File tree 2 files changed +13
-5
lines changed 2 files changed +13
-5
lines changed Original file line number Diff line number Diff line change @@ -1780,12 +1780,15 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
1780
1780
1781
1781
void ggml_backend_sched_reset (ggml_backend_sched_t sched ) {
1782
1782
// reset state for the next run
1783
- size_t hash_size = sched -> hash_set .size ;
1784
- memset (sched -> hash_set .keys , 0 , sizeof (sched -> hash_set .keys [0 ]) * hash_size ); // NOLINT
1785
- memset (sched -> tensor_backend_id , -1 , sizeof (sched -> tensor_backend_id [0 ]) * hash_size );
1786
- memset (sched -> tensor_copies , 0 , sizeof (sched -> tensor_copies [0 ]) * hash_size );
1783
+ if (!sched -> is_reset )
1784
+ {
1785
+ size_t hash_size = sched -> hash_set .size ;
1786
+ memset (sched -> hash_set .keys , 0 , sizeof (sched -> hash_set .keys [0 ]) * hash_size ); // NOLINT
1787
+ memset (sched -> tensor_backend_id , -1 , sizeof (sched -> tensor_backend_id [0 ]) * hash_size );
1788
+ memset (sched -> tensor_copies , 0 , sizeof (sched -> tensor_copies [0 ]) * hash_size );
1787
1789
1788
- sched -> is_reset = true;
1790
+ sched -> is_reset = true;
1791
+ }
1789
1792
sched -> is_alloc = false;
1790
1793
}
1791
1794
Original file line number Diff line number Diff line change @@ -16773,6 +16773,11 @@ float * llama_get_logits(struct llama_context * ctx) {
16773
16773
16774
16774
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
16775
16775
int32_t j = -1;
16776
+
16777
+ // Reset state for the next run before the following backend sync,
16778
+ // to allow the CPU activities in the reset to overlap with device computation.
16779
+ ggml_backend_sched_reset(ctx->sched);
16780
+
16776
16781
llama_synchronize(ctx);
16777
16782
16778
16783
try {
You can’t perform that action at this time.
0 commit comments