Skip to content

Commit a2beaff

Browse files
committed
Reset schedule earlier to allow overlap with graph computation on device
Refs ggml-org#6763
1 parent 637e9a8 commit a2beaff

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

ggml-backend.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,12 +1780,15 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
17801780

17811781
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
17821782
// reset state for the next run
1783-
size_t hash_size = sched->hash_set.size;
1784-
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1785-
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1786-
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
1783+
if(!sched->is_reset)
1784+
{
1785+
size_t hash_size = sched->hash_set.size;
1786+
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1787+
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1788+
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
17871789

1788-
sched->is_reset = true;
1790+
sched->is_reset = true;
1791+
}
17891792
sched->is_alloc = false;
17901793
}
17911794

llama.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16773,6 +16773,11 @@ float * llama_get_logits(struct llama_context * ctx) {
1677316773

1677416774
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
1677516775
int32_t j = -1;
16776+
16777+
// Reset state for the next run before the following backend sync,
16778+
// to allow the CPU activities in the reset to overlap with device computation.
16779+
ggml_backend_sched_reset(ctx->sched);
16780+
1677616781
llama_synchronize(ctx);
1677716782

1677816783
try {

0 commit comments

Comments
 (0)