Skip to content

Commit 38d11f5

Browse files
CUDA: always create events for split buffers
1 parent b8deef0 commit 38d11f5

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

ggml/src/ggml-cuda.cu

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,15 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
699699
ctx->tensor_extras.push_back(extra);
700700

701701
for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) {
702+
ggml_cuda_set_device(id);
703+
704+
// Create events on all devices unconditionally even if they don't actually hold any data.
705+
// This is because for very small matrices it's possible for the active device to not hold any data.
706+
// But in this case the events are still needed to synchronize the other devices.
707+
for (int64_t is = 0; is < GGML_CUDA_MAX_STREAMS; ++is) {
708+
CUDA_CHECK(cudaEventCreateWithFlags(&extra->events[id][is], cudaEventDisableTiming));
709+
}
710+
702711
int64_t row_low, row_high;
703712
get_row_split(&row_low, &row_high, tensor, buft_ctx->tensor_split, id);
704713

@@ -717,7 +726,6 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
717726

718727
// FIXME: do not crash if cudaMalloc fails
719728
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
720-
ggml_cuda_set_device(id);
721729
char * buf;
722730
CUDA_CHECK(ggml_cuda_device_malloc((void**)&buf, size, id));
723731

@@ -727,10 +735,6 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
727735
}
728736

729737
extra->data_device[id] = buf;
730-
731-
for (int64_t is = 0; is < GGML_CUDA_MAX_STREAMS; ++is) {
732-
CUDA_CHECK(cudaEventCreateWithFlags(&extra->events[id][is], cudaEventDisableTiming));
733-
}
734738
}
735739
tensor->extra = extra;
736740
}

0 commit comments

Comments
 (0)