@@ -771,7 +771,11 @@ GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t
771
771
if (src_ctx->device == dst_ctx->device ) {
772
772
CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
773
773
} else {
774
+ #ifdef GGML_CUDA_NO_PEER_COPY
775
+ return false ;
776
+ #else
774
777
CUDA_CHECK (cudaMemcpyPeerAsync (dst->data , dst_ctx->device , src->data , src_ctx->device , ggml_nbytes (src), cudaStreamPerThread));
778
+ #endif
775
779
}
776
780
CUDA_CHECK (cudaStreamSynchronize (cudaStreamPerThread));
777
781
return true ;
@@ -11322,19 +11326,23 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
11322
11326
GGML_ASSERT (cuda_ctx_src->device == buf_ctx_src->device );
11323
11327
GGML_ASSERT (cuda_ctx_dst->device == buf_ctx_dst->device );
11324
11328
11325
- if (!cuda_ctx_src->copy_event ) {
11326
- ggml_cuda_set_device (cuda_ctx_src->device );
11327
- CUDA_CHECK (cudaEventCreateWithFlags (&cuda_ctx_src->copy_event , cudaEventDisableTiming));
11328
- }
11329
-
11330
11329
// copy on src stream
11331
11330
if (cuda_ctx_src->device == cuda_ctx_dst->device ) {
11332
11331
CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst->stream ()));
11333
11332
} else {
11333
+ #ifdef GGML_CUDA_NO_PEER_COPY
11334
+ return false ;
11335
+ #else
11334
11336
CUDA_CHECK (cudaMemcpyPeerAsync (dst->data , cuda_ctx_dst->device , src->data , cuda_ctx_src->device , ggml_nbytes (dst), cuda_ctx_src->stream ()));
11337
+ #endif
11335
11338
}
11336
11339
11337
11340
// record event on src stream
11341
+ if (!cuda_ctx_src->copy_event ) {
11342
+ ggml_cuda_set_device (cuda_ctx_src->device );
11343
+ CUDA_CHECK (cudaEventCreateWithFlags (&cuda_ctx_src->copy_event , cudaEventDisableTiming));
11344
+ }
11345
+
11338
11346
CUDA_CHECK (cudaEventRecord (cuda_ctx_src->copy_event , cuda_ctx_src->stream ()));
11339
11347
11340
11348
// wait on dst stream for the copy to complete
@@ -11530,6 +11538,9 @@ GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const
11530
11538
}
11531
11539
11532
11540
static ggml_backend_event_t ggml_backend_cuda_event_new (ggml_backend_t backend) {
11541
+ #ifdef GGML_CUDA_NO_PEER_COPY
11542
+ return nullptr ;
11543
+ #else
11533
11544
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context ;
11534
11545
11535
11546
ggml_cuda_set_device (cuda_ctx->device );
@@ -11541,6 +11552,7 @@ static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend)
11541
11552
/* .backend = */ backend,
11542
11553
/* .context = */ event,
11543
11554
};
11555
+ #endif
11544
11556
}
11545
11557
11546
11558
static void ggml_backend_cuda_event_free (ggml_backend_event_t event) {
0 commit comments