From c1320fd54a5313d06c78b629f9c9349c0708bad0 Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Thu, 3 Aug 2023 16:40:34 -0400 Subject: [PATCH] CUDA: use min compute capability of GPUs actually used --- ggml-cuda.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e0192bc6ecebc..16419e2a76076 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_ } else { int min_compute_capability = INT_MAX; for (int id = 0; id < g_device_count; ++id) { - if (min_compute_capability > g_compute_capabilities[id]) { + if (min_compute_capability > g_compute_capabilities[id] + && g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) { min_compute_capability = g_compute_capabilities[id]; } }