Skip to content

Commit 39ddda2

Browse files
committed
disable fp16 mat mul completely with multi GPU
1 parent 59937e4 commit 39ddda2

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6118,7 +6118,7 @@ inline void ggml_cuda_op_mul_mat_cublas(
61186118

61196119
const int compute_capability = g_compute_capabilities[id];
61206120

6121-
if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && ldc == row_diff) {
6121+
if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) {
61226122
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
61236123
half * src0_as_f16 = nullptr;
61246124
size_t src0_as = 0;

0 commit comments

Comments
 (0)