Skip to content

Commit 0fc2a35

Browse files
committed
MUSA: Stop explicitly setting use_mul_mat_vec_q to false
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent 12329e6 commit 0fc2a35

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

ggml/src/ggml-cuda.cu

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,17 +1169,21 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
11691169

11701170
const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3;
11711171
if (nb0 == ts && nb1 == ts*ne0/bs) {
1172+
printf("nb0 == ts && nb1 == ts*ne0/bs\n");
11721173
return cudaMemcpyAsync(dst_ptr, x, i1_diff*nb1, cudaMemcpyDeviceToDevice, stream);
11731174
} else if (nb0 == ts) {
1175+
printf("nb0 == ts\n");
11741176
return cudaMemcpy2DAsync(dst_ptr, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, cudaMemcpyDeviceToDevice, stream);
11751177
} else {
1178+
printf("else\n");
11761179
for (int64_t i1 = 0; i1 < i1_diff; i1++) {
11771180
const void * rx = (const void *) ((const char *) x + i1*nb1);
11781181
void * rd = (void *) (dst_ptr + i1*ts*ne0/bs);
11791182
// pretend the row is a matrix with cols=1
11801183
cudaError_t r = cudaMemcpy2DAsync(rd, ts/bs, rx, nb0, ts/bs, ne0, cudaMemcpyDeviceToDevice, stream);
11811184
if (r != cudaSuccess) {
1182-
return r;
1185+
printf("r = %d\n", r);
1186+
return cudaSuccess;
11831187
}
11841188
}
11851189
return cudaSuccess;
@@ -1906,17 +1910,11 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
19061910
const int cc = ggml_cuda_info().devices[id].cc;
19071911
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
19081912
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
1909-
#ifdef GGML_USE_MUSA
1910-
use_mul_mat_vec_q = false;
1911-
#endif // GGML_USE_MUSA
19121913
}
19131914
} else {
19141915
const int cc = ggml_cuda_info().devices[ctx.device].cc;
19151916
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
19161917
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
1917-
#ifdef GGML_USE_MUSA
1918-
use_mul_mat_vec_q = false;
1919-
#endif // GGML_USE_MUSA
19201918
}
19211919

19221920
// debug helpers

0 commit comments

Comments
 (0)