MUSA: Stop explicitly setting use_mul_mat_vec_q to false

yeahdongcn · yeahdongcn · commit bf41f174f83e · 2024-07-15T19:03:56.000+08:00
Signed-off-by: Xiaodong Ye &lt;xiaodong.ye@mthreads.com&gt;
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
@@ -1906,17 +1906,11 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
             const int cc            = ggml_cuda_info().devices[id].cc;
             use_mul_mat_q           = use_mul_mat_q           && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
             any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
-#ifdef GGML_USE_MUSA
-            use_mul_mat_vec_q       = false;
-#endif // GGML_USE_MUSA
         }
     } else {
         const int cc            = ggml_cuda_info().devices[ctx.device].cc;
         use_mul_mat_q           = use_mul_mat_q           && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
         any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
-#ifdef GGML_USE_MUSA
-        use_mul_mat_vec_q       = false;
-#endif // GGML_USE_MUSA
     }
 
     // debug helpers
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -181,7 +181,7 @@
 #define cudaMemcpyAsync musaMemcpyAsync
 #define cudaMemcpyPeerAsync musaMemcpyPeerAsync
 #define cudaMemcpy2DAsync musaMemcpy2DAsync
-#define cudaMemcpyDeviceToDevice musaMemcpyDeviceToDevice
+#define cudaMemcpyDeviceToDevice musaMemcpyDefault
 #define cudaMemcpyDeviceToHost musaMemcpyDeviceToHost
 #define cudaMemcpyHostToDevice musaMemcpyHostToDevice
 #define cudaMemcpyKind musaMemcpyKind