try AMD fix

JohannesGaessler · JohannesGaessler · commit f195490b6304 · 2024-02-08T11:21:33.000+01:00
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -5310,8 +5310,9 @@ template <bool need_check> static __global__ void
 #endif // __CUDA_ARCH__ >= CC_VOLTA
 }
 
-#define MMVQ_NWARPS_NVIDIA 4
-#define MMVQ_NWARPS_AMD    1
+#define MMVQ_NWARPS_NVIDIA    4
+#define MMVQ_NWARPS_AMD_RDNA2 1
+#define MMVQ_NWARPS_AMD_OLD   4
 
 template <int nwarps, int ncols_y_template, int qk, int qi, typename block_q_t, int vdr, vec_dot_q_cuda_t vec_dot_q_cuda>
 #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__))
@@ -6855,7 +6856,12 @@ static void mul_mat_vec_q_cuda(
     int id;
     CUDA_CHECK(cudaGetDevice(&id));
 
-    const int nwarps = g_device_caps[id].cc >= CC_OFFSET_AMD ? MMVQ_NWARPS_AMD : MMVQ_NWARPS_NVIDIA;
+    int nwarps;
+    if (g_device_caps[id].cc >= CC_OFFSET_AMD) {
+        nwarps = g_device_caps[id].cc >= CC_RDNA2 ? MMVQ_NWARPS_AMD_RDNA2 : MMVQ_NWARPS_AMD_OLD;
+    } else {
+        nwarps = MMVQ_NWARPS_NVIDIA;
+    }
 
     const dim3 block_nums(nrows_x, 1, 1);
     const dim3 block_dims(WARP_SIZE, nwarps, 1);