diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 61cd1747cac4f..91529058e2471 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7369,9 +7369,9 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const CUBLAS_CHECK( cublasGemmBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N, ne01, ne11, ne10, - &alpha_f16, (const void * const *) (ptrs_as + 0*ne23), CUDA_R_16F, nb01/sizeof(half), - (const void * const *) (ptrs_as + 1*ne23), CUDA_R_16F, nb11/sizeof(float), - &beta_f16, ( void ** ) (ptrs_as + 2*ne23), CUDA_R_16F, ne01, + &alpha_f16, (const void **) (ptrs_as + 0*ne23), CUDA_R_16F, nb01/sizeof(half), + (const void **) (ptrs_as + 1*ne23), CUDA_R_16F, nb11/sizeof(float), + &beta_f16, ( void **) (ptrs_as + 2*ne23), CUDA_R_16F, ne01, ne23, CUBLAS_COMPUTE_16F, CUBLAS_GEMM_DEFAULT_TENSOR_OP));