Skip to content

Commit 637483b

Browse files
Define GGML_CUDA_DMMV_BLOCK_Y if not defined
1 parent fbacfec commit 637483b

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ggml-cuda.cu

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,11 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 blo
8585

8686
#define CUDA_MUL_BLOCK_SIZE 256
8787
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
88-
#define GGML_CUDA_DMMV_BLOCK_X 32 // dmmv = dequantize_mul_mat_vec
88+
// dmmv = dequantize_mul_mat_vec
89+
#define GGML_CUDA_DMMV_BLOCK_X 32
90+
#ifndef GGML_CUDA_DMMV_BLOCK_Y
91+
#define GGML_CUDA_DMMV_BLOCK_Y 1 // can by set by compiler option LLAMA_CUDA_BY
92+
#endif
8993

9094
static __global__ void mul_f32(const float * x, const float * y, float * dst, const int kx, const int ky) {
9195
const int i = blockDim.x*blockIdx.x + threadIdx.x;

0 commit comments

Comments
 (0)