Skip to content

Commit 2e88d87

Browse files
CUDA: fixed redundant value dequantization
1 parent 3c36213 commit 2e88d87

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ggml-cuda.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,7 +1983,7 @@ static __global__ void k_get_rows_float(
19831983

19841984
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
19851985
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y, const int k) {
1986-
const int i = blockDim.x*blockIdx.x + 2*threadIdx.x;
1986+
const int i = 2*(blockDim.x*blockIdx.x + threadIdx.x);
19871987

19881988
if (i >= k) {
19891989
return;
@@ -5609,7 +5609,7 @@ static void quantize_row_q8_1_cuda(const float * x, void * vy, const int kx, con
56095609

56105610
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
56115611
static void dequantize_block_cuda(const void * __restrict__ vx, dst_t * __restrict__ y, const int k, cudaStream_t stream) {
5612-
const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE;
5612+
const int num_blocks = (k + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE);
56135613
dequantize_block<qk, qr, dequantize_kernel><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>(vx, y, k);
56145614
}
56155615

0 commit comments

Comments
 (0)