Skip to content

Commit 946796f

Browse files
ggml-cuda : slight optimizations for TQ2_0
Co-authored-by: Johannes Gäßler <[email protected]>
1 parent f5fddb6 commit 946796f

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

ggml/src/ggml-cuda/convert.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ static __global__ void dequantize_block_tq2_0(const void * __restrict__ vx, dst_
287287
const int64_t n = tid/32; // 0 or 1
288288
const int64_t l = tid - 32*n; // 0..32
289289

290-
const uint8_t q = x[i].qs[32*n + l];
290+
const uint8_t q = x[i].qs[tid];
291291
dst_t * y = yy + i*QK_K + 128*n;
292292

293293
float d = __half2float(x[i].d);

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1837,7 +1837,9 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
18371837
const int qs0 = get_int_b2(bxi->qs, kqsx);
18381838

18391839
#pragma unroll
1840-
for (int l = 0; l < QR2_0; ++l) {
1840+
for (int l0 = 0; l0 < QR2_0; ++l0) {
1841+
const int l = (l0 + kqsx/8) % QR2_0; // avoid shared memory bank conflicts
1842+
18411843
// 0..7, 32..39
18421844
// 8..15, 40..47
18431845
// 16..23, 48..55

0 commit comments

Comments
 (0)