Skip to content

Commit f5fddb6

Browse files
committed
ggml-cuda : remove some superfluous comments for TQ2_0 tile loading
1 parent 983aa09 commit f5fddb6

File tree

1 file changed

+0
-2
lines changed

1 file changed

+0
-2
lines changed

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,13 +1848,11 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
18481848
#ifdef INT8_MMA_AVAILABLE
18491849
x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k] = q;
18501850
#else
1851-
// NOTE: this might assume WARP_SIZE is >= 32
18521851
x_qs[i*(2*WARP_SIZE + 1) + k] = q;
18531852
#endif // INT8_MMA_AVAILABLE
18541853
}
18551854
}
18561855

1857-
// TODO: does this work with WARP_SIZE != 32?
18581856
#pragma unroll
18591857
for (int i0 = 0; i0 < mmq_y; i0 += nwarps * WARP_SIZE/(QI2_0/2)) {
18601858
int i = i0 + threadIdx.y*(2*WARP_SIZE/QI2_0) + threadIdx.x/(QI2_0/2);

0 commit comments

Comments
 (0)