ggml-cuda : remove some superfluous comments for TQ2_0 tile loading

compilade · compilade · commit f5fddb6d24c4 · 2025-01-10T14:52:49.000-05:00
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh
@@ -1848,13 +1848,11 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 #ifdef INT8_MMA_AVAILABLE
             x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k] = q;
 #else
-            // NOTE: this might assume WARP_SIZE is >= 32
             x_qs[i*(2*WARP_SIZE + 1) + k] = q;
 #endif // INT8_MMA_AVAILABLE
         }
     }
 
-    // TODO: does this work with WARP_SIZE != 32?
 #pragma unroll
     for (int i0 = 0; i0 < mmq_y; i0 += nwarps * WARP_SIZE/(QI2_0/2)) {
         int i = i0 + threadIdx.y*(2*WARP_SIZE/QI2_0) + threadIdx.x/(QI2_0/2);

Original file line number	Diff line number	Diff line change
`@@ -1848,13 +1848,11 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin`
`1848`	`1848`	`#ifdef INT8_MMA_AVAILABLE`
`1849`	`1849`	`x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k] = q;`
`1850`	`1850`	`#else`
`1851`		`- // NOTE: this might assume WARP_SIZE is >= 32`
`1852`	`1851`	`x_qs[i(2WARP_SIZE + 1) + k] = q;`
`1853`	`1852`	`#endif // INT8_MMA_AVAILABLE`
`1854`	`1853`	`}`
`1855`	`1854`	`}`
`1856`	`1855`
`1857`		`- // TODO: does this work with WARP_SIZE != 32?`
`1858`	`1856`	`#pragma unroll`
`1859`	`1857`	`for (int i0 = 0; i0 < mmq_y; i0 += nwarps * WARP_SIZE/(QI2_0/2)) {`
`1860`	`1858`	`int i = i0 + threadIdx.y(2WARP_SIZE/QI2_0) + threadIdx.x/(QI2_0/2);`