Skip to content

Commit c8771ab

Browse files
CUDA: fix misaligned shared memory read (#8123)
1 parent 494165f commit c8771ab

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda/mma.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ struct mma_int_A_I16K4 {
2323

2424
__device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) {
2525
#if defined(INT8_MMA_AVAILABLE)
26-
const int * xs = xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2);
26+
const int * xs = xs0 + (threadIdx.x%I)*stride;
2727
asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];"
2828
: "+r"(x[0]), "+r"(x[1])
2929
: "l"(xs));

0 commit comments

Comments
 (0)