Skip to content

Commit 15fb145

Browse files
committed
ggml : fix Q8_0 and Q8_1 rounding
1 parent 96e5d02 commit 15fb145

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

ggml.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -925,8 +925,8 @@ static void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * r
925925
const float v0 = x[i*QK8_0 + 2*j + 0]*id;
926926
const float v1 = x[i*QK8_0 + 2*j + 1]*id;
927927

928-
y[i].qs[ j] = v0 + 0.5f;
929-
y[i].qs[QK8_0/2 + j] = v1 + 0.5f;
928+
y[i].qs[ j] = roundf(v0);
929+
y[i].qs[QK8_0/2 + j] = roundf(v1);
930930
}
931931
}
932932
}
@@ -1083,8 +1083,8 @@ static void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * r
10831083
const float v0 = x[i*QK8_1 + 2*j + 0]*id;
10841084
const float v1 = x[i*QK8_1 + 2*j + 1]*id;
10851085

1086-
y[i].qs[ j] = v0 + 0.5f;
1087-
y[i].qs[QK8_1/2 + j] = v1 + 0.5f;
1086+
y[i].qs[ j] = roundf(v0);
1087+
y[i].qs[QK8_1/2 + j] = roundf(v1);
10881088

10891089
sum += y[i].qs[ j];
10901090
sum += y[i].qs[QK8_1/2 + j];

0 commit comments

Comments
 (0)