Skip to content

Commit 71e6ae3

Browse files
committed
ggml : continue from #729 (wip)
1 parent bd166f7 commit 71e6ae3

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

ggml.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,13 +1188,17 @@ static void quantize_row_q4_2_reference(const float * restrict x, block_q4_2 * r
11881188

11891189
for (int i = 0; i < nb; i++) {
11901190
float amax = 0.0f; // absolute max
1191+
float max = 0.0f;
11911192

11921193
for (int l = 0; l < QK4_2; l++) {
11931194
const float v = x[i*QK4_2 + l];
1194-
amax = MAX(amax, fabsf(v));
1195+
if (amax < fabsf(v)) {
1196+
amax = fabsf(v);
1197+
max = v;
1198+
}
11951199
}
11961200

1197-
const float d = amax / ((1 << 3) - 1);
1201+
const float d = max / -8;
11981202

11991203
const float id = d ? 1.0f/d : 0.0f;
12001204

@@ -1204,8 +1208,8 @@ static void quantize_row_q4_2_reference(const float * restrict x, block_q4_2 * r
12041208
const float v0 = x[i*QK4_2 + l + 0]*id;
12051209
const float v1 = x[i*QK4_2 + l + 1]*id;
12061210

1207-
const uint8_t vi0 = (uint8_t)(v0 + 8.5f);
1208-
const uint8_t vi1 = (uint8_t)(v1 + 8.5f);
1211+
const uint8_t vi0 = MIN(15, (int8_t)roundf(v0) + 8);
1212+
const uint8_t vi1 = MIN(15, (int8_t)roundf(v1) + 8);
12091213

12101214
assert(vi0 < 16);
12111215
assert(vi1 < 16);
@@ -1299,9 +1303,9 @@ static void quantize_row_q4_2(const float * restrict x, void * restrict vy, int
12991303

13001304
block_q4_2 * restrict y = vy;
13011305

1302-
//quantize_row_q4_2_reference(x, y, k);
1306+
quantize_row_q4_2_reference(x, y, k);
13031307
// This produces the exact same format, just better match to the input floats ("better" as measured by RMSE)
1304-
quantize_row_q4_2_rmse(x, y, k);
1308+
//quantize_row_q4_2_rmse(x, y, k);
13051309
}
13061310

13071311
static void quantize_row_q4_3_reference(const float * restrict x, block_q4_3 * restrict y, int k) {
@@ -1852,7 +1856,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
18521856
[GGML_TYPE_Q4_2] = {
18531857
.dequantize_row_q = dequantize_row_q4_2,
18541858
.quantize_row_q = quantize_row_q4_2,
1855-
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_2_rmse, //quantize_row_q4_2_reference,
1859+
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_2_reference,
18561860
.quantize_row_q_dot = quantize_row_q8_0,
18571861
.vec_dot_q = ggml_vec_dot_q4_2_q8_0,
18581862
},
@@ -12184,8 +12188,8 @@ size_t ggml_quantize_q4_2(const float * src, void * dst, int n, int k, int64_t *
1218412188
for (int j = 0; j < n; j += k) {
1218512189
block_q4_2 * restrict y = (block_q4_2 *)dst + j/QK4_2;
1218612190

12187-
//quantize_row_q4_2_reference(src + j, y, k);
12188-
quantize_row_q4_2_rmse(src + j, y, k);
12191+
quantize_row_q4_2_reference(src + j, y, k);
12192+
//quantize_row_q4_2_rmse(src + j, y, k);
1218912193

1219012194
for (int i = 0; i < nb; i++) {
1219112195
for (int l = 0; l < QK4_2; l += 2) {

0 commit comments

Comments
 (0)