Skip to content

Commit 46a4ed0

Browse files
committed
ggml : fix bug in Q4_1 x Q8_1 I8MM kernel
ggml-ci
1 parent 20eb20e commit 46a4ed0

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
17921792
const int8x16_t y1_l = vld1q_s8(b_y1->qs);
17931793
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
17941794

1795-
float32_t _scale[4] = { GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d),
1795+
float32_t _scale[4] = {
1796+
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d),
17961797
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d),
17971798
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d),
17981799
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)};
@@ -2357,10 +2358,12 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
23572358
const block_q8_1 * restrict b_y0 = &vy0[i];
23582359
const block_q8_1 * restrict b_y1 = &vy1[i];
23592360

2360-
float32_t summs_t[4] = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
2361+
float32_t summs_t[4] = {
2362+
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
23612363
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s),
23622364
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s),
23632365
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s)};
2366+
23642367
summs0 = vaddq_f32(summs0, vld1q_f32(summs_t));
23652368

23662369
const uint8x16_t m4b = vdupq_n_u8(0x0F);
@@ -2381,10 +2384,11 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
23812384
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
23822385

23832386
// mmla into int32x4_t
2384-
float32_t _scale[4] = {GGML_FP16_TO_FP32(b_x0->d)*b_y0->d,
2385-
GGML_FP16_TO_FP32(b_x0->d)*b_y1->d,
2386-
GGML_FP16_TO_FP32(b_x1->d)*b_y0->d,
2387-
GGML_FP16_TO_FP32(b_x1->d)*b_y1->d};
2387+
float32_t _scale[4] = {
2388+
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d),
2389+
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d),
2390+
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d),
2391+
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)};
23882392
float32x4_t scale = vld1q_f32(_scale);
23892393

23902394
int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));

0 commit comments

Comments
 (0)