Skip to content

Commit 79e49c9

Browse files
committed
ggml : simplify scalar dot
1 parent f08f6f7 commit 79e49c9

File tree

1 file changed

+4
-12
lines changed

1 file changed

+4
-12
lines changed

ggml.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,15 +2236,13 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
22362236
float sumf = 0.0;
22372237

22382238
for (int i = 0; i < nb; i++) {
2239-
const int8_t * py = y[i].qs;
2240-
22412239
int sumi = 0;
22422240

22432241
for (int j = 0; j < qk/2; ++j) {
22442242
const int v0 = (x[i].qs[j] & 0xf) - 8;
22452243
const int v1 = (x[i].qs[j] >> 4) - 8;
22462244

2247-
sumi += (v0 * py[j]) + (v1 * py[j + qk/2]);
2245+
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
22482246
}
22492247

22502248
sumf += (x[i].d*y[i].d)*sumi;
@@ -2360,15 +2358,13 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
23602358
float sumf = 0.0;
23612359

23622360
for (int i = 0; i < nb; i++) {
2363-
const int8_t * py = y[i].qs;
2364-
23652361
int sumi = 0;
23662362

23672363
for (int j = 0; j < qk/2; ++j) {
23682364
const int v0 = (x[i].qs[j] & 0xf);
23692365
const int v1 = (x[i].qs[j] >> 4);
23702366

2371-
sumi += (v0 * py[j]) + (v1 * py[j + qk/2]);
2367+
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
23722368
}
23732369

23742370
sumf += (x[i].d*y[i].d)*sumi + x[i].m*(y[i].s0 + y[i].s1);
@@ -2694,8 +2690,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
26942690
float sumf = 0.0;
26952691

26962692
for (int i = 0; i < nb; i++) {
2697-
const int8_t * py = y[i].qs;
2698-
26992693
uint32_t qh;
27002694
memcpy(&qh, x[i].qh, sizeof(qh));
27012695

@@ -2708,7 +2702,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
27082702
const int32_t x0 = ((x[i].qs[j] & 0xf) | xh_0) - 16;
27092703
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
27102704

2711-
sumi += (x0 * py[j]) + (x1 * py[j + qk/2]);
2705+
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
27122706
}
27132707

27142708
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi;
@@ -2889,8 +2883,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
28892883
float sumf = 0.0;
28902884

28912885
for (int i = 0; i < nb; i++) {
2892-
const int8_t * py = y[i].qs;
2893-
28942886
uint32_t qh;
28952887
memcpy(&qh, x[i].qh, sizeof(qh));
28962888

@@ -2903,7 +2895,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
29032895
const int32_t x0 = (x[i].qs[j] & 0xF) | xh_0;
29042896
const int32_t x1 = (x[i].qs[j] >> 4) | xh_1;
29052897

2906-
sumi += (x0 * py[j]) + (x1 * py[j + qk/2]);
2898+
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
29072899
}
29082900

29092901
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*(y[i].s0 + y[i].s1);

0 commit comments

Comments
 (0)