@@ -2236,15 +2236,13 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2236
2236
float sumf = 0.0 ;
2237
2237
2238
2238
for (int i = 0 ; i < nb ; i ++ ) {
2239
- const int8_t * py = y [i ].qs ;
2240
-
2241
2239
int sumi = 0 ;
2242
2240
2243
2241
for (int j = 0 ; j < qk /2 ; ++ j ) {
2244
2242
const int v0 = (x [i ].qs [j ] & 0xf ) - 8 ;
2245
2243
const int v1 = (x [i ].qs [j ] >> 4 ) - 8 ;
2246
2244
2247
- sumi += (v0 * py [ j ]) + (v1 * py [j + qk /2 ]);
2245
+ sumi += (v0 * y [ i ]. qs [ j ]) + (v1 * y [ i ]. qs [j + qk /2 ]);
2248
2246
}
2249
2247
2250
2248
sumf += (x [i ].d * y [i ].d )* sumi ;
@@ -2360,15 +2358,13 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
2360
2358
float sumf = 0.0 ;
2361
2359
2362
2360
for (int i = 0 ; i < nb ; i ++ ) {
2363
- const int8_t * py = y [i ].qs ;
2364
-
2365
2361
int sumi = 0 ;
2366
2362
2367
2363
for (int j = 0 ; j < qk /2 ; ++ j ) {
2368
2364
const int v0 = (x [i ].qs [j ] & 0xf );
2369
2365
const int v1 = (x [i ].qs [j ] >> 4 );
2370
2366
2371
- sumi += (v0 * py [ j ]) + (v1 * py [j + qk /2 ]);
2367
+ sumi += (v0 * y [ i ]. qs [ j ]) + (v1 * y [ i ]. qs [j + qk /2 ]);
2372
2368
}
2373
2369
2374
2370
sumf += (x [i ].d * y [i ].d )* sumi + x [i ].m * (y [i ].s0 + y [i ].s1 );
@@ -2694,8 +2690,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2694
2690
float sumf = 0.0 ;
2695
2691
2696
2692
for (int i = 0 ; i < nb ; i ++ ) {
2697
- const int8_t * py = y [i ].qs ;
2698
-
2699
2693
uint32_t qh ;
2700
2694
memcpy (& qh , x [i ].qh , sizeof (qh ));
2701
2695
@@ -2708,7 +2702,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2708
2702
const int32_t x0 = ((x [i ].qs [j ] & 0xf ) | xh_0 ) - 16 ;
2709
2703
const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
2710
2704
2711
- sumi += (x0 * py [ j ]) + (x1 * py [j + qk /2 ]);
2705
+ sumi += (x0 * y [ i ]. qs [ j ]) + (x1 * y [ i ]. qs [j + qk /2 ]);
2712
2706
}
2713
2707
2714
2708
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi ;
@@ -2889,8 +2883,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
2889
2883
float sumf = 0.0 ;
2890
2884
2891
2885
for (int i = 0 ; i < nb ; i ++ ) {
2892
- const int8_t * py = y [i ].qs ;
2893
-
2894
2886
uint32_t qh ;
2895
2887
memcpy (& qh , x [i ].qh , sizeof (qh ));
2896
2888
@@ -2903,7 +2895,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
2903
2895
const int32_t x0 = (x [i ].qs [j ] & 0xF ) | xh_0 ;
2904
2896
const int32_t x1 = (x [i ].qs [j ] >> 4 ) | xh_1 ;
2905
2897
2906
- sumi += (x0 * py [ j ]) + (x1 * py [j + qk /2 ]);
2898
+ sumi += (x0 * y [ i ]. qs [ j ]) + (x1 * y [ i ]. qs [j + qk /2 ]);
2907
2899
}
2908
2900
2909
2901
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi + GGML_FP16_TO_FP32 (x [i ].m )* (y [i ].s0 + y [i ].s1 );
0 commit comments