@@ -11543,6 +11543,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
11543
11543
11544
11544
float scales [QK_K /IQ1S_BLOCK_SIZE ];
11545
11545
float weight [IQ1S_BLOCK_SIZE ];
11546
+ float waux [IQ1S_BLOCK_SIZE ];
11546
11547
int8_t L [IQ1S_BLOCK_SIZE ];
11547
11548
float sumx [IQ1S_BLOCK_SIZE + 1 ];
11548
11549
float sumw [IQ1S_BLOCK_SIZE + 1 ];
@@ -11562,12 +11563,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
11562
11563
const float * xbl = x + QK_K * ibl ;
11563
11564
float sumx2 = 0 ;
11564
11565
for (int i = 0 ; i < QK_K ; ++ i ) sumx2 += xbl [i ]* xbl [i ];
11565
- float sigma2 = 2 * sumx2 /QK_K ;
11566
+ float sigma2 = sumx2 /QK_K ;
11566
11567
11567
11568
for (int ib = 0 ; ib < QK_K /IQ1S_BLOCK_SIZE ; ++ ib ) {
11568
11569
const float * xb = xbl + IQ1S_BLOCK_SIZE * ib ;
11569
11570
const float * qw = quant_weights + QK_K * ibl + IQ1S_BLOCK_SIZE * ib ;
11570
11571
for (int i = 0 ; i < IQ1S_BLOCK_SIZE ; ++ i ) weight [i ] = qw [i ] * sqrtf (sigma2 + xb [i ]* xb [i ]);
11572
+ for (int i = 0 ; i < IQ1S_BLOCK_SIZE ; ++ i ) waux [i ] = sqrtf (weight [i ]);
11571
11573
float max = fabsf (xb [0 ]);
11572
11574
for (int i = 1 ; i < IQ1S_BLOCK_SIZE ; ++ i ) max = MAX (max , fabsf (xb [i ]));
11573
11575
if (!max ) {
@@ -11629,7 +11631,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
11629
11631
if (grid_index < 0 ) {
11630
11632
all_on_grid = false;
11631
11633
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs [u ] - 1 ;
11632
- grid_index = iq1_find_best_neighbour2 (neighbours , kgrid_q2xs , xb + 8 * k , weight + 8 * k , scale , xx , L + 8 * k , NGRID_IQ1S );
11634
+ grid_index = iq1_find_best_neighbour2 (neighbours , kgrid_q2xs , xb + 8 * k , waux + 8 * k , scale , xx , L + 8 * k , NGRID_IQ1S );
11633
11635
GGML_ASSERT (grid_index >= 0 );
11634
11636
}
11635
11637
index [k ] = grid_index ;
0 commit comments