Skip to content

Commit 230396b

Browse files
update avx2
1 parent fa9a742 commit 230396b

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

ggml-quants.c

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3806,18 +3806,27 @@ void ggml_vec_dot_q2_2_q8_0(int n, float * restrict s, size_t bs, const void * r
38063806

38073807
const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(y[i].d) );
38083808

3809-
__m256i xq8 = _mm256_set_epi32(
3810-
(int)q22_grid[x[i].qs[7]],
3811-
(int)q22_grid[x[i].qs[6]],
3812-
(int)q22_grid[x[i].qs[5]],
3813-
(int)q22_grid[x[i].qs[4]],
3814-
(int)q22_grid[x[i].qs[3]],
3815-
(int)q22_grid[x[i].qs[2]],
3816-
(int)q22_grid[x[i].qs[1]],
3817-
(int)q22_grid[x[i].qs[0]]
3818-
);
3819-
3820-
__m256i yq8 = _mm256_loadu_si256((const __m256i*)(y[i].qs));
3809+
__m128i xq8b = _mm_loadu_si64(x[i].qs);
3810+
__m256i xq8 = MM256_SET_M128I(xq8b, xq8b);
3811+
__m256i xq8l = _mm256_shuffle_epi8(xq8, _mm256_set_epi8(5, -1, 5, -1, 5, -1, 5, -1,
3812+
4, -1, 4, -1, 4, -1, 4, -1,
3813+
1, -1, 1, -1, 1, -1, 1, -1,
3814+
0, -1, 0, -1, 0, -1, 0, -1));
3815+
__m256i xq8h = _mm256_shuffle_epi8(xq8, _mm256_set_epi8(7, -1, 7, -1, 7, -1, 7, -1,
3816+
6, -1, 6, -1, 6, -1, 6, -1,
3817+
3, -1, 3, -1, 3, -1, 3, -1,
3818+
2, -1, 2, -1, 2, -1, 2, -1));
3819+
__m256i shift = _mm256_set_epi16(64, 16, 4, 1,
3820+
64, 16, 4, 1,
3821+
64, 16, 4, 1,
3822+
64, 16, 4, 1);
3823+
xq8l = _mm256_mullo_epi16(xq8l, shift);
3824+
xq8h = _mm256_mullo_epi16(xq8h, shift);
3825+
xq8l = _mm256_srai_epi16(xq8l, 14);
3826+
xq8h = _mm256_srai_epi16(xq8h, 14);
3827+
xq8 = _mm256_packs_epi16(xq8l, xq8h);
3828+
3829+
__m256i yq8 = _mm256_lddqu_si256((const __m256i*)(y[i].qs));
38213830
const __m256 q = mul_sum_i8_pairs_float(xq8, yq8);
38223831

38233832
acc = _mm256_fmadd_ps( d, q, acc );

0 commit comments

Comments
 (0)