Skip to content

Commit 81cb1ee

Browse files
committed
A better mul_sum_i8_pairs_float implementation using AVX512
1 parent bde28f2 commit 81cb1ee

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

ggml.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,19 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
505505

506506
// multiply int8_t, add results pairwise twice and return as float vector
507507
static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) {
508+
#if __AVXVNNIINT8__
509+
const __m256i zero = _mm256_setzero_si256();
510+
const __m256i summed_pairs = _mm256_dpbssd_epi32(zero, x, y);
511+
return _mm256_cvtepi32_ps(summed_pairs);
512+
#else
508513
// Get absolute values of x vectors
509514
const __m256i ax = _mm256_sign_epi8(x, x);
510515
// Sign the values of the y vectors
511516
const __m256i sy = _mm256_sign_epi8(y, x);
512517
// Perform multiplication and create 16-bit values
513518
const __m256i dot = _mm256_maddubs_epi16(ax, sy);
514519
return sum_i16_pairs_float(dot);
520+
#endif
515521
}
516522

517523
static inline __m128i packNibbles( __m256i bytes )

0 commit comments

Comments
 (0)