File tree 1 file changed +6
-0
lines changed
1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -505,13 +505,19 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
505
505
506
506
// multiply int8_t, add results pairwise twice and return as float vector
507
507
static inline __m256 mul_sum_i8_pairs_float (const __m256i x , const __m256i y ) {
508
+ #if __AVXVNNIINT8__
509
+ const __m256i zero = _mm256_setzero_si256 ();
510
+ const __m256i summed_pairs = _mm256_dpbssd_epi32 (zero , x , y );
511
+ return _mm256_cvtepi32_ps (summed_pairs );
512
+ #else
508
513
// Get absolute values of x vectors
509
514
const __m256i ax = _mm256_sign_epi8 (x , x );
510
515
// Sign the values of the y vectors
511
516
const __m256i sy = _mm256_sign_epi8 (y , x );
512
517
// Perform multiplication and create 16-bit values
513
518
const __m256i dot = _mm256_maddubs_epi16 (ax , sy );
514
519
return sum_i16_pairs_float (dot );
520
+ #endif
515
521
}
516
522
517
523
static inline __m128i packNibbles ( __m256i bytes )
You can’t perform that action at this time.
0 commit comments