@@ -957,8 +957,56 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: u32x8, b: u32x8) -> u32x8 {
957
957
permd ( a, b)
958
958
}
959
959
960
+ /// Permutes 64-bit integers from `a` using control mask `imm8`.
961
+ #[ inline( always) ]
962
+ #[ target_feature = "+avx2" ]
963
+ #[ cfg_attr( test, assert_instr( vpermq, imm8 = 9 ) ) ]
964
+ pub unsafe fn _mm256_permute4x64_epi64 ( a : i64x4 , imm8 : i32 ) -> i64x4 {
965
+ let imm8 = ( imm8 & 0xFF ) as u8 ;
966
+ macro_rules! permute4 {
967
+ ( $a: expr, $b: expr, $c: expr, $d: expr) => {
968
+ simd_shuffle4( a, i64x4:: splat( 0 ) , [ $a, $b, $c, $d] ) ;
969
+ }
970
+ }
971
+ macro_rules! permute3 {
972
+ ( $a: expr, $b: expr, $c: expr) => {
973
+ match ( imm8 >> 6 ) & 0b11 {
974
+ 0b00 => permute4!( $a, $b, $c, 0 ) ,
975
+ 0b01 => permute4!( $a, $b, $c, 1 ) ,
976
+ 0b10 => permute4!( $a, $b, $c, 2 ) ,
977
+ _ => permute4!( $a, $b, $c, 3 ) ,
978
+ }
979
+ }
980
+ }
981
+ macro_rules! permute2 {
982
+ ( $a: expr, $b: expr) => {
983
+ match ( imm8 >> 4 ) & 0b11 {
984
+ 0b00 => permute3!( $a, $b, 0 ) ,
985
+ 0b01 => permute3!( $a, $b, 1 ) ,
986
+ 0b10 => permute3!( $a, $b, 2 ) ,
987
+ _ => permute3!( $a, $b, 3 ) ,
988
+ }
989
+ }
990
+ }
991
+ macro_rules! permute1 {
992
+ ( $a: expr) => {
993
+ match ( imm8 >> 2 ) & 0b11 {
994
+ 0b00 => permute2!( $a, 0 ) ,
995
+ 0b01 => permute2!( $a, 1 ) ,
996
+ 0b10 => permute2!( $a, 2 ) ,
997
+ _ => permute2!( $a, 3 ) ,
998
+ }
999
+ }
1000
+ }
1001
+ match imm8 & 0b11 {
1002
+ 0b00 => permute1 ! ( 0 ) ,
1003
+ 0b01 => permute1 ! ( 1 ) ,
1004
+ 0b10 => permute1 ! ( 2 ) ,
1005
+ _ => permute1 ! ( 3 ) ,
1006
+ }
1007
+ }
1008
+
960
1009
// TODO _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
961
- // TODO _mm256_permute4x64_epi64 (__m256i a, const int imm8)
962
1010
// TODO _mm256_permute4x64_pd (__m256d a, const int imm8)
963
1011
// TODO _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
964
1012
@@ -2738,4 +2786,12 @@ mod tests {
2738
2786
let r = avx2:: _mm256_permutevar8x32_epi32 ( a, b) ;
2739
2787
assert_eq ! ( r, expected) ;
2740
2788
}
2789
+
2790
+ #[ simd_test = "avx2" ]
2791
+ unsafe fn _mm256_permute4x64_epi64 ( ) {
2792
+ let a = i64x4:: new ( 100 , 200 , 300 , 400 ) ;
2793
+ let expected = i64x4:: new ( 400 , 100 , 200 , 100 ) ;
2794
+ let r = avx2:: _mm256_permute4x64_epi64 ( a, 0b00010011 ) ;
2795
+ assert_eq ! ( r, expected) ;
2796
+ }
2741
2797
}
0 commit comments