Skip to content

Commit 8d87901

Browse files
sfackleralexcrichton
authored andcommitted
Implement _mm256_permute4x64_epi64 (#144)
1 parent 6fdfce5 commit 8d87901

File tree

1 file changed

+57
-1
lines changed

1 file changed

+57
-1
lines changed

src/x86/avx2.rs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,8 +957,56 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: u32x8, b: u32x8) -> u32x8 {
957957
permd(a, b)
958958
}
959959

960+
/// Permutes 64-bit integers from `a` using control mask `imm8`.
961+
#[inline(always)]
962+
#[target_feature = "+avx2"]
963+
#[cfg_attr(test, assert_instr(vpermq, imm8 = 9))]
964+
pub unsafe fn _mm256_permute4x64_epi64(a: i64x4, imm8: i32) -> i64x4 {
965+
let imm8 = (imm8 & 0xFF) as u8;
966+
macro_rules! permute4 {
967+
($a:expr, $b:expr, $c:expr, $d:expr) => {
968+
simd_shuffle4(a, i64x4::splat(0), [$a, $b, $c, $d]);
969+
}
970+
}
971+
macro_rules! permute3 {
972+
($a:expr, $b:expr, $c:expr) => {
973+
match (imm8 >> 6) & 0b11 {
974+
0b00 => permute4!($a, $b, $c, 0),
975+
0b01 => permute4!($a, $b, $c, 1),
976+
0b10 => permute4!($a, $b, $c, 2),
977+
_ => permute4!($a, $b, $c, 3),
978+
}
979+
}
980+
}
981+
macro_rules! permute2 {
982+
($a:expr, $b:expr) => {
983+
match (imm8 >> 4) & 0b11 {
984+
0b00 => permute3!($a, $b, 0),
985+
0b01 => permute3!($a, $b, 1),
986+
0b10 => permute3!($a, $b, 2),
987+
_ => permute3!($a, $b, 3),
988+
}
989+
}
990+
}
991+
macro_rules! permute1 {
992+
($a:expr) => {
993+
match (imm8 >> 2) & 0b11 {
994+
0b00 => permute2!($a, 0),
995+
0b01 => permute2!($a, 1),
996+
0b10 => permute2!($a, 2),
997+
_ => permute2!($a, 3),
998+
}
999+
}
1000+
}
1001+
match imm8 & 0b11 {
1002+
0b00 => permute1!(0),
1003+
0b01 => permute1!(1),
1004+
0b10 => permute1!(2),
1005+
_ => permute1!(3),
1006+
}
1007+
}
1008+
9601009
// TODO _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
961-
// TODO _mm256_permute4x64_epi64 (__m256i a, const int imm8)
9621010
// TODO _mm256_permute4x64_pd (__m256d a, const int imm8)
9631011
// TODO _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
9641012

@@ -2738,4 +2786,12 @@ mod tests {
27382786
let r = avx2::_mm256_permutevar8x32_epi32(a, b);
27392787
assert_eq!(r, expected);
27402788
}
2789+
2790+
#[simd_test = "avx2"]
2791+
unsafe fn _mm256_permute4x64_epi64() {
2792+
let a = i64x4::new(100, 200, 300, 400);
2793+
let expected = i64x4::new(400, 100, 200, 100);
2794+
let r = avx2::_mm256_permute4x64_epi64(a, 0b00010011);
2795+
assert_eq!(r, expected);
2796+
}
27412797
}

0 commit comments

Comments
 (0)