diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 1c7ddff7f9..37ca8a94ca 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1664,6 +1664,34 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { vcvtpq_u64_f64_(a) } +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + /// Floating-point multiply-add to accumulator #[inline] #[target_feature(enable = "neon")] @@ -5614,6 +5642,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vextq_p64() { + let a: i64x2 = i64x2::new(0, 8); + let b: i64x2 = i64x2::new(9, 11); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_f64() { + let a: f64x2 = f64x2::new(0., 2.); + let b: f64x2 = f64x2::new(3., 4.); + let e: f64x2 = f64x2::new(2., 3.); + let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmla_f64() { let a: f64 = 0.; diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 719aa2ed60..26971a54dd 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -15,6 +15,7 @@ use crate::{ }; #[cfg(test)] use stdarch_test::assert_instr; +use core::hint::unreachable_unchecked; types! { /// ARM-specific 64-bit wide vector of one packed `f64`. @@ -1427,6 +1428,29 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vpmaxq_f64_(a, b) } +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str, N = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_p64(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t { + if N != 0 { + unreachable_unchecked() + } + a +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str, N = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_f64(a: float64x1_t, _b: float64x1_t) -> float64x1_t { + if N != 0 { + unreachable_unchecked() + } + a +} /// Vector combine #[inline] #[target_feature(enable = "neon")] @@ -3470,6 +3494,24 @@ mod tests { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vext_p64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vext_p64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_f64() { + let a: f64x1 = f64x1::new(0.); + let b: f64x1 = f64x1::new(1.); + let e: f64x1 = f64x1::new(0.); + let r: f64x1 = transmute(vext_f64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + macro_rules! test_vcombine { ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => { #[allow(unused_assignments)] diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index f01a244354..c5528fad9a 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -2044,6 +2044,416 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { simd_cast(a) } +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2(a, b, [0, 1]), + 1 => simd_shuffle2(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + /// Multiply-add to accumulator #[inline] #[target_feature(enable = "neon")] @@ -8987,6 +9397,186 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vext_s8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s32() { + let a: i32x2 = i32x2::new(0, 8); + let b: i32x2 = i32x2::new(9, 11); + let e: i32x2 = i32x2::new(8, 9); + let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s32() { + let a: i32x4 = i32x4::new(0, 8, 8, 9); + let b: i32x4 = i32x4::new(9, 11, 14, 15); + let e: i32x4 = i32x4::new(8, 9, 9, 11); + let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u8() { + let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u8() { + let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u16() { + let a: u16x4 = u16x4::new(0, 8, 8, 9); + let b: u16x4 = u16x4::new(9, 11, 14, 15); + let e: u16x4 = u16x4::new(8, 9, 9, 11); + let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u16() { + let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u32() { + let a: u32x2 = u32x2::new(0, 8); + let b: u32x2 = u32x2::new(9, 11); + let e: u32x2 = u32x2::new(8, 9); + let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u32() { + let a: u32x4 = u32x4::new(0, 8, 8, 9); + let b: u32x4 = u32x4::new(9, 11, 14, 15); + let e: u32x4 = u32x4::new(8, 9, 9, 11); + let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_p8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_p8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_p16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_p16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s64() { + let a: i64x2 = i64x2::new(0, 8); + let b: i64x2 = i64x2::new(9, 11); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u64() { + let a: u64x2 = u64x2::new(0, 8); + let b: u64x2 = u64x2::new(9, 11); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_f32() { + let a: f32x2 = f32x2::new(0., 2.); + let b: f32x2 = f32x2::new(3., 4.); + let e: f32x2 = f32x2::new(2., 3.); + let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_f32() { + let a: f32x4 = f32x4::new(0., 2., 2., 3.); + let b: f32x4 = f32x4::new(3., 4., 5., 6.); + let e: f32x4 = f32x4::new(2., 3., 3., 4.); + let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmla_s8() { let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs index 65d10d1797..cdb0f82c9f 100644 --- a/crates/core_arch/src/arm/neon/mod.rs +++ b/crates/core_arch/src/arm/neon/mod.rs @@ -4479,212 +4479,28 @@ pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("str", N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("str", N = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - if N < 0 || N > 15 { - unreachable_unchecked(); - }; - match N & 0b1111 { - 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16( - a, - b, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], - ), - 2 => simd_shuffle16( - a, - b, - [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], - ), - 3 => simd_shuffle16( - a, - b, - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - ), - 4 => simd_shuffle16( - a, - b, - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - ), - 5 => simd_shuffle16( - a, - b, - [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - ), - 6 => simd_shuffle16( - a, - b, - [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], - ), - 7 => simd_shuffle16( - a, - b, - [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], - ), - 8 => simd_shuffle16( - a, - b, - [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], - ), - 9 => simd_shuffle16( - a, - b, - [ - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - ], - ), - 10 => simd_shuffle16( - a, - b, - [ - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - ], - ), - 11 => simd_shuffle16( - a, - b, - [ - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - ], - ), - 12 => simd_shuffle16( - a, - b, - [ - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - ], - ), - 13 => simd_shuffle16( - a, - b, - [ - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - ], - ), - 14 => simd_shuffle16( - a, - b, - [ - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - ], - ), - 15 => simd_shuffle16( - a, - b, - [ - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - ], - ), - _ => unreachable_unchecked(), +pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { + if N != 0 { + unreachable_unchecked() } + a } /// Extract vector from pair of vectors #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("str", N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("str", N = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - if N < 0 || N > 15 { - unreachable_unchecked(); - }; - match N & 0b1111 { - 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16( - a, - b, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], - ), - 2 => simd_shuffle16( - a, - b, - [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], - ), - 3 => simd_shuffle16( - a, - b, - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - ), - 4 => simd_shuffle16( - a, - b, - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - ), - 5 => simd_shuffle16( - a, - b, - [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - ), - 6 => simd_shuffle16( - a, - b, - [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], - ), - 7 => simd_shuffle16( - a, - b, - [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], - ), - 8 => simd_shuffle16( - a, - b, - [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], - ), - 9 => simd_shuffle16( - a, - b, - [ - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - ], - ), - 10 => simd_shuffle16( - a, - b, - [ - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - ], - ), - 11 => simd_shuffle16( - a, - b, - [ - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - ], - ), - 12 => simd_shuffle16( - a, - b, - [ - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - ], - ), - 13 => simd_shuffle16( - a, - b, - [ - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - ], - ), - 14 => simd_shuffle16( - a, - b, - [ - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - ], - ), - 15 => simd_shuffle16( - a, - b, - [ - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - ], - ), - _ => unreachable_unchecked(), +pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { + if N != 0 { + unreachable_unchecked() } + a } // These float-to-int implementations have undefined behaviour when `a` overflows @@ -6093,40 +5909,36 @@ mod tests { } #[simd_test(enable = "neon")] - unsafe fn test_vextq_s8() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b = i8x16::new( - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32, - ); - let e = i8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); - let r: i8x16 = transmute(vextq_s8::<3>(transmute(a), transmute(b))); + unsafe fn test_vshrq_n_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x16::new(0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4); + let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_u8() { + unsafe fn test_vshlq_n_u8() { let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b = u8x16::new( - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32, - ); - let e = u8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); - let r: u8x16 = transmute(vextq_u8::<3>(transmute(a), transmute(b))); + let e = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vshrq_n_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x16::new(0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4); - let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a))); + unsafe fn test_vext_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vshlq_n_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); - let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a))); + unsafe fn test_vext_u64() { + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b))); assert_eq!(r, e); } diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index e9f82943f1..baa664b81f 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -890,6 +890,52 @@ aarch64 = fcvtpu link-aarch64 = fcvtpu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t +/// Extract vector from pair of vectors +name = vext +constn = N +multi_fn = static_assert_imm-out_exp_len-N +multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 +b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 +n = HFLEN +validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19 + +arm = "vext.8" +aarch64 = ext +generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t + +/// Extract vector from pair of vectors +name = vext +constn = N +multi_fn = static_assert_imm-out_exp_len-N +multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 +b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 +n = HFLEN +validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19 + +aarch64 = ext +generate poly64x2_t + +arm = vmov +generate int64x2_t, uint64x2_t + +/// Extract vector from pair of vectors +name = vext +constn = N +multi_fn = static_assert_imm-out_exp_len-N +multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +a = 0., 2., 2., 3. +b = 3., 4., 5., 6., +n = HFLEN +validate 2., 3., 3., 4. + +aarch64 = ext +generate float64x2_t + +arm = "vext.8" +generate float*_t + /// Multiply-add to accumulator name = vmla multi_fn = simd_add, a, {simd_mul, b, c} @@ -1054,7 +1100,7 @@ generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint name = vmovn_high no-q multi_fn = simd_cast, c:in_t0, b -multi_fn = simd_shuffle-out_len-noext, a, c, {asc-out_len} +multi_fn = simd_shuffle-out_len-noext, a, c, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 4, 5 b = 2, 3, 4, 5, 12, 13, 14, 15 validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15 @@ -1408,7 +1454,7 @@ generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t name = vsubhn_high no-q multi_fn = vsubhn-noqself-noext, d:in_t0, b, c -multi_fn = simd_shuffle-out_len-noext, a, d, {asc-out_len} +multi_fn = simd_shuffle-out_len-noext, a, d, {asc-0-out_len} a = MAX, 0, MAX, 0, MAX, 0, MAX, 0 b = MAX, 1, MAX, 1, MAX, 1, MAX, 1 c = 1, 0, 1, 0, 1, 0, 1, 0 diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 3251b9f6bf..bde7d06da6 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -83,6 +83,40 @@ fn type_len(t: &str) -> usize { } } +fn type_exp_len(t: &str) -> usize { + match t { + "int8x8_t" => 3, + "int8x16_t" => 4, + "int16x4_t" => 2, + "int16x8_t" => 3, + "int32x2_t" => 1, + "int32x4_t" => 2, + "int64x1_t" => 0, + "int64x2_t" => 1, + "uint8x8_t" => 3, + "uint8x16_t" => 4, + "uint16x4_t" => 2, + "uint16x8_t" => 3, + "uint32x2_t" => 1, + "uint32x4_t" => 2, + "uint64x1_t" => 0, + "uint64x2_t" => 1, + "float16x4_t" => 2, + "float16x8_t" => 3, + "float32x2_t" => 1, + "float32x4_t" => 2, + "float64x1_t" => 0, + "float64x2_t" => 1, + "poly8x8_t" => 3, + "poly8x16_t" => 4, + "poly16x4_t" => 2, + "poly16x8_t" => 3, + "poly64x1_t" => 0, + "poly64x2_t" => 1, + _ => panic!("unknown type: {}", t), + } +} + fn type_to_suffix(t: &str) -> &str { match t { "int8x8_t" => "_s8", @@ -328,14 +362,17 @@ fn type_to_half(t: &str) -> &str { } } -fn asc(x: usize) -> &'static str { - match x { - 2 => "[0, 1]", - 4 => "[0, 1, 2, 3]", - 8 => "[0, 1, 2, 3, 4, 5, 6, 7]", - 16 => "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]", - _ => panic!("unknown transpose order of len {}", x), +fn asc(start: i32, len: usize) -> String { + let mut s = String::from("["); + for i in 0..len { + if i != 0 { + s.push_str(", "); + } + let n = start + i as i32; + s.push_str(&n.to_string()); } + s.push_str("]"); + s } fn transpose1(x: usize) -> &'static str { @@ -508,6 +545,74 @@ fn half_bits(t: &str) -> &'static str { } } +fn type_len_str(t: &str) -> &'static str { + match t { + "int8x8_t" => "8", + "int8x16_t" => "16", + "int16x4_t" => "4", + "int16x8_t" => "8", + "int32x2_t" => "2", + "int32x4_t" => "4", + "int64x1_t" => "1", + "int64x2_t" => "2", + "uint8x8_t" => "8", + "uint8x16_t" => "16", + "uint16x4_t" => "4", + "uint16x8_t" => "8", + "uint32x2_t" => "2", + "uint32x4_t" => "4", + "uint64x1_t" => "1", + "uint64x2_t" => "2", + "float16x4_t" => "4", + "float16x8_t" => "8", + "float32x2_t" => "2", + "float32x4_t" => "4", + "float64x1_t" => "1", + "float64x2_t" => "2", + "poly8x8_t" => "8", + "poly8x16_t" => "16", + "poly16x4_t" => "4", + "poly16x8_t" => "8", + "poly64x1_t" => "1", + "poly64x2_t" => "2", + _ => panic!("unknown type: {}", t), + } +} + +fn type_half_len_str(t: &str) -> &'static str { + match t { + "int8x8_t" => "4", + "int8x16_t" => "8", + "int16x4_t" => "2", + "int16x8_t" => "4", + "int32x2_t" => "1", + "int32x4_t" => "2", + "int64x1_t" => "0", + "int64x2_t" => "1", + "uint8x8_t" => "4", + "uint8x16_t" => "8", + "uint16x4_t" => "2", + "uint16x8_t" => "4", + "uint32x2_t" => "1", + "uint32x4_t" => "2", + "uint64x1_t" => "0", + "uint64x2_t" => "1", + "float16x4_t" => "2", + "float16x8_t" => "4", + "float32x2_t" => "1", + "float32x4_t" => "2", + "float64x1_t" => "0", + "float64x2_t" => "1", + "poly8x8_t" => "4", + "poly8x16_t" => "8", + "poly16x4_t" => "2", + "poly16x8_t" => "4", + "poly64x1_t" => "0", + "poly64x2_t" => "1", + _ => panic!("unknown type: {}", t), + } +} + fn map_val<'v>(t: &str, v: &'v str) -> &'v str { match v { "FALSE" => false_val(t), @@ -518,6 +623,8 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str { "BITS" => bits(t), "BITS_M1" => bits_minus_one(t), "HFBITS" => half_bits(t), + "LEN" => type_len_str(t), + "HFLEN" => type_half_len_str(t), o => o, } } @@ -529,9 +636,16 @@ fn gen_aarch64( current_name: &str, current_aarch64: &Option, link_aarch64: &Option, + constn: &Option, in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec, Vec, Vec, Vec)], + current_tests: &[( + Vec, + Vec, + Vec, + Option, + Vec, + )], suffix: Suffix, para_num: i32, fixed: &Vec, @@ -621,27 +735,54 @@ fn gen_aarch64( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); + calls.push_str(&get_call( + &multi_fn[i], + current_name, + in_t, + out_t, + fixed, + None, + )); } calls } else { String::new() }; + let const_declare = if let Some(constn) = constn { + format!(r#""#, constn) + } else { + String::new() + }; + let const_assert = if let Some(constn) = constn { + format!( + r#", {} = {}"#, + constn, + map_val(in_t[1], current_tests[0].3.as_ref().unwrap()) + ) + } else { + String::new() + }; + let const_legacy = if constn.is_some() { + format!("\n#[rustc_legacy_const_generics({})]", para_num) + } else { + String::new() + }; let call = match (multi_calls.len(), para_num, fixed.len()) { (0, 1, 0) => format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ {}{}(a) }}"#, - name, in_t[0], out_t, ext_c, current_fn, + name, const_declare, in_t[0], out_t, ext_c, current_fn, ), (0, 1, _) => { let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ let b{}; {}{}(a, transmute(b)) }}"#, name, + const_declare, in_t[0], out_t, values(in_t[0], &fixed), @@ -650,34 +791,34 @@ fn gen_aarch64( ) } (0, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{}(a, b) }}"#, - name, in_t[0], in_t[1], out_t, ext_c, current_fn, + name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn, ), (0, 3, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{}(a, b, c) }}"#, - name, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, ), (_, 1, _) => format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ {}{} }}"#, - name, in_t[0], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], out_t, ext_c, multi_calls, ), (_, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t[0], in_t[1], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, ), (_, 3, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{} }}"#, - name, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -686,10 +827,10 @@ fn gen_aarch64( {} #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr({}))] +#[cfg_attr(test, assert_instr({}{}))]{} {} "#, - current_comment, current_aarch64, call + current_comment, current_aarch64, const_assert, const_legacy, call ); let test = gen_test( @@ -708,7 +849,13 @@ fn gen_test( name: &str, in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec, Vec, Vec, Vec)], + current_tests: &[( + Vec, + Vec, + Vec, + Option, + Vec, + )], len_in: [usize; 3], len_out: usize, para_num: i32, @@ -719,11 +866,16 @@ fn gen_test( unsafe fn test_{}() {{"#, name, ); - for (a, b, c, e) in current_tests { + for (a, b, c, n, e) in current_tests { let a: Vec = a.iter().take(len_in[0]).cloned().collect(); let b: Vec = b.iter().take(len_in[1]).cloned().collect(); let c: Vec = c.iter().take(len_in[2]).cloned().collect(); let e: Vec = e.iter().take(len_out).cloned().collect(); + let const_value = if let Some(constn) = n { + format!(r#"::<{}>"#, map_val(in_t[1], constn)) + } else { + String::new() + }; let t = { match para_num { 1 => { @@ -731,13 +883,14 @@ fn gen_test( r#" let a{}; let e{}; - let r: {} = transmute({}(transmute(a))); + let r: {} = transmute({}{}(transmute(a))); assert_eq!(r, e); "#, values(in_t[0], &a), values(out_t, &e), type_to_global_type(out_t), - name + name, + const_value ) } 2 => { @@ -746,14 +899,15 @@ fn gen_test( let a{}; let b{}; let e{}; - let r: {} = transmute({}(transmute(a), transmute(b))); + let r: {} = transmute({}{}(transmute(a), transmute(b))); assert_eq!(r, e); "#, values(in_t[0], &a), values(in_t[1], &b), values(out_t, &e), type_to_global_type(out_t), - name + name, + const_value ) } 3 => { @@ -763,7 +917,7 @@ fn gen_test( let b{}; let c{}; let e{}; - let r: {} = transmute({}(transmute(a), transmute(b), transmute(c))); + let r: {} = transmute({}{}(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); "#, values(in_t[0], &a), @@ -771,7 +925,8 @@ fn gen_test( values(in_t[2], &c), values(out_t, &e), type_to_global_type(out_t), - name + name, + const_value ) } _ => { @@ -795,9 +950,16 @@ fn gen_arm( link_arm: &Option, current_aarch64: &Option, link_aarch64: &Option, + constn: &Option, in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec, Vec, Vec, Vec)], + current_tests: &[( + Vec, + Vec, + Vec, + Option, + Vec, + )], suffix: Suffix, para_num: i32, fixed: &Vec, @@ -900,27 +1062,54 @@ fn gen_arm( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); + calls.push_str(&get_call( + &multi_fn[i], + current_name, + in_t, + out_t, + fixed, + None, + )); } calls } else { String::new() }; + let const_declare = if let Some(constn) = constn { + format!(r#""#, constn) + } else { + String::new() + }; + let const_assert = if let Some(constn) = constn { + format!( + r#", {} = {}"#, + constn, + map_val(in_t[1], current_tests[0].3.as_ref().unwrap()) + ) + } else { + String::new() + }; + let const_legacy = if constn.is_some() { + format!("\n#[rustc_legacy_const_generics({})]", para_num) + } else { + String::new() + }; let call = match (multi_calls.len(), para_num, fixed.len()) { (0, 1, 0) => format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ {}{}(a) }}"#, - name, in_t[0], out_t, ext_c, current_fn, + name, const_declare, in_t[0], out_t, ext_c, current_fn, ), (0, 1, _) => { let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ let b{}; {}{}(a, transmute(b)) }}"#, name, + const_declare, in_t[0], out_t, values(in_t[0], &fixed), @@ -929,34 +1118,34 @@ fn gen_arm( ) } (0, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{}(a, b) }}"#, - name, in_t[0], in_t[1], out_t, ext_c, current_fn, + name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn, ), (0, 3, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{}(a, b) }}"#, - name, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, ), (_, 1, _) => format!( - r#"pub unsafe fn {}(a: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}) -> {} {{ {}{} }}"#, - name, in_t[0], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], out_t, ext_c, multi_calls, ), (_, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t[0], in_t[1], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, ), (_, 3, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{} }}"#, - name, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -966,13 +1155,16 @@ fn gen_arm( #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{} {} "#, current_comment, expand_intrinsic(¤t_arm, in_t[1]), + const_assert, expand_intrinsic(¤t_aarch64, in_t[1]), + const_assert, + const_legacy, call, ); let test = gen_test( @@ -1070,10 +1262,73 @@ fn get_call( in_t: &[&str; 3], out_t: &str, fixed: &Vec, + n: Option, ) -> String { let params: Vec<_> = in_str.split(',').map(|v| v.trim().to_string()).collect(); assert!(params.len() > 0); let mut fn_name = params[0].clone(); + if fn_name == "transpose-1-in_len" { + return transpose1(type_len(in_t[1])).to_string(); + } + if fn_name == "transpose-2-in_len" { + return transpose2(type_len(in_t[1])).to_string(); + } + if fn_name == "zip-1-in_len" { + return zip1(type_len(in_t[1])).to_string(); + } + if fn_name == "zip-2-in_len" { + return zip2(type_len(in_t[1])).to_string(); + } + if fn_name.starts_with("asc") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let start = match &*fn_format[1] { + "0" => 0, + "n" => n.unwrap(), + s => s.parse::().unwrap(), + }; + let len = match &*fn_format[2] { + "out_len" => type_len(out_t), + "in_len" => type_len(in_t[1]), + _ => 0, + }; + return asc(start, len); + } + if fn_name.starts_with("static_assert_imm") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let len = match &*fn_format[1] { + "out_exp_len" => type_exp_len(out_t), + "in_exp_len" => type_exp_len(in_t[1]), + _ => 0, + }; + let sa = format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]); + return sa; + } + if fn_name.starts_with("matchn") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let len = match &*fn_format[1] { + "out_exp_len" => type_exp_len(out_t), + "in_exp_len" => type_exp_len(in_t[1]), + _ => 0, + }; + let mut call = format!("match N & 0b{} {{\n", "1".repeat(len)); + let mut sub_call = String::new(); + for p in 1..params.len() { + if !sub_call.is_empty() { + sub_call.push_str(", "); + } + sub_call.push_str(¶ms[p]); + } + for i in 0..(2u32.pow(len as u32) as usize) { + let sub_match = format!( + " {} => {},\n", + i, + get_call(&sub_call, current_name, in_t, out_t, fixed, Some(i as i32)) + ); + call.push_str(&sub_match); + } + call.push_str(" _ => unreachable_unchecked(),\n }"); + return call; + } let mut re: Option<(String, String)> = None; let mut param_str = String::new(); let mut i = 1; @@ -1104,6 +1359,7 @@ fn get_call( in_t, out_t, fixed, + n.clone(), ); if !param_str.is_empty() { param_str.push_str(", "); @@ -1150,21 +1406,6 @@ fn get_call( }); return format!(r#"[{}]"#, &half[..half.len() - 2]); } - if fn_name == "asc-out_len" { - return asc(type_len(out_t)).to_string(); - } - if fn_name == "transpose-1-in_len" { - return transpose1(type_len(in_t[1])).to_string(); - } - if fn_name == "transpose-2-in_len" { - return transpose2(type_len(in_t[1])).to_string(); - } - if fn_name == "zip-1-in_len" { - return zip1(type_len(in_t[1])).to_string(); - } - if fn_name == "zip-2-in_len" { - return zip2(type_len(in_t[1])).to_string(); - } if fn_name.contains('-') { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); assert_eq!(fn_format.len(), 3); @@ -1225,13 +1466,21 @@ fn main() -> io::Result<()> { let mut current_aarch64: Option = None; let mut link_arm: Option = None; let mut link_aarch64: Option = None; + let mut constn: Option = None; let mut para_num = 2; let mut suffix: Suffix = Normal; let mut a: Vec = Vec::new(); let mut b: Vec = Vec::new(); let mut c: Vec = Vec::new(); + let mut n: Option = None; let mut fixed: Vec = Vec::new(); - let mut current_tests: Vec<(Vec, Vec, Vec, Vec)> = Vec::new(); + let mut current_tests: Vec<( + Vec, + Vec, + Vec, + Option, + Vec, + )> = Vec::new(); let mut multi_fn: Vec = Vec::new(); // @@ -1302,12 +1551,14 @@ mod test { link_aarch64 = None; link_arm = None; current_tests = Vec::new(); + constn = None; para_num = 2; suffix = Normal; a = Vec::new(); b = Vec::new(); c = Vec::new(); fixed = Vec::new(); + n = None; multi_fn = Vec::new(); } else if line.starts_with("//") { } else if line.starts_with("name = ") { @@ -1316,6 +1567,8 @@ mod test { current_fn = Some(String::from(&line[5..])); } else if line.starts_with("multi_fn = ") { multi_fn.push(String::from(&line[11..])); + } else if line.starts_with("constn = ") { + constn = Some(String::from(&line[9..])); } else if line.starts_with("arm = ") { current_arm = Some(String::from(&line[6..])); } else if line.starts_with("aarch64 = ") { @@ -1332,11 +1585,13 @@ mod test { b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("c = ") { c = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("n = ") { + n = Some(String::from(&line[4..])); } else if line.starts_with("fixed = ") { fixed = line[8..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("validate ") { let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); - current_tests.push((a.clone(), b.clone(), c.clone(), e)); + current_tests.push((a.clone(), b.clone(), c.clone(), n.clone(), e)); } else if line.starts_with("link-aarch64 = ") { link_aarch64 = Some(String::from(&line[15..])); } else if line.starts_with("link-arm = ") { @@ -1391,6 +1646,7 @@ mod test { &link_arm, ¤t_aarch64, &link_aarch64, + &constn, &in_t, &out_t, ¤t_tests, @@ -1408,6 +1664,7 @@ mod test { ¤t_name, ¤t_aarch64, &link_aarch64, + &constn, &in_t, &out_t, ¤t_tests,