diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 9dc1b049d2..28ea2592a0 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -2427,7 +2427,12 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtzs))] pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")] + fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t; + } + vcvt_s64_f64_(a) } /// Floating-point convert to signed fixed-point, rounding toward zero @@ -2435,7 +2440,12 @@ pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtzs))] pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")] + fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t; + } + vcvtq_s64_f64_(a) } /// Floating-point convert to unsigned fixed-point, rounding toward zero @@ -2443,7 +2453,12 @@ pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtzu))] pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")] + fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t; + } + vcvt_u64_f64_(a) } /// Floating-point convert to unsigned fixed-point, rounding toward zero @@ -2451,7 +2466,12 @@ pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtzu))] pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")] + fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t; + } + vcvtq_u64_f64_(a) } /// Floating-point convert to signed integer, rounding to nearest with ties to away diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index b1ec4dbc5a..7dc5b53e10 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -2407,7 +2407,13 @@ vcvtq_n_u32_f32_(a, N) #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")] + fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t; + } +vcvt_s32_f32_(a) } /// Floating-point convert to signed fixed-point, rounding toward zero @@ -2417,7 +2423,13 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")] + fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t; + } +vcvtq_s32_f32_(a) } /// Floating-point convert to unsigned fixed-point, rounding toward zero @@ -2427,7 +2439,13 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")] + fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t; + } +vcvt_u32_f32_(a) } /// Floating-point convert to unsigned fixed-point, rounding toward zero @@ -2437,7 +2455,13 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { - simd_cast(a) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")] + fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } +vcvtq_u32_f32_(a) } /// Set all vector lanes to the same value diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 7d4d878284..5850be482b 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -1040,26 +1040,28 @@ generate f32:u32, f64:u64 /// Floating-point convert to signed fixed-point, rounding toward zero name = vcvt double-suffixes -fn = simd_cast +link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_ a = -1.1, 2.1, -2.9, 3.9 validate -1, 2, -2, 3 aarch64 = fcvtzs generate float64x1_t:int64x1_t, float64x2_t:int64x2_t +link-arm = llvm.fptosi.sat._EXT2_._EXT_ arm = vcvt generate float32x2_t:int32x2_t, float32x4_t:int32x4_t /// Floating-point convert to unsigned fixed-point, rounding toward zero name = vcvt double-suffixes -fn = simd_cast +link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_ a = 1.1, 2.1, 2.9, 3.9 validate 1, 2, 2, 3 aarch64 = fcvtzu generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t +link-arm = llvm.fptoui.sat._EXT2_._EXT_ arm = vcvt generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 8ea14f0f7f..10834c00e7 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -125,6 +125,11 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { "usad8" | "vfma" | "vfms" => 27, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + // Temporary, currently the fptosi.sat and fptoui.sat LLVM + // intrinsics emit unnecessary code on arm. This can be + // removed once it has been addressed in LLVM. + "fcvtzu" | "fcvtzs" | "vcvt" => 64, + // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So, bump the limit to 22 // instead of adding here a long list of exceptions.