Skip to content

Commit d688b9b

Browse files
committed
Update arm vcvt intrinsics to use llvm.fpto(su)i.sat
Those intrinsics have the correct semantics for the desired fcvtz instruction, without any undefined behaviour. The previous simd_cast was undefined for infinite and NaN which could cause issues.
1 parent c158cfd commit d688b9b

File tree

4 files changed

+61
-10
lines changed

4 files changed

+61
-10
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

+24-4
Original file line numberDiff line numberDiff line change
@@ -2427,31 +2427,51 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
24272427
#[target_feature(enable = "neon")]
24282428
#[cfg_attr(test, assert_instr(fcvtzs))]
24292429
pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
2430-
simd_cast(a)
2430+
#[allow(improper_ctypes)]
2431+
extern "C" {
2432+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")]
2433+
fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
2434+
}
2435+
vcvt_s64_f64_(a)
24312436
}
24322437

24332438
/// Floating-point convert to signed fixed-point, rounding toward zero
24342439
#[inline]
24352440
#[target_feature(enable = "neon")]
24362441
#[cfg_attr(test, assert_instr(fcvtzs))]
24372442
pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
2438-
simd_cast(a)
2443+
#[allow(improper_ctypes)]
2444+
extern "C" {
2445+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")]
2446+
fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
2447+
}
2448+
vcvtq_s64_f64_(a)
24392449
}
24402450

24412451
/// Floating-point convert to unsigned fixed-point, rounding toward zero
24422452
#[inline]
24432453
#[target_feature(enable = "neon")]
24442454
#[cfg_attr(test, assert_instr(fcvtzu))]
24452455
pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
2446-
simd_cast(a)
2456+
#[allow(improper_ctypes)]
2457+
extern "C" {
2458+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")]
2459+
fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
2460+
}
2461+
vcvt_u64_f64_(a)
24472462
}
24482463

24492464
/// Floating-point convert to unsigned fixed-point, rounding toward zero
24502465
#[inline]
24512466
#[target_feature(enable = "neon")]
24522467
#[cfg_attr(test, assert_instr(fcvtzu))]
24532468
pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
2454-
simd_cast(a)
2469+
#[allow(improper_ctypes)]
2470+
extern "C" {
2471+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")]
2472+
fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
2473+
}
2474+
vcvtq_u64_f64_(a)
24552475
}
24562476

24572477
/// Floating-point convert to signed integer, rounding to nearest with ties to away

crates/core_arch/src/arm_shared/neon/generated.rs

+28-4
Original file line numberDiff line numberDiff line change
@@ -2407,7 +2407,13 @@ vcvtq_n_u32_f32_(a, N)
24072407
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
24082408
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
24092409
pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
2410-
simd_cast(a)
2410+
#[allow(improper_ctypes)]
2411+
extern "C" {
2412+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
2413+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
2414+
fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
2415+
}
2416+
vcvt_s32_f32_(a)
24112417
}
24122418

24132419
/// Floating-point convert to signed fixed-point, rounding toward zero
@@ -2417,7 +2423,13 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
24172423
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
24182424
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
24192425
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
2420-
simd_cast(a)
2426+
#[allow(improper_ctypes)]
2427+
extern "C" {
2428+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
2429+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
2430+
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
2431+
}
2432+
vcvtq_s32_f32_(a)
24212433
}
24222434

24232435
/// Floating-point convert to unsigned fixed-point, rounding toward zero
@@ -2427,7 +2439,13 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
24272439
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
24282440
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
24292441
pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
2430-
simd_cast(a)
2442+
#[allow(improper_ctypes)]
2443+
extern "C" {
2444+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
2445+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
2446+
fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
2447+
}
2448+
vcvt_u32_f32_(a)
24312449
}
24322450

24332451
/// Floating-point convert to unsigned fixed-point, rounding toward zero
@@ -2437,7 +2455,13 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
24372455
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
24382456
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
24392457
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
2440-
simd_cast(a)
2458+
#[allow(improper_ctypes)]
2459+
extern "C" {
2460+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
2461+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
2462+
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
2463+
}
2464+
vcvtq_u32_f32_(a)
24412465
}
24422466

24432467
/// Set all vector lanes to the same value

crates/stdarch-gen/neon.spec

+4-2
Original file line numberDiff line numberDiff line change
@@ -1040,26 +1040,28 @@ generate f32:u32, f64:u64
10401040
/// Floating-point convert to signed fixed-point, rounding toward zero
10411041
name = vcvt
10421042
double-suffixes
1043-
fn = simd_cast
1043+
link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_
10441044
a = -1.1, 2.1, -2.9, 3.9
10451045
validate -1, 2, -2, 3
10461046

10471047
aarch64 = fcvtzs
10481048
generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
10491049

1050+
link-arm = llvm.fptosi.sat._EXT2_._EXT_
10501051
arm = vcvt
10511052
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
10521053

10531054
/// Floating-point convert to unsigned fixed-point, rounding toward zero
10541055
name = vcvt
10551056
double-suffixes
1056-
fn = simd_cast
1057+
link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_
10571058
a = 1.1, 2.1, 2.9, 3.9
10581059
validate 1, 2, 2, 3
10591060

10601061
aarch64 = fcvtzu
10611062
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
10621063

1064+
link-arm = llvm.fptoui.sat._EXT2_._EXT_
10631065
arm = vcvt
10641066
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
10651067

crates/stdarch-test/src/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
125125
"usad8" | "vfma" | "vfms" => 27,
126126
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
127127

128+
// Temporary, currently the fptosi.sat and fptoui.sat LLVM
129+
// intrinsics emit unnecessary code on arm. This can be
130+
// removed once it has been addressed in LLVM.
131+
"fcvtzu" | "fcvtzs" | "vcvt" => 64,
132+
128133
// Original limit was 20 instructions, but ARM DSP Intrinsics
129134
// are exactly 20 instructions long. So, bump the limit to 22
130135
// instead of adding here a long list of exceptions.

0 commit comments

Comments
 (0)