Skip to content

Update arm vcvt intrinsics to use llvm.fpto(su)i.sat #1194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2427,31 +2427,51 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")]
fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvt_s64_f64_(a)
}

/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")]
fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtq_s64_f64_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")]
fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvt_u64_f64_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")]
fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtq_u64_f64_(a)
}

/// Floating-point convert to signed integer, rounding to nearest with ties to away
Expand Down
32 changes: 28 additions & 4 deletions crates/core_arch/src/arm_shared/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2407,7 +2407,13 @@ vcvtq_n_u32_f32_(a, N)
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvt_s32_f32_(a)
}

/// Floating-point convert to signed fixed-point, rounding toward zero
Expand All @@ -2417,7 +2423,13 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtq_s32_f32_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
Expand All @@ -2427,7 +2439,13 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvt_u32_f32_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
Expand All @@ -2437,7 +2455,13 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
simd_cast(a)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtq_u32_f32_(a)
}

/// Set all vector lanes to the same value
Expand Down
6 changes: 4 additions & 2 deletions crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -1040,26 +1040,28 @@ generate f32:u32, f64:u64
/// Floating-point convert to signed fixed-point, rounding toward zero
name = vcvt
double-suffixes
fn = simd_cast
link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_
a = -1.1, 2.1, -2.9, 3.9
validate -1, 2, -2, 3

aarch64 = fcvtzs
generate float64x1_t:int64x1_t, float64x2_t:int64x2_t

link-arm = llvm.fptosi.sat._EXT2_._EXT_
arm = vcvt
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t

/// Floating-point convert to unsigned fixed-point, rounding toward zero
name = vcvt
double-suffixes
fn = simd_cast
link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_
a = 1.1, 2.1, 2.9, 3.9
validate 1, 2, 2, 3

aarch64 = fcvtzu
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

link-arm = llvm.fptoui.sat._EXT2_._EXT_
arm = vcvt
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

Expand Down
5 changes: 5 additions & 0 deletions crates/stdarch-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
"usad8" | "vfma" | "vfms" => 27,
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,

// Temporary, currently the fptosi.sat and fptoui.sat LLVM
// intrinsics emit unnecessary code on arm. This can be
// removed once it has been addressed in LLVM.
"fcvtzu" | "fcvtzs" | "vcvt" => 64,

// Original limit was 20 instructions, but ARM DSP Intrinsics
// are exactly 20 instructions long. So, bump the limit to 22
// instead of adding here a long list of exceptions.
Expand Down