Skip to content

Commit b2028d5

Browse files
gnzlbgalexcrichton
authored andcommitted
[neon] reciprocal square-root estimate (#121)
1 parent 93cc250 commit b2028d5

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

ci/run.sh

+11
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,16 @@ export RUSTFLAGS="$RUSTFLAGS -C codegen-units=1"
99
# having only one thread increases debuggability to be worth it.
1010
export RUST_TEST_THREADS=1
1111

12+
# FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon
13+
case ${TARGET} in
14+
aarch*)
15+
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon"
16+
;;
17+
*)
18+
;;
19+
esac
20+
21+
echo "RUSTFLAGS=${RUSTFLAGS}"
22+
1223
cargo test --target $TARGET
1324
cargo test --release --target $TARGET

src/arm/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,13 @@ pub use self::v6::*;
33
pub use self::v7::*;
44
#[cfg(target_arch = "aarch64")]
55
pub use self::v8::*;
6+
#[cfg(target_feature = "neon")]
7+
pub use self::neon::*;
68

79
mod v6;
810
mod v7;
911
#[cfg(target_arch = "aarch64")]
1012
mod v8;
13+
14+
#[cfg(target_feature = "neon")]
15+
mod neon;

src/arm/neon.rs

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//! ARM NEON intrinsics
2+
//!
3+
//! The references is [ARM's NEON Intrinsics Reference](http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf). [ARM's NEON Intrinsics Online Database](https://developer.arm.com/technologies/neon/intrinsics) is also useful.
4+
5+
#[cfg(test)]
6+
use stdsimd_test::assert_instr;
7+
8+
use v64::{f32x2};
9+
10+
#[allow(improper_ctypes)]
11+
extern "C" {
12+
#[link_name = "llvm.aarch64.neon.frsqrte.v2f32"]
13+
fn frsqrte_v2f32(a: f32x2) -> f32x2;
14+
}
15+
16+
/// Reciprocal square-root estimate.
17+
#[inline(always)]
18+
#[target_feature = "+neon"]
19+
#[cfg_attr(test, assert_instr(frsqrte))]
20+
pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
21+
frsqrte_v2f32(a)
22+
}
23+
24+
#[cfg(test)]
25+
mod tests {
26+
use stdsimd_test::simd_test;
27+
28+
use v64::{f32x2};
29+
use arm::neon;
30+
31+
#[test]
32+
fn vrsqrt_f32() {
33+
let a = f32x2::new(1.0, 2.0);
34+
let e = f32x2::new(0.9980469, 0.7050781);
35+
let r = unsafe { neon::vrsqrte_f32(a) };
36+
assert_eq!(r, e);
37+
}
38+
}

0 commit comments

Comments
 (0)