From 0632492a0edd4d1fa94ae693944c846105598321 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 17 Mar 2018 15:31:55 +0100 Subject: [PATCH 1/2] split the portable vector tests into separate crates --- ci/run.sh | 14 +- coresimd/ppsv/api/arithmetic_ops.rs | 27 ++- coresimd/ppsv/api/arithmetic_reductions.rs | 5 +- coresimd/ppsv/api/bitwise_ops.rs | 20 +- coresimd/ppsv/api/bitwise_reductions.rs | 13 +- coresimd/ppsv/api/bool_vectors.rs | 5 +- coresimd/ppsv/api/boolean_reductions.rs | 1 + coresimd/ppsv/api/cmp.rs | 14 +- coresimd/ppsv/api/default.rs | 7 +- coresimd/ppsv/api/eq.rs | 3 +- coresimd/ppsv/api/fmt.rs | 8 +- coresimd/ppsv/api/from.rs | 62 +++-- coresimd/ppsv/api/from_bits.rs | 48 ++-- coresimd/ppsv/api/hash.rs | 9 +- coresimd/ppsv/api/load_store.rs | 22 +- coresimd/ppsv/api/minimal.rs | 13 +- coresimd/ppsv/api/minmax_reductions.rs | 5 +- coresimd/ppsv/api/mod.rs | 266 ++++++++++----------- coresimd/ppsv/api/neg.rs | 4 +- coresimd/ppsv/api/partial_eq.rs | 4 +- coresimd/ppsv/api/shifts.rs | 5 +- coresimd/ppsv/codegen/max.rs | 4 +- coresimd/ppsv/codegen/min.rs | 4 +- coresimd/ppsv/mod.rs | 6 +- coresimd/ppsv/v128.rs | 89 ++++--- coresimd/ppsv/v16.rs | 18 +- coresimd/ppsv/v256.rs | 93 +++---- coresimd/ppsv/v32.rs | 60 +++-- coresimd/ppsv/v512.rs | 81 ++++--- coresimd/ppsv/v64.rs | 68 +++--- crates/coresimd/src/lib.rs | 10 + crates/coresimd/tests/v128.rs | 42 ++++ crates/coresimd/tests/v16.rs | 42 ++++ crates/coresimd/tests/v256.rs | 42 ++++ crates/coresimd/tests/v32.rs | 42 ++++ crates/coresimd/tests/v512.rs | 42 ++++ crates/coresimd/tests/v64.rs | 42 ++++ 37 files changed, 814 insertions(+), 426 deletions(-) create mode 100644 crates/coresimd/tests/v128.rs create mode 100644 crates/coresimd/tests/v16.rs create mode 100644 crates/coresimd/tests/v256.rs create mode 100644 crates/coresimd/tests/v32.rs create mode 100644 crates/coresimd/tests/v512.rs create mode 100644 crates/coresimd/tests/v64.rs diff --git a/ci/run.sh b/ci/run.sh index 37fa4a79e5..1ca1180d18 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -2,21 +2,14 @@ set -ex +: ${TARGET?"The TARGET environment variable must be set."} + # Tests are all super fast anyway, and they fault often enough on travis that # having only one thread increases debuggability to be worth it. export RUST_TEST_THREADS=1 #export RUST_BACKTRACE=1 #export RUST_TEST_NOCAPTURE=1 -# FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon -case ${TARGET} in - aarch*) - export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon" - ;; - *) - ;; -esac - FEATURES="strict,$FEATURES" echo "RUSTFLAGS=${RUSTFLAGS}" @@ -25,7 +18,8 @@ echo "OBJDUMP=${OBJDUMP}" cargo_test() { cmd="cargo test --target=$TARGET --features $FEATURES $1" - cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml" + cmd="$cmd -p coresimd -p stdsimd" + cmd="$cmd --manifest-path crates/stdsimd/Cargo.toml" cmd="$cmd -- $2" $cmd } diff --git a/coresimd/ppsv/api/arithmetic_ops.rs b/coresimd/ppsv/api/arithmetic_ops.rs index e98745014b..3f1a0060c6 100644 --- a/coresimd/ppsv/api/arithmetic_ops.rs +++ b/coresimd/ppsv/api/arithmetic_ops.rs @@ -1,76 +1,82 @@ //! Lane-wise arithmetic operations. +#![allow(unused)] macro_rules! impl_arithmetic_ops { ($id:ident) => { - impl ops::Add for $id { + impl ::ops::Add for $id { type Output = Self; #[inline] fn add(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_add; unsafe { simd_add(self, other) } } } - impl ops::Sub for $id { + impl ::ops::Sub for $id { type Output = Self; #[inline] fn sub(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_sub; unsafe { simd_sub(self, other) } } } - impl ops::Mul for $id { + impl ::ops::Mul for $id { type Output = Self; #[inline] fn mul(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_mul; unsafe { simd_mul(self, other) } } } - impl ops::Div for $id { + impl ::ops::Div for $id { type Output = Self; #[inline] fn div(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_div; unsafe { simd_div(self, other) } } } - impl ops::Rem for $id { + impl ::ops::Rem for $id { type Output = Self; #[inline] fn rem(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_rem; unsafe { simd_rem(self, other) } } } - impl ops::AddAssign for $id { + impl ::ops::AddAssign for $id { #[inline] fn add_assign(&mut self, other: Self) { *self = *self + other; } } - impl ops::SubAssign for $id { + impl ::ops::SubAssign for $id { #[inline] fn sub_assign(&mut self, other: Self) { *self = *self - other; } } - impl ops::MulAssign for $id { + impl ::ops::MulAssign for $id { #[inline] fn mul_assign(&mut self, other: Self) { *self = *self * other; } } - impl ops::DivAssign for $id { + impl ::ops::DivAssign for $id { #[inline] fn div_assign(&mut self, other: Self) { *self = *self / other; } } - impl ops::RemAssign for $id { + impl ::ops::RemAssign for $id { #[inline] fn rem_assign(&mut self, other: Self) { *self = *self % other; @@ -80,7 +86,6 @@ macro_rules! impl_arithmetic_ops { } #[cfg(test)] -#[macro_export] macro_rules! test_arithmetic_ops { ($id:ident, $elem_ty:ident) => { #[test] diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs index 88bba0291a..932ac49eb2 100644 --- a/coresimd/ppsv/api/arithmetic_reductions.rs +++ b/coresimd/ppsv/api/arithmetic_reductions.rs @@ -1,4 +1,5 @@ //! Implements portable arithmetic vector reductions. +#![allow(unused)] macro_rules! impl_arithmetic_reductions { ($id:ident, $elem_ty:ident) => { @@ -6,12 +7,12 @@ macro_rules! impl_arithmetic_reductions { /// Lane-wise addition of the vector elements. #[inline] pub fn sum(self) -> $elem_ty { - ReduceAdd::reduce_add(self) + super::codegen::sum::ReduceAdd::reduce_add(self) } /// Lane-wise multiplication of the vector elements. #[inline] pub fn product(self) -> $elem_ty { - ReduceMul::reduce_mul(self) + super::codegen::product::ReduceMul::reduce_mul(self) } } } diff --git a/coresimd/ppsv/api/bitwise_ops.rs b/coresimd/ppsv/api/bitwise_ops.rs index aa82b2e797..d6b8a6c01b 100644 --- a/coresimd/ppsv/api/bitwise_ops.rs +++ b/coresimd/ppsv/api/bitwise_ops.rs @@ -1,48 +1,52 @@ //! Lane-wise bitwise operations for integer and boolean vectors. +#![allow(unused)] macro_rules! impl_bitwise_ops { ($ty:ident, $true_val:expr) => { - impl ops::Not for $ty { + impl ::ops::Not for $ty { type Output = Self; #[inline] fn not(self) -> Self { Self::splat($true_val) ^ self } } - impl ops::BitXor for $ty { + impl ::ops::BitXor for $ty { type Output = Self; #[inline] fn bitxor(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_xor; unsafe { simd_xor(self, other) } } } - impl ops::BitAnd for $ty { + impl ::ops::BitAnd for $ty { type Output = Self; #[inline] fn bitand(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_and; unsafe { simd_and(self, other) } } } - impl ops::BitOr for $ty { + impl ::ops::BitOr for $ty { type Output = Self; #[inline] fn bitor(self, other: Self) -> Self { + use coresimd::simd_llvm::simd_or; unsafe { simd_or(self, other) } } } - impl ops::BitAndAssign for $ty { + impl ::ops::BitAndAssign for $ty { #[inline] fn bitand_assign(&mut self, other: Self) { *self = *self & other; } } - impl ops::BitOrAssign for $ty { + impl ::ops::BitOrAssign for $ty { #[inline] fn bitor_assign(&mut self, other: Self) { *self = *self | other; } } - impl ops::BitXorAssign for $ty { + impl ::ops::BitXorAssign for $ty { #[inline] fn bitxor_assign(&mut self, other: Self) { *self = *self ^ other; @@ -52,7 +56,6 @@ macro_rules! impl_bitwise_ops { } #[cfg(test)] -#[macro_export] macro_rules! test_int_bitwise_ops { ($id:ident, $elem_ty:ident) => { #[test] @@ -117,7 +120,6 @@ macro_rules! test_int_bitwise_ops { } #[cfg(test)] -#[macro_export] macro_rules! test_bool_bitwise_ops { ($id:ident) => { #[test] diff --git a/coresimd/ppsv/api/bitwise_reductions.rs b/coresimd/ppsv/api/bitwise_reductions.rs index 24c0c80038..124cc0b2c9 100644 --- a/coresimd/ppsv/api/bitwise_reductions.rs +++ b/coresimd/ppsv/api/bitwise_reductions.rs @@ -1,4 +1,5 @@ //! Implements portable bitwise vector reductions. +#![allow(unused)] macro_rules! impl_bitwise_reductions { ($id:ident, $elem_ty:ident) => { @@ -6,17 +7,17 @@ macro_rules! impl_bitwise_reductions { /// Lane-wise bitwise `and` of the vector elements. #[inline] pub fn and(self) -> $elem_ty { - ReduceAnd::reduce_and(self) + super::codegen::and::ReduceAnd::reduce_and(self) } /// Lane-wise bitwise `or` of the vector elements. #[inline] pub fn or(self) -> $elem_ty { - ReduceOr::reduce_or(self) + super::codegen::or::ReduceOr::reduce_or(self) } /// Lane-wise bitwise `xor` of the vector elements. #[inline] pub fn xor(self) -> $elem_ty { - ReduceXor::reduce_xor(self) + super::codegen::xor::ReduceXor::reduce_xor(self) } } } @@ -28,17 +29,17 @@ macro_rules! impl_bool_bitwise_reductions { /// Lane-wise bitwise `and` of the vector elements. #[inline] pub fn and(self) -> $elem_ty { - ReduceAnd::reduce_and(self) !=0 + super::codegen::and::ReduceAnd::reduce_and(self) !=0 } /// Lane-wise bitwise `or` of the vector elements. #[inline] pub fn or(self) -> $elem_ty { - ReduceOr::reduce_or(self) != 0 + super::codegen::or::ReduceOr::reduce_or(self) != 0 } /// Lane-wise bitwise `xor` of the vector elements. #[inline] pub fn xor(self) -> $elem_ty { - ReduceXor::reduce_xor(self) != 0 + super::codegen::xor::ReduceXor::reduce_xor(self) != 0 } } } diff --git a/coresimd/ppsv/api/bool_vectors.rs b/coresimd/ppsv/api/bool_vectors.rs index 01fa13f0b8..cde5360e7f 100644 --- a/coresimd/ppsv/api/bool_vectors.rs +++ b/coresimd/ppsv/api/bool_vectors.rs @@ -1,11 +1,12 @@ //! Minimal boolean vector implementation +#![allow(unused)] /// Minimal interface: all packed SIMD boolean vector types implement this. macro_rules! impl_bool_minimal { ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { #[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))] - impl Clone for $id { + impl ::clone::Clone for $id { #[inline] // currently needed for correctness fn clone(&self) -> Self { *self @@ -59,6 +60,7 @@ macro_rules! impl_bool_minimal { /// If `index >= Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn extract_unchecked(self, index: usize) -> bool { + use coresimd::simd_llvm::simd_extract; let x: $elem_ty = simd_extract(self, index as u32); x != 0 } @@ -87,6 +89,7 @@ macro_rules! impl_bool_minimal { index: usize, new_value: bool, ) -> Self { + use coresimd::simd_llvm::simd_insert; simd_insert(self, index as u32, Self::bool_to_internal(new_value)) } } diff --git a/coresimd/ppsv/api/boolean_reductions.rs b/coresimd/ppsv/api/boolean_reductions.rs index 3c45fee48d..3e22a6edbc 100644 --- a/coresimd/ppsv/api/boolean_reductions.rs +++ b/coresimd/ppsv/api/boolean_reductions.rs @@ -1,4 +1,5 @@ //! Lane-wise boolean vector reductions. +#![allow(unused)] macro_rules! impl_bool_reductions { ($id:ident) => { diff --git a/coresimd/ppsv/api/cmp.rs b/coresimd/ppsv/api/cmp.rs index df5a9957bf..1d31a294a0 100644 --- a/coresimd/ppsv/api/cmp.rs +++ b/coresimd/ppsv/api/cmp.rs @@ -1,4 +1,5 @@ //! Lane-wise vector comparisons returning boolean vectors. +#![allow(unused)] macro_rules! impl_cmp { ($id:ident, $bool_ty:ident) => { @@ -6,36 +7,42 @@ macro_rules! impl_cmp { /// Lane-wise equality comparison. #[inline] pub fn eq(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_eq; unsafe { simd_eq(self, other) } } /// Lane-wise inequality comparison. #[inline] pub fn ne(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_ne; unsafe { simd_ne(self, other) } } /// Lane-wise less-than comparison. #[inline] pub fn lt(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_lt; unsafe { simd_lt(self, other) } } /// Lane-wise less-than-or-equals comparison. #[inline] pub fn le(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_le; unsafe { simd_le(self, other) } } /// Lane-wise greater-than comparison. #[inline] pub fn gt(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_gt; unsafe { simd_gt(self, other) } } /// Lane-wise greater-than-or-equals comparison. #[inline] pub fn ge(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_ge; unsafe { simd_ge(self, other) } } } @@ -48,36 +55,42 @@ macro_rules! impl_bool_cmp { /// Lane-wise equality comparison. #[inline] pub fn eq(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_eq; unsafe { simd_eq(self, other) } } /// Lane-wise inequality comparison. #[inline] pub fn ne(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_ne; unsafe { simd_ne(self, other) } } /// Lane-wise less-than comparison. #[inline] pub fn lt(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_gt; unsafe { simd_gt(self, other) } } /// Lane-wise less-than-or-equals comparison. #[inline] pub fn le(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_ge; unsafe { simd_ge(self, other) } } /// Lane-wise greater-than comparison. #[inline] pub fn gt(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_lt; unsafe { simd_lt(self, other) } } /// Lane-wise greater-than-or-equals comparison. #[inline] pub fn ge(self, other: $id) -> $bool_ty { + use coresimd::simd_llvm::simd_le; unsafe { simd_le(self, other) } } } @@ -85,7 +98,6 @@ macro_rules! impl_bool_cmp { } #[cfg(test)] -#[macro_export] macro_rules! test_cmp { ($id:ident, $elem_ty:ident, $bool_ty:ident, $true:expr, $false:expr) => { diff --git a/coresimd/ppsv/api/default.rs b/coresimd/ppsv/api/default.rs index 153bbe1ae3..98ff3b205a 100644 --- a/coresimd/ppsv/api/default.rs +++ b/coresimd/ppsv/api/default.rs @@ -1,8 +1,9 @@ //! Implements `Default` for vector types. +#![allow(unused)] macro_rules! impl_default { ($id:ident, $elem_ty:ident) => { - impl Default for $id { + impl ::default::Default for $id { #[inline] fn default() -> Self { Self::splat($elem_ty::default()) @@ -12,13 +13,11 @@ macro_rules! impl_default { } #[cfg(test)] -#[macro_export] macro_rules! test_default { ($id:ident, $elem_ty:ident) => { #[test] fn default() { - use ::coresimd::simd::*; - use std::default::Default; + use ::coresimd::simd::{$id}; let a = $id::default(); for i in 0..$id::lanes() { assert_eq!(a.extract(i), $elem_ty::default()); diff --git a/coresimd/ppsv/api/eq.rs b/coresimd/ppsv/api/eq.rs index bcbee31041..18c289e0d2 100644 --- a/coresimd/ppsv/api/eq.rs +++ b/coresimd/ppsv/api/eq.rs @@ -1,5 +1,6 @@ //! Implements `Eq` for vector types. +#![allow(unused)] macro_rules! impl_eq { - ($id:ident) => { impl Eq for $id {} } + ($id:ident) => { impl ::cmp::Eq for $id {} } } diff --git a/coresimd/ppsv/api/fmt.rs b/coresimd/ppsv/api/fmt.rs index 159a049bae..095a197bf0 100644 --- a/coresimd/ppsv/api/fmt.rs +++ b/coresimd/ppsv/api/fmt.rs @@ -1,10 +1,12 @@ //! Implements formating traits. +#![allow(unused)] macro_rules! impl_hex_fmt { ($id:ident, $elem_ty:ident) => { - impl fmt::LowerHex for $id { - fn fmt(&self, f: &mut fmt::Formatter) - -> fmt::Result { + impl ::fmt::LowerHex for $id { + fn fmt(&self, f: &mut ::fmt::Formatter) + -> ::fmt::Result { + use ::mem; write!(f, "{}(", stringify!($id))?; let n = mem::size_of_val(self) / mem::size_of::<$elem_ty>(); diff --git a/coresimd/ppsv/api/from.rs b/coresimd/ppsv/api/from.rs index f1008c1ea6..f9129860f6 100644 --- a/coresimd/ppsv/api/from.rs +++ b/coresimd/ppsv/api/from.rs @@ -1,32 +1,48 @@ //! Implements the From trait for vector types, which performs a lane-wise //! cast vector types with the same number of lanes. +#![allow(unused)] + +macro_rules! impl_from_impl { + ($from:ident, $to:ident) => { + impl ::convert::From<::simd::$from> for $to { + #[inline] + fn from(f: ::simd::$from) -> $to { + use coresimd::simd_llvm::simd_cast; + unsafe { simd_cast(f) } + } + } + } +} + +macro_rules! impl_from_ { + ($to:ident, $from:ident) => { + vector_impl!([impl_from_impl, $to, $from]); + } +} macro_rules! impl_from { - ($to:ident: $elem_ty:ident, $test_mod:ident | $($from:ident),+) => { + ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => { $( - impl From<::simd::$from> for $to { - #[inline] - fn from(f: ::simd::$from) -> $to { - unsafe { simd_cast(f) } - } - } + impl_from_!($from, $to); )+ - #[cfg(test)] - mod $test_mod { - $( - #[test] - fn $from() { - use ::std::convert::{From, Into}; - use ::coresimd::simd::{$from, $to}; - use ::std::default::Default; - assert_eq!($to::lanes(), $from::lanes()); - let a: $from = $from::default(); - let b_0: $to = From::from(a); - let b_1: $to = a.into(); - assert_eq!(b_0, b_1); - } - )+ - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + $( + #[test] + fn $from() { + use std::convert::{From, Into}; + use ::coresimd::simd::{$from, $to}; + use ::std::default::Default; + assert_eq!($to::lanes(), $from::lanes()); + let a: $from = $from::default(); + let b_0: $to = From::from(a); + let b_1: $to = a.into(); + assert_eq!(b_0, b_1); + } + )+ + } + ); } } diff --git a/coresimd/ppsv/api/from_bits.rs b/coresimd/ppsv/api/from_bits.rs index 43f82696dc..2658b61fe8 100644 --- a/coresimd/ppsv/api/from_bits.rs +++ b/coresimd/ppsv/api/from_bits.rs @@ -1,39 +1,47 @@ //! Implements the `FromBits` trait for vector types, which performs bitwise //! lossless transmutes between equally-sized vector types. +#![allow(unused)] -macro_rules! impl_from_bits_ { +macro_rules! impl_from_bits__ { ($to:ident: $($from:ident),+) => { $( impl ::simd::FromBits<$from> for $to { #[inline] fn from_bits(f: $from) -> $to { - unsafe { mem::transmute(f) } + unsafe { ::mem::transmute(f) } } } )+ } } +macro_rules! impl_from_bits_ { + ($to:ident: $($from:ident),+) => { + vector_impl!([impl_from_bits__, $to: $($from),+]); + } +} + macro_rules! impl_from_bits { - ($to:ident: $elem_ty:ident, $test_mod:ident | $($from:ident),+) => { + ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => { impl_from_bits_!($to: $($from),+); - #[cfg(test)] - mod $test_mod { - $( - #[test] - fn $from() { - use ::coresimd::simd::{$from, $to, FromBits, IntoBits}; - use ::std::{mem, default}; - use default::Default; - assert_eq!(mem::size_of::<$from>(), - mem::size_of::<$to>()); - let a: $from = $from::default(); - let b_0: $to = FromBits::from_bits(a); - let b_1: $to = a.into_bits(); - assert_eq!(b_0, b_1); - } - )+ - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + $( + #[test] + fn $from() { + use ::coresimd::simd::*; + use ::std::mem; + assert_eq!(mem::size_of::<$from>(), + mem::size_of::<$to>()); + let a: $from = $from::default(); + let b_0: $to = FromBits::from_bits(a); + let b_1: $to = a.into_bits(); + assert_eq!(b_0, b_1); + } + )+ + } + ); } } diff --git a/coresimd/ppsv/api/hash.rs b/coresimd/ppsv/api/hash.rs index 47c135b4e1..02fe416f74 100644 --- a/coresimd/ppsv/api/hash.rs +++ b/coresimd/ppsv/api/hash.rs @@ -1,10 +1,11 @@ //! Implements `Hash`. +#![allow(unused)] macro_rules! impl_hash { ($id:ident, $elem_ty:ident) => { - impl hash::Hash for $id { + impl ::hash::Hash for $id { #[inline] - fn hash(&self, state: &mut H) { + fn hash(&self, state: &mut H) { union A { data: [$elem_ty; $id::lanes()], vec: $id @@ -18,7 +19,6 @@ macro_rules! impl_hash { } #[cfg(test)] -#[macro_export] macro_rules! test_hash { ($id:ident, $elem_ty:ident) => { #[test] @@ -26,8 +26,7 @@ macro_rules! test_hash { use ::coresimd::simd::$id; use ::std::collections::hash_map::DefaultHasher; use ::std::hash::{Hash, Hasher}; - use ::std::{mem, clone}; - use clone::Clone; + use ::std::mem; type A = [$elem_ty; $id::lanes()]; let a: A = [42 as $elem_ty; $id::lanes()]; assert!(mem::size_of::() == mem::size_of::<$id>()); diff --git a/coresimd/ppsv/api/load_store.rs b/coresimd/ppsv/api/load_store.rs index fe21f74c98..0014f236c3 100644 --- a/coresimd/ppsv/api/load_store.rs +++ b/coresimd/ppsv/api/load_store.rs @@ -1,4 +1,5 @@ //! Implements the load/store API. +#![allow(unused)] macro_rules! impl_load_store { ($id:ident, $elem_ty:ident, $elem_count:expr) => { @@ -11,10 +12,11 @@ macro_rules! impl_load_store { /// aligned to an `align_of::()` boundary. #[inline] pub fn store_aligned(self, slice: &mut [$elem_ty]) { + use ::slice::SliceExt; unsafe { assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; - assert!(target_ptr.align_offset(mem::align_of::()) == 0); + assert!(target_ptr.align_offset(::mem::align_of::()) == 0); self.store_aligned_unchecked(slice); } } @@ -26,6 +28,7 @@ macro_rules! impl_load_store { /// If `slice.len() < Self::lanes()`. #[inline] pub fn store_unaligned(self, slice: &mut [$elem_ty]) { + use ::slice::SliceExt; unsafe { assert!(slice.len() >= $elem_count); self.store_unaligned_unchecked(slice); @@ -45,6 +48,7 @@ macro_rules! impl_load_store { slice: &mut [$elem_ty] ) { + use ::slice::SliceExt; *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = self; } @@ -58,9 +62,10 @@ macro_rules! impl_load_store { self, slice: &mut [$elem_ty] ) { + use ::slice::SliceExt; let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; let self_ptr = &self as *const Self as *const u8; - ptr::copy_nonoverlapping(self_ptr, target_ptr, mem::size_of::()); + ::ptr::copy_nonoverlapping(self_ptr, target_ptr, ::mem::size_of::()); } /// Instantiates a new vector with the values of the `slice`. @@ -72,9 +77,10 @@ macro_rules! impl_load_store { #[inline] pub fn load_aligned(slice: &[$elem_ty]) -> Self { unsafe { + use ::slice::SliceExt; assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked(0) as *const $elem_ty; - assert!(target_ptr.align_offset(mem::align_of::()) == 0); + assert!(target_ptr.align_offset(::mem::align_of::()) == 0); Self::load_aligned_unchecked(slice) } } @@ -86,6 +92,7 @@ macro_rules! impl_load_store { /// If `slice.len() < Self::lanes()`. #[inline] pub fn load_unaligned(slice: &[$elem_ty]) -> Self { + use ::slice::SliceExt; unsafe { assert!(slice.len() >= $elem_count); Self::load_unaligned_unchecked(slice) @@ -100,6 +107,7 @@ macro_rules! impl_load_store { /// to an `align_of::()` boundary, the behavior is undefined. #[inline] pub unsafe fn load_aligned_unchecked(slice: &[$elem_ty]) -> Self { + use ::slice::SliceExt; *(slice.get_unchecked(0) as *const $elem_ty as *const Self) } @@ -110,11 +118,12 @@ macro_rules! impl_load_store { /// If `slice.len() < Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn load_unaligned_unchecked(slice: &[$elem_ty]) -> Self { - use mem::size_of; + use ::slice::SliceExt; + use ::mem::size_of; let target_ptr = slice.get_unchecked(0) as *const $elem_ty as *const u8; let mut x = Self::splat(0 as $elem_ty); let self_ptr = &mut x as *mut Self as *mut u8; - ptr::copy_nonoverlapping(target_ptr,self_ptr,size_of::()); + ::ptr::copy_nonoverlapping(target_ptr,self_ptr,size_of::()); x } } @@ -122,7 +131,6 @@ macro_rules! impl_load_store { } #[cfg(test)] -#[macro_export] macro_rules! test_load_store { ($id:ident, $elem_ty:ident) => { #[test] @@ -177,7 +185,7 @@ macro_rules! test_load_store { union A { data: [$elem_ty; 2 * ::coresimd::simd::$id::lanes()], - vec: ::coresimd::simd::$id, + _vec: ::coresimd::simd::$id, } #[test] diff --git a/coresimd/ppsv/api/minimal.rs b/coresimd/ppsv/api/minimal.rs index 3def265e21..d833a2af67 100644 --- a/coresimd/ppsv/api/minimal.rs +++ b/coresimd/ppsv/api/minimal.rs @@ -1,10 +1,11 @@ -//! +//! Minimal portable vector types API. +#![allow(unused)] /// Minimal interface: all packed SIMD vector types implement this. macro_rules! impl_minimal { ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { #[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))] - impl Clone for $id { + impl ::clone::Clone for $id { #[inline] // currently needed for correctness fn clone(&self) -> Self { *self @@ -49,9 +50,12 @@ macro_rules! impl_minimal { /// Extracts the value at `index`. /// + /// # Precondition + /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { + use coresimd::simd_llvm::simd_extract; simd_extract(self, index as u32) } @@ -69,9 +73,9 @@ macro_rules! impl_minimal { /// Returns a new vector where the value at `index` is replaced by `new_value`. /// - /// # Panics + /// # Precondition /// - /// If `index >= Self::lanes()`. + /// If `index >= Self::lanes()` the behavior is undefined. #[inline] #[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"] pub unsafe fn replace_unchecked( @@ -79,6 +83,7 @@ macro_rules! impl_minimal { index: usize, new_value: $elem_ty, ) -> Self { + use coresimd::simd_llvm::simd_insert; simd_insert(self, index as u32, new_value) } } diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs index 7c158ab404..35232d17a6 100644 --- a/coresimd/ppsv/api/minmax_reductions.rs +++ b/coresimd/ppsv/api/minmax_reductions.rs @@ -1,4 +1,5 @@ //! Implements portable arithmetic vector reductions. +#![allow(unused)] macro_rules! impl_minmax_reductions { ($id:ident, $elem_ty:ident) => { @@ -6,12 +7,12 @@ macro_rules! impl_minmax_reductions { /// Largest vector value. #[inline] pub fn max(self) -> $elem_ty { - ReduceMax::reduce_max(self) + super::codegen::max::ReduceMax::reduce_max(self) } /// Smallest vector value. #[inline] pub fn min(self) -> $elem_ty { - ReduceMin::reduce_min(self) + super::codegen::min::ReduceMin::reduce_min(self) } } } diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs index cfd201ed26..ed4e0e2c22 100644 --- a/coresimd/ppsv/api/mod.rs +++ b/coresimd/ppsv/api/mod.rs @@ -53,6 +53,7 @@ //! * [x] boolean reductions: implemented by boolean vectors. //! * [ ] portable shuffles: `shufflevector`. //! * [ ] portable `gather`/`scatter`: +#![allow(unused)] /// Adds the vector type `$id`, with elements of types `$elem_tys`. macro_rules! define_ty { @@ -113,170 +114,155 @@ mod partial_eq; #[macro_use] mod shifts; -/// Imports required to implement vector types using the macros. - -macro_rules! simd_api_imports { - () => { - use ::coresimd::simd_llvm::*; - use fmt; - use hash; - use ops; - #[allow(unused_imports)] - use num; - use cmp::{Eq, PartialEq}; - use ptr; - use mem; - #[allow(unused_imports)] - use convert::{From, Into}; - use slice::SliceExt; - #[allow(unused_imports)] - use iter::Iterator; - #[allow(unused_imports)] - use default::Default; - use clone::Clone; - use super::codegen::sum::{ReduceAdd}; - use super::codegen::product::{ReduceMul}; - use super::codegen::and::{ReduceAnd}; - use super::codegen::or::{ReduceOr}; - use super::codegen::xor::{ReduceXor}; - use super::codegen::min::{ReduceMin}; - use super::codegen::max::{ReduceMax}; - } -} - /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - define_ty!($id, $($elem_tys),+ | $(#[$doc])*); - impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*); - impl_load_store!($id, $elem_ty, $elem_count); - impl_cmp!($id, $bool_ty); - impl_arithmetic_ops!($id); - impl_arithmetic_reductions!($id, $elem_ty); - impl_minmax_reductions!($id, $elem_ty); - impl_neg_op!($id, $elem_ty); - impl_partial_eq!($id); - impl_default!($id, $elem_ty); + vector_impl!( + [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], + [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], + [impl_load_store, $id, $elem_ty, $elem_count], + [impl_cmp, $id, $bool_ty], + [impl_arithmetic_ops, $id], + [impl_arithmetic_reductions, $id, $elem_ty], + [impl_minmax_reductions, $id, $elem_ty], + [impl_neg_op, $id, $elem_ty], + [impl_partial_eq, $id], + [impl_default, $id, $elem_ty] + ); - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1. as $elem_ty, 0. as $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_neg_op!($id, $elem_ty); - test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty); - test_default!($id, $elem_ty); - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + test_minimal!($id, $elem_ty, $elem_count); + test_load_store!($id, $elem_ty); + test_cmp!($id, $elem_ty, $bool_ty, 1. as $elem_ty, 0. as $elem_ty); + test_arithmetic_ops!($id, $elem_ty); + test_arithmetic_reductions!($id, $elem_ty); + test_minmax_reductions!($id, $elem_ty); + test_neg_op!($id, $elem_ty); + test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty); + test_default!($id, $elem_ty); + } + ); } } /// Defines a portable packed SIMD signed-integer vector type. macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - define_ty!($id, $($elem_tys),+ | $(#[$doc])*); - impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*); - impl_load_store!($id, $elem_ty, $elem_count); - impl_cmp!($id, $bool_ty); - impl_hash!($id, $elem_ty); - impl_arithmetic_ops!($id); - impl_arithmetic_reductions!($id, $elem_ty); - impl_minmax_reductions!($id, $elem_ty); - impl_neg_op!($id, $elem_ty); - impl_bitwise_ops!($id, !(0 as $elem_ty)); - impl_bitwise_reductions!($id, $elem_ty); - impl_all_shifts!($id, $elem_ty); - impl_hex_fmt!($id, $elem_ty); - impl_eq!($id); - impl_partial_eq!($id); - impl_default!($id, $elem_ty); + vector_impl!( + [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], + [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], + [impl_load_store, $id, $elem_ty, $elem_count], + [impl_cmp, $id, $bool_ty], + [impl_hash, $id, $elem_ty], + [impl_arithmetic_ops, $id], + [impl_arithmetic_reductions, $id, $elem_ty], + [impl_minmax_reductions, $id, $elem_ty], + [impl_neg_op, $id, $elem_ty], + [impl_bitwise_ops, $id, !(0 as $elem_ty)], + [impl_bitwise_reductions, $id, $elem_ty], + [impl_all_shifts, $id, $elem_ty], + [impl_hex_fmt, $id, $elem_ty], + [impl_eq, $id], + [impl_partial_eq, $id], + [impl_default, $id, $elem_ty] + ); - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); - test_hash!($id, $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_neg_op!($id, $elem_ty); - test_int_bitwise_ops!($id, $elem_ty); - test_bitwise_reductions!($id, !(0 as $elem_ty)); - test_all_shift_ops!($id, $elem_ty); - test_hex_fmt!($id, $elem_ty); - test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); - test_default!($id, $elem_ty); - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + test_minimal!($id, $elem_ty, $elem_count); + test_load_store!($id, $elem_ty); + test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); + test_hash!($id, $elem_ty); + test_arithmetic_ops!($id, $elem_ty); + test_arithmetic_reductions!($id, $elem_ty); + test_minmax_reductions!($id, $elem_ty); + test_neg_op!($id, $elem_ty); + test_int_bitwise_ops!($id, $elem_ty); + test_bitwise_reductions!($id, !(0 as $elem_ty)); + test_all_shift_ops!($id, $elem_ty); + test_hex_fmt!($id, $elem_ty); + test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); + test_default!($id, $elem_ty); + } + ); } } /// Defines a portable packed SIMD unsigned-integer vector type. macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - define_ty!($id, $($elem_tys),+ | $(#[$doc])*); - impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*); - impl_load_store!($id, $elem_ty, $elem_count); - impl_cmp!($id, $bool_ty); - impl_hash!($id, $elem_ty); - impl_arithmetic_ops!($id); - impl_arithmetic_reductions!($id, $elem_ty); - impl_minmax_reductions!($id, $elem_ty); - impl_bitwise_ops!($id, !(0 as $elem_ty)); - impl_bitwise_reductions!($id, $elem_ty); - impl_all_shifts!($id, $elem_ty); - impl_hex_fmt!($id, $elem_ty); - impl_eq!($id); - impl_partial_eq!($id); - impl_default!($id, $elem_ty); + vector_impl!( + [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], + [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], + [impl_load_store, $id, $elem_ty, $elem_count], + [impl_cmp, $id, $bool_ty], + [impl_hash, $id, $elem_ty], + [impl_arithmetic_ops, $id], + [impl_arithmetic_reductions, $id, $elem_ty], + [impl_minmax_reductions, $id, $elem_ty], + [impl_bitwise_ops, $id, !(0 as $elem_ty)], + [impl_bitwise_reductions, $id, $elem_ty], + [impl_all_shifts, $id, $elem_ty], + [impl_hex_fmt, $id, $elem_ty], + [impl_eq, $id], + [impl_partial_eq, $id], + [impl_default, $id, $elem_ty] + ); - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); - test_hash!($id, $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_int_bitwise_ops!($id, $elem_ty); - test_bitwise_reductions!($id, !(0 as $elem_ty)); - test_all_shift_ops!($id, $elem_ty); - test_hex_fmt!($id, $elem_ty); - test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); - test_default!($id, $elem_ty); - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + test_minimal!($id, $elem_ty, $elem_count); + test_load_store!($id, $elem_ty); + test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); + test_hash!($id, $elem_ty); + test_arithmetic_ops!($id, $elem_ty); + test_arithmetic_reductions!($id, $elem_ty); + test_minmax_reductions!($id, $elem_ty); + test_int_bitwise_ops!($id, $elem_ty); + test_bitwise_reductions!($id, !(0 as $elem_ty)); + test_all_shift_ops!($id, $elem_ty); + test_hex_fmt!($id, $elem_ty); + test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); + test_default!($id, $elem_ty); + } + ); } } /// Defines a portable packed SIMD boolean vector type. macro_rules! simd_b_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - define_ty!($id, $($elem_tys),+ | $(#[$doc])*); - impl_bool_minimal!($id, $elem_ty, $elem_count, $($elem_name),*); - impl_bitwise_ops!($id, true); - impl_bool_bitwise_reductions!($id, bool); - impl_bool_reductions!($id); - impl_bool_cmp!($id, $id); - impl_eq!($id); - impl_partial_eq!($id); - impl_default!($id, bool); + vector_impl!( + [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], + [impl_bool_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], + [impl_bitwise_ops, $id, true], + [impl_bool_bitwise_reductions, $id, bool], + [impl_bool_reductions, $id], + [impl_bool_cmp, $id, $id], + [impl_eq, $id], + [impl_partial_eq, $id], + [impl_default, $id, bool] + ); - #[cfg(test)] - mod $test_mod { - test_bool_minimal!($id, $elem_count); - test_bool_bitwise_ops!($id); - test_bool_reductions!($id); - test_bitwise_reductions!($id, true); - test_cmp!($id, $elem_ty, $id, true, false); - test_partial_eq!($id, true, false); - test_default!($id, bool); - } + $test_macro!( + #[cfg(test)] + mod $test_mod { + test_bool_minimal!($id, $elem_count); + test_bool_bitwise_ops!($id); + test_bool_reductions!($id); + test_bitwise_reductions!($id, true); + test_cmp!($id, $elem_ty, $id, true, false); + test_partial_eq!($id, true, false); + test_default!($id, bool); + } + ); } } diff --git a/coresimd/ppsv/api/neg.rs b/coresimd/ppsv/api/neg.rs index aa1cffbf7f..138162215c 100644 --- a/coresimd/ppsv/api/neg.rs +++ b/coresimd/ppsv/api/neg.rs @@ -1,8 +1,9 @@ //! Implements `std::ops::Neg` for signed vector types. +#![allow(unused)] macro_rules! impl_neg_op { ($id:ident, $elem_ty:ident) => { - impl ops::Neg for $id { + impl ::ops::Neg for $id { type Output = Self; #[inline] fn neg(self) -> Self { @@ -13,7 +14,6 @@ macro_rules! impl_neg_op { } #[cfg(test)] -#[macro_export] macro_rules! test_neg_op { ($id:ident, $elem_ty:ident) => { #[test] diff --git a/coresimd/ppsv/api/partial_eq.rs b/coresimd/ppsv/api/partial_eq.rs index 70e7a9f966..324442c623 100644 --- a/coresimd/ppsv/api/partial_eq.rs +++ b/coresimd/ppsv/api/partial_eq.rs @@ -1,8 +1,9 @@ //! Implements `PartialEq` for vector types. +#![allow(unused)] macro_rules! impl_partial_eq { ($id:ident) => { - impl PartialEq<$id> for $id { + impl ::cmp::PartialEq<$id> for $id { #[inline] fn eq(&self, other: &Self) -> bool { $id::eq(*self, *other).all() @@ -16,7 +17,6 @@ macro_rules! impl_partial_eq { } #[cfg(test)] -#[macro_export] macro_rules! test_partial_eq { ($id:ident, $true:expr, $false:expr) => { #[test] diff --git a/coresimd/ppsv/api/shifts.rs b/coresimd/ppsv/api/shifts.rs index 1447447eea..397f10646a 100644 --- a/coresimd/ppsv/api/shifts.rs +++ b/coresimd/ppsv/api/shifts.rs @@ -1,4 +1,5 @@ //! Implements integer shifts. +#![allow(unused)] macro_rules! impl_shifts { ($id:ident, $elem_ty:ident, $($by:ident),+) => { @@ -7,6 +8,7 @@ macro_rules! impl_shifts { type Output = Self; #[inline] fn shl(self, other: $by) -> Self { + use coresimd::simd_llvm::simd_shl; unsafe { simd_shl(self, $id::splat(other as $elem_ty)) } } } @@ -14,6 +16,7 @@ macro_rules! impl_shifts { type Output = Self; #[inline] fn shr(self, other: $by) -> Self { + use coresimd::simd_llvm::simd_shr; unsafe { simd_shr(self, $id::splat(other as $elem_ty)) } } } @@ -46,7 +49,6 @@ macro_rules! impl_all_shifts { } #[cfg(test)] -#[macro_export] macro_rules! test_shift_ops { ($id:ident, $elem_ty:ident, $($index_ty:ident),+) => { #[test] @@ -111,7 +113,6 @@ macro_rules! test_shift_ops { } #[cfg(test)] -#[macro_export] macro_rules! test_all_shift_ops { ($id:ident, $elem_ty:ident) => { test_shift_ops!( diff --git a/coresimd/ppsv/codegen/max.rs b/coresimd/ppsv/codegen/max.rs index 420aca447c..497fbe46f3 100644 --- a/coresimd/ppsv/codegen/max.rs +++ b/coresimd/ppsv/codegen/max.rs @@ -115,8 +115,8 @@ macro_rules! red_max { #[allow(unused_imports)] #[inline] fn reduce_max(self) -> Self::Acc { - use num::Float; - use cmp::Ord; + use ::num::Float; + use ::cmp::Ord; let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.max(self.extract(i)); diff --git a/coresimd/ppsv/codegen/min.rs b/coresimd/ppsv/codegen/min.rs index 064b591e76..d0434ce4be 100644 --- a/coresimd/ppsv/codegen/min.rs +++ b/coresimd/ppsv/codegen/min.rs @@ -115,8 +115,8 @@ macro_rules! red_min { #[allow(unused_imports)] #[inline] fn reduce_min(self) -> Self::Acc { - use num::Float; - use cmp::Ord; + use ::num::Float; + use ::cmp::Ord; let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.min(self.extract(i)); diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs index 69de61d906..177f3c97c4 100644 --- a/coresimd/ppsv/mod.rs +++ b/coresimd/ppsv/mod.rs @@ -48,16 +48,14 @@ pub use self::v128::*; pub use self::v256::*; pub use self::v512::*; -use marker; - /// Safe lossless bitwise conversion from `T` to `Self`. -pub trait FromBits: marker::Sized { +pub trait FromBits: ::marker::Sized { /// Safe lossless bitwise from `T` to `Self`. fn from_bits(T) -> Self; } /// Safe lossless bitwise conversion from `Self` to `T`. -pub trait IntoBits: marker::Sized { +pub trait IntoBits: ::marker::Sized { /// Safe lossless bitwise transmute from `self` to `T`. fn into_bits(self) -> T; } diff --git a/coresimd/ppsv/v128.rs b/coresimd/ppsv/v128.rs index 5528a6d1bf..c0beb3ada1 100644 --- a/coresimd/ppsv/v128.rs +++ b/coresimd/ppsv/v128.rs @@ -1,81 +1,78 @@ //! 128-bit wide portable packed vector types. - -simd_api_imports!(); - use coresimd::simd::{b8x2, b8x4, b8x8}; simd_i_ty! { - i8x16: 16, i8, b8x16, i8x16_tests | + i8x16: 16, i8, b8x16, i8x16_tests, test_v128 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | /// A 128-bit vector with 16 `i8` lanes. } simd_u_ty! { - u8x16: 16, u8, b8x16, u8x16_tests | + u8x16: 16, u8, b8x16, u8x16_tests, test_v128 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | /// A 128-bit vector with 16 `u8` lanes. } simd_b_ty! { - b8x16: 16, i8, b8x16_tests | + b8x16: 16, i8, b8x16_tests, test_v128 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | /// A 128-bit vector with 16 `bool` lanes. } simd_i_ty! { - i16x8: 8, i16, b8x8, i16x8_tests | + i16x8: 8, i16, b8x8, i16x8_tests, test_v128 | i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 128-bit vector with 8 `i16` lanes. } simd_u_ty! { - u16x8: 8, u16, b8x8, u16x8_tests | + u16x8: 8, u16, b8x8, u16x8_tests, test_v128 | u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 128-bit vector with 8 `u16` lanes. } simd_i_ty! { - i32x4: 4, i32, b8x4, i32x4_tests | + i32x4: 4, i32, b8x4, i32x4_tests, test_v128 | i32, i32, i32, i32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `i32` lanes. } simd_u_ty! { - u32x4: 4, u32, b8x4, u32x4_tests | + u32x4: 4, u32, b8x4, u32x4_tests, test_v128 | u32, u32, u32, u32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `u32` lanes. } simd_f_ty! { - f32x4: 4, f32, b8x4, f32x4_tests | + f32x4: 4, f32, b8x4, f32x4_tests, test_v128 | f32, f32, f32, f32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `f32` lanes. } simd_i_ty! { - i64x2: 2, i64, b8x2, i64x2_tests | + i64x2: 2, i64, b8x2, i64x2_tests, test_v128 | i64, i64 | x0, x1 | /// A 128-bit vector with 2 `u64` lanes. } simd_u_ty! { - u64x2: 2, u64, b8x2, u64x2_tests | + u64x2: 2, u64, b8x2, u64x2_tests, test_v128 | u64, u64 | x0, x1 | /// A 128-bit vector with 2 `u64` lanes. } simd_f_ty! { - f64x2: 2, f64, b8x2, f64x2_tests | + f64x2: 2, f64, b8x2, f64x2_tests, test_v128 | f64, f64 | x0, x1 | /// A 128-bit vector with 2 `f64` lanes. @@ -83,7 +80,8 @@ simd_f_ty! { impl_from_bits!( u64x2: u64, - u64x2_from_bits | i64x2, + u64x2_from_bits, + test_v128 | i64x2, f64x2, u32x4, i32x4, @@ -96,7 +94,8 @@ impl_from_bits!( ); impl_from_bits!( i64x2: i64, - i64x2_from_bits | u64x2, + i64x2_from_bits, + test_v128 | u64x2, f64x2, u32x4, i32x4, @@ -109,7 +108,8 @@ impl_from_bits!( ); impl_from_bits!( f64x2: f64, - f64x2_from_bits | i64x2, + f64x2_from_bits, + test_v128 | i64x2, u64x2, u32x4, i32x4, @@ -122,7 +122,8 @@ impl_from_bits!( ); impl_from_bits!( u32x4: u32, - u32x4_from_bits | u64x2, + u32x4_from_bits, + test_v128 | u64x2, i64x2, f64x2, i32x4, @@ -135,7 +136,8 @@ impl_from_bits!( ); impl_from_bits!( i32x4: i32, - i32x4_from_bits | u64x2, + i32x4_from_bits, + test_v128 | u64x2, i64x2, f64x2, u32x4, @@ -148,7 +150,8 @@ impl_from_bits!( ); impl_from_bits!( f32x4: f32, - f32x4_from_bits | u64x2, + f32x4_from_bits, + test_v128 | u64x2, i64x2, f64x2, i32x4, @@ -161,7 +164,8 @@ impl_from_bits!( ); impl_from_bits!( u16x8: u16, - u16x8_from_bits | u64x2, + u16x8_from_bits, + test_v128 | u64x2, i64x2, f64x2, u32x4, @@ -174,7 +178,8 @@ impl_from_bits!( ); impl_from_bits!( i16x8: i16, - i16x8_from_bits | u64x2, + i16x8_from_bits, + test_v128 | u64x2, i64x2, f64x2, u32x4, @@ -187,7 +192,8 @@ impl_from_bits!( ); impl_from_bits!( u8x16: u8, - u8x16_from_bits | u64x2, + u8x16_from_bits, + test_v128 | u64x2, i64x2, f64x2, u32x4, @@ -200,7 +206,8 @@ impl_from_bits!( ); impl_from_bits!( i8x16: i8, - i8x16_from_bits | u64x2, + i8x16_from_bits, + test_v128 | u64x2, i64x2, f64x2, u32x4, @@ -212,12 +219,10 @@ impl_from_bits!( b8x16 ); -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m128; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m128i; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m128d; +#[cfg(target_arch = "x86")] +use coresimd::arch::x86::{__m128, __m128d, __m128i}; +#[cfg(target_arch = "x86_64")] +use coresimd::arch::x86_64::{__m128, __m128d, __m128i}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] impl_from_bits_!(f64x2: __m128, __m128i, __m128d); @@ -242,7 +247,8 @@ impl_from_bits_!(i8x16: __m128, __m128i, __m128d); impl_from!( f64x2: f64, - f64x2_from | f32x2, + f64x2_from, + test_v128 | f32x2, u64x2, i64x2, u32x2, @@ -254,7 +260,8 @@ impl_from!( ); impl_from!( f32x4: f32, - f32x4_from | f64x4, + f32x4_from, + test_v128 | f64x4, u64x4, i64x4, u32x4, @@ -266,7 +273,8 @@ impl_from!( ); impl_from!( u64x2: u64, - u64x2_from | f32x2, + u64x2_from, + test_v128 | f32x2, f64x2, i64x2, i32x2, @@ -278,7 +286,8 @@ impl_from!( ); impl_from!( i64x2: i64, - i64x2_from | f32x2, + i64x2_from, + test_v128 | f32x2, f64x2, u64x2, i32x2, @@ -290,7 +299,8 @@ impl_from!( ); impl_from!( u32x4: u32, - u32x4_from | f64x4, + u32x4_from, + test_v128 | f64x4, u64x4, i64x4, f32x4, @@ -302,7 +312,8 @@ impl_from!( ); impl_from!( i32x4: i32, - i32x4_from | f64x4, + i32x4_from, + test_v128 | f64x4, u64x4, i64x4, f32x4, @@ -314,7 +325,8 @@ impl_from!( ); impl_from!( i16x8: i16, - i16x8_from | f64x8, + i16x8_from, + test_v128 | f64x8, u64x8, i64x8, f32x8, @@ -326,7 +338,8 @@ impl_from!( ); impl_from!( u16x8: u16, - u16x8_from | f64x8, + u16x8_from, + test_v128 | f64x8, u64x8, i64x8, f32x8, diff --git a/coresimd/ppsv/v16.rs b/coresimd/ppsv/v16.rs index 5bde9079f9..389aaf0e6a 100644 --- a/coresimd/ppsv/v16.rs +++ b/coresimd/ppsv/v16.rs @@ -1,34 +1,33 @@ //! 16-bit wide portable packed vector types. -simd_api_imports!(); - simd_i_ty! { - i8x2: 2, i8, b8x2, i8x2_tests | + i8x2: 2, i8, b8x2, i8x2_tests, test_v16 | i8, i8 | x0, x1 | /// A 16-bit wide vector with 2 `i8` lanes. } simd_u_ty! { - u8x2: 2, u8, b8x2, u8x2_tests | + u8x2: 2, u8, b8x2, u8x2_tests, test_v16 | u8, u8 | x0, x1 | /// A 16-bit wide vector with 2 `u8` lanes. } simd_b_ty! { - b8x2: 2, i8, b8x2_tests | + b8x2: 2, i8, b8x2_tests, test_v16 | i8, i8 | x0, x1 | /// A 16-bit wide vector with 2 `bool` lanes. } -impl_from_bits!(i8x2: i8, i8x2_from_bits | u8x2, b8x2); -impl_from_bits!(u8x2: u8, u8x2_from_bits | i8x2, b8x2); +impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, b8x2); +impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, b8x2); impl_from!( i8x2: i8, - i8x2_from | f64x2, + i8x2_from, + test_v16 | f64x2, u64x2, i64x2, f32x2, @@ -39,7 +38,8 @@ impl_from!( ); impl_from!( u8x2: u8, - u8x2_from | f64x2, + u8x2_from, + test_v16 | f64x2, u64x2, i64x2, f32x2, diff --git a/coresimd/ppsv/v256.rs b/coresimd/ppsv/v256.rs index d7994dfc7d..f88a209b96 100644 --- a/coresimd/ppsv/v256.rs +++ b/coresimd/ppsv/v256.rs @@ -1,11 +1,8 @@ //! 256-bit wide portable packed vector types. - -simd_api_imports!(); - use coresimd::simd::{b8x16, b8x4, b8x8}; simd_i_ty! { - i8x32: 32, i8, b8x32, i8x32_tests | + i8x32: 32, i8, b8x32, i8x32_tests, test_v256 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -16,7 +13,7 @@ simd_i_ty! { } simd_u_ty! { - u8x32: 32, u8, b8x32, u8x32_tests | + u8x32: 32, u8, b8x32, u8x32_tests, test_v256 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -27,7 +24,7 @@ simd_u_ty! { } simd_b_ty! { - b8x32: 32, i8, b8x32_tests | + b8x32: 32, i8, b8x32_tests, test_v256 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -38,7 +35,7 @@ simd_b_ty! { } simd_i_ty! { - i16x16: 16, i16, b8x16, i16x16_tests | + i16x16: 16, i16, b8x16, i16x16_tests, test_v256 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -47,7 +44,7 @@ simd_i_ty! { } simd_u_ty! { - u16x16: 16, u16, b8x16, u16x16_tests | + u16x16: 16, u16, b8x16, u16x16_tests, test_v256 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -56,42 +53,42 @@ simd_u_ty! { } simd_i_ty! { - i32x8: 8, i32, b8x8, i32x8_tests | + i32x8: 8, i32, b8x8, i32x8_tests, test_v256 | i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `i32` lanes. } simd_u_ty! { - u32x8: 8, u32, b8x8, u32x8_tests | + u32x8: 8, u32, b8x8, u32x8_tests, test_v256 | u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `u32` lanes. } simd_f_ty! { - f32x8: 8, f32, b8x8, f32x8_tests | + f32x8: 8, f32, b8x8, f32x8_tests, test_v256 | f32, f32, f32, f32, f32, f32, f32, f32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `f32` lanes. } simd_i_ty! { - i64x4: 4, i64, b8x4, i64x4_tests | + i64x4: 4, i64, b8x4, i64x4_tests, test_v256 | i64, i64, i64, i64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `i64` lanes. } simd_u_ty! { - u64x4: 4, u64, b8x4, u64x4_tests | + u64x4: 4, u64, b8x4, u64x4_tests, test_v256 | u64, u64, u64, u64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `u64` lanes. } simd_f_ty! { - f64x4: 4, f64, b8x4, f64x4_tests | + f64x4: 4, f64, b8x4, f64x4_tests, test_v256 | f64, f64, f64, f64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `f64` lanes. @@ -99,7 +96,8 @@ simd_f_ty! { impl_from_bits!( i8x32: i8, - i8x32_from_bits | u64x4, + i8x32_from_bits, + test_v256 | u64x4, i64x4, f64x4, u32x8, @@ -112,7 +110,8 @@ impl_from_bits!( ); impl_from_bits!( u8x32: u8, - u8x32_from_bits | u64x4, + u8x32_from_bits, + test_v256 | u64x4, i64x4, f64x4, u32x8, @@ -125,7 +124,8 @@ impl_from_bits!( ); impl_from_bits!( i16x16: i16, - i16x16_from_bits | u64x4, + i16x16_from_bits, + test_v256 | u64x4, i64x4, f64x4, u32x8, @@ -138,7 +138,8 @@ impl_from_bits!( ); impl_from_bits!( u16x16: u16, - u16x16_from_bits | u64x4, + u16x16_from_bits, + test_v256 | u64x4, i64x4, f64x4, u32x8, @@ -151,7 +152,8 @@ impl_from_bits!( ); impl_from_bits!( i32x8: i32, - i32x8_from_bits | u64x4, + i32x8_from_bits, + test_v256 | u64x4, i64x4, f64x4, u32x8, @@ -164,7 +166,8 @@ impl_from_bits!( ); impl_from_bits!( u32x8: u32, - u32x8_from_bits | u64x4, + u32x8_from_bits, + test_v256 | u64x4, i64x4, f64x4, i32x8, @@ -177,7 +180,8 @@ impl_from_bits!( ); impl_from_bits!( f32x8: f32, - f32x8_from_bits | u64x4, + f32x8_from_bits, + test_v256 | u64x4, i64x4, f64x4, i32x8, @@ -190,7 +194,8 @@ impl_from_bits!( ); impl_from_bits!( i64x4: i64, - i64x4_from_bits | u64x4, + i64x4_from_bits, + test_v256 | u64x4, f64x4, i32x8, u32x8, @@ -203,7 +208,8 @@ impl_from_bits!( ); impl_from_bits!( u64x4: u64, - u64x4_from_bits | i64x4, + u64x4_from_bits, + test_v256 | i64x4, f64x4, i32x8, u32x8, @@ -216,7 +222,8 @@ impl_from_bits!( ); impl_from_bits!( f64x4: f64, - f64x4_from_bits | i64x4, + f64x4_from_bits, + test_v256 | i64x4, u64x4, i32x8, u32x8, @@ -228,12 +235,10 @@ impl_from_bits!( b8x32 ); -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m256; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m256i; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m256d; +#[cfg(target_arch = "x86")] +use coresimd::arch::x86::{__m256, __m256d, __m256i}; +#[cfg(target_arch = "x86_64")] +use coresimd::arch::x86_64::{__m256, __m256d, __m256i}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] impl_from_bits_!(f64x4: __m256, __m256i, __m256d); @@ -258,7 +263,8 @@ impl_from_bits_!(i8x32: __m256, __m256i, __m256d); impl_from!( f64x4: f64, - f64x4_from | u64x4, + f64x4_from, + test_v256 | u64x4, i64x4, u32x4, i32x4, @@ -270,7 +276,8 @@ impl_from!( ); impl_from!( i64x4: i64, - i64x4_from | u64x4, + i64x4_from, + test_v256 | u64x4, f64x4, u32x4, i32x4, @@ -282,7 +289,8 @@ impl_from!( ); impl_from!( u64x4: u64, - u64x4_from | i64x4, + u64x4_from, + test_v256 | i64x4, f64x4, u32x4, i32x4, @@ -294,7 +302,8 @@ impl_from!( ); impl_from!( f32x8: f32, - f32x8_from | u64x8, + f32x8_from, + test_v256 | u64x8, i64x8, f64x8, u32x8, @@ -306,7 +315,8 @@ impl_from!( ); impl_from!( i32x8: i32, - i32x8_from | u64x8, + i32x8_from, + test_v256 | u64x8, i64x8, f64x8, u32x8, @@ -318,7 +328,8 @@ impl_from!( ); impl_from!( u32x8: u32, - u32x8_from | u64x8, + u32x8_from, + test_v256 | u64x8, i64x8, f64x8, i32x8, @@ -330,7 +341,8 @@ impl_from!( ); impl_from!( i16x16: i16, - i16x16_from | u32x16, + i16x16_from, + test_v256 | u32x16, i32x16, f32x16, u16x16, @@ -339,12 +351,13 @@ impl_from!( ); impl_from!( u16x16: u16, - u16x16_from | u32x16, + u16x16_from, + test_v256 | u32x16, i32x16, f32x16, i16x16, u8x16, i8x16 ); -impl_from!(i8x32: i8, i8x32_from | u16x32, i16x32, u8x32); -impl_from!(u8x32: u8, u8x32_from | u16x32, i16x32, i8x32); +impl_from!(i8x32: i8, i8x32_from, test_v256 | u16x32, i16x32, u8x32); +impl_from!(u8x32: u8, u8x32_from, test_v256 | u16x32, i16x32, i8x32); diff --git a/coresimd/ppsv/v32.rs b/coresimd/ppsv/v32.rs index e007abcebe..fc51344bc3 100644 --- a/coresimd/ppsv/v32.rs +++ b/coresimd/ppsv/v32.rs @@ -1,51 +1,78 @@ //! 32-bit wide portable packed vector types. - -simd_api_imports!(); use coresimd::simd::b8x2; simd_i_ty! { - i16x2: 2, i16, b8x2, i16x2_tests | + i16x2: 2, i16, b8x2, i16x2_tests, test_v32 | i16, i16 | x0, x1 | /// A 32-bit wide vector with 2 `i16` lanes. } simd_u_ty! { - u16x2: 2, u16, b8x2, u16x2_tests | + u16x2: 2, u16, b8x2, u16x2_tests, test_v32 | u16, u16 | x0, x1 | /// A 32-bit wide vector with 2 `u16` lanes. } simd_i_ty! { - i8x4: 4, i8, b8x4, i8x4_tests | + i8x4: 4, i8, b8x4, i8x4_tests, test_v32 | i8, i8, i8, i8 | x0, x1, x2, x3 | /// A 32-bit wide vector with 4 `i8` lanes. } simd_u_ty! { - u8x4: 4, u8, b8x4, u8x4_tests | + u8x4: 4, u8, b8x4, u8x4_tests, test_v32 | u8, u8, u8, u8 | x0, x1, x2, x3 | /// A 32-bit wide vector with 4 `u8` lanes. } simd_b_ty! { - b8x4: 4, i8, b8x4_tests | + b8x4: 4, i8, b8x4_tests, test_v32 | i8, i8, i8, i8 | x0, x1, x2, x3 | /// A 32-bit wide vector with 4 `bool` lanes. } -impl_from_bits!(i16x2: i16, i16x2_from_bits | u16x2, i8x4, u8x4, b8x4); -impl_from_bits!(u16x2: u16, u16x2_from_bits | i16x2, i8x4, u8x4, b8x4); -impl_from_bits!(i8x4: i8, i8x2_from_bits | i16x2, u16x2, u8x4, b8x4); -impl_from_bits!(u8x4: u8, u8x2_from_bits | i16x2, u16x2, i8x4, b8x4); +impl_from_bits!( + i16x2: i16, + i16x2_from_bits, + test_v32 | u16x2, + i8x4, + u8x4, + b8x4 +); +impl_from_bits!( + u16x2: u16, + u16x2_from_bits, + test_v32 | i16x2, + i8x4, + u8x4, + b8x4 +); +impl_from_bits!( + i8x4: i8, + i8x2_from_bits, + test_v32 | i16x2, + u16x2, + u8x4, + b8x4 +); +impl_from_bits!( + u8x4: u8, + u8x2_from_bits, + test_v32 | i16x2, + u16x2, + i8x4, + b8x4 +); impl_from!( i16x2: i16, - i16x2_from | f64x2, + i16x2_from, + test_v32 | f64x2, u64x2, i64x2, f32x2, @@ -58,7 +85,8 @@ impl_from!( impl_from!( u16x2: u16, - u16x2_from | f64x2, + u16x2_from, + test_v32 | f64x2, u64x2, i64x2, f32x2, @@ -71,7 +99,8 @@ impl_from!( impl_from!( i8x4: i8, - i8x4_from | f64x4, + i8x4_from, + test_v32 | f64x4, u64x4, i64x4, u32x4, @@ -84,7 +113,8 @@ impl_from!( impl_from!( u8x4: u8, - u8x4_from | f64x4, + u8x4_from, + test_v32 | f64x4, u64x4, i64x4, u32x4, diff --git a/coresimd/ppsv/v512.rs b/coresimd/ppsv/v512.rs index 471e59eca5..8bfc249b1b 100644 --- a/coresimd/ppsv/v512.rs +++ b/coresimd/ppsv/v512.rs @@ -1,11 +1,8 @@ //! 512-bit wide portable packed vector types. - -simd_api_imports!(); - use coresimd::simd::{b8x16, b8x32, b8x8}; simd_i_ty! { - i8x64: 64, i8, b8x64, i8x64_tests | + i8x64: 64, i8, b8x64, i8x64_tests, test_v512 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, @@ -22,7 +19,7 @@ simd_i_ty! { } simd_u_ty! { - u8x64: 64, u8, b8x64, u8x64_tests | + u8x64: 64, u8, b8x64, u8x64_tests, test_v512 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, @@ -39,7 +36,7 @@ simd_u_ty! { } simd_b_ty! { - b8x64: 64, i8, b8x64_tests | + b8x64: 64, i8, b8x64_tests, test_v512 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, @@ -56,7 +53,7 @@ simd_b_ty! { } simd_i_ty! { - i16x32: 32, i16, b8x32, i16x32_tests | + i16x32: 32, i16, b8x32, i16x32_tests, test_v512 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, @@ -69,7 +66,7 @@ simd_i_ty! { } simd_u_ty! { - u16x32: 32, u16, b8x32, u16x32_tests | + u16x32: 32, u16, b8x32, u16x32_tests, test_v512 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, @@ -81,7 +78,7 @@ simd_u_ty! { /// A 512-bit vector with 32 `u16` lanes. } simd_i_ty! { - i32x16: 16, i32, b8x16, i32x16_tests | + i32x16: 16, i32, b8x16, i32x16_tests, test_v512 | i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -90,7 +87,7 @@ simd_i_ty! { } simd_u_ty! { - u32x16: 16, u32, b8x16, u32x16_tests | + u32x16: 16, u32, b8x16, u32x16_tests, test_v512 | u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -99,7 +96,7 @@ simd_u_ty! { } simd_f_ty! { - f32x16: 16, f32, b8x16, f32x16_tests | + f32x16: 16, f32, b8x16, f32x16_tests, test_v512 | f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -108,21 +105,21 @@ simd_f_ty! { } simd_i_ty! { - i64x8: 8, i64, b8x8, i64x8_tests | + i64x8: 8, i64, b8x8, i64x8_tests, test_v512 | i64, i64, i64, i64, i64, i64, i64, i64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `i64` lanes. } simd_u_ty! { - u64x8: 8, u64, b8x8, u64x8_tests | + u64x8: 8, u64, b8x8, u64x8_tests, test_v512 | u64, u64, u64, u64, u64, u64, u64, u64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `u64` lanes. } simd_f_ty! { - f64x8: 8, f64, b8x8, f64x8_tests | + f64x8: 8, f64, b8x8, f64x8_tests, test_v512 | f64, f64, f64, f64, f64, f64, f64, f64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `f64` lanes. @@ -130,7 +127,8 @@ simd_f_ty! { impl_from_bits!( i8x64: i8, - i8x64_from_bits | u64x8, + i8x64_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -143,7 +141,8 @@ impl_from_bits!( ); impl_from_bits!( u8x64: u8, - u8x64_from_bits | u64x8, + u8x64_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -156,7 +155,8 @@ impl_from_bits!( ); impl_from_bits!( i16x32: i16, - i16x32_from_bits | u64x8, + i16x32_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -169,7 +169,8 @@ impl_from_bits!( ); impl_from_bits!( u16x32: u16, - u16x32_from_bits | u64x8, + u16x32_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -182,7 +183,8 @@ impl_from_bits!( ); impl_from_bits!( i32x16: i32, - i32x16_from_bits | u64x8, + i32x16_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -195,7 +197,8 @@ impl_from_bits!( ); impl_from_bits!( u32x16: u32, - u32x16_from_bits | u64x8, + u32x16_from_bits, + test_v512 | u64x8, i64x8, f64x8, i32x16, @@ -208,7 +211,8 @@ impl_from_bits!( ); impl_from_bits!( f32x16: f32, - f32x16_from_bits | u64x8, + f32x16_from_bits, + test_v512 | u64x8, i64x8, f64x8, u32x16, @@ -221,7 +225,8 @@ impl_from_bits!( ); impl_from_bits!( i64x8: i64, - i64x8_from_bits | u64x8, + i64x8_from_bits, + test_v512 | u64x8, f64x8, u32x16, i32x16, @@ -234,7 +239,8 @@ impl_from_bits!( ); impl_from_bits!( u64x8: u64, - u64x8_from_bits | i64x8, + u64x8_from_bits, + test_v512 | i64x8, f64x8, u32x16, i32x16, @@ -247,7 +253,8 @@ impl_from_bits!( ); impl_from_bits!( f64x8: f64, - f64x8_from_bits | u64x8, + f64x8_from_bits, + test_v512 | u64x8, i64x8, u32x16, i32x16, @@ -261,7 +268,8 @@ impl_from_bits!( impl_from!( f64x8: f64, - f64x8_from | u64x8, + f64x8_from, + test_v512 | u64x8, i64x8, u32x8, i32x8, @@ -273,7 +281,8 @@ impl_from!( ); impl_from!( i64x8: i64, - i64x8_from | u64x8, + i64x8_from, + test_v512 | u64x8, f64x8, u32x8, i32x8, @@ -285,7 +294,8 @@ impl_from!( ); impl_from!( u64x8: u64, - u64x8_from | i64x8, + u64x8_from, + test_v512 | i64x8, f64x8, u32x8, i32x8, @@ -298,7 +308,8 @@ impl_from!( impl_from!( f32x16: f32, - f32x16_from | u32x16, + f32x16_from, + test_v512 | u32x16, i32x16, u16x16, i16x16, @@ -307,7 +318,8 @@ impl_from!( ); impl_from!( i32x16: i32, - i32x16_from | u32x16, + i32x16_from, + test_v512 | u32x16, f32x16, u16x16, i16x16, @@ -316,7 +328,8 @@ impl_from!( ); impl_from!( u32x16: u32, - u32x16_from | i32x16, + u32x16_from, + test_v512 | i32x16, f32x16, u16x16, i16x16, @@ -324,8 +337,8 @@ impl_from!( i8x16 ); -impl_from!(i16x32: i16, i16x32_from | u16x32, u8x32, i8x32); -impl_from!(u16x32: u16, u16x32_from | i16x32, u8x32, i8x32); +impl_from!(i16x32: i16, i16x32_from, test_v512 | u16x32, u8x32, i8x32); +impl_from!(u16x32: u16, u16x32_from, test_v512 | i16x32, u8x32, i8x32); -impl_from!(i8x64: i8, i8x64_from | u8x64); -impl_from!(u8x64: u8, u8x64_from | i8x64); +impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64); +impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64); diff --git a/coresimd/ppsv/v64.rs b/coresimd/ppsv/v64.rs index ee8262a384..6da839861d 100644 --- a/coresimd/ppsv/v64.rs +++ b/coresimd/ppsv/v64.rs @@ -1,60 +1,57 @@ //! 64-bit wide portable packed vector types. - -simd_api_imports!(); - use coresimd::simd::{b8x2, b8x4}; simd_i_ty! { - i8x8: 8, i8, b8x8, i8x8_tests | + i8x8: 8, i8, b8x8, i8x8_tests, test_v64 | i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 64-bit vector with 8 `i8` lanes. } simd_u_ty! { - u8x8: 8, u8, b8x8, u8x8_tests | + u8x8: 8, u8, b8x8, u8x8_tests, test_v64 | u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 64-bit vector with 8 `u8` lanes. } simd_b_ty! { - b8x8: 8, i8, b8x8_tests | + b8x8: 8, i8, b8x8_tests, test_v64 | i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 64-bit vector with 8 `bool` lanes. } simd_i_ty! { - i16x4: 4, i16, b8x4, i16x4_tests | + i16x4: 4, i16, b8x4, i16x4_tests, test_v64 | i16, i16, i16, i16 | x0, x1, x2, x3 | /// A 64-bit vector with 4 `i16` lanes. } simd_u_ty! { - u16x4: 4, u16, b8x4, u16x4_tests | + u16x4: 4, u16, b8x4, u16x4_tests, test_v64 | u16, u16, u16, u16 | x0, x1, x2, x3 | /// A 64-bit vector with 4 `u16` lanes. } simd_i_ty! { - i32x2: 2, i32, b8x2, i32x2_tests | + i32x2: 2, i32, b8x2, i32x2_tests, test_v64 | i32, i32 | x0, x1 | /// A 64-bit vector with 2 `i32` lanes. } simd_u_ty! { - u32x2: 2, u32, b8x2, u32x2_tests | + u32x2: 2, u32, b8x2, u32x2_tests, test_v64 | u32, u32 | x0, x1 | /// A 64-bit vector with 2 `u32` lanes. } simd_f_ty! { - f32x2: 2, f32, b8x2, f32x2_tests | + f32x2: 2, f32, b8x2, f32x2_tests, test_v64 | f32, f32 | x0, x1 | /// A 64-bit vector with 2 `f32` lanes. @@ -62,7 +59,8 @@ simd_f_ty! { impl_from_bits!( u32x2: u32, - u32x2_from_bits | i32x2, + u32x2_from_bits, + test_v64 | i32x2, f32x2, u16x4, i16x4, @@ -72,7 +70,8 @@ impl_from_bits!( ); impl_from_bits!( i32x2: i32, - i32x2_from_bits | u32x2, + i32x2_from_bits, + test_v64 | u32x2, f32x2, u16x4, i16x4, @@ -82,7 +81,8 @@ impl_from_bits!( ); impl_from_bits!( f32x2: f32, - f32x2_from_bits | i32x2, + f32x2_from_bits, + test_v64 | i32x2, u32x2, u16x4, i16x4, @@ -92,7 +92,8 @@ impl_from_bits!( ); impl_from_bits!( u16x4: u16, - u16x4_from_bits | u32x2, + u16x4_from_bits, + test_v64 | u32x2, i32x2, i16x4, u8x8, @@ -101,7 +102,8 @@ impl_from_bits!( ); impl_from_bits!( i16x4: i16, - i16x4_from_bits | u32x2, + i16x4_from_bits, + test_v64 | u32x2, i32x2, u16x4, u8x8, @@ -110,7 +112,8 @@ impl_from_bits!( ); impl_from_bits!( u8x8: u8, - u8x8_from_bits | u32x2, + u8x8_from_bits, + test_v64 | u32x2, i32x2, u16x4, i16x4, @@ -119,7 +122,8 @@ impl_from_bits!( ); impl_from_bits!( i8x8: i8, - i8x8_from_bits | u32x2, + i8x8_from_bits, + test_v64 | u32x2, i32x2, u16x4, i16x4, @@ -127,8 +131,11 @@ impl_from_bits!( b8x8 ); -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use coresimd::x86::__m64; +#[cfg(target_arch = "x86")] +use coresimd::arch::x86::__m64; + +#[cfg(target_arch = "x86_64")] +use coresimd::arch::x86_64::__m64; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] impl_from_bits_!(f32x2: __m64); @@ -147,7 +154,8 @@ impl_from_bits_!(i8x8: __m64); impl_from!( f32x2: f32, - f32x2_from | f64x2, + f32x2_from, + test_v64 | f64x2, u64x2, i64x2, u32x2, @@ -160,7 +168,8 @@ impl_from!( impl_from!( u32x2: u32, - u32x2_from | f64x2, + u32x2_from, + test_v64 | f64x2, u64x2, i64x2, f32x2, @@ -173,7 +182,8 @@ impl_from!( impl_from!( i32x2: i32, - i32x2_from | f64x2, + i32x2_from, + test_v64 | f64x2, u64x2, i64x2, f32x2, @@ -186,7 +196,8 @@ impl_from!( impl_from!( u16x4: u16, - u16x4_from | f64x4, + u16x4_from, + test_v64 | f64x4, u64x4, i64x4, f32x4, @@ -199,7 +210,8 @@ impl_from!( impl_from!( i16x4: i16, - i16x4_from | f64x4, + i16x4_from, + test_v64 | f64x4, u64x4, i64x4, f32x4, @@ -211,7 +223,8 @@ impl_from!( ); impl_from!( i8x8: i8, - i8x8_from | f64x8, + i8x8_from, + test_v64 | f64x8, u64x8, i64x8, f32x8, @@ -223,7 +236,8 @@ impl_from!( ); impl_from!( u8x8: u8, - u8x8_from | f64x8, + u8x8_from, + test_v64 | f64x8, u64x8, i64x8, f32x8, diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs index 76a1ad3a65..b4918b85ac 100644 --- a/crates/coresimd/src/lib.rs +++ b/crates/coresimd/src/lib.rs @@ -44,6 +44,16 @@ extern crate stdsimd_test; #[cfg(test)] extern crate test; +macro_rules! test_v16 { ($item:item) => {} } +macro_rules! test_v32 { ($item:item) => {} } +macro_rules! test_v64 { ($item:item) => {} } +macro_rules! test_v128 { ($item:item) => {} } +macro_rules! test_v256 { ($item:item) => {} } +macro_rules! test_v512 { ($item:item) => {} } +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { $($f!($($args)*);)* } +} + #[path = "../../../coresimd/mod.rs"] mod coresimd; diff --git a/crates/coresimd/tests/v128.rs b/crates/coresimd/tests/v128.rs new file mode 100644 index 0000000000..8eb1e1801c --- /dev/null +++ b/crates/coresimd/tests/v128.rs @@ -0,0 +1,42 @@ +//! coresimd 128-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports, dead_code)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => { $item } } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => {} } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; diff --git a/crates/coresimd/tests/v16.rs b/crates/coresimd/tests/v16.rs new file mode 100644 index 0000000000..b44c03f281 --- /dev/null +++ b/crates/coresimd/tests/v16.rs @@ -0,0 +1,42 @@ +//! coresimd 16-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports, dead_code)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => { $item } } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => {} } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; diff --git a/crates/coresimd/tests/v256.rs b/crates/coresimd/tests/v256.rs new file mode 100644 index 0000000000..e4f7416d2d --- /dev/null +++ b/crates/coresimd/tests/v256.rs @@ -0,0 +1,42 @@ +//! coresimd 256-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => { $item } } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => {} } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; diff --git a/crates/coresimd/tests/v32.rs b/crates/coresimd/tests/v32.rs new file mode 100644 index 0000000000..83991fc8b2 --- /dev/null +++ b/crates/coresimd/tests/v32.rs @@ -0,0 +1,42 @@ +//! coresimd 32-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports, dead_code)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => { } } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => { $item } } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => {} } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; diff --git a/crates/coresimd/tests/v512.rs b/crates/coresimd/tests/v512.rs new file mode 100644 index 0000000000..6420ecb68e --- /dev/null +++ b/crates/coresimd/tests/v512.rs @@ -0,0 +1,42 @@ +//! coresimd 512-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => { $item } } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; diff --git a/crates/coresimd/tests/v64.rs b/crates/coresimd/tests/v64.rs new file mode 100644 index 0000000000..5434b4c5ab --- /dev/null +++ b/crates/coresimd/tests/v64.rs @@ -0,0 +1,42 @@ +//! coresimd 64-bit wide vector tests + +#![cfg_attr(feature = "strict", deny(warnings))] +#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] +#![allow(unused_imports, dead_code)] + +#[cfg(test)] +extern crate coresimd; + +#[cfg(test)] +macro_rules! test_v16 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v32 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v64 { ($item:item) => { $item } } +#[cfg(test)] +macro_rules! test_v128 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v256 { ($item:item) => {} } +#[cfg(test)] +macro_rules! test_v512 { ($item:item) => {} } + +#[cfg(test)] +macro_rules! vector_impl { + ($([$f:ident, $($args:tt)*]),*) => { } +} + +#[cfg(test)] +#[path = "../../../coresimd/ppsv/mod.rs"] +mod ppsv; + +#[cfg(test)] +use std::marker; + +#[cfg(all(test, target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(test, target_arch = "aarch64"))] +extern crate core as _core; + +#[cfg(all(test, target_arch = "aarch64"))] +use _core::num; From bdf0d254a43cc48d9a66927e7ff42c04421ba811 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 18 Mar 2018 12:00:16 +0100 Subject: [PATCH 2/2] use rustc reductions --- coresimd/ppsv/api/arithmetic_reductions.rs | 51 ++++- coresimd/ppsv/api/bitwise_reductions.rs | 111 ++++++++++- coresimd/ppsv/api/boolean_reductions.rs | 26 ++- coresimd/ppsv/api/minmax_reductions.rs | 49 ++++- coresimd/ppsv/api/mod.rs | 2 +- coresimd/ppsv/codegen/and.rs | 170 ----------------- coresimd/ppsv/codegen/max.rs | 196 ------------------- coresimd/ppsv/codegen/min.rs | 196 ------------------- coresimd/ppsv/codegen/mod.rs | 9 - coresimd/ppsv/codegen/or.rs | 170 ----------------- coresimd/ppsv/codegen/product.rs | 210 --------------------- coresimd/ppsv/codegen/sum.rs | 210 --------------------- coresimd/ppsv/codegen/xor.rs | 170 ----------------- coresimd/ppsv/mod.rs | 1 - coresimd/simd_llvm.rs | 14 ++ 15 files changed, 240 insertions(+), 1345 deletions(-) delete mode 100644 coresimd/ppsv/codegen/and.rs delete mode 100644 coresimd/ppsv/codegen/max.rs delete mode 100644 coresimd/ppsv/codegen/min.rs delete mode 100644 coresimd/ppsv/codegen/mod.rs delete mode 100644 coresimd/ppsv/codegen/or.rs delete mode 100644 coresimd/ppsv/codegen/product.rs delete mode 100644 coresimd/ppsv/codegen/sum.rs delete mode 100644 coresimd/ppsv/codegen/xor.rs diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs index 932ac49eb2..1e92205b8b 100644 --- a/coresimd/ppsv/api/arithmetic_reductions.rs +++ b/coresimd/ppsv/api/arithmetic_reductions.rs @@ -5,14 +5,61 @@ macro_rules! impl_arithmetic_reductions { ($id:ident, $elem_ty:ident) => { impl $id { /// Lane-wise addition of the vector elements. + /// + /// FIXME: document guarantees with respect to: + /// * integers: overflow behavior + /// * floats: order and NaNs + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn sum(self) -> $elem_ty { - super::codegen::sum::ReduceAdd::reduce_add(self) + use ::coresimd::simd_llvm::simd_reduce_add_ordered; + unsafe { + simd_reduce_add_ordered(self, 0 as $elem_ty) + } } + /// Lane-wise addition of the vector elements. + /// + /// FIXME: document guarantees with respect to: + /// * integers: overflow behavior + /// * floats: order and NaNs + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn sum(self) -> $elem_ty { + // FIXME: broken on AArch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x += self.extract(i) as $elem_ty; + } + x + } + /// Lane-wise multiplication of the vector elements. + /// + /// FIXME: document guarantees with respect to: + /// * integers: overflow behavior + /// * floats: order and NaNs + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn product(self) -> $elem_ty { - super::codegen::product::ReduceMul::reduce_mul(self) + use ::coresimd::simd_llvm::simd_reduce_mul_ordered; + unsafe { + simd_reduce_mul_ordered(self, 1 as $elem_ty) + } + } + /// Lane-wise multiplication of the vector elements. + /// + /// FIXME: document guarantees with respect to: + /// * integers: overflow behavior + /// * floats: order and NaNs + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn product(self) -> $elem_ty { + // FIXME: broken on AArch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x *= self.extract(i) as $elem_ty; + } + x } } } diff --git a/coresimd/ppsv/api/bitwise_reductions.rs b/coresimd/ppsv/api/bitwise_reductions.rs index 124cc0b2c9..60eb648e42 100644 --- a/coresimd/ppsv/api/bitwise_reductions.rs +++ b/coresimd/ppsv/api/bitwise_reductions.rs @@ -5,41 +5,138 @@ macro_rules! impl_bitwise_reductions { ($id:ident, $elem_ty:ident) => { impl $id { /// Lane-wise bitwise `and` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn and(self) -> $elem_ty { - super::codegen::and::ReduceAnd::reduce_and(self) + use ::coresimd::simd_llvm::simd_reduce_and; + unsafe { + simd_reduce_and(self) + } } + /// Lane-wise bitwise `and` of the vector elements. + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn and(self) -> $elem_ty { + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x &= self.extract(i) as $elem_ty; + } + x + } + /// Lane-wise bitwise `or` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn or(self) -> $elem_ty { - super::codegen::or::ReduceOr::reduce_or(self) + use ::coresimd::simd_llvm::simd_reduce_or; + unsafe { + simd_reduce_or(self) + } } + /// Lane-wise bitwise `or` of the vector elements. + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn or(self) -> $elem_ty { + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x |= self.extract(i) as $elem_ty; + } + x + } + /// Lane-wise bitwise `xor` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn xor(self) -> $elem_ty { - super::codegen::xor::ReduceXor::reduce_xor(self) + use ::coresimd::simd_llvm::simd_reduce_xor; + unsafe { + simd_reduce_xor(self) + } + } + /// Lane-wise bitwise `xor` of the vector elements. + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn xor(self) -> $elem_ty { + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x ^= self.extract(i) as $elem_ty; + } + x } } } } macro_rules! impl_bool_bitwise_reductions { - ($id:ident, $elem_ty:ident) => { + ($id:ident, $elem_ty:ident, $internal_ty:ident) => { impl $id { /// Lane-wise bitwise `and` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn and(self) -> $elem_ty { - super::codegen::and::ReduceAnd::reduce_and(self) !=0 + use ::coresimd::simd_llvm::simd_reduce_and; + unsafe { + let r: $internal_ty = simd_reduce_and(self); + r != 0 + } + } + /// Lane-wise bitwise `and` of the vector elements. + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn and(self) -> $elem_ty { + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x &= self.extract(i) as $elem_ty; + } + x + } + + /// Lane-wise bitwise `or` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] + #[inline] + pub fn or(self) -> $elem_ty { + use ::coresimd::simd_llvm::simd_reduce_or; + unsafe { + let r: $internal_ty = simd_reduce_or(self); + r != 0 + } } /// Lane-wise bitwise `or` of the vector elements. + #[cfg(target_arch = "aarch64")] #[inline] pub fn or(self) -> $elem_ty { - super::codegen::or::ReduceOr::reduce_or(self) != 0 + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x |= self.extract(i) as $elem_ty; + } + x + } + + /// Lane-wise bitwise `xor` of the vector elements. + #[cfg(not(target_arch = "aarch64"))] + #[inline] + pub fn xor(self) -> $elem_ty { + use ::coresimd::simd_llvm::simd_reduce_xor; + unsafe { + let r: $internal_ty = simd_reduce_xor(self); + r != 0 + } } /// Lane-wise bitwise `xor` of the vector elements. + #[cfg(target_arch = "aarch64")] #[inline] pub fn xor(self) -> $elem_ty { - super::codegen::xor::ReduceXor::reduce_xor(self) != 0 + // FIXME: broken on aarch64 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x ^= self.extract(i) as $elem_ty; + } + x } } } diff --git a/coresimd/ppsv/api/boolean_reductions.rs b/coresimd/ppsv/api/boolean_reductions.rs index 3e22a6edbc..bb302e385b 100644 --- a/coresimd/ppsv/api/boolean_reductions.rs +++ b/coresimd/ppsv/api/boolean_reductions.rs @@ -5,19 +5,43 @@ macro_rules! impl_bool_reductions { ($id:ident) => { impl $id { /// Are `all` vector lanes `true`? + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn all(self) -> bool { + use ::coresimd::simd_llvm::simd_reduce_all; + unsafe { + simd_reduce_all(self) + } + } + /// Are `all` vector lanes `true`? + #[cfg(target_arch = "aarch64")] + #[inline] + pub fn all(self) -> bool { + // FIXME: Broken on AArch64 self.and() } + + /// Is `any` vector lanes `true`? + #[cfg(not(target_arch = "aarch64"))] + #[inline] + pub fn any(self) -> bool { + use ::coresimd::simd_llvm::simd_reduce_any; + unsafe { + simd_reduce_any(self) + } + } /// Is `any` vector lanes `true`? + #[cfg(target_arch = "aarch64")] #[inline] pub fn any(self) -> bool { + // FIXME: Broken on AArch64 self.or() } + /// Are `all` vector lanes `false`? #[inline] pub fn none(self) -> bool { - !self.or() + !self.any() } } } diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs index 35232d17a6..cdb862dfab 100644 --- a/coresimd/ppsv/api/minmax_reductions.rs +++ b/coresimd/ppsv/api/minmax_reductions.rs @@ -5,14 +5,59 @@ macro_rules! impl_minmax_reductions { ($id:ident, $elem_ty:ident) => { impl $id { /// Largest vector value. + /// + /// FIXME: document behavior for float vectors with NaNs. + #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn max(self) -> $elem_ty { - super::codegen::max::ReduceMax::reduce_max(self) + use ::coresimd::simd_llvm::simd_reduce_max; + unsafe { + simd_reduce_max(self) + } + } + /// Largest vector value. + /// + /// FIXME: document behavior for float vectors with NaNs. + #[cfg(target_arch = "aarch64")] + #[allow(unused_imports)] + #[inline] + pub fn max(self) -> $elem_ty { + // FIXME: broken on AArch64 + use ::num::Float; + use ::cmp::Ord; + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.max(self.extract(i)); + } + x + } + + /// Smallest vector value. + /// + /// FIXME: document behavior for float vectors with NaNs. + #[cfg(not(target_arch = "aarch64"))] + #[inline] + pub fn min(self) -> $elem_ty { + use ::coresimd::simd_llvm::simd_reduce_min; + unsafe { + simd_reduce_min(self) + } } /// Smallest vector value. + /// + /// FIXME: document behavior for float vectors with NaNs. + #[cfg(target_arch = "aarch64")] + #[allow(unused_imports)] #[inline] pub fn min(self) -> $elem_ty { - super::codegen::min::ReduceMin::reduce_min(self) + // FIXME: broken on AArch64 + use ::num::Float; + use ::cmp::Ord; + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.min(self.extract(i)); + } + x } } } diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs index ed4e0e2c22..732c1c0b93 100644 --- a/coresimd/ppsv/api/mod.rs +++ b/coresimd/ppsv/api/mod.rs @@ -244,7 +244,7 @@ macro_rules! simd_b_ty { [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], [impl_bool_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], [impl_bitwise_ops, $id, true], - [impl_bool_bitwise_reductions, $id, bool], + [impl_bool_bitwise_reductions, $id, bool, $elem_ty], [impl_bool_reductions, $id], [impl_bool_cmp, $id, $id], [impl_eq, $id], diff --git a/coresimd/ppsv/codegen/and.rs b/coresimd/ppsv/codegen/and.rs deleted file mode 100644 index aaba2b3c85..0000000000 --- a/coresimd/ppsv/codegen/and.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! Code generation for the and reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the and reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.and.i8.v2i8"] - fn reduce_and_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v2u8"] - fn reduce_and_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.and.i16.v2i16"] - fn reduce_and_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.and.u16.v2u16"] - fn reduce_and_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.and.i32.v2i32"] - fn reduce_and_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.and.u32.v2u32"] - fn reduce_and_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.and.i64.v2i64"] - fn reduce_and_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.and.u64.v2u64"] - fn reduce_and_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.and.i8.v4i8"] - fn reduce_and_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v4u8"] - fn reduce_and_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.and.i16.v4i16"] - fn reduce_and_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.and.u16.v4u16"] - fn reduce_and_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.and.i32.v4i32"] - fn reduce_and_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.and.u32.v4u32"] - fn reduce_and_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.and.i64.v4i64"] - fn reduce_and_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.and.u64.v4u64"] - fn reduce_and_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.and.i8.v8i8"] - fn reduce_and_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v8u8"] - fn reduce_and_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.and.i16.v8i16"] - fn reduce_and_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.and.u16.v8u16"] - fn reduce_and_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.and.i32.v8i32"] - fn reduce_and_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.and.u32.v8u32"] - fn reduce_and_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.and.i64.v8i64"] - fn reduce_and_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.and.u64.v8u64"] - fn reduce_and_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.and.i8.v16i8"] - fn reduce_and_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v16u8"] - fn reduce_and_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.and.i16.v16i16"] - fn reduce_and_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.and.u16.v16u16"] - fn reduce_and_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.and.i32.v16i32"] - fn reduce_and_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.and.u32.v16u32"] - fn reduce_and_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.and.i8.v32i8"] - fn reduce_and_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v32u8"] - fn reduce_and_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.and.i16.v32i16"] - fn reduce_and_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.and.u16.v32u16"] - fn reduce_and_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.and.i8.v64i8"] - fn reduce_and_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.and.u8.v64u8"] - fn reduce_and_u8x64(x: u8x64) -> u8; -} - -/// Reduction: horizontal bitwise and of the vector elements. -#[cfg_attr(feature = "cargo-clippy", allow(stutter))] -pub trait ReduceAnd { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal bitwise and of the vector elements - fn reduce_and(self) -> Self::Acc; -} - -macro_rules! red_and { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceAnd for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_and(self) -> Self::Acc { - unsafe { $llvm_intr(self.into_bits()) } - } - // FIXME: broken in AArch64 - #[cfg(target_arch = "aarch64")] - #[inline] - fn reduce_and(self) -> Self::Acc { - let mut x = self.extract(0) as Self::Acc; - for i in 1..$id::lanes() { - x &= self.extract(i) as Self::Acc; - } - x - } - } - }; -} -red_and!(i8x2, i8, reduce_and_i8x2); -red_and!(u8x2, u8, reduce_and_u8x2); -red_and!(i16x2, i16, reduce_and_i16x2); -red_and!(u16x2, u16, reduce_and_u16x2); -red_and!(i32x2, i32, reduce_and_i32x2); -red_and!(u32x2, u32, reduce_and_u32x2); -red_and!(i64x2, i64, reduce_and_i64x2); -red_and!(u64x2, u64, reduce_and_u64x2); -red_and!(i8x4, i8, reduce_and_i8x4); -red_and!(u8x4, u8, reduce_and_u8x4); -red_and!(i16x4, i16, reduce_and_i16x4); -red_and!(u16x4, u16, reduce_and_u16x4); -red_and!(i32x4, i32, reduce_and_i32x4); -red_and!(u32x4, u32, reduce_and_u32x4); -red_and!(i64x4, i64, reduce_and_i64x4); -red_and!(u64x4, u64, reduce_and_u64x4); -red_and!(i8x8, i8, reduce_and_i8x8); -red_and!(u8x8, u8, reduce_and_u8x8); -red_and!(i16x8, i16, reduce_and_i16x8); -red_and!(u16x8, u16, reduce_and_u16x8); -red_and!(i32x8, i32, reduce_and_i32x8); -red_and!(u32x8, u32, reduce_and_u32x8); -red_and!(i64x8, i64, reduce_and_i64x8); -red_and!(u64x8, u64, reduce_and_u64x8); -red_and!(i8x16, i8, reduce_and_i8x16); -red_and!(u8x16, u8, reduce_and_u8x16); -red_and!(i16x16, i16, reduce_and_i16x16); -red_and!(u16x16, u16, reduce_and_u16x16); -red_and!(i32x16, i32, reduce_and_i32x16); -red_and!(u32x16, u32, reduce_and_u32x16); -red_and!(i8x32, i8, reduce_and_i8x32); -red_and!(u8x32, u8, reduce_and_u8x32); -red_and!(i16x32, i16, reduce_and_i16x32); -red_and!(u16x32, u16, reduce_and_u16x32); -red_and!(i8x64, i8, reduce_and_i8x64); -red_and!(u8x64, u8, reduce_and_u8x64); - -red_and!(b8x2, i8, reduce_and_i8x2); -red_and!(b8x4, i8, reduce_and_i8x4); -red_and!(b8x8, i8, reduce_and_i8x8); -red_and!(b8x16, i8, reduce_and_i8x16); -red_and!(b8x32, i8, reduce_and_i8x32); -red_and!(b8x64, i8, reduce_and_i8x64); - -#[cfg(test)] -mod tests { - use super::ReduceAnd; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_and_i32x4() { - let v = i32x4::splat(1); - assert_eq!(v.reduce_and(), 1_i32); - let v = i32x4::new(1, 1, 0, 1); - assert_eq!(v.reduce_and(), 0_i32); - } -} diff --git a/coresimd/ppsv/codegen/max.rs b/coresimd/ppsv/codegen/max.rs deleted file mode 100644 index 497fbe46f3..0000000000 --- a/coresimd/ppsv/codegen/max.rs +++ /dev/null @@ -1,196 +0,0 @@ -//! Code generation for the max reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the max reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v2i8"] - fn reduce_max_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v2u8"] - fn reduce_max_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smax.i16.v2i16"] - fn reduce_max_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umax.u16.v2u16"] - fn reduce_max_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smax.i32.v2i32"] - fn reduce_max_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umax.u32.v2u32"] - fn reduce_max_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smax.i64.v2i64"] - fn reduce_max_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umax.u64.v2u64"] - fn reduce_max_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v4i8"] - fn reduce_max_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v4u8"] - fn reduce_max_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smax.i16.v4i16"] - fn reduce_max_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umax.u16.v4u16"] - fn reduce_max_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smax.i32.v4i32"] - fn reduce_max_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umax.u32.v4u32"] - fn reduce_max_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smax.i64.v4i64"] - fn reduce_max_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umax.u64.v4u64"] - fn reduce_max_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v8i8"] - fn reduce_max_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v8u8"] - fn reduce_max_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smax.i16.v8i16"] - fn reduce_max_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umax.u16.v8u16"] - fn reduce_max_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smax.i32.v8i32"] - fn reduce_max_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umax.u32.v8u32"] - fn reduce_max_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smax.i64.v8i64"] - fn reduce_max_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umax.u64.v8u64"] - fn reduce_max_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v16i8"] - fn reduce_max_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v16u8"] - fn reduce_max_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smax.i16.v16i16"] - fn reduce_max_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umax.u16.v16u16"] - fn reduce_max_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smax.i32.v16i32"] - fn reduce_max_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umax.u32.v16u32"] - fn reduce_max_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v32i8"] - fn reduce_max_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v32u8"] - fn reduce_max_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smax.i16.v32i16"] - fn reduce_max_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umax.u16.v32u16"] - fn reduce_max_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smax.i8.v64i8"] - fn reduce_max_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umax.u8.v64u8"] - fn reduce_max_u8x64(x: u8x64) -> u8; - #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v2f32"] - fn reduce_fmax_f32x2(x: f32x2) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v2f64"] - fn reduce_fmax_f64x2(x: f64x2) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v4f32"] - fn reduce_fmax_f32x4(x: f32x4) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v4f64"] - fn reduce_fmax_f64x4(x: f64x4) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v8f32"] - fn reduce_fmax_f32x8(x: f32x8) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v8f64"] - fn reduce_fmax_f64x8(x: f64x8) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v16f32"] - fn reduce_fmax_f32x16(x: f32x16) -> f32; -} - -/// Reduction: horizontal max of the vector elements. -#[cfg_attr(feature = "cargo-clippy", allow(stutter))] -pub trait ReduceMax { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal max of the vector elements. - fn reduce_max(self) -> Self::Acc; -} - -macro_rules! red_max { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceMax for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_max(self) -> Self::Acc { - unsafe { $llvm_intr(self) } - } - // FIXME: broken on AArch64 - #[cfg(target_arch = "aarch64")] - #[allow(unused_imports)] - #[inline] - fn reduce_max(self) -> Self::Acc { - use ::num::Float; - use ::cmp::Ord; - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x = x.max(self.extract(i)); - } - x - } - } - }; -} -red_max!(i8x2, i8, reduce_max_i8x2); -red_max!(u8x2, u8, reduce_max_u8x2); -red_max!(i16x2, i16, reduce_max_i16x2); -red_max!(u16x2, u16, reduce_max_u16x2); -red_max!(i32x2, i32, reduce_max_i32x2); -red_max!(u32x2, u32, reduce_max_u32x2); -red_max!(i64x2, i64, reduce_max_i64x2); -red_max!(u64x2, u64, reduce_max_u64x2); -red_max!(i8x4, i8, reduce_max_i8x4); -red_max!(u8x4, u8, reduce_max_u8x4); -red_max!(i16x4, i16, reduce_max_i16x4); -red_max!(u16x4, u16, reduce_max_u16x4); -red_max!(i32x4, i32, reduce_max_i32x4); -red_max!(u32x4, u32, reduce_max_u32x4); -red_max!(i64x4, i64, reduce_max_i64x4); -red_max!(u64x4, u64, reduce_max_u64x4); -red_max!(i8x8, i8, reduce_max_i8x8); -red_max!(u8x8, u8, reduce_max_u8x8); -red_max!(i16x8, i16, reduce_max_i16x8); -red_max!(u16x8, u16, reduce_max_u16x8); -red_max!(i32x8, i32, reduce_max_i32x8); -red_max!(u32x8, u32, reduce_max_u32x8); -red_max!(i64x8, i64, reduce_max_i64x8); -red_max!(u64x8, u64, reduce_max_u64x8); -red_max!(i8x16, i8, reduce_max_i8x16); -red_max!(u8x16, u8, reduce_max_u8x16); -red_max!(i16x16, i16, reduce_max_i16x16); -red_max!(u16x16, u16, reduce_max_u16x16); -red_max!(i32x16, i32, reduce_max_i32x16); -red_max!(u32x16, u32, reduce_max_u32x16); -red_max!(i8x32, i8, reduce_max_i8x32); -red_max!(u8x32, u8, reduce_max_u8x32); -red_max!(i16x32, i16, reduce_max_i16x32); -red_max!(u16x32, u16, reduce_max_u16x32); -red_max!(i8x64, i8, reduce_max_i8x64); -red_max!(u8x64, u8, reduce_max_u8x64); - -red_max!(f32x2, f32, reduce_fmax_f32x2); -red_max!(f64x2, f64, reduce_fmax_f64x2); -red_max!(f32x4, f32, reduce_fmax_f32x4); -red_max!(f64x4, f64, reduce_fmax_f64x4); -red_max!(f32x8, f32, reduce_fmax_f32x8); -red_max!(f64x8, f64, reduce_fmax_f64x8); -red_max!(f32x16, f32, reduce_fmax_f32x16); - -#[cfg(test)] -mod tests { - use super::ReduceMax; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_max_i32x4() { - let v = i32x4::new(1, 2, -1, 3); - assert_eq!(v.reduce_max(), 3_i32); - } - #[test] - fn reduce_max_u32x4() { - let v = u32x4::new(4, 2, 7, 3); - assert_eq!(v.reduce_max(), 7_u32); - } - #[test] - fn reduce_max_f32x4() { - let v = f32x4::new(4., 2., -1., 3.); - assert_eq!(v.reduce_max(), 4.); - } -} diff --git a/coresimd/ppsv/codegen/min.rs b/coresimd/ppsv/codegen/min.rs deleted file mode 100644 index d0434ce4be..0000000000 --- a/coresimd/ppsv/codegen/min.rs +++ /dev/null @@ -1,196 +0,0 @@ -//! Code generation for the min reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the min reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v2i8"] - fn reduce_min_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v2u8"] - fn reduce_min_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smin.i16.v2i16"] - fn reduce_min_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umin.u16.v2u16"] - fn reduce_min_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smin.i32.v2i32"] - fn reduce_min_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umin.u32.v2u32"] - fn reduce_min_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smin.i64.v2i64"] - fn reduce_min_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umin.u64.v2u64"] - fn reduce_min_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v4i8"] - fn reduce_min_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v4u8"] - fn reduce_min_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smin.i16.v4i16"] - fn reduce_min_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umin.u16.v4u16"] - fn reduce_min_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smin.i32.v4i32"] - fn reduce_min_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umin.u32.v4u32"] - fn reduce_min_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smin.i64.v4i64"] - fn reduce_min_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umin.u64.v4u64"] - fn reduce_min_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v8i8"] - fn reduce_min_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v8u8"] - fn reduce_min_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smin.i16.v8i16"] - fn reduce_min_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umin.u16.v8u16"] - fn reduce_min_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smin.i32.v8i32"] - fn reduce_min_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umin.u32.v8u32"] - fn reduce_min_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smin.i64.v8i64"] - fn reduce_min_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.umin.u64.v8u64"] - fn reduce_min_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v16i8"] - fn reduce_min_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v16u8"] - fn reduce_min_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smin.i16.v16i16"] - fn reduce_min_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umin.u16.v16u16"] - fn reduce_min_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smin.i32.v16i32"] - fn reduce_min_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.umin.u32.v16u32"] - fn reduce_min_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v32i8"] - fn reduce_min_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v32u8"] - fn reduce_min_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.smin.i16.v32i16"] - fn reduce_min_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.umin.u16.v32u16"] - fn reduce_min_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.smin.i8.v64i8"] - fn reduce_min_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.umin.u8.v64u8"] - fn reduce_min_u8x64(x: u8x64) -> u8; - #[link_name = "llvm.experimental.vector.reduce.fmin.f32.v2f32"] - fn reduce_fmin_f32x2(x: f32x2) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmin.f64.v2f64"] - fn reduce_fmin_f64x2(x: f64x2) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmin.f32.v4f32"] - fn reduce_fmin_f32x4(x: f32x4) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmin.f64.v4f64"] - fn reduce_fmin_f64x4(x: f64x4) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmin.f32.v8f32"] - fn reduce_fmin_f32x8(x: f32x8) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmin.f64.v8f64"] - fn reduce_fmin_f64x8(x: f64x8) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmin.f32.v16f32"] - fn reduce_fmin_f32x16(x: f32x16) -> f32; -} - -/// Reduction: horizontal max of the vector elements. -#[cfg_attr(feature = "cargo-clippy", allow(stutter))] -pub trait ReduceMin { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal max of the vector elements. - fn reduce_min(self) -> Self::Acc; -} - -macro_rules! red_min { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceMin for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_min(self) -> Self::Acc { - unsafe { $llvm_intr(self) } - } - // FIXME: broken on AArch64 - #[cfg(target_arch = "aarch64")] - #[allow(unused_imports)] - #[inline] - fn reduce_min(self) -> Self::Acc { - use ::num::Float; - use ::cmp::Ord; - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x = x.min(self.extract(i)); - } - x - } - } - }; -} -red_min!(i8x2, i8, reduce_min_i8x2); -red_min!(u8x2, u8, reduce_min_u8x2); -red_min!(i16x2, i16, reduce_min_i16x2); -red_min!(u16x2, u16, reduce_min_u16x2); -red_min!(i32x2, i32, reduce_min_i32x2); -red_min!(u32x2, u32, reduce_min_u32x2); -red_min!(i64x2, i64, reduce_min_i64x2); -red_min!(u64x2, u64, reduce_min_u64x2); -red_min!(i8x4, i8, reduce_min_i8x4); -red_min!(u8x4, u8, reduce_min_u8x4); -red_min!(i16x4, i16, reduce_min_i16x4); -red_min!(u16x4, u16, reduce_min_u16x4); -red_min!(i32x4, i32, reduce_min_i32x4); -red_min!(u32x4, u32, reduce_min_u32x4); -red_min!(i64x4, i64, reduce_min_i64x4); -red_min!(u64x4, u64, reduce_min_u64x4); -red_min!(i8x8, i8, reduce_min_i8x8); -red_min!(u8x8, u8, reduce_min_u8x8); -red_min!(i16x8, i16, reduce_min_i16x8); -red_min!(u16x8, u16, reduce_min_u16x8); -red_min!(i32x8, i32, reduce_min_i32x8); -red_min!(u32x8, u32, reduce_min_u32x8); -red_min!(i64x8, i64, reduce_min_i64x8); -red_min!(u64x8, u64, reduce_min_u64x8); -red_min!(i8x16, i8, reduce_min_i8x16); -red_min!(u8x16, u8, reduce_min_u8x16); -red_min!(i16x16, i16, reduce_min_i16x16); -red_min!(u16x16, u16, reduce_min_u16x16); -red_min!(i32x16, i32, reduce_min_i32x16); -red_min!(u32x16, u32, reduce_min_u32x16); -red_min!(i8x32, i8, reduce_min_i8x32); -red_min!(u8x32, u8, reduce_min_u8x32); -red_min!(i16x32, i16, reduce_min_i16x32); -red_min!(u16x32, u16, reduce_min_u16x32); -red_min!(i8x64, i8, reduce_min_i8x64); -red_min!(u8x64, u8, reduce_min_u8x64); - -red_min!(f32x2, f32, reduce_fmin_f32x2); -red_min!(f64x2, f64, reduce_fmin_f64x2); -red_min!(f32x4, f32, reduce_fmin_f32x4); -red_min!(f64x4, f64, reduce_fmin_f64x4); -red_min!(f32x8, f32, reduce_fmin_f32x8); -red_min!(f64x8, f64, reduce_fmin_f64x8); -red_min!(f32x16, f32, reduce_fmin_f32x16); - -#[cfg(test)] -mod tests { - use super::ReduceMin; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_min_i32x4() { - let v = i32x4::new(1, 2, -1, 3); - assert_eq!(v.reduce_min(), -1_i32); - } - #[test] - fn reduce_min_u32x4() { - let v = u32x4::new(4, 2, 7, 3); - assert_eq!(v.reduce_min(), 2_u32); - } - #[test] - fn reduce_min_f32x4() { - let v = f32x4::new(4., 2., -1., 3.); - assert_eq!(v.reduce_min(), -1.); - } -} diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs deleted file mode 100644 index 26beb14563..0000000000 --- a/coresimd/ppsv/codegen/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Code Generation - -pub mod sum; -pub mod product; -pub mod and; -pub mod or; -pub mod xor; -pub mod min; -pub mod max; diff --git a/coresimd/ppsv/codegen/or.rs b/coresimd/ppsv/codegen/or.rs deleted file mode 100644 index c5b8711f6a..0000000000 --- a/coresimd/ppsv/codegen/or.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! Code generation for the or reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the or reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.or.i8.v2i8"] - fn reduce_or_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v2u8"] - fn reduce_or_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.or.i16.v2i16"] - fn reduce_or_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.or.u16.v2u16"] - fn reduce_or_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.or.i32.v2i32"] - fn reduce_or_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.or.u32.v2u32"] - fn reduce_or_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.or.i64.v2i64"] - fn reduce_or_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.or.u64.v2u64"] - fn reduce_or_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.or.i8.v4i8"] - fn reduce_or_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v4u8"] - fn reduce_or_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.or.i16.v4i16"] - fn reduce_or_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.or.u16.v4u16"] - fn reduce_or_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.or.i32.v4i32"] - fn reduce_or_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.or.u32.v4u32"] - fn reduce_or_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.or.i64.v4i64"] - fn reduce_or_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.or.u64.v4u64"] - fn reduce_or_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.or.i8.v8i8"] - fn reduce_or_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v8u8"] - fn reduce_or_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.or.i16.v8i16"] - fn reduce_or_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.or.u16.v8u16"] - fn reduce_or_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.or.i32.v8i32"] - fn reduce_or_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.or.u32.v8u32"] - fn reduce_or_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.or.i64.v8i64"] - fn reduce_or_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.or.u64.v8u64"] - fn reduce_or_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.or.i8.v16i8"] - fn reduce_or_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v16u8"] - fn reduce_or_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.or.i16.v16i16"] - fn reduce_or_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.or.u16.v16u16"] - fn reduce_or_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.or.i32.v16i32"] - fn reduce_or_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.or.u32.v16u32"] - fn reduce_or_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.or.i8.v32i8"] - fn reduce_or_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v32u8"] - fn reduce_or_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.or.i16.v32i16"] - fn reduce_or_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.or.u16.v32u16"] - fn reduce_or_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.or.i8.v64i8"] - fn reduce_or_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.or.u8.v64u8"] - fn reduce_or_u8x64(x: u8x64) -> u8; -} - -/// Reduction: horizontal bitwise or of the vector elements. -#[cfg_attr(feature = "cargo-clippy", allow(stutter))] -pub trait ReduceOr { - /// Result of the reduction. - type Acc; - /// Computes the horizontal bitwise or of the vector elements. - fn reduce_or(self) -> Self::Acc; -} - -macro_rules! red_or { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceOr for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_or(self) -> Self::Acc { - unsafe { $llvm_intr(self.into_bits()) } - } - // FIXME: broken in AArch64 - #[cfg(target_arch = "aarch64")] - #[inline] - fn reduce_or(self) -> Self::Acc { - let mut x = self.extract(0) as Self::Acc; - for i in 1..$id::lanes() { - x |= self.extract(i) as Self::Acc; - } - x - } - } - }; -} -red_or!(i8x2, i8, reduce_or_i8x2); -red_or!(u8x2, u8, reduce_or_u8x2); -red_or!(i16x2, i16, reduce_or_i16x2); -red_or!(u16x2, u16, reduce_or_u16x2); -red_or!(i32x2, i32, reduce_or_i32x2); -red_or!(u32x2, u32, reduce_or_u32x2); -red_or!(i64x2, i64, reduce_or_i64x2); -red_or!(u64x2, u64, reduce_or_u64x2); -red_or!(i8x4, i8, reduce_or_i8x4); -red_or!(u8x4, u8, reduce_or_u8x4); -red_or!(i16x4, i16, reduce_or_i16x4); -red_or!(u16x4, u16, reduce_or_u16x4); -red_or!(i32x4, i32, reduce_or_i32x4); -red_or!(u32x4, u32, reduce_or_u32x4); -red_or!(i64x4, i64, reduce_or_i64x4); -red_or!(u64x4, u64, reduce_or_u64x4); -red_or!(i8x8, i8, reduce_or_i8x8); -red_or!(u8x8, u8, reduce_or_u8x8); -red_or!(i16x8, i16, reduce_or_i16x8); -red_or!(u16x8, u16, reduce_or_u16x8); -red_or!(i32x8, i32, reduce_or_i32x8); -red_or!(u32x8, u32, reduce_or_u32x8); -red_or!(i64x8, i64, reduce_or_i64x8); -red_or!(u64x8, u64, reduce_or_u64x8); -red_or!(i8x16, i8, reduce_or_i8x16); -red_or!(u8x16, u8, reduce_or_u8x16); -red_or!(i16x16, i16, reduce_or_i16x16); -red_or!(u16x16, u16, reduce_or_u16x16); -red_or!(i32x16, i32, reduce_or_i32x16); -red_or!(u32x16, u32, reduce_or_u32x16); -red_or!(i8x32, i8, reduce_or_i8x32); -red_or!(u8x32, u8, reduce_or_u8x32); -red_or!(i16x32, i16, reduce_or_i16x32); -red_or!(u16x32, u16, reduce_or_u16x32); -red_or!(i8x64, i8, reduce_or_i8x64); -red_or!(u8x64, u8, reduce_or_u8x64); - -red_or!(b8x2, i8, reduce_or_i8x2); -red_or!(b8x4, i8, reduce_or_i8x4); -red_or!(b8x8, i8, reduce_or_i8x8); -red_or!(b8x16, i8, reduce_or_i8x16); -red_or!(b8x32, i8, reduce_or_i8x32); -red_or!(b8x64, i8, reduce_or_i8x64); - -#[cfg(test)] -mod tests { - use super::ReduceOr; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_or_i32x4() { - let v = i32x4::splat(1); - assert_eq!(v.reduce_or(), 1_i32); - let v = i32x4::new(1, 1, 0, 1); - assert_eq!(v.reduce_or(), 1_i32); - } -} diff --git a/coresimd/ppsv/codegen/product.rs b/coresimd/ppsv/codegen/product.rs deleted file mode 100644 index 00bce740f1..0000000000 --- a/coresimd/ppsv/codegen/product.rs +++ /dev/null @@ -1,210 +0,0 @@ -//! Code generation for the product reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the product reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v2i8"] - fn reduce_mul_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v2u8"] - fn reduce_mul_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.mul.i16.v2i16"] - fn reduce_mul_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.mul.u16.v2u16"] - fn reduce_mul_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.mul.i32.v2i32"] - fn reduce_mul_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.mul.u32.v2u32"] - fn reduce_mul_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.mul.i64.v2i64"] - fn reduce_mul_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.mul.u64.v2u64"] - fn reduce_mul_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v4i8"] - fn reduce_mul_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v4u8"] - fn reduce_mul_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.mul.i16.v4i16"] - fn reduce_mul_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.mul.u16.v4u16"] - fn reduce_mul_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.mul.i32.v4i32"] - fn reduce_mul_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.mul.u32.v4u32"] - fn reduce_mul_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.mul.i64.v4i64"] - fn reduce_mul_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.mul.u64.v4u64"] - fn reduce_mul_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v8i8"] - fn reduce_mul_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v8u8"] - fn reduce_mul_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.mul.i16.v8i16"] - fn reduce_mul_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.mul.u16.v8u16"] - fn reduce_mul_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.mul.i32.v8i32"] - fn reduce_mul_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.mul.u32.v8u32"] - fn reduce_mul_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.mul.i64.v8i64"] - fn reduce_mul_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.mul.u64.v8u64"] - fn reduce_mul_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v16i8"] - fn reduce_mul_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v16u8"] - fn reduce_mul_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.mul.i16.v16i16"] - fn reduce_mul_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.mul.u16.v16u16"] - fn reduce_mul_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.mul.i32.v16i32"] - fn reduce_mul_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.mul.u32.v16u32"] - fn reduce_mul_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v32i8"] - fn reduce_mul_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v32u8"] - fn reduce_mul_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.mul.i16.v32i16"] - fn reduce_mul_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.mul.u16.v32u16"] - fn reduce_mul_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.mul.i8.v64i8"] - fn reduce_mul_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.mul.u8.v64u8"] - fn reduce_mul_u8x64(x: u8x64) -> u8; - #[link_name = "llvm.experimental.vector.reduce.fmul.f32.v2f32"] - fn reduce_fmul_f32x2(acc: f32, x: f32x2) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmul.f64.v2f64"] - fn reduce_fmul_f64x2(acc: f64, x: f64x2) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmul.f32.v4f32"] - fn reduce_fmul_f32x4(acc: f32, x: f32x4) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmul.f64.v4f64"] - fn reduce_fmul_f64x4(acc: f64, x: f64x4) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmul.f32.v8f32"] - fn reduce_fmul_f32x8(acc: f32, x: f32x8) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fmul.f64.v8f64"] - fn reduce_fmul_f64x8(acc: f64, x: f64x8) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fmul.f32.v16f32"] - fn reduce_fmul_f32x16(acc: f32, x: f32x16) -> f32; -} - -/// Reduction: horizontal product of the vector elements. -pub trait ReduceMul { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal product of the vector elements. - fn reduce_mul(self) -> Self::Acc; -} - -macro_rules! red_mul { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceMul for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_mul(self) -> Self::Acc { - unsafe { $llvm_intr(self) } - } - // FIXME: broken in AArch64 - #[cfg(target_arch = "aarch64")] - #[inline] - fn reduce_mul(self) -> Self::Acc { - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x *= self.extract(i); - } - x - } - } - }; -} -red_mul!(i8x2, i8, reduce_mul_i8x2); -red_mul!(u8x2, u8, reduce_mul_u8x2); -red_mul!(i16x2, i16, reduce_mul_i16x2); -red_mul!(u16x2, u16, reduce_mul_u16x2); -red_mul!(i32x2, i32, reduce_mul_i32x2); -red_mul!(u32x2, u32, reduce_mul_u32x2); -red_mul!(i64x2, i64, reduce_mul_i64x2); -red_mul!(u64x2, u64, reduce_mul_u64x2); -red_mul!(i8x4, i8, reduce_mul_i8x4); -red_mul!(u8x4, u8, reduce_mul_u8x4); -red_mul!(i16x4, i16, reduce_mul_i16x4); -red_mul!(u16x4, u16, reduce_mul_u16x4); -red_mul!(i32x4, i32, reduce_mul_i32x4); -red_mul!(u32x4, u32, reduce_mul_u32x4); -red_mul!(i64x4, i64, reduce_mul_i64x4); -red_mul!(u64x4, u64, reduce_mul_u64x4); -red_mul!(i8x8, i8, reduce_mul_i8x8); -red_mul!(u8x8, u8, reduce_mul_u8x8); -red_mul!(i16x8, i16, reduce_mul_i16x8); -red_mul!(u16x8, u16, reduce_mul_u16x8); -red_mul!(i32x8, i32, reduce_mul_i32x8); -red_mul!(u32x8, u32, reduce_mul_u32x8); -red_mul!(i64x8, i64, reduce_mul_i64x8); -red_mul!(u64x8, u64, reduce_mul_u64x8); -red_mul!(i8x16, i8, reduce_mul_i8x16); -red_mul!(u8x16, u8, reduce_mul_u8x16); -red_mul!(i16x16, i16, reduce_mul_i16x16); -red_mul!(u16x16, u16, reduce_mul_u16x16); -red_mul!(i32x16, i32, reduce_mul_i32x16); -red_mul!(u32x16, u32, reduce_mul_u32x16); -red_mul!(i8x32, i8, reduce_mul_i8x32); -red_mul!(u8x32, u8, reduce_mul_u8x32); -red_mul!(i16x32, i16, reduce_mul_i16x32); -red_mul!(u16x32, u16, reduce_mul_u16x32); -red_mul!(i8x64, i8, reduce_mul_i8x64); -red_mul!(u8x64, u8, reduce_mul_u8x64); - -macro_rules! red_fmul { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceMul for $id { - type Acc = $elem_ty; - #[inline] - fn reduce_mul(self) -> Self::Acc { - // FIXME: - // unsafe { $llvm_intr(1. as $elem_ty, self) } - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x *= self.extract(i); - } - x - } - } - }; -} - -red_fmul!(f32x2, f32, reduce_fmul_f32x2); -red_fmul!(f64x2, f64, reduce_fmul_f64x2); -red_fmul!(f32x4, f32, reduce_fmul_f32x4); -red_fmul!(f64x4, f64, reduce_fmul_f64x4); -red_fmul!(f32x8, f32, reduce_fmul_f32x8); -red_fmul!(f64x8, f64, reduce_fmul_f64x8); -red_fmul!(f32x16, f32, reduce_fmul_f32x16); - -#[cfg(test)] -mod tests { - use super::ReduceMul; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_mul_i32x4() { - let v = i32x4::splat(2); - assert_eq!(v.reduce_mul(), 16_i32); - } - #[test] - fn reduce_mul_u32x4() { - let v = u32x4::splat(2); - assert_eq!(v.reduce_mul(), 16_u32); - } - #[test] - fn reduce_mul_f32x4() { - let v = f32x4::splat(2.); - assert_eq!(v.reduce_mul(), 16.); - } -} diff --git a/coresimd/ppsv/codegen/sum.rs b/coresimd/ppsv/codegen/sum.rs deleted file mode 100644 index b67a598f9f..0000000000 --- a/coresimd/ppsv/codegen/sum.rs +++ /dev/null @@ -1,210 +0,0 @@ -//! Code generation for the sum reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the sum reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.add.i8.v2i8"] - fn reduce_add_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v2u8"] - fn reduce_add_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.add.i16.v2i16"] - fn reduce_add_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.add.u16.v2u16"] - fn reduce_add_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.add.i32.v2i32"] - fn reduce_add_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.add.u32.v2u32"] - fn reduce_add_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.add.i64.v2i64"] - fn reduce_add_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.add.u64.v2u64"] - fn reduce_add_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.add.i8.v4i8"] - fn reduce_add_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v4u8"] - fn reduce_add_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.add.i16.v4i16"] - fn reduce_add_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.add.u16.v4u16"] - fn reduce_add_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.add.i32.v4i32"] - fn reduce_add_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.add.u32.v4u32"] - fn reduce_add_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.add.i64.v4i64"] - fn reduce_add_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.add.u64.v4u64"] - fn reduce_add_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.add.i8.v8i8"] - fn reduce_add_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v8u8"] - fn reduce_add_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.add.i16.v8i16"] - fn reduce_add_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.add.u16.v8u16"] - fn reduce_add_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.add.i32.v8i32"] - fn reduce_add_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.add.u32.v8u32"] - fn reduce_add_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.add.i64.v8i64"] - fn reduce_add_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.add.u64.v8u64"] - fn reduce_add_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.add.i8.v16i8"] - fn reduce_add_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v16u8"] - fn reduce_add_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.add.i16.v16i16"] - fn reduce_add_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.add.u16.v16u16"] - fn reduce_add_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.add.i32.v16i32"] - fn reduce_add_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.add.u32.v16u32"] - fn reduce_add_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.add.i8.v32i8"] - fn reduce_add_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v32u8"] - fn reduce_add_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.add.i16.v32i16"] - fn reduce_add_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.add.u16.v32u16"] - fn reduce_add_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.add.i8.v64i8"] - fn reduce_add_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.add.u8.v64u8"] - fn reduce_add_u8x64(x: u8x64) -> u8; - #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v2f32"] - fn reduce_fadd_f32x2(acc: f32, x: f32x2) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v2f64"] - fn reduce_fadd_f64x2(acc: f64, x: f64x2) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v4f32"] - fn reduce_fadd_f32x4(acc: f32, x: f32x4) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v4f64"] - fn reduce_fadd_f64x4(acc: f64, x: f64x4) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v8f32"] - fn reduce_fadd_f32x8(acc: f32, x: f32x8) -> f32; - #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v8f64"] - fn reduce_fadd_f64x8(acc: f64, x: f64x8) -> f64; - #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v16f32"] - fn reduce_fadd_f32x16(acc: f32, x: f32x16) -> f32; -} - -/// Reduction: horizontal sum of the vector elements. -pub trait ReduceAdd { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal sum of the vector elements. - fn reduce_add(self) -> Self::Acc; -} - -macro_rules! red_add { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceAdd for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_add(self) -> Self::Acc { - unsafe { $llvm_intr(self) } - } - // FIXME: broken in AArch64 - #[cfg(target_arch = "aarch64")] - #[inline] - fn reduce_add(self) -> Self::Acc { - let mut x = self.extract(0) as Self::Acc; - for i in 1..$id::lanes() { - x += self.extract(i) as Self::Acc; - } - x - } - } - }; -} -red_add!(i8x2, i8, reduce_add_i8x2); -red_add!(u8x2, u8, reduce_add_u8x2); -red_add!(i16x2, i16, reduce_add_i16x2); -red_add!(u16x2, u16, reduce_add_u16x2); -red_add!(i32x2, i32, reduce_add_i32x2); -red_add!(u32x2, u32, reduce_add_u32x2); -red_add!(i64x2, i64, reduce_add_i64x2); -red_add!(u64x2, u64, reduce_add_u64x2); -red_add!(i8x4, i8, reduce_add_i8x4); -red_add!(u8x4, u8, reduce_add_u8x4); -red_add!(i16x4, i16, reduce_add_i16x4); -red_add!(u16x4, u16, reduce_add_u16x4); -red_add!(i32x4, i32, reduce_add_i32x4); -red_add!(u32x4, u32, reduce_add_u32x4); -red_add!(i64x4, i64, reduce_add_i64x4); -red_add!(u64x4, u64, reduce_add_u64x4); -red_add!(i8x8, i8, reduce_add_i8x8); -red_add!(u8x8, u8, reduce_add_u8x8); -red_add!(i16x8, i16, reduce_add_i16x8); -red_add!(u16x8, u16, reduce_add_u16x8); -red_add!(i32x8, i32, reduce_add_i32x8); -red_add!(u32x8, u32, reduce_add_u32x8); -red_add!(i64x8, i64, reduce_add_i64x8); -red_add!(u64x8, u64, reduce_add_u64x8); -red_add!(i8x16, i8, reduce_add_i8x16); -red_add!(u8x16, u8, reduce_add_u8x16); -red_add!(i16x16, i16, reduce_add_i16x16); -red_add!(u16x16, u16, reduce_add_u16x16); -red_add!(i32x16, i32, reduce_add_i32x16); -red_add!(u32x16, u32, reduce_add_u32x16); -red_add!(i8x32, i8, reduce_add_i8x32); -red_add!(u8x32, u8, reduce_add_u8x32); -red_add!(i16x32, i16, reduce_add_i16x32); -red_add!(u16x32, u16, reduce_add_u16x32); -red_add!(i8x64, i8, reduce_add_i8x64); -red_add!(u8x64, u8, reduce_add_u8x64); - -macro_rules! red_fadd { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceAdd for $id { - type Acc = $elem_ty; - #[inline] - fn reduce_add(self) -> Self::Acc { - // FIXME: - //unsafe { $llvm_intr(0. as $elem_ty, self) } - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x += self.extract(i); - } - x - } - } - }; -} - -red_fadd!(f32x2, f32, reduce_fadd_f32x2); -red_fadd!(f64x2, f64, reduce_fadd_f64x2); -red_fadd!(f32x4, f32, reduce_fadd_f32x4); -red_fadd!(f64x4, f64, reduce_fadd_f64x4); -red_fadd!(f32x8, f32, reduce_fadd_f32x8); -red_fadd!(f64x8, f64, reduce_fadd_f64x8); -red_fadd!(f32x16, f32, reduce_fadd_f32x16); - -#[cfg(test)] -mod tests { - use super::ReduceAdd; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_add_i32x4() { - let v = i32x4::splat(1); - assert_eq!(v.reduce_add(), 4_i32); - } - #[test] - fn reduce_add_u32x4() { - let v = u32x4::splat(1); - assert_eq!(v.reduce_add(), 4_u32); - } - #[test] - fn reduce_add_f32x4() { - let v = f32x4::splat(1.); - assert_eq!(v.reduce_add(), 4.); - } -} diff --git a/coresimd/ppsv/codegen/xor.rs b/coresimd/ppsv/codegen/xor.rs deleted file mode 100644 index 5d4695fa6f..0000000000 --- a/coresimd/ppsv/codegen/xor.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! Code generation for the xor reduction. -use coresimd::simd::*; - -/// LLVM intrinsics used in the xor reduction -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v2i8"] - fn reduce_xor_i8x2(x: i8x2) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v2u8"] - fn reduce_xor_u8x2(x: u8x2) -> u8; - #[link_name = "llvm.experimental.vector.reduce.xor.i16.v2i16"] - fn reduce_xor_i16x2(x: i16x2) -> i16; - #[link_name = "llvm.experimental.vector.reduce.xor.u16.v2u16"] - fn reduce_xor_u16x2(x: u16x2) -> u16; - #[link_name = "llvm.experimental.vector.reduce.xor.i32.v2i32"] - fn reduce_xor_i32x2(x: i32x2) -> i32; - #[link_name = "llvm.experimental.vector.reduce.xor.u32.v2u32"] - fn reduce_xor_u32x2(x: u32x2) -> u32; - #[link_name = "llvm.experimental.vector.reduce.xor.i64.v2i64"] - fn reduce_xor_i64x2(x: i64x2) -> i64; - #[link_name = "llvm.experimental.vector.reduce.xor.u64.v2u64"] - fn reduce_xor_u64x2(x: u64x2) -> u64; - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v4i8"] - fn reduce_xor_i8x4(x: i8x4) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v4u8"] - fn reduce_xor_u8x4(x: u8x4) -> u8; - #[link_name = "llvm.experimental.vector.reduce.xor.i16.v4i16"] - fn reduce_xor_i16x4(x: i16x4) -> i16; - #[link_name = "llvm.experimental.vector.reduce.xor.u16.v4u16"] - fn reduce_xor_u16x4(x: u16x4) -> u16; - #[link_name = "llvm.experimental.vector.reduce.xor.i32.v4i32"] - fn reduce_xor_i32x4(x: i32x4) -> i32; - #[link_name = "llvm.experimental.vector.reduce.xor.u32.v4u32"] - fn reduce_xor_u32x4(x: u32x4) -> u32; - #[link_name = "llvm.experimental.vector.reduce.xor.i64.v4i64"] - fn reduce_xor_i64x4(x: i64x4) -> i64; - #[link_name = "llvm.experimental.vector.reduce.xor.u64.v4u64"] - fn reduce_xor_u64x4(x: u64x4) -> u64; - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v8i8"] - fn reduce_xor_i8x8(x: i8x8) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v8u8"] - fn reduce_xor_u8x8(x: u8x8) -> u8; - #[link_name = "llvm.experimental.vector.reduce.xor.i16.v8i16"] - fn reduce_xor_i16x8(x: i16x8) -> i16; - #[link_name = "llvm.experimental.vector.reduce.xor.u16.v8u16"] - fn reduce_xor_u16x8(x: u16x8) -> u16; - #[link_name = "llvm.experimental.vector.reduce.xor.i32.v8i32"] - fn reduce_xor_i32x8(x: i32x8) -> i32; - #[link_name = "llvm.experimental.vector.reduce.xor.u32.v8u32"] - fn reduce_xor_u32x8(x: u32x8) -> u32; - #[link_name = "llvm.experimental.vector.reduce.xor.i64.v8i64"] - fn reduce_xor_i64x8(x: i64x8) -> i64; - #[link_name = "llvm.experimental.vector.reduce.xor.u64.v8u64"] - fn reduce_xor_u64x8(x: u64x8) -> u64; - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v16i8"] - fn reduce_xor_i8x16(x: i8x16) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v16u8"] - fn reduce_xor_u8x16(x: u8x16) -> u8; - #[link_name = "llvm.experimental.vector.reduce.xor.i16.v16i16"] - fn reduce_xor_i16x16(x: i16x16) -> i16; - #[link_name = "llvm.experimental.vector.reduce.xor.u16.v16u16"] - fn reduce_xor_u16x16(x: u16x16) -> u16; - #[link_name = "llvm.experimental.vector.reduce.xor.i32.v16i32"] - fn reduce_xor_i32x16(x: i32x16) -> i32; - #[link_name = "llvm.experimental.vector.reduce.xor.u32.v16u32"] - fn reduce_xor_u32x16(x: u32x16) -> u32; - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v32i8"] - fn reduce_xor_i8x32(x: i8x32) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v32u8"] - fn reduce_xor_u8x32(x: u8x32) -> u8; - #[link_name = "llvm.experimental.vector.reduce.xor.i16.v32i16"] - fn reduce_xor_i16x32(x: i16x32) -> i16; - #[link_name = "llvm.experimental.vector.reduce.xor.u16.v32u16"] - fn reduce_xor_u16x32(x: u16x32) -> u16; - #[link_name = "llvm.experimental.vector.reduce.xor.i8.v64i8"] - fn reduce_xor_i8x64(x: i8x64) -> i8; - #[link_name = "llvm.experimental.vector.reduce.xor.u8.v64u8"] - fn reduce_xor_u8x64(x: u8x64) -> u8; -} - -/// Reduction: horizontal bitwise xor of the vector elements. -#[cfg_attr(feature = "cargo-clippy", allow(stutter))] -pub trait ReduceXor { - /// Result type of the reduction. - type Acc; - /// Computes the horizontal bitwise xor of the vector elements. - fn reduce_xor(self) -> Self::Acc; -} - -macro_rules! red_xor { - ($id:ident, $elem_ty:ident, $llvm_intr:ident) => { - impl ReduceXor for $id { - type Acc = $elem_ty; - #[cfg(not(target_arch = "aarch64"))] - #[inline] - fn reduce_xor(self) -> Self::Acc { - unsafe { $llvm_intr(self.into_bits()) } - } - // FIXME: broken in AArch64 - #[cfg(target_arch = "aarch64")] - #[inline] - fn reduce_xor(self) -> Self::Acc { - let mut x = self.extract(0) as Self::Acc; - for i in 1..$id::lanes() { - x ^= self.extract(i) as Self::Acc; - } - x - } - } - }; -} -red_xor!(i8x2, i8, reduce_xor_i8x2); -red_xor!(u8x2, u8, reduce_xor_u8x2); -red_xor!(i16x2, i16, reduce_xor_i16x2); -red_xor!(u16x2, u16, reduce_xor_u16x2); -red_xor!(i32x2, i32, reduce_xor_i32x2); -red_xor!(u32x2, u32, reduce_xor_u32x2); -red_xor!(i64x2, i64, reduce_xor_i64x2); -red_xor!(u64x2, u64, reduce_xor_u64x2); -red_xor!(i8x4, i8, reduce_xor_i8x4); -red_xor!(u8x4, u8, reduce_xor_u8x4); -red_xor!(i16x4, i16, reduce_xor_i16x4); -red_xor!(u16x4, u16, reduce_xor_u16x4); -red_xor!(i32x4, i32, reduce_xor_i32x4); -red_xor!(u32x4, u32, reduce_xor_u32x4); -red_xor!(i64x4, i64, reduce_xor_i64x4); -red_xor!(u64x4, u64, reduce_xor_u64x4); -red_xor!(i8x8, i8, reduce_xor_i8x8); -red_xor!(u8x8, u8, reduce_xor_u8x8); -red_xor!(i16x8, i16, reduce_xor_i16x8); -red_xor!(u16x8, u16, reduce_xor_u16x8); -red_xor!(i32x8, i32, reduce_xor_i32x8); -red_xor!(u32x8, u32, reduce_xor_u32x8); -red_xor!(i64x8, i64, reduce_xor_i64x8); -red_xor!(u64x8, u64, reduce_xor_u64x8); -red_xor!(i8x16, i8, reduce_xor_i8x16); -red_xor!(u8x16, u8, reduce_xor_u8x16); -red_xor!(i16x16, i16, reduce_xor_i16x16); -red_xor!(u16x16, u16, reduce_xor_u16x16); -red_xor!(i32x16, i32, reduce_xor_i32x16); -red_xor!(u32x16, u32, reduce_xor_u32x16); -red_xor!(i8x32, i8, reduce_xor_i8x32); -red_xor!(u8x32, u8, reduce_xor_u8x32); -red_xor!(i16x32, i16, reduce_xor_i16x32); -red_xor!(u16x32, u16, reduce_xor_u16x32); -red_xor!(i8x64, i8, reduce_xor_i8x64); -red_xor!(u8x64, u8, reduce_xor_u8x64); - -red_xor!(b8x2, i8, reduce_xor_i8x2); -red_xor!(b8x4, i8, reduce_xor_i8x4); -red_xor!(b8x8, i8, reduce_xor_i8x8); -red_xor!(b8x16, i8, reduce_xor_i8x16); -red_xor!(b8x32, i8, reduce_xor_i8x32); -red_xor!(b8x64, i8, reduce_xor_i8x64); - -#[cfg(test)] -mod tests { - use super::ReduceXor; - use coresimd::simd::*; - - // note: these are tested in the portable vector API tests - - #[test] - fn reduce_xor_i32x4() { - let v = i32x4::splat(1); - assert_eq!(v.reduce_xor(), 0_i32); - let v = i32x4::new(1, 0, 0, 0); - assert_eq!(v.reduce_xor(), 1_i32); - } -} diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs index 177f3c97c4..8590ad41f2 100644 --- a/coresimd/ppsv/mod.rs +++ b/coresimd/ppsv/mod.rs @@ -32,7 +32,6 @@ #[macro_use] mod api; -mod codegen; mod v16; mod v32; diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs index c4ae8a2a90..790ec31afb 100644 --- a/coresimd/simd_llvm.rs +++ b/coresimd/simd_llvm.rs @@ -31,4 +31,18 @@ extern "platform-intrinsic" { pub fn simd_and(x: T, y: T) -> T; pub fn simd_or(x: T, y: T) -> T; pub fn simd_xor(x: T, y: T) -> T; + + pub fn simd_reduce_add_unordered(x: T) -> U; + pub fn simd_reduce_mul_unordered(x: T) -> U; + pub fn simd_reduce_add_ordered(x: T, acc: U) -> U; + pub fn simd_reduce_mul_ordered(x: T, acc: U) -> U; + pub fn simd_reduce_min(x: T) -> U; + pub fn simd_reduce_max(x: T) -> U; + pub fn simd_reduce_min_nanless(x: T) -> U; + pub fn simd_reduce_max_nanless(x: T) -> U; + pub fn simd_reduce_and(x: T) -> U; + pub fn simd_reduce_or(x: T) -> U; + pub fn simd_reduce_xor(x: T) -> U; + pub fn simd_reduce_all(x: T) -> bool; + pub fn simd_reduce_any(x: T) -> bool; }