From fd131cd7aa774bfd2b9efe9944e9aaf7f77702a9 Mon Sep 17 00:00:00 2001 From: miguel raz Date: Fri, 4 Jun 2021 14:24:47 -0500 Subject: [PATCH 1/5] add dot_product example --- crates/core_simd/examples/dot_product.rs | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 crates/core_simd/examples/dot_product.rs diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs new file mode 100644 index 00000000000..812b0b23eeb --- /dev/null +++ b/crates/core_simd/examples/dot_product.rs @@ -0,0 +1,31 @@ +// Code taken from the `packed_simd` crate +// Run this code with `cargo test --example dot_product` +#![feature(array_chunks)] +use core_simd::*; + +pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + + // TODO handle remainder when a.len() % 4 != 0 + a.array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .map(|(a, b)| (a * b).horizontal_sum()) + .sum() +} + +fn main() { + // Empty main to make cargo happy +} + +#[cfg(test)] +mod tests { + #[test] + fn test() { + use super::*; + let a: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let b: Vec = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0]; + + assert_eq!(0.0, dot_prod(&a, &b)); + } +} From 390111f5a1058f40bb05568acb7c919dcc6293e1 Mon Sep 17 00:00:00 2001 From: miguel raz Date: Fri, 4 Jun 2021 16:38:32 -0500 Subject: [PATCH 2/5] add matrix_inversion example --- crates/core_simd/examples/matrix_inversion.rs | 319 ++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 crates/core_simd/examples/matrix_inversion.rs diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs new file mode 100644 index 00000000000..d108d4e2d00 --- /dev/null +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -0,0 +1,319 @@ +//! 4x4 matrix inverse +// Code ported from the `packed_simd` crate +// Run this code with `cargo test --example matrix_inversion` +#![feature(custom_inner_attributes)] +#![deny(rust_2018_idioms)] +#![allow(clippy::must_use_candidate)] +#![feature(array_chunks)] +use core_simd::*; + +// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] +pub struct Matrix4x4([[f32; 4]; 4]); + +#[allow(clippy::too_many_lines)] +pub fn scalar_inv4x4(m: Matrix4x4) -> Option { + let m = m.0; + + let mut inv = [ + [ // row 0: + // 0,0: + m[1][1] * m[2][2] * m[3][3] - + m[1][1] * m[2][3] * m[3][2] - + m[2][1] * m[1][2] * m[3][3] + + m[2][1] * m[1][3] * m[3][2] + + m[3][1] * m[1][2] * m[2][3] - + m[3][1] * m[1][3] * m[2][2], + // 0,1: + -m[0][1] * m[2][2] * m[3][3] + + m[0][1] * m[2][3] * m[3][2] + + m[2][1] * m[0][2] * m[3][3] - + m[2][1] * m[0][3] * m[3][2] - + m[3][1] * m[0][2] * m[2][3] + + m[3][1] * m[0][3] * m[2][2], + // 0,2: + m[0][1] * m[1][2] * m[3][3] - + m[0][1] * m[1][3] * m[3][2] - + m[1][1] * m[0][2] * m[3][3] + + m[1][1] * m[0][3] * m[3][2] + + m[3][1] * m[0][2] * m[1][3] - + m[3][1] * m[0][3] * m[1][2], + // 0,3: + -m[0][1] * m[1][2] * m[2][3] + + m[0][1] * m[1][3] * m[2][2] + + m[1][1] * m[0][2] * m[2][3] - + m[1][1] * m[0][3] * m[2][2] - + m[2][1] * m[0][2] * m[1][3] + + m[2][1] * m[0][3] * m[1][2], + ], + [ // row 1 + // 1,0: + -m[1][0] * m[2][2] * m[3][3] + + m[1][0] * m[2][3] * m[3][2] + + m[2][0] * m[1][2] * m[3][3] - + m[2][0] * m[1][3] * m[3][2] - + m[3][0] * m[1][2] * m[2][3] + + m[3][0] * m[1][3] * m[2][2], + // 1,1: + m[0][0] * m[2][2] * m[3][3] - + m[0][0] * m[2][3] * m[3][2] - + m[2][0] * m[0][2] * m[3][3] + + m[2][0] * m[0][3] * m[3][2] + + m[3][0] * m[0][2] * m[2][3] - + m[3][0] * m[0][3] * m[2][2], + // 1,2: + -m[0][0] * m[1][2] * m[3][3] + + m[0][0] * m[1][3] * m[3][2] + + m[1][0] * m[0][2] * m[3][3] - + m[1][0] * m[0][3] * m[3][2] - + m[3][0] * m[0][2] * m[1][3] + + m[3][0] * m[0][3] * m[1][2], + // 1,3: + m[0][0] * m[1][2] * m[2][3] - + m[0][0] * m[1][3] * m[2][2] - + m[1][0] * m[0][2] * m[2][3] + + m[1][0] * m[0][3] * m[2][2] + + m[2][0] * m[0][2] * m[1][3] - + m[2][0] * m[0][3] * m[1][2], + ], + [ // row 2 + // 2,0: + m[1][0] * m[2][1] * m[3][3] - + m[1][0] * m[2][3] * m[3][1] - + m[2][0] * m[1][1] * m[3][3] + + m[2][0] * m[1][3] * m[3][1] + + m[3][0] * m[1][1] * m[2][3] - + m[3][0] * m[1][3] * m[2][1], + // 2,1: + -m[0][0] * m[2][1] * m[3][3] + + m[0][0] * m[2][3] * m[3][1] + + m[2][0] * m[0][1] * m[3][3] - + m[2][0] * m[0][3] * m[3][1] - + m[3][0] * m[0][1] * m[2][3] + + m[3][0] * m[0][3] * m[2][1], + // 2,2: + m[0][0] * m[1][1] * m[3][3] - + m[0][0] * m[1][3] * m[3][1] - + m[1][0] * m[0][1] * m[3][3] + + m[1][0] * m[0][3] * m[3][1] + + m[3][0] * m[0][1] * m[1][3] - + m[3][0] * m[0][3] * m[1][1], + // 2,3: + -m[0][0] * m[1][1] * m[2][3] + + m[0][0] * m[1][3] * m[2][1] + + m[1][0] * m[0][1] * m[2][3] - + m[1][0] * m[0][3] * m[2][1] - + m[2][0] * m[0][1] * m[1][3] + + m[2][0] * m[0][3] * m[1][1], + ], + [ // row 3 + // 3,0: + -m[1][0] * m[2][1] * m[3][2] + + m[1][0] * m[2][2] * m[3][1] + + m[2][0] * m[1][1] * m[3][2] - + m[2][0] * m[1][2] * m[3][1] - + m[3][0] * m[1][1] * m[2][2] + + m[3][0] * m[1][2] * m[2][1], + // 3,1: + m[0][0] * m[2][1] * m[3][2] - + m[0][0] * m[2][2] * m[3][1] - + m[2][0] * m[0][1] * m[3][2] + + m[2][0] * m[0][2] * m[3][1] + + m[3][0] * m[0][1] * m[2][2] - + m[3][0] * m[0][2] * m[2][1], + // 3,2: + -m[0][0] * m[1][1] * m[3][2] + + m[0][0] * m[1][2] * m[3][1] + + m[1][0] * m[0][1] * m[3][2] - + m[1][0] * m[0][2] * m[3][1] - + m[3][0] * m[0][1] * m[1][2] + + m[3][0] * m[0][2] * m[1][1], + // 3,3: + m[0][0] * m[1][1] * m[2][2] - + m[0][0] * m[1][2] * m[2][1] - + m[1][0] * m[0][1] * m[2][2] + + m[1][0] * m[0][2] * m[2][1] + + m[2][0] * m[0][1] * m[1][2] - + m[2][0] * m[0][2] * m[1][1], + ], + ]; + + let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + + m[0][2] * inv[2][0] + m[0][3] * inv[3][0]; + if det == 0. { return None; } + + let det_inv = 1. / det; + + for row in &mut inv { + for elem in row.iter_mut() { + *elem *= det_inv; + } + } + + Some(Matrix4x4(inv)) +} + +pub fn simd_inv4x4(m: Matrix4x4) -> Option { + let m = m.0; + let m_0 = f32x4::from_array(m[0]); + let m_1 = f32x4::from_array(m[1]); + let m_2 = f32x4::from_array(m[2]); + let m_3 = f32x4::from_array(m[3]); + + // 2 argument shuffle, returns an f32x4 + // the first f32x4 is indexes 0..=3 + // the second f32x4 is indexed 4..=7 + let tmp1 = f32x4::shuffle::<{[0, 1, 4, 5]}>(m_0, m_1); + let row1 = f32x4::shuffle::<{[0, 1, 4, 5]}>(m_2, m_3,); + + let row0 = f32x4::shuffle::<{[0, 2, 4, 6]}>(tmp1, row1); + let row1 = f32x4::shuffle::<{[1, 3, 5, 7]}>(row1, tmp1); + + let tmp1 = f32x4::shuffle::<{[2, 3, 6, 7]}>(m_0, m_1); + let row3 = f32x4::shuffle::<{[2, 3, 6, 7]}>(m_2, m_3); + let row2 = f32x4::shuffle::<{[0, 2, 4, 6]}>(tmp1, row3); + let row3 = f32x4::shuffle::<{[1, 3, 5, 7]}>(row3, tmp1); + + let tmp1 = row2 * row3; + // there's no syntax for a 1 arg shuffle yet, + // so we just pass the same f32x4 twice + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + + let minor0 = row1 * tmp1; + let minor1 = row0 * tmp1; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + let minor0 = (row1 * tmp1) - minor0; + let minor1 = (row0 * tmp1) - minor1; + let minor1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor1, minor1); + + let tmp1 = row1 * row2; + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + let minor0 = (row3 * tmp1) + minor0; + let minor3 = row0 * tmp1; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + + let minor0 = minor0 - row3 * tmp1; + let minor3 = row0 * tmp1 - minor3; + let minor3 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor3, minor3); + + let tmp1 = row3 * f32x4::shuffle::<{[2, 3, 0, 1]}>(row1, row1); + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + let row2 = f32x4::shuffle::<{[2, 3, 0, 1]}>(row2, row2); + let minor0 = row2 * tmp1 + minor0; + let minor2 = row0 * tmp1; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + let minor0 = minor0 - row2 * tmp1; + let minor2 = row0 * tmp1 - minor2; + let minor2 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor2, minor2); + + let tmp1 = row0 * row1; + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + let minor2 = minor2 + row3 * tmp1; + let minor3 = row2 * tmp1 - minor3; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + let minor2 = row3 * tmp1 - minor2; + let minor3 = minor3 - row2 * tmp1; + + let tmp1 = row0 * row3; + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + let minor1 = minor1 - row2 * tmp1; + let minor2 = row1 * tmp1 + minor2; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + let minor1 = row2 * tmp1 + minor1; + let minor2 = minor2 - row1 * tmp1; + + let tmp1 = row0 * row2; + let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); + let minor1 = row3 * tmp1 + minor1; + let minor3 = minor3 - row1 * tmp1; + let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); + let minor1 = minor1 - row3 * tmp1; + let minor3 = row1 * tmp1 + minor3; + + let det = row0 * minor0; + let det = f32x4::shuffle::<{[2, 3, 0, 1]}>(det, det) + det; + let det = f32x4::shuffle::<{[1, 0, 3, 2]}>(det, det) + det; + + if det.horizontal_sum() == 0. { + return None; + } + // calculate the reciprocal + let tmp1 = f32x4::splat(1.0) / det; + let det = tmp1 + tmp1 - det * tmp1 * tmp1; + + let res0 = minor0 * det; + let res1 = minor1 * det; + let res2 = minor2 * det; + let res3 = minor3 * det; + + let mut m = m; + + m[0] = res0.to_array(); + m[1] = res1.to_array(); + m[2] = res2.to_array(); + m[3] = res3.to_array(); + + Some(Matrix4x4(m)) +} + + +#[cfg(test)] +#[rustfmt::skip] +mod tests { + use super::*; + + #[test] + fn test() { + let tests: &[(Matrix4x4, Option)] = &[ + // Identity: + (Matrix4x4([ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + ]), + Some(Matrix4x4([ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + ])) + ), + // None: + (Matrix4x4([ + [1., 2., 3., 4.], + [12., 11., 10., 9.], + [5., 6., 7., 8.], + [16., 15., 14., 13.], + ]), + None + ), + // Other: + (Matrix4x4([ + [1., 1., 1., 0.], + [0., 3., 1., 2.], + [2., 3., 1., 0.], + [1., 0., 2., 1.], + ]), + Some(Matrix4x4([ + [-3., -0.5, 1.5, 1.0], + [ 1., 0.25, -0.25, -0.5], + [ 3., 0.25, -1.25, -0.5], + [-3., 0.0, 1.0, 1.0], + ])) + ), + + + ]; + + for &(input, output) in tests { + assert_eq!(scalar_inv4x4(input), output); + assert_eq!(simd_inv4x4(input), output); + } + } +} + + +fn main() { + // Empty main to make cargo happy +} From b041fce3d204640e5c4cf2624ccdebceba749ed3 Mon Sep 17 00:00:00 2001 From: miguel raz Date: Sat, 5 Jun 2021 16:26:32 -0500 Subject: [PATCH 3/5] add more doctests --- crates/core_simd/src/first.rs | 41 ++++++++++++++++ crates/core_simd/src/reduction.rs | 71 +++++++++++++++++++++++++++- crates/core_simd/src/round.rs | 37 ++++++++++++++- crates/core_simd/src/vector/float.rs | 14 ++++++ 4 files changed, 161 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/first.rs b/crates/core_simd/src/first.rs index 50602829d48..f3f88f7aa44 100644 --- a/crates/core_simd/src/first.rs +++ b/crates/core_simd/src/first.rs @@ -3,26 +3,54 @@ macro_rules! impl_vector { { $name:ident, $type:ty } => { impl $name where Self: crate::LanesAtMost32 { /// Construct a SIMD vector by setting all lanes to the given value. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// let b = f32x4::splat(1.0); + /// assert_eq!(a, b); + /// ``` pub const fn splat(value: $type) -> Self { Self([value; LANES]) } /// Returns a slice containing the entire SIMD vector. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// let b = a.as_slice(); + /// ``` pub const fn as_slice(&self) -> &[$type] { &self.0 } /// Returns a mutable slice containing the entire SIMD vector. + /// ``` + /// # use core_simd::*; + /// let mut a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// let b = a.as_mut_slice(); + /// ``` pub fn as_mut_slice(&mut self) -> &mut [$type] { &mut self.0 } /// Converts an array to a SIMD vector. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// let b = f32x4::splat(1.0); + /// assert_eq!(a, b); + /// ``` pub const fn from_array(array: [$type; LANES]) -> Self { Self(array) } /// Converts a SIMD vector to an array. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// let b = [1.0, 1.0, 1.0, 1.0]; + /// assert_eq!(f32x4::to_array(a), b); + /// ``` pub const fn to_array(self) -> [$type; LANES] { // workaround for rust-lang/rust#80108 // TODO fix this @@ -47,6 +75,13 @@ macro_rules! impl_vector { impl Copy for $name where Self: crate::LanesAtMost32 {} impl Clone for $name where Self: crate::LanesAtMost32 { + /// Clone a SIMD vector. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 1.0, 1.0, 1.0]); + /// assert_eq!(a, f32x4::clone(&a)); + /// assert_eq!(a, a.clone()); + /// ``` #[inline] fn clone(&self) -> Self { *self @@ -61,6 +96,12 @@ macro_rules! impl_vector { } impl PartialEq for $name where Self: crate::LanesAtMost32 { + /// ``` + /// # use core_simd::*; + /// let a = f32x4::splat(1.0); + /// let b = f32x4::splat(1.0); + /// assert!(a == b); + /// ``` #[inline] fn eq(&self, other: &Self) -> bool { // TODO use SIMD equality diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 8687d1af516..1c053c4fde9 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -5,12 +5,26 @@ macro_rules! impl_integer_reductions { Self: crate::LanesAtMost32 { /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([1, 2, 3, 4]).horizontal_sum(); + /// assert_eq!(10, a); + /// let b = u32x4::from_array([u32::MAX, 1, 0, 0]).horizontal_sum(); + /// assert_eq!(0, b); + /// ``` #[inline] pub fn horizontal_sum(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } } /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([2, 2, 2, 2]).horizontal_product(); + /// assert_eq!(16, a); + /// let b = u32x4::from_array([u32::MAX, 2, 1, 1]).horizontal_product(); + /// assert_eq!(u32::MAX, b + 1); // Because 2*u32::MAX wraps, and is 1 off from the u32::MAX + /// ``` #[inline] pub fn horizontal_product(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } @@ -18,6 +32,13 @@ macro_rules! impl_integer_reductions { /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of /// the vector. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([3, 3, 3, 3]).horizontal_and(); + /// assert_eq!(3, a); + /// let b = u32x4::from_array([1, 1, 0, 0]).horizontal_and(); + /// assert_eq!(0, b); + /// ``` #[inline] pub fn horizontal_and(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_and(self) } @@ -25,6 +46,11 @@ macro_rules! impl_integer_reductions { /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of /// the vector. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([1, 2, 0, 0]).horizontal_or(); + /// assert_eq!(3, a); + /// ``` #[inline] pub fn horizontal_or(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_or(self) } @@ -32,18 +58,33 @@ macro_rules! impl_integer_reductions { /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of /// the vector. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([5, 5, 5, 0]).horizontal_xor(); + /// assert_eq!(5, a); + /// ``` #[inline] pub fn horizontal_xor(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_xor(self) } } /// Horizontal maximum. Returns the maximum lane in the vector. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([1, 2, 42, 0]).horizontal_max(); + /// assert_eq!(42, a); + /// ``` #[inline] pub fn horizontal_max(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } } /// Horizontal minimum. Returns the minimum lane in the vector. + /// ``` + /// # use core_simd::*; + /// let a = u32x4::from_array([1, 2, 42, 0]).horizontal_min(); + /// assert_eq!(0, a); + /// ``` #[inline] pub fn horizontal_min(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } @@ -59,7 +100,14 @@ macro_rules! impl_float_reductions { Self: crate::LanesAtMost32 { - /// Horizontal add. Returns the sum of the lanes of the vector. + /// Horizontal add. Returns the sum of the lanes of the vector, with saturating addition. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 2.0, 3.0, 4.0]).horizontal_sum(); + /// assert_eq!(10.0, a); + /// let b = f32x4::from_array([f32::MAX, 2.0, 0.0, 0.0]).horizontal_sum(); + /// assert_eq!(f32::MAX, b); + /// ``` #[inline] pub fn horizontal_sum(self) -> $scalar { // LLVM sum is inaccurate on i586 @@ -71,6 +119,13 @@ macro_rules! impl_float_reductions { } /// Horizontal multiply. Returns the product of the lanes of the vector. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 2.0, 3.0, 4.0]).horizontal_product(); + /// assert_eq!(24.0, a); + /// let b = f32x4::from_array([f32::MAX, 2.0, 1.0, 1.0]).horizontal_product(); + /// assert_eq!(f32::MAX, b); + /// ``` #[inline] pub fn horizontal_product(self) -> $scalar { // LLVM product is inaccurate on i586 @@ -85,6 +140,13 @@ macro_rules! impl_float_reductions { /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 2.0, 3.0, 4.0]).horizontal_max(); + /// assert_eq!(4.0, a); + /// let b = f32x4::from_array([f32::NAN, 2.0, 0.0, 0.0]).horizontal_max(); + /// assert_eq!(2.0, b); + /// ``` #[inline] pub fn horizontal_max(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } @@ -94,6 +156,13 @@ macro_rules! impl_float_reductions { /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([1.0, 2.0, 3.0, 4.0]).horizontal_min(); + /// assert_eq!(1.0, a); + /// let b = f32x4::from_array([f32::NAN, 2.0, 0.0, 0.0]).horizontal_min(); + /// assert_eq!(0.0, b); + /// ``` #[inline] pub fn horizontal_min(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 281851c68ac..7bd97f300ba 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -8,6 +8,13 @@ macro_rules! implement { Self: crate::LanesAtMost32, { /// Returns the smallest integer greater than or equal to each lane. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.3, -0.7, 2.9, 0.2]); + /// let b = f32x4::from_array([-3.0, 0.0, 3.0, 1.0]); + /// let c = f32x4::ceil(a); + /// assert_eq!(c, b); + /// ``` #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn ceil(self) -> Self { @@ -15,13 +22,27 @@ macro_rules! implement { } /// Returns the largest integer value less than or equal to each lane. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.3, -0.7, 2.3, 1.3]); + /// let b = f32x4::from_array([-4.0, -1.0, 2.0, 1.0]); + /// let c = f32x4::floor(a); + /// assert_eq!(c, b); + /// ``` #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn floor(self) -> Self { unsafe { crate::intrinsics::simd_floor(self) } } - /// Rounds to the nearest integer value. Ties round toward zero. + /// Rounds to the nearest integer value. Ties round away from zero. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.6, -0.5, 2.5, 0.6]); + /// let b = f32x4::from_array([-4.0, -1.0, 3.0, 1.0]); + /// let c = f32x4::round(a); + /// assert_eq!(c, b); + /// ``` #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn round(self) -> Self { @@ -29,6 +50,13 @@ macro_rules! implement { } /// Returns the floating point's integer value, with its fractional part removed. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.47, -0.1234, 2.3, 0.1234]); + /// let b = f32x4::from_array([-3.0, -0.0, 2.0, 0.0]); + /// let c = f32x4::trunc(a); + /// assert_eq!(c, b); + /// ``` #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn trunc(self) -> Self { @@ -36,6 +64,13 @@ macro_rules! implement { } /// Returns the floating point's fractional value, with its integer part removed. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.25, -0.75, 2.5, 10.0]); + /// let b = f32x4::from_array([-0.25, -0.75, 0.5, 0.0]); + /// let c = f32x4::fract(a); + /// assert_eq!(c, b); + /// ``` #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn fract(self) -> Self { diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index c4565a9dd90..73a834b9bb2 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -31,6 +31,13 @@ macro_rules! impl_float_vector { /// Produces a vector where every lane has the absolute value of the /// equivalently-indexed lane in `self`. + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([-3.0, -0.0, 2.0, 0.0]); + /// let b = f32x4::from_array([3.0, 0.0, 2.0, 0.0]); + /// let c = f32x4::abs(a); + /// assert_eq!(c, b); + /// ``` #[inline] pub fn abs(self) -> Self { unsafe { crate::intrinsics::simd_fabs(self) } @@ -38,6 +45,13 @@ macro_rules! impl_float_vector { /// Produces a vector where every lane has the square root value /// of the equivalently-indexed lane in `self` + /// ``` + /// # use core_simd::*; + /// let a = f32x4::from_array([9.0, 16.0, 4.0, 0.0]); + /// let b = f32x4::from_array([3.0, 4.0, 2.0, 0.0]); + /// let c = f32x4::sqrt(a); + /// assert_eq!(c, b); + /// ``` #[inline] #[cfg(feature = "std")] pub fn sqrt(self) -> Self { From 5add73c04a7934032dba375d3663cc2965c1232e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Sat, 5 Jun 2021 16:27:29 -0500 Subject: [PATCH 4/5] Delete dot_product.rs --- crates/core_simd/examples/dot_product.rs | 31 ------------------------ 1 file changed, 31 deletions(-) delete mode 100644 crates/core_simd/examples/dot_product.rs diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs deleted file mode 100644 index 812b0b23eeb..00000000000 --- a/crates/core_simd/examples/dot_product.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Code taken from the `packed_simd` crate -// Run this code with `cargo test --example dot_product` -#![feature(array_chunks)] -use core_simd::*; - -pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 { - assert_eq!(a.len(), b.len()); - - // TODO handle remainder when a.len() % 4 != 0 - a.array_chunks::<4>() - .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) - .map(|(a, b)| (a * b).horizontal_sum()) - .sum() -} - -fn main() { - // Empty main to make cargo happy -} - -#[cfg(test)] -mod tests { - #[test] - fn test() { - use super::*; - let a: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; - let b: Vec = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0]; - - assert_eq!(0.0, dot_prod(&a, &b)); - } -} From e8661a2b9be83c518a4617e04aeb2d3308634ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Sat, 5 Jun 2021 16:27:49 -0500 Subject: [PATCH 5/5] Delete matrix_inversion.rs --- crates/core_simd/examples/matrix_inversion.rs | 319 ------------------ 1 file changed, 319 deletions(-) delete mode 100644 crates/core_simd/examples/matrix_inversion.rs diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs deleted file mode 100644 index d108d4e2d00..00000000000 --- a/crates/core_simd/examples/matrix_inversion.rs +++ /dev/null @@ -1,319 +0,0 @@ -//! 4x4 matrix inverse -// Code ported from the `packed_simd` crate -// Run this code with `cargo test --example matrix_inversion` -#![feature(custom_inner_attributes)] -#![deny(rust_2018_idioms)] -#![allow(clippy::must_use_candidate)] -#![feature(array_chunks)] -use core_simd::*; - -// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) -#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] -pub struct Matrix4x4([[f32; 4]; 4]); - -#[allow(clippy::too_many_lines)] -pub fn scalar_inv4x4(m: Matrix4x4) -> Option { - let m = m.0; - - let mut inv = [ - [ // row 0: - // 0,0: - m[1][1] * m[2][2] * m[3][3] - - m[1][1] * m[2][3] * m[3][2] - - m[2][1] * m[1][2] * m[3][3] + - m[2][1] * m[1][3] * m[3][2] + - m[3][1] * m[1][2] * m[2][3] - - m[3][1] * m[1][3] * m[2][2], - // 0,1: - -m[0][1] * m[2][2] * m[3][3] + - m[0][1] * m[2][3] * m[3][2] + - m[2][1] * m[0][2] * m[3][3] - - m[2][1] * m[0][3] * m[3][2] - - m[3][1] * m[0][2] * m[2][3] + - m[3][1] * m[0][3] * m[2][2], - // 0,2: - m[0][1] * m[1][2] * m[3][3] - - m[0][1] * m[1][3] * m[3][2] - - m[1][1] * m[0][2] * m[3][3] + - m[1][1] * m[0][3] * m[3][2] + - m[3][1] * m[0][2] * m[1][3] - - m[3][1] * m[0][3] * m[1][2], - // 0,3: - -m[0][1] * m[1][2] * m[2][3] + - m[0][1] * m[1][3] * m[2][2] + - m[1][1] * m[0][2] * m[2][3] - - m[1][1] * m[0][3] * m[2][2] - - m[2][1] * m[0][2] * m[1][3] + - m[2][1] * m[0][3] * m[1][2], - ], - [ // row 1 - // 1,0: - -m[1][0] * m[2][2] * m[3][3] + - m[1][0] * m[2][3] * m[3][2] + - m[2][0] * m[1][2] * m[3][3] - - m[2][0] * m[1][3] * m[3][2] - - m[3][0] * m[1][2] * m[2][3] + - m[3][0] * m[1][3] * m[2][2], - // 1,1: - m[0][0] * m[2][2] * m[3][3] - - m[0][0] * m[2][3] * m[3][2] - - m[2][0] * m[0][2] * m[3][3] + - m[2][0] * m[0][3] * m[3][2] + - m[3][0] * m[0][2] * m[2][3] - - m[3][0] * m[0][3] * m[2][2], - // 1,2: - -m[0][0] * m[1][2] * m[3][3] + - m[0][0] * m[1][3] * m[3][2] + - m[1][0] * m[0][2] * m[3][3] - - m[1][0] * m[0][3] * m[3][2] - - m[3][0] * m[0][2] * m[1][3] + - m[3][0] * m[0][3] * m[1][2], - // 1,3: - m[0][0] * m[1][2] * m[2][3] - - m[0][0] * m[1][3] * m[2][2] - - m[1][0] * m[0][2] * m[2][3] + - m[1][0] * m[0][3] * m[2][2] + - m[2][0] * m[0][2] * m[1][3] - - m[2][0] * m[0][3] * m[1][2], - ], - [ // row 2 - // 2,0: - m[1][0] * m[2][1] * m[3][3] - - m[1][0] * m[2][3] * m[3][1] - - m[2][0] * m[1][1] * m[3][3] + - m[2][0] * m[1][3] * m[3][1] + - m[3][0] * m[1][1] * m[2][3] - - m[3][0] * m[1][3] * m[2][1], - // 2,1: - -m[0][0] * m[2][1] * m[3][3] + - m[0][0] * m[2][3] * m[3][1] + - m[2][0] * m[0][1] * m[3][3] - - m[2][0] * m[0][3] * m[3][1] - - m[3][0] * m[0][1] * m[2][3] + - m[3][0] * m[0][3] * m[2][1], - // 2,2: - m[0][0] * m[1][1] * m[3][3] - - m[0][0] * m[1][3] * m[3][1] - - m[1][0] * m[0][1] * m[3][3] + - m[1][0] * m[0][3] * m[3][1] + - m[3][0] * m[0][1] * m[1][3] - - m[3][0] * m[0][3] * m[1][1], - // 2,3: - -m[0][0] * m[1][1] * m[2][3] + - m[0][0] * m[1][3] * m[2][1] + - m[1][0] * m[0][1] * m[2][3] - - m[1][0] * m[0][3] * m[2][1] - - m[2][0] * m[0][1] * m[1][3] + - m[2][0] * m[0][3] * m[1][1], - ], - [ // row 3 - // 3,0: - -m[1][0] * m[2][1] * m[3][2] + - m[1][0] * m[2][2] * m[3][1] + - m[2][0] * m[1][1] * m[3][2] - - m[2][0] * m[1][2] * m[3][1] - - m[3][0] * m[1][1] * m[2][2] + - m[3][0] * m[1][2] * m[2][1], - // 3,1: - m[0][0] * m[2][1] * m[3][2] - - m[0][0] * m[2][2] * m[3][1] - - m[2][0] * m[0][1] * m[3][2] + - m[2][0] * m[0][2] * m[3][1] + - m[3][0] * m[0][1] * m[2][2] - - m[3][0] * m[0][2] * m[2][1], - // 3,2: - -m[0][0] * m[1][1] * m[3][2] + - m[0][0] * m[1][2] * m[3][1] + - m[1][0] * m[0][1] * m[3][2] - - m[1][0] * m[0][2] * m[3][1] - - m[3][0] * m[0][1] * m[1][2] + - m[3][0] * m[0][2] * m[1][1], - // 3,3: - m[0][0] * m[1][1] * m[2][2] - - m[0][0] * m[1][2] * m[2][1] - - m[1][0] * m[0][1] * m[2][2] + - m[1][0] * m[0][2] * m[2][1] + - m[2][0] * m[0][1] * m[1][2] - - m[2][0] * m[0][2] * m[1][1], - ], - ]; - - let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + - m[0][2] * inv[2][0] + m[0][3] * inv[3][0]; - if det == 0. { return None; } - - let det_inv = 1. / det; - - for row in &mut inv { - for elem in row.iter_mut() { - *elem *= det_inv; - } - } - - Some(Matrix4x4(inv)) -} - -pub fn simd_inv4x4(m: Matrix4x4) -> Option { - let m = m.0; - let m_0 = f32x4::from_array(m[0]); - let m_1 = f32x4::from_array(m[1]); - let m_2 = f32x4::from_array(m[2]); - let m_3 = f32x4::from_array(m[3]); - - // 2 argument shuffle, returns an f32x4 - // the first f32x4 is indexes 0..=3 - // the second f32x4 is indexed 4..=7 - let tmp1 = f32x4::shuffle::<{[0, 1, 4, 5]}>(m_0, m_1); - let row1 = f32x4::shuffle::<{[0, 1, 4, 5]}>(m_2, m_3,); - - let row0 = f32x4::shuffle::<{[0, 2, 4, 6]}>(tmp1, row1); - let row1 = f32x4::shuffle::<{[1, 3, 5, 7]}>(row1, tmp1); - - let tmp1 = f32x4::shuffle::<{[2, 3, 6, 7]}>(m_0, m_1); - let row3 = f32x4::shuffle::<{[2, 3, 6, 7]}>(m_2, m_3); - let row2 = f32x4::shuffle::<{[0, 2, 4, 6]}>(tmp1, row3); - let row3 = f32x4::shuffle::<{[1, 3, 5, 7]}>(row3, tmp1); - - let tmp1 = row2 * row3; - // there's no syntax for a 1 arg shuffle yet, - // so we just pass the same f32x4 twice - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - - let minor0 = row1 * tmp1; - let minor1 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - let minor0 = (row1 * tmp1) - minor0; - let minor1 = (row0 * tmp1) - minor1; - let minor1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor1, minor1); - - let tmp1 = row1 * row2; - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - let minor0 = (row3 * tmp1) + minor0; - let minor3 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - - let minor0 = minor0 - row3 * tmp1; - let minor3 = row0 * tmp1 - minor3; - let minor3 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor3, minor3); - - let tmp1 = row3 * f32x4::shuffle::<{[2, 3, 0, 1]}>(row1, row1); - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - let row2 = f32x4::shuffle::<{[2, 3, 0, 1]}>(row2, row2); - let minor0 = row2 * tmp1 + minor0; - let minor2 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - let minor0 = minor0 - row2 * tmp1; - let minor2 = row0 * tmp1 - minor2; - let minor2 = f32x4::shuffle::<{[2, 3, 0, 1]}>(minor2, minor2); - - let tmp1 = row0 * row1; - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - let minor2 = minor2 + row3 * tmp1; - let minor3 = row2 * tmp1 - minor3; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - let minor2 = row3 * tmp1 - minor2; - let minor3 = minor3 - row2 * tmp1; - - let tmp1 = row0 * row3; - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - let minor1 = minor1 - row2 * tmp1; - let minor2 = row1 * tmp1 + minor2; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - let minor1 = row2 * tmp1 + minor1; - let minor2 = minor2 - row1 * tmp1; - - let tmp1 = row0 * row2; - let tmp1 = f32x4::shuffle::<{[1, 0, 3, 2]}>(tmp1, tmp1); - let minor1 = row3 * tmp1 + minor1; - let minor3 = minor3 - row1 * tmp1; - let tmp1 = f32x4::shuffle::<{[2, 3, 0, 1]}>(tmp1, tmp1); - let minor1 = minor1 - row3 * tmp1; - let minor3 = row1 * tmp1 + minor3; - - let det = row0 * minor0; - let det = f32x4::shuffle::<{[2, 3, 0, 1]}>(det, det) + det; - let det = f32x4::shuffle::<{[1, 0, 3, 2]}>(det, det) + det; - - if det.horizontal_sum() == 0. { - return None; - } - // calculate the reciprocal - let tmp1 = f32x4::splat(1.0) / det; - let det = tmp1 + tmp1 - det * tmp1 * tmp1; - - let res0 = minor0 * det; - let res1 = minor1 * det; - let res2 = minor2 * det; - let res3 = minor3 * det; - - let mut m = m; - - m[0] = res0.to_array(); - m[1] = res1.to_array(); - m[2] = res2.to_array(); - m[3] = res3.to_array(); - - Some(Matrix4x4(m)) -} - - -#[cfg(test)] -#[rustfmt::skip] -mod tests { - use super::*; - - #[test] - fn test() { - let tests: &[(Matrix4x4, Option)] = &[ - // Identity: - (Matrix4x4([ - [1., 0., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.], - ]), - Some(Matrix4x4([ - [1., 0., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.], - ])) - ), - // None: - (Matrix4x4([ - [1., 2., 3., 4.], - [12., 11., 10., 9.], - [5., 6., 7., 8.], - [16., 15., 14., 13.], - ]), - None - ), - // Other: - (Matrix4x4([ - [1., 1., 1., 0.], - [0., 3., 1., 2.], - [2., 3., 1., 0.], - [1., 0., 2., 1.], - ]), - Some(Matrix4x4([ - [-3., -0.5, 1.5, 1.0], - [ 1., 0.25, -0.25, -0.5], - [ 3., 0.25, -1.25, -0.5], - [-3., 0.0, 1.0, 1.0], - ])) - ), - - - ]; - - for &(input, output) in tests { - assert_eq!(scalar_inv4x4(input), output); - assert_eq!(simd_inv4x4(input), output); - } - } -} - - -fn main() { - // Empty main to make cargo happy -}