From f182819bd197432dbcb386c54611ad750647deb9 Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 00:31:42 +0200 Subject: [PATCH 1/7] added _mm_cvtps_pd --- src/x86/sse2.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 618f3df234..ba8eff911f 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1726,6 +1726,13 @@ pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 { cvtpd2ps(a) } +#[inline(always)] +#[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(cvtps2pd))] +pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 { + cvtps2pd(a) +} + /// Convert packed double-precision (64-bit) floating-point elements in `a` to packed 32-bit integers. #[inline(always)] #[target_feature = "+sse2"] @@ -1991,6 +1998,8 @@ extern { fn movmskpd(a: f64x2) -> i32; #[link_name = "llvm.x86.sse2.cvtpd2ps"] fn cvtpd2ps(a: f64x2) -> f32x4; + #[link_name = "llvm.x86.sse2.cvtps2pd"] + fn cvtps2pd(a: f32x4) -> f64x2; #[link_name = "llvm.x86.sse2.cvtpd2dq"] fn cvtpd2dq(a: f64x2) -> i32x4; #[link_name = "llvm.x86.sse2.cvtsd2si"] @@ -3551,6 +3560,17 @@ mod tests { assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0)); } + #[simd_test = "sse2"] + unsafe fn _mm_cvtps_pd() { + use std::{f64, f32}; + + let r = sse2::_mm_cvtps_pd(f32x4::new(-1.0, 2.0, -3.0, 5.0)); + assert_eq!(r, f64x2::new(-1.0, 2.0)); + + let r = sse2::_mm_cvtps_pd(f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN)); + assert_eq!(r, f64x2::new(f32::MAX as f64, f64::INFINITY)); + } + #[simd_test = "sse2"] unsafe fn _mm_cvtpd_epi32() { use std::{f64, i32}; From fa716a8e1257e24758244cfba177d1f0d3b7d8c7 Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 00:54:31 +0200 Subject: [PATCH 2/7] added _mm_set_sd --- src/x86/sse2.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index ba8eff911f..c1de07ce59 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1815,6 +1815,13 @@ pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 { cvttps2dq(a) } +/// Copy double-precision (64-bit) floating-point element `a` to the lower element of the +/// packed 64-bit return value +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_set_sd(a: f64) -> f64x2 { + f64x2::new(a, 0_f64) +} /// Return a mask of the most significant bit of each element in `a`. /// @@ -3709,6 +3716,13 @@ mod tests { assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); } + #[simd_test = "sse2"] + unsafe fn _mm_set_sd() { + + let r = sse2::_mm_set_sd(-1.0_f64); + assert_eq!(r, f64x2::new(-1.0_f64, 0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0; From 4e59fe5cb1eb6df7aaef5322ebae0beb64c70591 Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 01:00:13 +0200 Subject: [PATCH 3/7] added _mm_set1_pd --- src/x86/sse2.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index c1de07ce59..c6ef91eed0 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1823,6 +1823,13 @@ pub unsafe fn _mm_set_sd(a: f64) -> f64x2 { f64x2::new(a, 0_f64) } +/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 { + f64x2::new(a, a) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -3718,11 +3725,16 @@ mod tests { #[simd_test = "sse2"] unsafe fn _mm_set_sd() { - let r = sse2::_mm_set_sd(-1.0_f64); assert_eq!(r, f64x2::new(-1.0_f64, 0_f64)); } + #[simd_test = "sse2"] + unsafe fn _mm_set1_pd() { + let r = sse2::_mm_set1_pd(-1.0_f64); + assert_eq!(r, f64x2::new(-1.0_f64, -1.0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0; From bb6a8e83f15ed920832bbe9100a811cd4eb52a16 Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 01:05:32 +0200 Subject: [PATCH 4/7] added _mm_set_pd1 --- src/x86/sse2.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index c6ef91eed0..65b835b0d7 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1830,6 +1830,13 @@ pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 { f64x2::new(a, a) } +/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 { + f64x2::new(a, a) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -3735,6 +3742,12 @@ mod tests { assert_eq!(r, f64x2::new(-1.0_f64, -1.0_f64)); } + #[simd_test = "sse2"] + unsafe fn _mm_set_pd1() { + let r = sse2::_mm_set_pd1(-2.0_f64); + assert_eq!(r, f64x2::new(-2.0_f64, -2.0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0; From db9cf0d3bcee06a12985971137809a96bc1f91fc Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 01:11:46 +0200 Subject: [PATCH 5/7] added _mm_set_pd --- src/x86/sse2.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 65b835b0d7..542fde56a2 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1837,6 +1837,14 @@ pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 { f64x2::new(a, a) } +/// Set packed double-precision (64-bit) floating-point elements in the return value with the +/// supplied values. +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 { + f64x2::new(a, b) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -3748,6 +3756,12 @@ mod tests { assert_eq!(r, f64x2::new(-2.0_f64, -2.0_f64)); } + #[simd_test = "sse2"] + unsafe fn _mm_set_pd() { + let r = sse2::_mm_set_pd(1.0_f64, 5.0_f64); + assert_eq!(r, f64x2::new(1.0_f64, 5.0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0; From 49659a6991b01bbe407d8d42816d8f61ce551eee Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 01:15:50 +0200 Subject: [PATCH 6/7] added _mm_setr_pd --- src/x86/sse2.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 542fde56a2..997cfa9d9d 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1845,6 +1845,14 @@ pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 { f64x2::new(a, b) } +/// Set packed double-precision (64-bit) floating-point elements in the return value with the +/// supplied values in reverse order. +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 { + f64x2::new(b, a) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -3762,6 +3770,12 @@ mod tests { assert_eq!(r, f64x2::new(1.0_f64, 5.0_f64)); } + #[simd_test = "sse2"] + unsafe fn _mm_setr_pd() { + let r = sse2::_mm_setr_pd(1.0_f64, -5.0_f64); + assert_eq!(r, f64x2::new(-5.0_f64, 1.0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0; From a76ac5cb33b1429731fc90e28c2be362c4599c60 Mon Sep 17 00:00:00 2001 From: Dustin Bensing Date: Sun, 15 Oct 2017 01:22:46 +0200 Subject: [PATCH 7/7] added _mm_setzero_pd --- src/x86/sse2.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 997cfa9d9d..c71a82202f 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1853,6 +1853,13 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 { f64x2::new(b, a) } +/// returns packed double-precision (64-bit) floating-point elements with all zeros. +#[inline(always)] +#[target_feature = "+sse2"] +pub unsafe fn _mm_setzero_pd() -> f64x2 { + f64x2::splat(0_f64) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -3776,6 +3783,12 @@ mod tests { assert_eq!(r, f64x2::new(-5.0_f64, 1.0_f64)); } + #[simd_test = "sse2"] + unsafe fn _mm_setzero_pd() { + let r = sse2::_mm_setzero_pd(); + assert_eq!(r, f64x2::new(0_f64, 0_f64)); + } + #[simd_test = "sse2"] unsafe fn _mm_load1_pd() { let d = -5.0;