From 56e9370af91d8e2262a902a1cfdfb891206255b6 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 16:28:15 +0200 Subject: [PATCH 01/37] avx: _mm256_movedup_pd --- src/x86/avx.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 4de3ee9dc7..ff31bc760c 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1316,6 +1316,15 @@ pub unsafe fn _mm256_moveldup_ps(a: f32x8) -> f32x8 { simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } +/// Duplicate even-indexed double-precision (64-bit) floating-point elements +/// from "a", and return the results. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmovddup))] +pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 { + simd_shuffle4(a, a, [0, 0, 2, 2]) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -2400,4 +2409,12 @@ mod tests { let e = f32x8::new(1., 1., 3., 3., 5., 5., 7., 7.); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_movedup_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let r = avx::_mm256_movedup_pd(a); + let e = f64x4::new(1., 1., 3., 3.); + assert_eq!(r, e); + } } From 8517ab9fe4afec8a6fca5cdd305d7523c7ae4fc0 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 16:44:12 +0200 Subject: [PATCH 02/37] avx: _mm256_lddqu_si256 --- src/x86/avx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index ff31bc760c..b5c3283b97 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1325,6 +1325,16 @@ pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 { simd_shuffle4(a, a, [0, 0, 2, 2]) } +/// Load 256-bits of integer data from unaligned memory into result. +/// This intrinsic may perform better than `_mm256_loadu_si256` when the +/// data crosses a cache line boundary. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vlddqu))] +pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 { + vlddqu(mem_addr as *const i8) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1478,6 +1488,8 @@ extern "C" { fn maskloadps(mem_addr: *const i8, mask: i32x4) -> f32x4; #[link_name = "llvm.x86.avx.maskstore.ps"] fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: f32x4); + #[link_name = "llvm.x86.avx.ldu.dq.256"] + fn vlddqu(mem_addr: *const i8) -> i8x32; } #[cfg(test)] @@ -2417,4 +2429,21 @@ mod tests { let e = f64x4::new(1., 1., 3., 3.); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_lddqu_si256() { + let a = i8x32::new( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + let p = &a as *const _; + let r = avx::_mm256_lddqu_si256(black_box(p)); + let e = i8x32::new( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + assert_eq!(r, e); + } } From 1c6be55e1fd98c2c536f3a13fa8433f59665122c Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 16:57:47 +0200 Subject: [PATCH 03/37] avx: _mm256_rcp_ps --- src/x86/avx.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index b5c3283b97..a88842b80f 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1335,6 +1335,16 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 { vlddqu(mem_addr as *const i8) } +/// Compute the approximate reciprocal of packed single-precision (32-bit) +/// floating-point elements in `a`, and return the results. The maximum +/// relative error for this approximation is less than 1.5*2^-12. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vrcpps))] +pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 { + vrcpps(a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1490,6 +1500,8 @@ extern "C" { fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: f32x4); #[link_name = "llvm.x86.avx.ldu.dq.256"] fn vlddqu(mem_addr: *const i8) -> i8x32; + #[link_name = "llvm.x86.avx.rcp.ps.256"] + fn vrcpps(a: f32x8) -> f32x8; } #[cfg(test)] @@ -2446,4 +2458,13 @@ mod tests { 25, 26, 27, 28, 29, 30, 31, 32); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_rcp_ps() { + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let r = avx::_mm256_rcp_ps(a); + let e = f32x8::new(0.99975586, 0.49987793, 0.33325195, 0.24993896, + 0.19995117, 0.16662598, 0.14282227, 0.12496948); + assert_eq!(r, e); + } } From 5aee0b97bf1b38c87169a956f3e5f8c0ee1b98bb Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 17:05:30 +0200 Subject: [PATCH 04/37] avx: _mm256_rsqrt_ps --- src/x86/avx.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index a88842b80f..bad960ebf1 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1345,6 +1345,16 @@ pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 { vrcpps(a) } +/// Compute the approximate reciprocal square root of packed single-precision +/// (32-bit) floating-point elements in `a`, and return the results. +/// The maximum relative error for this approximation is less than 1.5*2^-12. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vrsqrtps))] +pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 { + vrsqrtps(a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1502,6 +1512,8 @@ extern "C" { fn vlddqu(mem_addr: *const i8) -> i8x32; #[link_name = "llvm.x86.avx.rcp.ps.256"] fn vrcpps(a: f32x8) -> f32x8; + #[link_name = "llvm.x86.avx.rsqrt.ps.256"] + fn vrsqrtps(a: f32x8) -> f32x8; } #[cfg(test)] @@ -2467,4 +2479,13 @@ mod tests { 0.19995117, 0.16662598, 0.14282227, 0.12496948); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_rsqrt_ps() { + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let r = avx::_mm256_rsqrt_ps(a); + let e = f32x8::new(0.99975586, 0.7069092, 0.5772705, 0.49987793, + 0.44714355, 0.40820313, 0.3779297, 0.3534546); + assert_eq!(r, e); + } } From e3a51026119e6a1802e2e14211479150156fd2b3 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 17:14:46 +0200 Subject: [PATCH 05/37] avx: _mm256_unpackhi_pd --- src/x86/avx.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index bad960ebf1..df04712528 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1355,6 +1355,15 @@ pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 { vrsqrtps(a) } +/// Unpack and interleave double-precision (64-bit) floating-point elements +/// from the high half of each 128-bit lane in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 { + simd_shuffle4(a, b, [1, 5, 3, 7]) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -2488,4 +2497,13 @@ mod tests { 0.44714355, 0.40820313, 0.3779297, 0.3534546); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_unpackhi_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let b = f64x4::new(5., 6., 7., 8.); + let r = avx::_mm256_unpackhi_pd(a, b); + let e = f64x4::new(2., 6., 4., 8.); + assert_eq!(r, e); + } } From 01c3227276809fc0b3c1cacbd21e2442c10e6e63 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 17:25:26 +0200 Subject: [PATCH 06/37] avx: _mm256_unpackhi_ps --- src/x86/avx.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index df04712528..d65de5baad 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1364,6 +1364,15 @@ pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 { simd_shuffle4(a, b, [1, 5, 3, 7]) } +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the high half of each 128-bit lane in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 { + simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -2506,4 +2515,13 @@ mod tests { let e = f64x4::new(2., 6., 4., 8.); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_unpackhi_ps() { + let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); + let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); + let r = avx::_mm256_unpackhi_ps(a, b); + let e = f32x8::new(3., 7., 4., 8., 3., 7., 4., 8.); + assert_eq!(r, e); + } } From 3060970e18c29b30a776091b1d81831aaf324618 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 14 Oct 2017 17:40:00 +0200 Subject: [PATCH 07/37] avx: _mm256_unpacklo_pd, _mm256_unpacklo_ps --- src/x86/avx.rs | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index d65de5baad..96b3587d62 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1373,6 +1373,24 @@ pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 { simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } +/// Unpack and interleave double-precision (64-bit) floating-point elements +/// from the low half of each 128-bit lane in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub unsafe fn _mm256_unpacklo_pd(a: f64x4, b: f64x4) -> f64x4 { + simd_shuffle4(a, b, [0, 4, 2, 6]) +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the low half of each 128-bit lane in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 { + simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -2518,10 +2536,28 @@ mod tests { #[simd_test = "avx"] unsafe fn _mm256_unpackhi_ps() { - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); let r = avx::_mm256_unpackhi_ps(a, b); - let e = f32x8::new(3., 7., 4., 8., 3., 7., 4., 8.); + let e = f32x8::new(3., 11., 4., 12., 7., 15., 8., 16.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_unpacklo_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let b = f64x4::new(5., 6., 7., 8.); + let r = avx::_mm256_unpacklo_pd(a, b); + let e = f64x4::new(1., 5., 3., 7.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_unpacklo_ps() { + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); + let r = avx::_mm256_unpacklo_ps(a, b); + let e = f32x8::new(1., 9., 2., 10., 5., 13., 6., 14.); assert_eq!(r, e); } } From 09d480a5b04f05fc84da554ded6fad5780b8f9ed Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:03:19 +0200 Subject: [PATCH 08/37] avx: _mm256_testz_si256 --- src/x86/avx.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 96b3587d62..10e232458e 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1391,6 +1391,17 @@ pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 { simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } +/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and +/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. +/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vptest))] +pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 { + ptestz256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1550,6 +1561,8 @@ extern "C" { fn vrcpps(a: f32x8) -> f32x8; #[link_name = "llvm.x86.avx.rsqrt.ps.256"] fn vrsqrtps(a: f32x8) -> f32x8; + #[link_name = "llvm.x86.avx.ptestz.256"] + fn ptestz256(a: i64x4, b: i64x4) -> i32; } #[cfg(test)] @@ -2560,4 +2573,15 @@ mod tests { let e = f32x8::new(1., 9., 2., 10., 5., 13., 6., 14.); assert_eq!(r, e); } + + #[simd_test = "avx"] + unsafe fn _mm256_testz_si256() { + let a = i64x4::new(1, 2, 3, 4); + let b = i64x4::new(5, 6, 7, 8); + let r = avx::_mm256_testz_si256(a, b); + assert_eq!(r, 0); + let b = i64x4::splat(0); + let r = avx::_mm256_testz_si256(a, b); + assert_eq!(r, 1); + } } From 35dbf8e495b982eaa89bbbc6710f205697db4cb4 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:12:05 +0200 Subject: [PATCH 09/37] avx: _mm256_testc_si256 --- src/x86/avx.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 10e232458e..efa72ed5c6 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1402,6 +1402,17 @@ pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 { ptestz256(a, b) } +/// Compute the bitwise AND of 256 bits (representing integer data) in "a" and +/// "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. +/// Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if +/// the result is zero, otherwise set "CF" to 0. Return the "CF" value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vptest))] +pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 { + ptestc256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1563,6 +1574,8 @@ extern "C" { fn vrsqrtps(a: f32x8) -> f32x8; #[link_name = "llvm.x86.avx.ptestz.256"] fn ptestz256(a: i64x4, b: i64x4) -> i32; + #[link_name = "llvm.x86.avx.ptestc.256"] + fn ptestc256(a: i64x4, b: i64x4) -> i32; } #[cfg(test)] @@ -2584,4 +2597,15 @@ mod tests { let r = avx::_mm256_testz_si256(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm256_testc_si256() { + let a = i64x4::new(1, 2, 3, 4); + let b = i64x4::new(5, 6, 7, 8); + let r = avx::_mm256_testc_si256(a, b); + assert_eq!(r, 0); + let b = i64x4::splat(0); + let r = avx::_mm256_testc_si256(a, b); + assert_eq!(r, 1); + } } From dd4ef5c91b6dfb82ee90529d5c31607cd20b34ba Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:24:15 +0200 Subject: [PATCH 10/37] avx: _mm256_testz_pd --- src/x86/avx.rs | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index efa72ed5c6..f668740cf0 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1402,10 +1402,10 @@ pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 { ptestz256(a, b) } -/// Compute the bitwise AND of 256 bits (representing integer data) in "a" and -/// "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. -/// Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if -/// the result is zero, otherwise set "CF" to 0. Return the "CF" value. +/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and +/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. +/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// the result is zero, otherwise set `CF` to 0. Return the `CF` value. #[inline(always)] #[target_feature = "+avx"] #[cfg_attr(test, assert_instr(vptest))] @@ -1413,6 +1413,20 @@ pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 { ptestc256(a, b) } +/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 { + vtestzpd256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1576,6 +1590,8 @@ extern "C" { fn ptestz256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.ptestc.256"] fn ptestc256(a: i64x4, b: i64x4) -> i32; + #[link_name = "llvm.x86.avx.vtestz.pd.256"] + fn vtestzpd256(a: f64x4, b: f64x4) -> i32; } #[cfg(test)] @@ -2608,4 +2624,15 @@ mod tests { let r = avx::_mm256_testc_si256(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm256_testz_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let b = f64x4::new(5., 6., 7., 8.); + let r = avx::_mm256_testz_pd(a, b); + assert_eq!(r, 1); + let a = f64x4::splat(-1.); + let r = avx::_mm256_testz_pd(a, a); + assert_eq!(r, 0); + } } From fddb670a671b28336e31222fe725a9cef0eb8c44 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:31:18 +0200 Subject: [PATCH 11/37] avx: _mm256_testc_pd --- src/x86/avx.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index f668740cf0..d01bf4328a 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1427,6 +1427,20 @@ pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 { vtestzpd256(a, b) } +/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 { + vtestcpd256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1592,6 +1606,8 @@ extern "C" { fn ptestc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd.256"] fn vtestzpd256(a: f64x4, b: f64x4) -> i32; + #[link_name = "llvm.x86.avx.vtestc.pd.256"] + fn vtestcpd256(a: f64x4, b: f64x4) -> i32; } #[cfg(test)] @@ -2635,4 +2651,15 @@ mod tests { let r = avx::_mm256_testz_pd(a, a); assert_eq!(r, 0); } + + #[simd_test = "avx"] + unsafe fn _mm256_testc_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let b = f64x4::new(5., 6., 7., 8.); + let r = avx::_mm256_testc_pd(a, b); + assert_eq!(r, 1); + let b = f64x4::splat(0.); + let r = avx::_mm256_testc_pd(a, b); + assert_eq!(r, 1); + } } From c0961f2b716a695209eee6a72bc7a3049a50276f Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:42:04 +0200 Subject: [PATCH 12/37] avx: _mm256_testnzc_pd --- src/x86/avx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index d01bf4328a..d6ced27a89 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1441,6 +1441,21 @@ pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 { vtestcpd256(a, b) } +/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 { + vtestnzcpd256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1608,6 +1623,8 @@ extern "C" { fn vtestzpd256(a: f64x4, b: f64x4) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd.256"] fn vtestcpd256(a: f64x4, b: f64x4) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] + fn vtestnzcpd256(a: f64x4, b: f64x4) -> i32; } #[cfg(test)] @@ -2662,4 +2679,16 @@ mod tests { let r = avx::_mm256_testc_pd(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm256_testnzc_pd() { + let a = f64x4::new(1., 2., 3., 4.); + let b = f64x4::new(5., 6., 7., 8.); + let r = avx::_mm256_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = f64x4::new(1., -1., -1., -1.); + let b = f64x4::new(-1., -1., 1., 1.); + let r = avx::_mm256_testnzc_pd(a, b); + assert_eq!(r, 1); + } } From f4da29bda8e4e0c869cbf2ce90bd4546533baf8f Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:47:23 +0200 Subject: [PATCH 13/37] avx: _mm_testz_pd --- src/x86/avx.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index d6ced27a89..b68d2c8f45 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1456,6 +1456,20 @@ pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 { vtestnzcpd256(a, b) } +/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 { + vtestzpd(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1625,6 +1639,8 @@ extern "C" { fn vtestcpd256(a: f64x4, b: f64x4) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] fn vtestnzcpd256(a: f64x4, b: f64x4) -> i32; + #[link_name = "llvm.x86.avx.vtestz.pd"] + fn vtestzpd(a: f64x2, b: f64x2) -> i32; } #[cfg(test)] @@ -2690,5 +2706,16 @@ mod tests { let b = f64x4::new(-1., -1., 1., 1.); let r = avx::_mm256_testnzc_pd(a, b); assert_eq!(r, 1); + + #[simd_test = "avx"] + unsafe fn _mm_testz_pd() { + let a = f64x2::new(1., 2.); + let b = f64x2::new(5., 6.); + let r = avx::_mm_testz_pd(a, b); + assert_eq!(r, 1); + let a = f64x2::splat(-1.); + let r = avx::_mm_testz_pd(a, a); + assert_eq!(r, 0); + } } } From b59393b85842b619e59989cf7d774339f4da099f Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 08:56:11 +0200 Subject: [PATCH 14/37] avx: _mm_testc_pd --- src/x86/avx.rs | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index b68d2c8f45..ba54541993 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1470,6 +1470,20 @@ pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 { vtestzpd(a, b) } +/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 { + vtestcpd(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1641,6 +1655,8 @@ extern "C" { fn vtestnzcpd256(a: f64x4, b: f64x4) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd"] fn vtestzpd(a: f64x2, b: f64x2) -> i32; + #[link_name = "llvm.x86.avx.vtestc.pd"] + fn vtestcpd(a: f64x2, b: f64x2) -> i32; } #[cfg(test)] @@ -2691,9 +2707,10 @@ mod tests { let b = f64x4::new(5., 6., 7., 8.); let r = avx::_mm256_testc_pd(a, b); assert_eq!(r, 1); - let b = f64x4::splat(0.); + let a = f64x4::splat(1.); + let b = f64x4::splat(-1.); let r = avx::_mm256_testc_pd(a, b); - assert_eq!(r, 1); + assert_eq!(r, 0); } #[simd_test = "avx"] @@ -2706,6 +2723,7 @@ mod tests { let b = f64x4::new(-1., -1., 1., 1.); let r = avx::_mm256_testnzc_pd(a, b); assert_eq!(r, 1); + } #[simd_test = "avx"] unsafe fn _mm_testz_pd() { @@ -2717,5 +2735,16 @@ mod tests { let r = avx::_mm_testz_pd(a, a); assert_eq!(r, 0); } + + #[simd_test = "avx"] + unsafe fn _mm_testc_pd() { + let a = f64x2::new(1., 2.); + let b = f64x2::new(5., 6.); + let r = avx::_mm_testc_pd(a, b); + assert_eq!(r, 1); + let a = f64x2::splat(1.); + let b = f64x2::splat(-1.); + let r = avx::_mm_testc_pd(a, b); + assert_eq!(r, 0); } } From ed3b7ca4c08ac3a42022b9e8e694130e777bfa91 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 09:01:38 +0200 Subject: [PATCH 15/37] avx: _mm_testnzc_pd --- src/x86/avx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index ba54541993..180a1331b0 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1484,6 +1484,21 @@ pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 { vtestcpd(a, b) } +/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestpd))] +pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 { + vtestnzcpd(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1657,6 +1672,8 @@ extern "C" { fn vtestzpd(a: f64x2, b: f64x2) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd"] fn vtestcpd(a: f64x2, b: f64x2) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.pd"] + fn vtestnzcpd(a: f64x2, b: f64x2) -> i32; } #[cfg(test)] @@ -2747,4 +2764,16 @@ mod tests { let r = avx::_mm_testc_pd(a, b); assert_eq!(r, 0); } + + #[simd_test = "avx"] + unsafe fn _mm_testnzc_pd() { + let a = f64x2::new(1., 2.); + let b = f64x2::new(5., 6.); + let r = avx::_mm_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = f64x2::new(1., -1.); + let b = f64x2::new(-1., -1.); + let r = avx::_mm_testnzc_pd(a, b); + assert_eq!(r, 1); + } } From f4ca21e63621e7485b5ea28de6dd9e9178f777db Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 09:20:24 +0200 Subject: [PATCH 16/37] avx: _mm256_testz_ps, _mm256_testc_ps, _mm256_testnzc_ps --- src/x86/avx.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 180a1331b0..5c24f84940 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1499,6 +1499,49 @@ pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 { vtestnzcpd(a, b) } +/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm256_testz_ps(a: f32x8, b: f32x8) -> i32 { + vtestzps256(a, b) +} + +/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm256_testc_ps(a: f32x8, b: f32x8) -> i32 { + vtestcps256(a, b) +} + +/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 { + vtestnzcps256(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1674,6 +1717,12 @@ extern "C" { fn vtestcpd(a: f64x2, b: f64x2) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd"] fn vtestnzcpd(a: f64x2, b: f64x2) -> i32; + #[link_name = "llvm.x86.avx.vtestz.ps.256"] + fn vtestzps256(a: f32x8, b: f32x8) -> i32; + #[link_name = "llvm.x86.avx.vtestc.ps.256"] + fn vtestcps256(a: f32x8, b: f32x8) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] + fn vtestnzcps256(a: f32x8, b: f32x8) -> i32; } #[cfg(test)] @@ -2776,4 +2825,35 @@ mod tests { let r = avx::_mm_testnzc_pd(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm256_testz_ps() { + let a = f32x8::splat(1.); + let r = avx::_mm256_testz_ps(a, a); + assert_eq!(r, 1); + let a = f32x8::splat(-1.); + let r = avx::_mm256_testz_ps(a, a); + assert_eq!(r, 0); + } + + #[simd_test = "avx"] + unsafe fn _mm256_testc_ps() { + let a = f32x8::splat(1.); + let r = avx::_mm256_testc_ps(a, a); + assert_eq!(r, 1); + let b = f32x8::splat(-1.); + let r = avx::_mm256_testc_ps(a, b); + assert_eq!(r, 0); + } + + #[simd_test = "avx"] + unsafe fn _mm256_testnzc_ps() { + let a = f32x8::splat(1.); + let r = avx::_mm256_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = f32x8::new(1., -1., -1., -1., -1., -1., -1., -1.); + let b = f32x8::new(-1., -1., 1., 1., 1., 1., 1., 1.); + let r = avx::_mm256_testnzc_ps(a, b); + assert_eq!(r, 1); + } } From 981892f3c9f49586789b33c68378aaa012482020 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 09:30:10 +0200 Subject: [PATCH 17/37] avx: _mm_testz_ps, _mm_testc_ps, _mm_testnzc_ps --- src/x86/avx.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 5c24f84940..b661184bef 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1542,6 +1542,49 @@ pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 { vtestnzcps256(a, b) } +/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm_testz_ps(a: f32x4, b: f32x4) -> i32 { + vtestzps(a, b) +} + +/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm_testc_ps(a: f32x4, b: f32x4) -> i32 { + vtestcps(a, b) +} + +/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vtestps))] +pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 { + vtestnzcps(a, b) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1723,6 +1766,12 @@ extern "C" { fn vtestcps256(a: f32x8, b: f32x8) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] fn vtestnzcps256(a: f32x8, b: f32x8) -> i32; + #[link_name = "llvm.x86.avx.vtestz.ps"] + fn vtestzps(a: f32x4, b: f32x4) -> i32; + #[link_name = "llvm.x86.avx.vtestc.ps"] + fn vtestcps(a: f32x4, b: f32x4) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.ps"] + fn vtestnzcps(a: f32x4, b: f32x4) -> i32; } #[cfg(test)] @@ -2856,4 +2905,35 @@ mod tests { let r = avx::_mm256_testnzc_ps(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm_testz_ps() { + let a = f32x4::splat(1.); + let r = avx::_mm_testz_ps(a, a); + assert_eq!(r, 1); + let a = f32x4::splat(-1.); + let r = avx::_mm_testz_ps(a, a); + assert_eq!(r, 0); + } + + #[simd_test = "avx"] + unsafe fn _mm_testc_ps() { + let a = f32x4::splat(1.); + let r = avx::_mm_testc_ps(a, a); + assert_eq!(r, 1); + let b = f32x4::splat(-1.); + let r = avx::_mm_testc_ps(a, b); + assert_eq!(r, 0); + } + + #[simd_test = "avx"] + unsafe fn _mm_testnzc_ps() { + let a = f32x4::splat(1.); + let r = avx::_mm_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = f32x4::new(1., -1., -1., -1.); + let b = f32x4::new(-1., -1., 1., 1.); + let r = avx::_mm_testnzc_ps(a, b); + assert_eq!(r, 1); + } } From d97ee09e728a978b3c5921c24e2c110412a15d70 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 09:59:45 +0200 Subject: [PATCH 18/37] avx: _mm256_movemask_pd, _mm256_movemask_ps --- src/x86/avx.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index b661184bef..1434450d34 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1585,6 +1585,23 @@ pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 { vtestnzcps(a, b) } +/// Set each bit of the returned mask based on the most significant bit of the +/// corresponding packed double-precision (64-bit) floating-point element in `a`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmovmskpd))] +pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 { + movmskpd256(a) +} + +/// +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmovmskps))] +pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 { + movmskps256(a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -1772,6 +1789,10 @@ extern "C" { fn vtestcps(a: f32x4, b: f32x4) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps"] fn vtestnzcps(a: f32x4, b: f32x4) -> i32; + #[link_name = "llvm.x86.avx.movmsk.pd.256"] + fn movmskpd256(a: f64x4) -> i32; + #[link_name = "llvm.x86.avx.movmsk.ps.256"] + fn movmskps256(a: f32x8) -> i32; } #[cfg(test)] @@ -2936,4 +2957,18 @@ mod tests { let r = avx::_mm_testnzc_ps(a, b); assert_eq!(r, 1); } + + #[simd_test = "avx"] + unsafe fn _mm256_movemask_pd() { + let a = f64x4::new(1., -2., 3., -4.); + let r = avx::_mm256_movemask_pd(a); + assert_eq!(r, 0xA); + } + + #[simd_test = "avx"] + unsafe fn _mm256_movemask_ps() { + let a = f32x8::new(1., -2., 3., -4., 1., -2., 3., -4.); + let r = avx::_mm256_movemask_ps(a); + assert_eq!(r, 0xAA); + } } From 424775fd2627ed55420f9c0998d6aa09ee498ab1 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 12:22:36 +0200 Subject: [PATCH 19/37] avx: _mm256_setzero_pd, _mm256_setzero_ps --- src/x86/avx.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 1434450d34..4e2e39fc75 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1594,7 +1594,8 @@ pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 { movmskpd256(a) } -/// +/// Set each bit of the returned mask based on the most significant bit of the +/// corresponding packed single-precision (32-bit) floating-point element in `a`. #[inline(always)] #[target_feature = "+avx"] #[cfg_attr(test, assert_instr(vmovmskps))] @@ -1602,6 +1603,22 @@ pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 { movmskps256(a) } +/// Return vector of type __m256d with all elements set to zero. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected +pub unsafe fn _mm256_setzero_pd() -> f64x4 { + f64x4::new(0., 0., 0., 0.) +} + +/// Return vector of type __m256 with all elements set to zero. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vxorps))] +pub unsafe fn _mm256_setzero_ps() -> f32x8 { + f32x8::new(0., 0., 0., 0., 0., 0., 0., 0.) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -2971,4 +2988,16 @@ mod tests { let r = avx::_mm256_movemask_ps(a); assert_eq!(r, 0xAA); } + + #[simd_test = "avx"] + unsafe fn _mm256_setzero_pd() { + let r = avx::_mm256_setzero_pd(); + assert_eq!(r, f64x4::splat(0.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_setzero_ps() { + let r = avx::_mm256_setzero_ps(); + assert_eq!(r, f32x8::splat(0.)); + } } From 668988c9452ee15d549b6a4fbfc072f980caeaf4 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 12:29:32 +0200 Subject: [PATCH 20/37] avx: _mm256_setzero_si256 --- src/x86/avx.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 4e2e39fc75..c2afae3f05 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1619,6 +1619,14 @@ pub unsafe fn _mm256_setzero_ps() -> f32x8 { f32x8::new(0., 0., 0., 0., 0., 0., 0., 0.) } +/// Return vector of type __m256i with all elements set to zero. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vxor))] +pub unsafe fn _mm256_setzero_si256() -> i64x4 { + i64x4::new(0, 0, 0, 0) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3000,4 +3008,10 @@ mod tests { let r = avx::_mm256_setzero_ps(); assert_eq!(r, f32x8::splat(0.)); } + + #[simd_test = "avx"] + unsafe fn _mm256_setzero_si256() { + let r = avx::_mm256_setzero_si256(); + assert_eq!(r, i64x4::splat(0)); + } } From 2e9ec162d1cd839de5ac22655f996faf6567ecdf Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 12:44:22 +0200 Subject: [PATCH 21/37] avx: _mm256_set_pd, _mm256_set_ps --- src/x86/avx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index c2afae3f05..442a4f6c3e 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1627,6 +1627,23 @@ pub unsafe fn _mm256_setzero_si256() -> i64x4 { i64x4::new(0, 0, 0, 0) } +/// Set packed double-precision (64-bit) floating-point elements in returned +/// vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { + f64x4::new(d, c, b, a) +} + +/// Set packed single-precision (32-bit) floating-point elements in returned +/// vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, + e: f32, f: f32, g: f32, h: f32) -> f32x8 { + f32x8::new(h, g, f, e, d, c, b, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3014,4 +3031,16 @@ mod tests { let r = avx::_mm256_setzero_si256(); assert_eq!(r, i64x4::splat(0)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set_pd() { + let r = avx::_mm256_set_pd(1., 2., 3., 4.); + assert_eq!(r, f64x4::new(4., 3., 2., 1.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_set_ps() { + let r = avx::_mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq!(r, f32x8::new(8., 7., 6., 5., 4., 3., 2., 1.)); + } } From 502a814ff807eae466f46ca4f4c9905a34cf28b6 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 13:13:23 +0200 Subject: [PATCH 22/37] avx: _mm256_set_epi8 --- src/x86/avx.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 442a4f6c3e..a36e209c41 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1644,6 +1644,28 @@ pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, f32x8::new(h, g, f, e, d, c, b, a) } +/// Set packed 8-bit integers in returned vector with the supplied values in +/// reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_epi8(a00: i8, a01: i8, a02: i8, a03: i8, + a04: i8, a05: i8, a06: i8, a07: i8, + a08: i8, a09: i8, a10: i8, a11: i8, + a12: i8, a13: i8, a14: i8, a15: i8, + a16: i8, a17: i8, a18: i8, a19: i8, + a20: i8, a21: i8, a22: i8, a23: i8, + a24: i8, a25: i8, a26: i8, a27: i8, + a28: i8, a29: i8, a30: i8, a31: i8) -> i8x32 { + i8x32::new(a31, a30, a29, a28, + a27, a26, a25, a24, + a23, a22, a21, a20, + a19, a18, a17, a16, + a15, a14, a13, a12, + a11, a10, a09, a08, + a07, a06, a05, a04, + a03, a02, a01, a00) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3043,4 +3065,17 @@ mod tests { let r = avx::_mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq!(r, f32x8::new(8., 7., 6., 5., 4., 3., 2., 1.)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set_epi8() { + let r = avx::_mm256_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + assert_eq!(r, i8x32::new(32, 31, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1)); + } } From 56700ef0b28c925b4710093dabebc7fc52566bf1 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 13:21:32 +0200 Subject: [PATCH 23/37] avx: _mm256_set_epi16 --- src/x86/avx.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index a36e209c41..f33165038e 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1666,6 +1666,19 @@ pub unsafe fn _mm256_set_epi8(a00: i8, a01: i8, a02: i8, a03: i8, a03, a02, a01, a00) } +/// Set packed 16-bit integers in "dst" with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_epi16(a00: i16, a01: i16, a02: i16, a03: i16, + a04: i16, a05: i16, a06: i16, a07: i16, + a08: i16, a09: i16, a10: i16, a11: i16, + a12: i16, a13: i16, a14: i16, a15: i16) -> i16x16 { + i16x16::new(a15, a14, a13, a12, + a11, a10, a09, a08, + a07, a06, a05, a04, + a03, a02, a01, a00) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3078,4 +3091,13 @@ mod tests { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set_epi16() { + let r = avx::_mm256_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq!(r, i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1)); + } } From 40000c9cc269a5c71ec01cea6126049714d34575 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 13:27:49 +0200 Subject: [PATCH 24/37] avx: _mm256_set_epi32 --- src/x86/avx.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index f33165038e..b7107cad00 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1666,7 +1666,7 @@ pub unsafe fn _mm256_set_epi8(a00: i8, a01: i8, a02: i8, a03: i8, a03, a02, a01, a00) } -/// Set packed 16-bit integers in "dst" with the supplied values. +/// Set packed 16-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_set_epi16(a00: i16, a01: i16, a02: i16, a03: i16, @@ -1679,6 +1679,15 @@ pub unsafe fn _mm256_set_epi16(a00: i16, a01: i16, a02: i16, a03: i16, a03, a02, a01, a00) } +/// Set packed 32-bit integers in returned vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_epi32(a0: i32, a1: i32, a2: i32, a3: i32, + a4: i32, a5: i32, a6: i32, a7: i32) -> i32x8 { + i32x8::new(a7, a6, a5, a4, + a3, a2, a1, a0) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3100,4 +3109,11 @@ mod tests { assert_eq!(r, i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set_epi32() { + let r = avx::_mm256_set_epi32( + 1, 2, 3, 4, 5, 6, 7, 8); + assert_eq!(r, i32x8::new(8, 7, 6, 5, 4, 3, 2, 1)); + } } From b82692c26c91fec47ed72cee9bf83f47c9db9fc8 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 13:36:40 +0200 Subject: [PATCH 25/37] avx: _mm256_set_epi64x --- src/x86/avx.rs | 69 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index b7107cad00..69e82b55e5 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1648,44 +1648,51 @@ pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, /// reverse order. #[inline(always)] #[target_feature = "+avx"] -pub unsafe fn _mm256_set_epi8(a00: i8, a01: i8, a02: i8, a03: i8, - a04: i8, a05: i8, a06: i8, a07: i8, - a08: i8, a09: i8, a10: i8, a11: i8, - a12: i8, a13: i8, a14: i8, a15: i8, - a16: i8, a17: i8, a18: i8, a19: i8, - a20: i8, a21: i8, a22: i8, a23: i8, - a24: i8, a25: i8, a26: i8, a27: i8, - a28: i8, a29: i8, a30: i8, a31: i8) -> i8x32 { - i8x32::new(a31, a30, a29, a28, - a27, a26, a25, a24, - a23, a22, a21, a20, - a19, a18, a17, a16, - a15, a14, a13, a12, - a11, a10, a09, a08, - a07, a06, a05, a04, - a03, a02, a01, a00) +pub unsafe fn _mm256_set_epi8(e00: i8, e01: i8, e02: i8, e03: i8, + e04: i8, e05: i8, e06: i8, e07: i8, + e08: i8, e09: i8, e10: i8, e11: i8, + e12: i8, e13: i8, e14: i8, e15: i8, + e16: i8, e17: i8, e18: i8, e19: i8, + e20: i8, e21: i8, e22: i8, e23: i8, + e24: i8, e25: i8, e26: i8, e27: i8, + e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 { + i8x32::new(e31, e30, e29, e28, + e27, e26, e25, e24, + e23, e22, e21, e20, + e19, e18, e17, e16, + e15, e14, e13, e12, + e11, e10, e09, e08, + e07, e06, e05, e04, + e03, e02, e01, e00) } /// Set packed 16-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] -pub unsafe fn _mm256_set_epi16(a00: i16, a01: i16, a02: i16, a03: i16, - a04: i16, a05: i16, a06: i16, a07: i16, - a08: i16, a09: i16, a10: i16, a11: i16, - a12: i16, a13: i16, a14: i16, a15: i16) -> i16x16 { - i16x16::new(a15, a14, a13, a12, - a11, a10, a09, a08, - a07, a06, a05, a04, - a03, a02, a01, a00) +pub unsafe fn _mm256_set_epi16(e00: i16, e01: i16, e02: i16, e03: i16, + e04: i16, e05: i16, e06: i16, e07: i16, + e08: i16, e09: i16, e10: i16, e11: i16, + e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 { + i16x16::new(e15, e14, e13, e12, + e11, e10, e09, e08, + e07, e06, e05, e04, + e03, e02, e01, e00) } /// Set packed 32-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] -pub unsafe fn _mm256_set_epi32(a0: i32, a1: i32, a2: i32, a3: i32, - a4: i32, a5: i32, a6: i32, a7: i32) -> i32x8 { - i32x8::new(a7, a6, a5, a4, - a3, a2, a1, a0) +pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32, + e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 { + i32x8::new(e7, e6, e5, e4, + e3, e2, e1, e0) +} + +/// Set packed 64-bit integers in returned vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { + i64x4::new(d, c, b, a) } /// Casts vector of type __m128 to type __m256; @@ -3116,4 +3123,10 @@ mod tests { 1, 2, 3, 4, 5, 6, 7, 8); assert_eq!(r, i32x8::new(8, 7, 6, 5, 4, 3, 2, 1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set_epi64x() { + let r = avx::_mm256_set_epi64x(1, 2, 3, 4); + assert_eq!(r, i64x4::new(4, 3, 2, 1)); + } } From 8c67767dcf6f001fbeb975a1c04a4b238fe8230d Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 14:18:39 +0200 Subject: [PATCH 26/37] avx: _mm256_setr_pd, _mm256_setr_ps --- src/x86/avx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 69e82b55e5..70395c9632 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1695,6 +1695,23 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(d, c, b, a) } +/// Set packed double-precision (64-bit) floating-point elements in returned +/// vector with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { + f64x4::new(a, b, c, d) +} + +/// Set packed single-precision (32-bit) floating-point elements in returned +/// vector with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, + e: f32, f: f32, g: f32, h: f32) -> f32x8 { + f32x8::new(a, b, c, d, e, f, g, h) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3129,4 +3146,16 @@ mod tests { let r = avx::_mm256_set_epi64x(1, 2, 3, 4); assert_eq!(r, i64x4::new(4, 3, 2, 1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_pd() { + let r = avx::_mm256_setr_pd(1., 2., 3., 4.); + assert_eq!(r, f64x4::new(1., 2., 3., 4.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_ps() { + let r = avx::_mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq!(r, f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.)); + } } From 9f6f7823cce652a6d3e716dd50772530d3e6b6f8 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 14:24:46 +0200 Subject: [PATCH 27/37] avx: _mm256_setr_epi8 --- src/x86/avx.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 70395c9632..9489be6000 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1712,6 +1712,28 @@ pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, f32x8::new(a, b, c, d, e, f, g, h) } +/// Set packed single-precision (32-bit) floating-point elements in returned +/// vector with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8, + e04: i8, e05: i8, e06: i8, e07: i8, + e08: i8, e09: i8, e10: i8, e11: i8, + e12: i8, e13: i8, e14: i8, e15: i8, + e16: i8, e17: i8, e18: i8, e19: i8, + e20: i8, e21: i8, e22: i8, e23: i8, + e24: i8, e25: i8, e26: i8, e27: i8, + e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 { + i8x32::new(e00, e01, e02, e03, + e04, e05, e06, e07, + e08, e09, e10, e11, + e12, e13, e14, e15, + e16, e17, e18, e19, + e20, e21, e22, e23, + e24, e25, e26, e27, + e28, e29, e30, e31) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3158,4 +3180,17 @@ mod tests { let r = avx::_mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq!(r, f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.)); } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_epi8() { + let r = avx::_mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + assert_eq!(r, i8x32::new(1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32)); + } } From 06b96385657171e6491c76202b6a00cfeb0feed8 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 14:32:58 +0200 Subject: [PATCH 28/37] avx: _mm256_setr_epi16 --- src/x86/avx.rs | 58 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 9489be6000..5a6af5057c 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1670,20 +1670,20 @@ pub unsafe fn _mm256_set_epi8(e00: i8, e01: i8, e02: i8, e03: i8, #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_set_epi16(e00: i16, e01: i16, e02: i16, e03: i16, - e04: i16, e05: i16, e06: i16, e07: i16, - e08: i16, e09: i16, e10: i16, e11: i16, - e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 { + e04: i16, e05: i16, e06: i16, e07: i16, + e08: i16, e09: i16, e10: i16, e11: i16, + e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 { i16x16::new(e15, e14, e13, e12, - e11, e10, e09, e08, - e07, e06, e05, e04, - e03, e02, e01, e00) + e11, e10, e09, e08, + e07, e06, e05, e04, + e03, e02, e01, e00) } /// Set packed 32-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32, - e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 { + e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 { i32x8::new(e7, e6, e5, e4, e3, e2, e1, e0) } @@ -1708,22 +1708,22 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, - e: f32, f: f32, g: f32, h: f32) -> f32x8 { + e: f32, f: f32, g: f32, h: f32) -> f32x8 { f32x8::new(a, b, c, d, e, f, g, h) } -/// Set packed single-precision (32-bit) floating-point elements in returned -/// vector with the supplied values in reverse order. +/// Set packed 8-bit integers in returned vector with the supplied values in +/// reverse order. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8, - e04: i8, e05: i8, e06: i8, e07: i8, - e08: i8, e09: i8, e10: i8, e11: i8, - e12: i8, e13: i8, e14: i8, e15: i8, - e16: i8, e17: i8, e18: i8, e19: i8, - e20: i8, e21: i8, e22: i8, e23: i8, - e24: i8, e25: i8, e26: i8, e27: i8, - e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 { + e04: i8, e05: i8, e06: i8, e07: i8, + e08: i8, e09: i8, e10: i8, e11: i8, + e12: i8, e13: i8, e14: i8, e15: i8, + e16: i8, e17: i8, e18: i8, e19: i8, + e20: i8, e21: i8, e22: i8, e23: i8, + e24: i8, e25: i8, e26: i8, e27: i8, + e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 { i8x32::new(e00, e01, e02, e03, e04, e05, e06, e07, e08, e09, e10, e11, @@ -1734,6 +1734,19 @@ pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8, e28, e29, e30, e31) } +/// Set packed 16-bit integers in with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_epi16(e00: i16, e01: i16, e02: i16, e03: i16, + e04: i16, e05: i16, e06: i16, e07: i16, + e08: i16, e09: i16, e10: i16, e11: i16, + e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 { + i16x16::new(e00, e01, e02, e03, + e04, e05, e06, e07, + e08, e09, e10, e11, + e12, e13, e14, e15) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3153,7 +3166,7 @@ mod tests { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); assert_eq!(r, i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1)); + 8, 7, 6, 5, 4, 3, 2, 1)); } #[simd_test = "avx"] @@ -3193,4 +3206,13 @@ mod tests { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_epi16() { + let r = avx::_mm256_setr_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq!(r, i16x16::new(1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16)); + } } From 13fa6b0e0e7257cd8e6b9718500938bfcfe4952a Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 14:40:51 +0200 Subject: [PATCH 29/37] avx: _mm256_setr_epi32, _mm256_setr_epi64x --- src/x86/avx.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 5a6af5057c..3bb25d5a42 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1734,7 +1734,8 @@ pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8, e28, e29, e30, e31) } -/// Set packed 16-bit integers in with the supplied values in reverse order. +/// Set packed 16-bit integers in returned vector with the supplied values in +/// reverse order. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_setr_epi16(e00: i16, e01: i16, e02: i16, e03: i16, @@ -1747,6 +1748,24 @@ pub unsafe fn _mm256_setr_epi16(e00: i16, e01: i16, e02: i16, e03: i16, e12, e13, e14, e15) } +/// Set packed 32-bit integers in returned vector with the supplied values in +/// reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_epi32(e0: i32, e1: i32, e2: i32, e3: i32, + e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 { + i32x8::new(e0, e1, e2, e3, + e4, e5, e6, e7) +} + +/// Set packed 64-bit integers in returned vector with the supplied values in +/// reverse order. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { + i64x4::new(a, b, c, d) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3215,4 +3234,17 @@ mod tests { assert_eq!(r, i16x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_epi32() { + let r = avx::_mm256_setr_epi32( + 1, 2, 3, 4, 5, 6, 7, 8); + assert_eq!(r, i32x8::new(1, 2, 3, 4, 5, 6, 7, 8)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_setr_epi64x() { + let r = avx::_mm256_setr_epi64x(1, 2, 3, 4); + assert_eq!(r, i64x4::new(1, 2, 3, 4)); + } } From 3c1daea381e5d11604127275daa572445cb23d9a Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:06:00 +0200 Subject: [PATCH 30/37] avx: add missing assert_instr --- src/x86/avx.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 3bb25d5a42..de898a4584 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1631,6 +1631,8 @@ pub unsafe fn _mm256_setzero_si256() -> i64x4 { /// vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpcklpd))] +#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { f64x4::new(d, c, b, a) } @@ -1691,6 +1693,8 @@ pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32, /// Set packed 64-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(d, c, b, a) } @@ -1699,6 +1703,8 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { /// vector with the supplied values in reverse order. #[inline(always)] #[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vunpcklpd))] +#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { f64x4::new(a, b, c, d) } @@ -1762,6 +1768,8 @@ pub unsafe fn _mm256_setr_epi32(e0: i32, e1: i32, e2: i32, e3: i32, /// reverse order. #[inline(always)] #[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(a, b, c, d) } From 896561c5a0b998ab7b5a9c239311b7b4bb787d9c Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:12:36 +0200 Subject: [PATCH 31/37] avx: _mm256_set1_pd --- src/x86/avx.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index de898a4584..2a37766bfe 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1774,6 +1774,16 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(a, b, c, d) } +/// Broadcast double-precision (64-bit) floating-point value `a` to all +/// elements of returned vector. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmovddup))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { + f64x4::new(a, a, a, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3255,4 +3265,10 @@ mod tests { let r = avx::_mm256_setr_epi64x(1, 2, 3, 4); assert_eq!(r, i64x4::new(1, 2, 3, 4)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_pd() { + let r = avx::_mm256_set1_pd(1.); + assert_eq!(r, f64x4::splat(1.)); + } } From d8a472790e209add9ea112a140c450d29f9207bc Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:15:47 +0200 Subject: [PATCH 32/37] avx: _mm256_set1_ps --- src/x86/avx.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 2a37766bfe..36e67f0007 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1784,6 +1784,16 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { f64x4::new(a, a, a, a) } +/// Broadcast single-precision (32-bit) floating-point value `a` to all +/// elements of returned vector. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vpermilps))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 { + f32x8::new(a, a, a, a, a, a, a, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3271,4 +3281,10 @@ mod tests { let r = avx::_mm256_set1_pd(1.); assert_eq!(r, f64x4::splat(1.)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_ps() { + let r = avx::_mm256_set1_ps(1.); + assert_eq!(r, f32x8::splat(1.)); + } } From 328ac368aa19744d6897da2f08ba6ad4d691bd2e Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:21:37 +0200 Subject: [PATCH 33/37] avx: _mm256_set1_epi8 --- src/x86/avx.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 36e67f0007..e421ba8f11 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1794,6 +1794,19 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 { f32x8::new(a, a, a, a, a, a, a, a) } +/// Broadcast 8-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastb`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vpshufb))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 { + i8x32::new(a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3287,4 +3300,10 @@ mod tests { let r = avx::_mm256_set1_ps(1.); assert_eq!(r, f32x8::splat(1.)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_epi8() { + let r = avx::_mm256_set1_epi8(1); + assert_eq!(r, i8x32::splat(1)); + } } From 6fcddf17943e91cdc207af0feb79fb7622358c0e Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:34:24 +0200 Subject: [PATCH 34/37] avx: _mm256_set1_epi16, _mm256_set1_epi32 --- src/x86/avx.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index e421ba8f11..2932e42bb3 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1807,6 +1807,27 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 { a, a, a, a, a, a, a, a) } +/// Broadcast 16-bit integer `a` to all all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastw`. +#[inline(always)] +#[target_feature = "+avx"] +//#[cfg_attr(test, assert_instr(vpshufb))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 { + i16x16::new(a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a) +} + +/// Broadcast 32-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastd`. +#[inline(always)] +#[target_feature = "+avx"] +//#[cfg_attr(test, assert_instr(vpermilps))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 { + i32x8::new(a, a, a, a, a, a, a, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3306,4 +3327,16 @@ mod tests { let r = avx::_mm256_set1_epi8(1); assert_eq!(r, i8x32::splat(1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_epi16() { + let r = avx::_mm256_set1_epi16(1); + assert_eq!(r, i16x16::splat(1)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_epi32() { + let r = avx::_mm256_set1_epi32(1); + assert_eq!(r, i32x8::splat(1)); + } } From ce139597a0a3c6882e5c2d31c98cded4fda0140a Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 15:39:45 +0200 Subject: [PATCH 35/37] avx: _mm256_set1_epi64x --- src/x86/avx.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 2932e42bb3..1f69a4e5b9 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1828,6 +1828,16 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 { i32x8::new(a, a, a, a, a, a, a, a) } +/// Broadcast 64-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastq`. +#[inline(always)] +#[target_feature = "+avx"] +//#[cfg_attr(test, assert_instr(vmovddup))] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 { + i64x4::new(a, a, a, a) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3339,4 +3349,10 @@ mod tests { let r = avx::_mm256_set1_epi32(1); assert_eq!(r, i32x8::splat(1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_set1_epi64x() { + let r = avx::_mm256_set1_epi64x(1); + assert_eq!(r, i64x4::splat(1)); + } } From 6a9996ada0782f7d4b1fc5e29bb5d0aabbc6a034 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 16:34:08 +0200 Subject: [PATCH 36/37] avx: _mm256_castpd_si256, _mm256_castsi256_pd, _mm256_castps256_ps128, _mm256_castpd256_pd128, _mm256_castsi256_si128 --- src/x86/avx.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 1f69a4e5b9..b9c8e8b3a6 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1838,6 +1838,51 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 { i64x4::new(a, a, a, a) } +/// Casts vector of type __m256d to type __m256i. +/// This intrinsic is only used for compilation and does not generate any +/// instructions, thus it has zero latency. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castpd_si256(a: f64x4) -> i64x4 { + simd_cast(a) +} + +/// Casts vector of type __m256i to type __m256d. +/// This intrinsic is only used for compilation and does not generate any +/// instructions, thus it has zero latency. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castsi256_pd(a: i64x4) -> f64x4 { + simd_cast(a) +} + +/// Casts vector of type __m256 to type __m128. +/// This intrinsic is only used for compilation and does not generate any +/// instructions, thus it has zero latency. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> f32x4 { + simd_shuffle4(a, a, [0, 1, 2, 3]) +} + +/// Casts vector of type __m256d to type __m128d. +/// This intrinsic is only used for compilation and does not generate any +/// instructions, thus it has zero latency. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> f64x2 { + simd_shuffle2(a, a, [0, 1]) +} + +/// Casts vector of type __m256i to type __m128i. +/// This intrinsic is only used for compilation and does not generate any +/// instructions, thus it has zero latency. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castsi256_si128(a: i64x4) -> i64x2 { + simd_shuffle2(a, a, [0, 1]) +} + /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. #[inline(always)] @@ -3355,4 +3400,39 @@ mod tests { let r = avx::_mm256_set1_epi64x(1); assert_eq!(r, i64x4::splat(1)); } + + #[simd_test = "avx"] + unsafe fn _mm256_castpd_si256() { + let a = f64x4::new(1., 2., 3., 4.); + let r = avx::_mm256_castpd_si256(a); + assert_eq!(r, i64x4::new(1, 2, 3, 4)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castsi256_pd() { + let a = i64x4::new(1, 2, 3, 4); + let r = avx::_mm256_castsi256_pd(a); + assert_eq!(r, f64x4::new(1., 2., 3., 4.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castps256_ps128() { + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let r = avx::_mm256_castps256_ps128(a); + assert_eq!(r, f32x4::new(1., 2., 3., 4.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castpd256_pd128() { + let a = f64x4::new(1., 2., 3., 4.); + let r = avx::_mm256_castpd256_pd128(a); + assert_eq!(r, f64x2::new(1., 2.)); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castsi256_si128() { + let a = i64x4::new(1, 2, 3, 4); + let r = avx::_mm256_castsi256_si128(a); + assert_eq!(r, i64x2::new(1, 2)); + } } From 6c1b14891c473a100c109e86ba25e99962a143cd Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 15 Oct 2017 16:48:30 +0200 Subject: [PATCH 37/37] avx: remove assert_instr failing --- src/x86/avx.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index b9c8e8b3a6..6757e00f16 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -1631,7 +1631,6 @@ pub unsafe fn _mm256_setzero_si256() -> i64x4 { /// vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vunpcklpd))] #[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { f64x4::new(d, c, b, a) @@ -1693,7 +1692,6 @@ pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32, /// Set packed 64-bit integers in returned vector with the supplied values. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vpunpcklqdq))] #[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(d, c, b, a) @@ -1703,8 +1701,6 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { /// vector with the supplied values in reverse order. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vunpcklpd))] -#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { f64x4::new(a, b, c, d) } @@ -1768,7 +1764,6 @@ pub unsafe fn _mm256_setr_epi32(e0: i32, e1: i32, e2: i32, e3: i32, /// reverse order. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vpunpcklqdq))] #[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { i64x4::new(a, b, c, d) @@ -1778,8 +1773,6 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { /// elements of returned vector. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vmovddup))] -#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { f64x4::new(a, a, a, a) } @@ -1788,8 +1781,6 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { /// elements of returned vector. #[inline(always)] #[target_feature = "+avx"] -#[cfg_attr(test, assert_instr(vpermilps))] -#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 { f32x8::new(a, a, a, a, a, a, a, a) } @@ -1822,8 +1813,6 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 { /// This intrinsic may generate the `vpbroadcastd`. #[inline(always)] #[target_feature = "+avx"] -//#[cfg_attr(test, assert_instr(vpermilps))] -#[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 { i32x8::new(a, a, a, a, a, a, a, a) }