@@ -1007,52 +1007,20 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
10071007#[ inline]
10081008#[ target_feature( enable = "sse" ) ]
10091009#[ cfg_attr( test, assert_instr( shufps, mask = 3 ) ) ]
1010- #[ rustc_args_required_const( 2 ) ]
1011- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1012- pub unsafe fn _mm_shuffle_ps ( a : __m128 , b : __m128 , mask : i32 ) -> __m128 {
1013- let mask = ( mask & 0xFF ) as u8 ;
1014-
1015- macro_rules! shuffle_done {
1016- ( $x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
1017- simd_shuffle4( a, b, [ $x01, $x23, $x45, $x67] )
1018- } ;
1019- }
1020- macro_rules! shuffle_x67 {
1021- ( $x01: expr, $x23: expr, $x45: expr) => {
1022- match ( mask >> 6 ) & 0b11 {
1023- 0b00 => shuffle_done!( $x01, $x23, $x45, 4 ) ,
1024- 0b01 => shuffle_done!( $x01, $x23, $x45, 5 ) ,
1025- 0b10 => shuffle_done!( $x01, $x23, $x45, 6 ) ,
1026- _ => shuffle_done!( $x01, $x23, $x45, 7 ) ,
1027- }
1028- } ;
1029- }
1030- macro_rules! shuffle_x45 {
1031- ( $x01: expr, $x23: expr) => {
1032- match ( mask >> 4 ) & 0b11 {
1033- 0b00 => shuffle_x67!( $x01, $x23, 4 ) ,
1034- 0b01 => shuffle_x67!( $x01, $x23, 5 ) ,
1035- 0b10 => shuffle_x67!( $x01, $x23, 6 ) ,
1036- _ => shuffle_x67!( $x01, $x23, 7 ) ,
1037- }
1038- } ;
1039- }
1040- macro_rules! shuffle_x23 {
1041- ( $x01: expr) => {
1042- match ( mask >> 2 ) & 0b11 {
1043- 0b00 => shuffle_x45!( $x01, 0 ) ,
1044- 0b01 => shuffle_x45!( $x01, 1 ) ,
1045- 0b10 => shuffle_x45!( $x01, 2 ) ,
1046- _ => shuffle_x45!( $x01, 3 ) ,
1047- }
1048- } ;
1049- }
1050- match mask & 0b11 {
1051- 0b00 => shuffle_x23 ! ( 0 ) ,
1052- 0b01 => shuffle_x23 ! ( 1 ) ,
1053- 0b10 => shuffle_x23 ! ( 2 ) ,
1054- _ => shuffle_x23 ! ( 3 ) ,
1055- }
1010+ #[ rustc_legacy_const_generics( 2 ) ]
1011+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1012+ pub unsafe fn _mm_shuffle_ps < const mask: i32 > ( a : __m128 , b : __m128 ) -> __m128 {
1013+ assert ! ( mask >= 0 && mask <= 255 ) ;
1014+ simd_shuffle4 (
1015+ a,
1016+ b,
1017+ [
1018+ mask as u32 & 0b11 ,
1019+ ( mask as u32 >> 2 ) & 0b11 ,
1020+ ( ( mask as u32 >> 4 ) & 0b11 ) + 4 ,
1021+ ( ( mask as u32 >> 6 ) & 0b11 ) + 4 ,
1022+ ] ,
1023+ )
10561024}
10571025
10581026/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1725,6 +1693,14 @@ pub const _MM_HINT_T2: i32 = 1;
17251693#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
17261694pub const _MM_HINT_NTA: i32 = 0 ;
17271695
1696+ /// See [`_mm_prefetch`](fn._mm_prefetch.html).
1697+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1698+ pub const _MM_HINT_ET0: i32 = 7 ;
1699+
1700+ /// See [`_mm_prefetch`](fn._mm_prefetch.html).
1701+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1702+ pub const _MM_HINT_ET1: i32 = 6 ;
1703+
17281704/// Fetch the cache line that contains address `p` using the given `strategy`.
17291705///
17301706/// The `strategy` must be one of:
@@ -1742,6 +1718,10 @@ pub const _MM_HINT_NTA: i32 = 0;
17421718/// but outside of the cache hierarchy. This is used to reduce access latency
17431719/// without polluting the cache.
17441720///
1721+ /// * [`_MM_HINT_ET0`](constant._MM_HINT_ET0.html) and
1722+ /// [`_MM_HINT_ET1`](constant._MM_HINT_ET1.html) are similar to `_MM_HINT_T0`
1723+ /// and `_MM_HINT_T1` but indicate an anticipation to write to the address.
1724+ ///
17451725/// The actual implementation depends on the particular CPU. This instruction
17461726/// is considered a hint, so the CPU is also free to simply ignore the request.
17471727///
@@ -1769,24 +1749,12 @@ pub const _MM_HINT_NTA: i32 = 0;
17691749#[ cfg_attr( test, assert_instr( prefetcht1, strategy = _MM_HINT_T1) ) ]
17701750#[ cfg_attr( test, assert_instr( prefetcht2, strategy = _MM_HINT_T2) ) ]
17711751#[ cfg_attr( test, assert_instr( prefetchnta, strategy = _MM_HINT_NTA) ) ]
1772- #[ rustc_args_required_const( 1 ) ]
1773- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1774- pub unsafe fn _mm_prefetch ( p : * const i8 , strategy : i32 ) {
1775- // The `strategy` must be a compile-time constant, so we use a short form
1776- // of `constify_imm8!` for now.
1777- // We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and
1778- // `cache type` = 1 (data cache). `locality` is based on our `strategy`.
1779- macro_rules! pref {
1780- ( $imm8: expr) => {
1781- match $imm8 {
1782- 0 => prefetch( p, 0 , 0 , 1 ) ,
1783- 1 => prefetch( p, 0 , 1 , 1 ) ,
1784- 2 => prefetch( p, 0 , 2 , 1 ) ,
1785- _ => prefetch( p, 0 , 3 , 1 ) ,
1786- }
1787- } ;
1788- }
1789- pref ! ( strategy)
1752+ #[ rustc_legacy_const_generics( 1 ) ]
1753+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1754+ pub unsafe fn _mm_prefetch < const strategy: i32 > ( p : * const i8 ) {
1755+ // We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
1756+ // `locality` and `rw` are based on our `strategy`.
1757+ prefetch ( p, ( strategy >> 2 ) & 1 , strategy & 3 , 1 ) ;
17901758}
17911759
17921760/// Returns vector of type __m128 with undefined elements.
@@ -2976,7 +2944,7 @@ mod tests {
29762944 unsafe fn test_mm_shuffle_ps ( ) {
29772945 let a = _mm_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
29782946 let b = _mm_setr_ps ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
2979- let r = _mm_shuffle_ps ( a, b, 0b00_01_01_11 ) ;
2947+ let r = _mm_shuffle_ps :: < 0b00_01_01_11 > ( a, b) ;
29802948 assert_eq_m128 ( r, _mm_setr_ps ( 4.0 , 2.0 , 6.0 , 5.0 ) ) ;
29812949 }
29822950
0 commit comments