diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
index 9984af227b..75dbba2d99 100644
--- a/crates/core_arch/src/x86/avx2.rs
+++ b/crates/core_arch/src/x86/avx2.rs
@@ -153,28 +153,28 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8)
 #[inline]
 #[target_feature(enable = "avx2")]
-#[cfg_attr(test, assert_instr(vpalignr, n = 7))]
-#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
-    let n = n as u32;
-    // If `palignr` is shifting the pair of vectors more than the size of two
+pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+    static_assert_imm8!(IMM8);
+    // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
-    if n > 32 {
+    if IMM8 > 32 {
         return _mm256_set1_epi8(0);
     }
-    // If `palignr` is shifting the pair of input vectors more than one lane,
+    // If palignr is shifting the pair of input vectors more than one lane,
     // but less than two lanes, convert to shifting in zeroes.
-    let (a, b, n) = if n > 16 {
-        (_mm256_set1_epi8(0), a, n - 16)
+    let (a, b) = if IMM8 > 16 {
+        (_mm256_set1_epi8(0), a)
     } else {
-        (a, b, n)
+        (a, b)
     };
 
     let a = a.as_i8x32();
     let b = b.as_i8x32();
 
-    let r: i8x32 = match n {
+    let r: i8x32 = match IMM8 % 16 {
         0 => simd_shuffle32(
             b,
             a,
@@ -5106,10 +5106,10 @@ mod tests {
             -17, -18, -19, -20, -21, -22, -23, -24,
             -25, -26, -27, -28, -29, -30, -31, -32,
         );
-        let r = _mm256_alignr_epi8(a, b, 33);
+        let r = _mm256_alignr_epi8::<33>(a, b);
         assert_eq_m256i(r, _mm256_set1_epi8(0));
 
-        let r = _mm256_alignr_epi8(a, b, 17);
+        let r = _mm256_alignr_epi8::<17>(a, b);
         #[rustfmt::skip]
         let expected = _mm256_setr_epi8(
             2, 3, 4, 5, 6, 7, 8, 9,
@@ -5119,7 +5119,7 @@ mod tests {
         );
         assert_eq_m256i(r, expected);
 
-        let r = _mm256_alignr_epi8(a, b, 4);
+        let r = _mm256_alignr_epi8::<4>(a, b);
         #[rustfmt::skip]
         let expected = _mm256_setr_epi8(
             -5, -6, -7, -8, -9, -10, -11, -12,
@@ -5136,10 +5136,10 @@ mod tests {
             -18, -19, -20, -21, -22, -23, -24, -25,
             -26, -27, -28, -29, -30, -31, -32,
         );
-        let r = _mm256_alignr_epi8(a, b, 16);
+        let r = _mm256_alignr_epi8::<16>(a, b);
         assert_eq_m256i(r, expected);
 
-        let r = _mm256_alignr_epi8(a, b, 15);
+        let r = _mm256_alignr_epi8::<15>(a, b);
         #[rustfmt::skip]
         let expected = _mm256_setr_epi8(
             -16, 1, 2, 3, 4, 5, 6, 7,
@@ -5149,7 +5149,7 @@ mod tests {
         );
         assert_eq_m256i(r, expected);
 
-        let r = _mm256_alignr_epi8(a, b, 0);
+        let r = _mm256_alignr_epi8::<0>(a, b);
         assert_eq_m256i(r, b);
     }
 
diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index d88d5bad95..d18e41aa81 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -5055,16 +5055,17 @@ pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi16&expand=5296)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_slli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
+#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliw256(a.as_i16x16(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i16x16()))
 }
 
 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -5072,17 +5073,14 @@ pub unsafe fn _mm256_mask_slli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi16&expand=5297)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliw256(a.as_i16x16(), imm8);
     let zero = _mm256_setzero_si256().as_i16x16();
-    transmute(simd_select_bitmask(k, shf.as_i16x16(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -5090,16 +5088,17 @@ pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi16&expand=5293)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
+#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliw128(a.as_i16x8(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i16x8()))
 }
 
 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -5107,17 +5106,14 @@ pub unsafe fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi16&expand=5294)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliw128(a.as_i16x8(), imm8);
     let zero = _mm_setzero_si128().as_i16x8();
-    transmute(simd_select_bitmask(k, shf.as_i16x8(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
@@ -5688,16 +5684,17 @@ pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi16&expand=5422)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srai_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
+#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psraiw256(a.as_i16x16(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i16x16()))
 }
 
 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -5705,17 +5702,14 @@ pub unsafe fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi16&expand=5423)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srai_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psraiw256(a.as_i16x16(), imm8);
     let zero = _mm256_setzero_si256().as_i16x16();
-    transmute(simd_select_bitmask(k, shf.as_i16x16(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -5723,16 +5717,17 @@ pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi16&expand=5419)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srai_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
+#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psraiw128(a.as_i16x8(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i16x8()))
 }
 
 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -5740,17 +5735,14 @@ pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi16&expand=5420)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srai_epi16::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psraiw128(a.as_i16x8(), imm8);
     let zero = _mm_setzero_si128().as_i16x8();
-    transmute(simd_select_bitmask(k, shf.as_i16x8(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
@@ -7136,56 +7128,49 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm512_shufflelo_epi16(a: __m512i, imm8: i32) -> __m512i {
-    let imm8 = (imm8 & 0xFF) as u8;
+#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
+    static_assert_imm8!(IMM8);
     let a = a.as_i16x32();
-    macro_rules! shuffle_done {
-        ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
-            #[rustfmt::skip]
-                        simd_shuffle32(a, a, [
-                            0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7, 8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
-                            16+$x01, 16+$x23, 16+$x45, 16+$x67, 20, 21, 22, 23, 24+$x01, 24+$x23, 24+$x45, 24+$x67, 28, 29, 30, 31,
-                        ])
-        };
-    }
-    macro_rules! shuffle_x67 {
-        ($x01:expr, $x23:expr, $x45:expr) => {
-            match (imm8 >> 6) & 0b11 {
-                0b00 => shuffle_done!($x01, $x23, $x45, 0),
-                0b01 => shuffle_done!($x01, $x23, $x45, 1),
-                0b10 => shuffle_done!($x01, $x23, $x45, 2),
-                _ => shuffle_done!($x01, $x23, $x45, 3),
-            }
-        };
-    }
-    macro_rules! shuffle_x45 {
-        ($x01:expr, $x23:expr) => {
-            match (imm8 >> 4) & 0b11 {
-                0b00 => shuffle_x67!($x01, $x23, 0),
-                0b01 => shuffle_x67!($x01, $x23, 1),
-                0b10 => shuffle_x67!($x01, $x23, 2),
-                _ => shuffle_x67!($x01, $x23, 3),
-            }
-        };
-    }
-    macro_rules! shuffle_x23 {
-        ($x01:expr) => {
-            match (imm8 >> 2) & 0b11 {
-                0b00 => shuffle_x45!($x01, 0),
-                0b01 => shuffle_x45!($x01, 1),
-                0b10 => shuffle_x45!($x01, 2),
-                _ => shuffle_x45!($x01, 3),
-            }
-        };
-    }
-    let r: i16x32 = match imm8 & 0b11 {
-        0b00 => shuffle_x23!(0),
-        0b01 => shuffle_x23!(1),
-        0b10 => shuffle_x23!(2),
-        _ => shuffle_x23!(3),
-    };
+    let r: i16x32 = simd_shuffle32(
+        a,
+        a,
+        [
+            IMM8 as u32 & 0b11,
+            (IMM8 as u32 >> 2) & 0b11,
+            (IMM8 as u32 >> 4) & 0b11,
+            (IMM8 as u32 >> 6) & 0b11,
+            4,
+            5,
+            6,
+            7,
+            (IMM8 as u32 & 0b11) + 8,
+            ((IMM8 as u32 >> 2) & 0b11) + 8,
+            ((IMM8 as u32 >> 4) & 0b11) + 8,
+            ((IMM8 as u32 >> 6) & 0b11) + 8,
+            12,
+            13,
+            14,
+            15,
+            (IMM8 as u32 & 0b11) + 16,
+            ((IMM8 as u32 >> 2) & 0b11) + 16,
+            ((IMM8 as u32 >> 4) & 0b11) + 16,
+            ((IMM8 as u32 >> 6) & 0b11) + 16,
+            20,
+            21,
+            22,
+            23,
+            (IMM8 as u32 & 0b11) + 24,
+            ((IMM8 as u32 >> 2) & 0b11) + 24,
+            ((IMM8 as u32 >> 4) & 0b11) + 24,
+            ((IMM8 as u32 >> 6) & 0b11) + 24,
+            28,
+            29,
+            30,
+            31,
+        ],
+    );
     transmute(r)
 }
 
@@ -7202,7 +7187,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
     a: __m512i,
 ) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_shufflelo_epi16(a, IMM8);
+    let r = _mm512_shufflelo_epi16::<IMM8>(a);
     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
 }
 
@@ -7215,7 +7200,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_shufflelo_epi16(a, IMM8);
+    let r = _mm512_shufflelo_epi16::<IMM8>(a);
     let zero = _mm512_setzero_si512().as_i16x32();
     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
 }
@@ -7287,56 +7272,49 @@ pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm512_shufflehi_epi16(a: __m512i, imm8: i32) -> __m512i {
-    let imm8 = (imm8 & 0xFF) as u8;
+#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
+    static_assert_imm8!(IMM8);
     let a = a.as_i16x32();
-    macro_rules! shuffle_done {
-        ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
-            #[rustfmt::skip]
-                        simd_shuffle32(a, a, [
-                            0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67, 8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67,
-                            16, 17, 18, 19, 20+$x01, 20+$x23, 20+$x45, 20+$x67, 24, 25, 26, 27, 28+$x01, 28+$x23, 28+$x45, 28+$x67,
-                        ])
-        };
-    }
-    macro_rules! shuffle_x67 {
-        ($x01:expr, $x23:expr, $x45:expr) => {
-            match (imm8 >> 6) & 0b11 {
-                0b00 => shuffle_done!($x01, $x23, $x45, 0),
-                0b01 => shuffle_done!($x01, $x23, $x45, 1),
-                0b10 => shuffle_done!($x01, $x23, $x45, 2),
-                _ => shuffle_done!($x01, $x23, $x45, 3),
-            }
-        };
-    }
-    macro_rules! shuffle_x45 {
-        ($x01:expr, $x23:expr) => {
-            match (imm8 >> 4) & 0b11 {
-                0b00 => shuffle_x67!($x01, $x23, 0),
-                0b01 => shuffle_x67!($x01, $x23, 1),
-                0b10 => shuffle_x67!($x01, $x23, 2),
-                _ => shuffle_x67!($x01, $x23, 3),
-            }
-        };
-    }
-    macro_rules! shuffle_x23 {
-        ($x01:expr) => {
-            match (imm8 >> 2) & 0b11 {
-                0b00 => shuffle_x45!($x01, 0),
-                0b01 => shuffle_x45!($x01, 1),
-                0b10 => shuffle_x45!($x01, 2),
-                _ => shuffle_x45!($x01, 3),
-            }
-        };
-    }
-    let r: i16x32 = match imm8 & 0b11 {
-        0b00 => shuffle_x23!(0),
-        0b01 => shuffle_x23!(1),
-        0b10 => shuffle_x23!(2),
-        _ => shuffle_x23!(3),
-    };
+    let r: i16x32 = simd_shuffle32(
+        a,
+        a,
+        [
+            0,
+            1,
+            2,
+            3,
+            (IMM8 as u32 & 0b11) + 4,
+            ((IMM8 as u32 >> 2) & 0b11) + 4,
+            ((IMM8 as u32 >> 4) & 0b11) + 4,
+            ((IMM8 as u32 >> 6) & 0b11) + 4,
+            8,
+            9,
+            10,
+            11,
+            (IMM8 as u32 & 0b11) + 12,
+            ((IMM8 as u32 >> 2) & 0b11) + 12,
+            ((IMM8 as u32 >> 4) & 0b11) + 12,
+            ((IMM8 as u32 >> 6) & 0b11) + 12,
+            16,
+            17,
+            18,
+            19,
+            (IMM8 as u32 & 0b11) + 20,
+            ((IMM8 as u32 >> 2) & 0b11) + 20,
+            ((IMM8 as u32 >> 4) & 0b11) + 20,
+            ((IMM8 as u32 >> 6) & 0b11) + 20,
+            24,
+            25,
+            26,
+            27,
+            (IMM8 as u32 & 0b11) + 28,
+            ((IMM8 as u32 >> 2) & 0b11) + 28,
+            ((IMM8 as u32 >> 4) & 0b11) + 28,
+            ((IMM8 as u32 >> 6) & 0b11) + 28,
+        ],
+    );
     transmute(r)
 }
 
@@ -7353,7 +7331,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
     a: __m512i,
 ) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_shufflehi_epi16(a, IMM8);
+    let r = _mm512_shufflehi_epi16::<IMM8>(a);
     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
 }
 
@@ -7366,7 +7344,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_shufflehi_epi16(a, IMM8);
+    let r = _mm512_shufflehi_epi16::<IMM8>(a);
     let zero = _mm512_setzero_si512().as_i16x32();
     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
 }
@@ -8891,49 +8869,82 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bslli_epi128&expand=591)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
-pub unsafe fn _mm512_bslli_epi128(a: __m512i, imm8: i32) -> __m512i {
+#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
+    static_assert_imm8!(IMM8);
     let a = a.as_i8x64();
     let zero = _mm512_setzero_si512().as_i8x64();
-    #[rustfmt::skip]
-    macro_rules! call {
-        ($imm8:expr) => {
-            simd_shuffle64 (
-                zero,
-                a,
-                [
-                    64 - $imm8, 65 - $imm8, 66 - $imm8, 67 - $imm8, 68 - $imm8, 69 - $imm8, 70 - $imm8, 71 - $imm8,
-                    72 - $imm8, 73 - $imm8, 74 - $imm8, 75 - $imm8, 76 - $imm8, 77 - $imm8, 78 - $imm8, 79 - $imm8,
-                    80 - ($imm8+16), 81 - ($imm8+16), 82 - ($imm8+16), 83 - ($imm8+16), 84 - ($imm8+16), 85 - ($imm8+16), 86 - ($imm8+16), 87 - ($imm8+16),
-                    88 - ($imm8+16), 89 - ($imm8+16), 90 - ($imm8+16), 91 - ($imm8+16), 92 - ($imm8+16), 93 - ($imm8+16), 94 - ($imm8+16), 95 - ($imm8+16),
-                    96 - ($imm8+32), 97 - ($imm8+32), 98 - ($imm8+32), 99 - ($imm8+32), 100 - ($imm8+32), 101 - ($imm8+32), 102 - ($imm8+32), 103 - ($imm8+32),
-                    104 - ($imm8+32), 105 - ($imm8+32), 106 - ($imm8+32), 107 - ($imm8+32), 108 - ($imm8+32), 109 - ($imm8+32), 110 - ($imm8+32), 111 - ($imm8+32),
-                    112 - ($imm8+48), 113 - ($imm8+48), 114 - ($imm8+48), 115 - ($imm8+48), 116 - ($imm8+48), 117 - ($imm8+48), 118 - ($imm8+48), 119 - ($imm8+48),
-                    120 - ($imm8+48), 121 - ($imm8+48), 122 - ($imm8+48), 123 - ($imm8+48), 124 - ($imm8+48), 125 - ($imm8+48), 126 - ($imm8+48), 127 - ($imm8+48),
-                ],
-            )
-        };
-    }
-    let r: i8x64 = match imm8 {
-        0 => call!(0),
-        1 => call!(1),
-        2 => call!(2),
-        3 => call!(3),
-        4 => call!(4),
-        5 => call!(5),
-        6 => call!(6),
-        7 => call!(7),
-        8 => call!(8),
-        9 => call!(9),
-        10 => call!(10),
-        11 => call!(11),
-        12 => call!(12),
-        13 => call!(13),
-        14 => call!(14),
-        15 => call!(15),
-        _ => call!(16),
-    };
+    let r: i8x64 = simd_shuffle64(
+        zero,
+        a,
+        [
+            64 - (IMM8 as u32 & 0xff),
+            65 - (IMM8 as u32 & 0xff),
+            66 - (IMM8 as u32 & 0xff),
+            67 - (IMM8 as u32 & 0xff),
+            68 - (IMM8 as u32 & 0xff),
+            69 - (IMM8 as u32 & 0xff),
+            70 - (IMM8 as u32 & 0xff),
+            71 - (IMM8 as u32 & 0xff),
+            72 - (IMM8 as u32 & 0xff),
+            73 - (IMM8 as u32 & 0xff),
+            74 - (IMM8 as u32 & 0xff),
+            75 - (IMM8 as u32 & 0xff),
+            76 - (IMM8 as u32 & 0xff),
+            77 - (IMM8 as u32 & 0xff),
+            78 - (IMM8 as u32 & 0xff),
+            79 - (IMM8 as u32 & 0xff),
+            80 - (IMM8 as u32 & 0xff) - 16,
+            81 - (IMM8 as u32 & 0xff) - 16,
+            82 - (IMM8 as u32 & 0xff) - 16,
+            83 - (IMM8 as u32 & 0xff) - 16,
+            84 - (IMM8 as u32 & 0xff) - 16,
+            85 - (IMM8 as u32 & 0xff) - 16,
+            86 - (IMM8 as u32 & 0xff) - 16,
+            87 - (IMM8 as u32 & 0xff) - 16,
+            88 - (IMM8 as u32 & 0xff) - 16,
+            89 - (IMM8 as u32 & 0xff) - 16,
+            90 - (IMM8 as u32 & 0xff) - 16,
+            91 - (IMM8 as u32 & 0xff) - 16,
+            92 - (IMM8 as u32 & 0xff) - 16,
+            93 - (IMM8 as u32 & 0xff) - 16,
+            94 - (IMM8 as u32 & 0xff) - 16,
+            95 - (IMM8 as u32 & 0xff) - 16,
+            96 - (IMM8 as u32 & 0xff) - 32,
+            97 - (IMM8 as u32 & 0xff) - 32,
+            98 - (IMM8 as u32 & 0xff) - 32,
+            99 - (IMM8 as u32 & 0xff) - 32,
+            100 - (IMM8 as u32 & 0xff) - 32,
+            101 - (IMM8 as u32 & 0xff) - 32,
+            102 - (IMM8 as u32 & 0xff) - 32,
+            103 - (IMM8 as u32 & 0xff) - 32,
+            104 - (IMM8 as u32 & 0xff) - 32,
+            105 - (IMM8 as u32 & 0xff) - 32,
+            106 - (IMM8 as u32 & 0xff) - 32,
+            107 - (IMM8 as u32 & 0xff) - 32,
+            108 - (IMM8 as u32 & 0xff) - 32,
+            109 - (IMM8 as u32 & 0xff) - 32,
+            110 - (IMM8 as u32 & 0xff) - 32,
+            111 - (IMM8 as u32 & 0xff) - 32,
+            112 - (IMM8 as u32 & 0xff) - 48,
+            113 - (IMM8 as u32 & 0xff) - 48,
+            114 - (IMM8 as u32 & 0xff) - 48,
+            115 - (IMM8 as u32 & 0xff) - 48,
+            116 - (IMM8 as u32 & 0xff) - 48,
+            117 - (IMM8 as u32 & 0xff) - 48,
+            118 - (IMM8 as u32 & 0xff) - 48,
+            119 - (IMM8 as u32 & 0xff) - 48,
+            120 - (IMM8 as u32 & 0xff) - 48,
+            121 - (IMM8 as u32 & 0xff) - 48,
+            122 - (IMM8 as u32 & 0xff) - 48,
+            123 - (IMM8 as u32 & 0xff) - 48,
+            124 - (IMM8 as u32 & 0xff) - 48,
+            125 - (IMM8 as u32 & 0xff) - 48,
+            126 - (IMM8 as u32 & 0xff) - 48,
+            127 - (IMM8 as u32 & 0xff) - 48,
+        ],
+    );
     transmute(r)
 }
 
@@ -8942,49 +8953,82 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bsrli_epi128&expand=594)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
-pub unsafe fn _mm512_bsrli_epi128(a: __m512i, imm8: i32) -> __m512i {
+#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
+    static_assert_imm8!(IMM8);
     let a = a.as_i8x64();
     let zero = _mm512_setzero_si512().as_i8x64();
-    #[rustfmt::skip]
-    macro_rules! call {
-        ($imm8:expr) => {
-            simd_shuffle64 (
-                a,
-                zero,
-                [
-                    0 + ($imm8+48), 1 + ($imm8+48), 2 + ($imm8+48), 3 + ($imm8+48), 4 + ($imm8+48), 5 + ($imm8+48), 6 + ($imm8+48), 7 + ($imm8+48),
-                    8 + ($imm8+48), 9 + ($imm8+48), 10 + ($imm8+48), 11 + ($imm8+48), 12 + ($imm8+48), 13 + ($imm8+48), 14 + ($imm8+48), 15 + ($imm8+48),
-                    16 + ($imm8+32), 17 + ($imm8+32), 18 + ($imm8+32), 19 + ($imm8+32), 20 + ($imm8+32), 21 + ($imm8+32), 22 + ($imm8+32), 23 + ($imm8+32),
-                    24 + ($imm8+32), 25 + ($imm8+32), 26 + ($imm8+32), 27 + ($imm8+32), 28 + ($imm8+32), 29 + ($imm8+32), 30 + ($imm8+32), 31 + ($imm8+32),
-                    32 + ($imm8+16), 33 + ($imm8+16), 34 + ($imm8+16), 35 + ($imm8+16), 36 + ($imm8+16), 37 + ($imm8+16), 38 + ($imm8+16), 39 + ($imm8+16),
-                    40 + ($imm8+16), 41 + ($imm8+16), 42 + ($imm8+16), 43 + ($imm8+16), 44 + ($imm8+16), 45 + ($imm8+16), 46 + ($imm8+16), 47 + ($imm8+16),
-                    48 + $imm8, 49 + $imm8, 50 + $imm8, 51 + $imm8, 52 + $imm8, 53 + $imm8, 54 + $imm8, 55 + $imm8,
-                    56 + $imm8, 57 + $imm8, 58 + $imm8, 59 + $imm8, 60 + $imm8, 61 + $imm8, 62 + $imm8, 63 + $imm8,
-                ],
-            )
-        };
-    }
-    let r: i8x64 = match imm8 {
-        0 => call!(0),
-        1 => call!(1),
-        2 => call!(2),
-        3 => call!(3),
-        4 => call!(4),
-        5 => call!(5),
-        6 => call!(6),
-        7 => call!(7),
-        8 => call!(8),
-        9 => call!(9),
-        10 => call!(10),
-        11 => call!(11),
-        12 => call!(12),
-        13 => call!(13),
-        14 => call!(14),
-        15 => call!(15),
-        _ => call!(16),
-    };
+    let r: i8x64 = simd_shuffle64(
+        a,
+        zero,
+        [
+            0 + (IMM8 as u32 & 0xff) + 48,
+            1 + (IMM8 as u32 & 0xff) + 48,
+            2 + (IMM8 as u32 & 0xff) + 48,
+            3 + (IMM8 as u32 & 0xff) + 48,
+            4 + (IMM8 as u32 & 0xff) + 48,
+            5 + (IMM8 as u32 & 0xff) + 48,
+            6 + (IMM8 as u32 & 0xff) + 48,
+            7 + (IMM8 as u32 & 0xff) + 48,
+            8 + (IMM8 as u32 & 0xff) + 48,
+            9 + (IMM8 as u32 & 0xff) + 48,
+            10 + (IMM8 as u32 & 0xff) + 48,
+            11 + (IMM8 as u32 & 0xff) + 48,
+            12 + (IMM8 as u32 & 0xff) + 48,
+            13 + (IMM8 as u32 & 0xff) + 48,
+            14 + (IMM8 as u32 & 0xff) + 48,
+            15 + (IMM8 as u32 & 0xff) + 48,
+            16 + (IMM8 as u32 & 0xff) + 32,
+            17 + (IMM8 as u32 & 0xff) + 32,
+            18 + (IMM8 as u32 & 0xff) + 32,
+            19 + (IMM8 as u32 & 0xff) + 32,
+            20 + (IMM8 as u32 & 0xff) + 32,
+            21 + (IMM8 as u32 & 0xff) + 32,
+            22 + (IMM8 as u32 & 0xff) + 32,
+            23 + (IMM8 as u32 & 0xff) + 32,
+            24 + (IMM8 as u32 & 0xff) + 32,
+            25 + (IMM8 as u32 & 0xff) + 32,
+            26 + (IMM8 as u32 & 0xff) + 32,
+            27 + (IMM8 as u32 & 0xff) + 32,
+            28 + (IMM8 as u32 & 0xff) + 32,
+            29 + (IMM8 as u32 & 0xff) + 32,
+            30 + (IMM8 as u32 & 0xff) + 32,
+            31 + (IMM8 as u32 & 0xff) + 32,
+            32 + (IMM8 as u32 & 0xff) + 16,
+            33 + (IMM8 as u32 & 0xff) + 16,
+            34 + (IMM8 as u32 & 0xff) + 16,
+            35 + (IMM8 as u32 & 0xff) + 16,
+            36 + (IMM8 as u32 & 0xff) + 16,
+            37 + (IMM8 as u32 & 0xff) + 16,
+            38 + (IMM8 as u32 & 0xff) + 16,
+            39 + (IMM8 as u32 & 0xff) + 16,
+            40 + (IMM8 as u32 & 0xff) + 16,
+            41 + (IMM8 as u32 & 0xff) + 16,
+            42 + (IMM8 as u32 & 0xff) + 16,
+            43 + (IMM8 as u32 & 0xff) + 16,
+            44 + (IMM8 as u32 & 0xff) + 16,
+            45 + (IMM8 as u32 & 0xff) + 16,
+            46 + (IMM8 as u32 & 0xff) + 16,
+            47 + (IMM8 as u32 & 0xff) + 16,
+            48 + (IMM8 as u32 & 0xff),
+            49 + (IMM8 as u32 & 0xff),
+            50 + (IMM8 as u32 & 0xff),
+            51 + (IMM8 as u32 & 0xff),
+            52 + (IMM8 as u32 & 0xff),
+            53 + (IMM8 as u32 & 0xff),
+            54 + (IMM8 as u32 & 0xff),
+            55 + (IMM8 as u32 & 0xff),
+            56 + (IMM8 as u32 & 0xff),
+            57 + (IMM8 as u32 & 0xff),
+            58 + (IMM8 as u32 & 0xff),
+            59 + (IMM8 as u32 & 0xff),
+            60 + (IMM8 as u32 & 0xff),
+            61 + (IMM8 as u32 & 0xff),
+            62 + (IMM8 as u32 & 0xff),
+            63 + (IMM8 as u32 & 0xff),
+        ],
+    );
     transmute(r)
 }
 
@@ -8993,60 +9037,183 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi8&expand=263)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(vpalignr, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
+#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
-    if imm8 > 32 {
+    if IMM8 > 32 {
         return _mm512_set1_epi8(0);
     }
     // If palignr is shifting the pair of input vectors more than one lane,
     // but less than two lanes, convert to shifting in zeroes.
-    let (a, b, imm8) = if imm8 > 16 {
-        (_mm512_set1_epi8(0), a, imm8 - 16)
+    let (a, b) = if IMM8 > 16 {
+        (_mm512_set1_epi8(0), a)
     } else {
-        (a, b, imm8)
+        (a, b)
     };
     let a = a.as_i8x64();
     let b = b.as_i8x64();
-    #[rustfmt::skip]
-    macro_rules! shuffle {
-        ($imm8:expr) => {
-            simd_shuffle64(
-                b,
-                a,
-                [
-                    0 + ($imm8+48), 1 + ($imm8+48), 2 + ($imm8+48), 3 + ($imm8+48), 4 + ($imm8+48), 5 + ($imm8+48), 6 + ($imm8+48), 7 + ($imm8+48),
-                    8 + ($imm8+48), 9 + ($imm8+48), 10 + ($imm8+48), 11 + ($imm8+48), 12 + ($imm8+48), 13 + ($imm8+48), 14 + ($imm8+48), 15 + ($imm8+48),
-                    16 + ($imm8+32), 17 + ($imm8+32), 18 + ($imm8+32), 19 + ($imm8+32), 20 + ($imm8+32), 21 + ($imm8+32), 22 + ($imm8+32), 23 + ($imm8+32),
-                    24 + ($imm8+32), 25 + ($imm8+32), 26 + ($imm8+32), 27 + ($imm8+32), 28 + ($imm8+32), 29 + ($imm8+32), 30 + ($imm8+32), 31 + ($imm8+32),
-                    32 + ($imm8+16), 33 + ($imm8+16), 34 + ($imm8+16), 35 + ($imm8+16), 36 + ($imm8+16), 37 + ($imm8+16), 38 + ($imm8+16), 39 + ($imm8+16),
-                    40 + ($imm8+16), 41 + ($imm8+16), 42 + ($imm8+16), 43 + ($imm8+16), 44 + ($imm8+16), 45 + ($imm8+16), 46 + ($imm8+16), 47 + ($imm8+16),
-                    48 + $imm8, 49 + $imm8, 50 + $imm8, 51 + $imm8, 52 + $imm8, 53 + $imm8, 54 + $imm8, 55 + $imm8,
-                    56 + $imm8, 57 + $imm8, 58 + $imm8, 59 + $imm8, 60 + $imm8, 61 + $imm8, 62 + $imm8, 63 + $imm8,
-                ],
-            )
-        };
-    }
-    let r: i8x64 = match imm8 {
-        0 => shuffle!(0),
-        1 => shuffle!(1),
-        2 => shuffle!(2),
-        3 => shuffle!(3),
-        4 => shuffle!(4),
-        5 => shuffle!(5),
-        6 => shuffle!(6),
-        7 => shuffle!(7),
-        8 => shuffle!(8),
-        9 => shuffle!(9),
-        10 => shuffle!(10),
-        11 => shuffle!(11),
-        12 => shuffle!(12),
-        13 => shuffle!(13),
-        14 => shuffle!(14),
-        15 => shuffle!(15),
-        _ => shuffle!(16),
+
+    let r: i8x64 = match IMM8 % 16 {
+        0 => simd_shuffle64(
+            b,
+            a,
+            [
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+            ],
+        ),
+        1 => simd_shuffle64(
+            b,
+            a,
+            [
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
+                24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+                45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
+            ],
+        ),
+        2 => simd_shuffle64(
+            b,
+            a,
+            [
+                2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
+                25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+                46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
+            ],
+        ),
+        3 => simd_shuffle64(
+            b,
+            a,
+            [
+                3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
+                25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+                46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
+                114,
+            ],
+        ),
+        4 => simd_shuffle64(
+            b,
+            a,
+            [
+                4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
+                26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+                47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
+                115,
+            ],
+        ),
+        5 => simd_shuffle64(
+            b,
+            a,
+            [
+                5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
+                27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
+                115, 116,
+            ],
+        ),
+        6 => simd_shuffle64(
+            b,
+            a,
+            [
+                6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
+                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
+                97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
+                116, 117,
+            ],
+        ),
+        7 => simd_shuffle64(
+            b,
+            a,
+            [
+                7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
+                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
+                97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
+                116, 117, 118,
+            ],
+        ),
+        8 => simd_shuffle64(
+            b,
+            a,
+            [
+                8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
+                29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
+                98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
+                116, 117, 118, 119,
+            ],
+        ),
+        9 => simd_shuffle64(
+            b,
+            a,
+            [
+                9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
+                30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
+                99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
+                117, 118, 119, 120,
+            ],
+        ),
+        10 => simd_shuffle64(
+            b,
+            a,
+            [
+                10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
+                31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
+                100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
+                118, 119, 120, 121,
+            ],
+        ),
+        11 => simd_shuffle64(
+            b,
+            a,
+            [
+                11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
+                80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
+                100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
+                117, 118, 119, 120, 121, 122,
+            ],
+        ),
+        12 => simd_shuffle64(
+            b,
+            a,
+            [
+                12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
+                101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
+                118, 119, 120, 121, 122, 123,
+            ],
+        ),
+        13 => simd_shuffle64(
+            b,
+            a,
+            [
+                13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
+                82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
+                102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
+                119, 120, 121, 122, 123, 124,
+            ],
+        ),
+        14 => simd_shuffle64(
+            b,
+            a,
+            [
+                14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
+                83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
+                103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
+                120, 121, 122, 123, 124, 125,
+            ],
+        ),
+        15 => simd_shuffle64(
+            b,
+            a,
+            [
+                15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
+                84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
+                104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                121, 122, 123, 124, 125, 126,
+            ],
+        ),
+        _ => b,
     };
     transmute(r)
 }
@@ -9065,7 +9232,7 @@ pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>(
     b: __m512i,
 ) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_alignr_epi8(a, b, IMM8);
+    let r = _mm512_alignr_epi8::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
 }
 
@@ -9082,7 +9249,7 @@ pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
     b: __m512i,
 ) -> __m512i {
     static_assert_imm8!(IMM8);
-    let r = _mm512_alignr_epi8(a, b, IMM8);
+    let r = _mm512_alignr_epi8::<IMM8>(a, b);
     let zero = _mm512_setzero_si512().as_i8x64();
     transmute(simd_select_bitmask(k, r.as_i8x64(), zero))
 }
@@ -9101,7 +9268,7 @@ pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>(
     b: __m256i,
 ) -> __m256i {
     static_assert_imm8!(IMM8);
-    let r = _mm256_alignr_epi8(a, b, IMM8);
+    let r = _mm256_alignr_epi8::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
 }
 
@@ -9118,7 +9285,7 @@ pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
     b: __m256i,
 ) -> __m256i {
     static_assert_imm8!(IMM8);
-    let r = _mm256_alignr_epi8(a, b, IMM8);
+    let r = _mm256_alignr_epi8::<IMM8>(a, b);
     transmute(simd_select_bitmask(
         k,
         r.as_i8x32(),
@@ -9387,6 +9554,11 @@ extern "C" {
     #[link_name = "llvm.x86.avx512.pslli.w.512"]
     fn vpslliw(a: i16x32, imm8: u32) -> i16x32;
 
+    #[link_name = "llvm.x86.avx2.pslli.w"]
+    fn pslliw256(a: i16x16, imm8: i32) -> i16x16;
+    #[link_name = "llvm.x86.sse2.pslli.w"]
+    fn pslliw128(a: i16x8, imm8: i32) -> i16x8;
+
     #[link_name = "llvm.x86.avx512.psllv.w.512"]
     fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
     #[link_name = "llvm.x86.avx512.psllv.w.256"]
@@ -9411,6 +9583,11 @@ extern "C" {
     #[link_name = "llvm.x86.avx512.psrai.w.512"]
     fn vpsraiw(a: i16x32, imm8: u32) -> i16x32;
 
+    #[link_name = "llvm.x86.avx2.psrai.w"]
+    fn psraiw256(a: i16x16, imm8: i32) -> i16x16;
+    #[link_name = "llvm.x86.sse2.psrai.w"]
+    fn psraiw128(a: i16x8, imm8: i32) -> i16x8;
+
     #[link_name = "llvm.x86.avx512.psrav.w.512"]
     fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
     #[link_name = "llvm.x86.avx512.psrav.w.256"]
@@ -14376,9 +14553,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm256_mask_slli_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
-        let r = _mm256_mask_slli_epi16(a, 0, a, 1);
+        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_slli_epi16(a, 0b11111111_11111111, a, 1);
+        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
         let e = _mm256_set1_epi16(0);
         assert_eq_m256i(r, e);
     }
@@ -14386,9 +14563,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm256_maskz_slli_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
-        let r = _mm256_maskz_slli_epi16(0, a, 1);
+        let r = _mm256_maskz_slli_epi16::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_slli_epi16(0b11111111_11111111, a, 1);
+        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
         let e = _mm256_set1_epi16(0);
         assert_eq_m256i(r, e);
     }
@@ -14396,9 +14573,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm_mask_slli_epi16() {
         let a = _mm_set1_epi16(1 << 15);
-        let r = _mm_mask_slli_epi16(a, 0, a, 1);
+        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_slli_epi16(a, 0b11111111, a, 1);
+        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
         let e = _mm_set1_epi16(0);
         assert_eq_m128i(r, e);
     }
@@ -14406,9 +14583,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm_maskz_slli_epi16() {
         let a = _mm_set1_epi16(1 << 15);
-        let r = _mm_maskz_slli_epi16(0, a, 1);
+        let r = _mm_maskz_slli_epi16::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_slli_epi16(0b11111111, a, 1);
+        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
         let e = _mm_set1_epi16(0);
         assert_eq_m128i(r, e);
     }
@@ -14848,9 +15025,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm256_mask_srai_epi16() {
         let a = _mm256_set1_epi16(8);
-        let r = _mm256_mask_srai_epi16(a, 0, a, 2);
+        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_srai_epi16(a, 0b11111111_11111111, a, 2);
+        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
         let e = _mm256_set1_epi16(2);
         assert_eq_m256i(r, e);
     }
@@ -14858,9 +15035,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm256_maskz_srai_epi16() {
         let a = _mm256_set1_epi16(8);
-        let r = _mm256_maskz_srai_epi16(0, a, 2);
+        let r = _mm256_maskz_srai_epi16::<2>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_srai_epi16(0b11111111_11111111, a, 2);
+        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
         let e = _mm256_set1_epi16(2);
         assert_eq_m256i(r, e);
     }
@@ -14868,9 +15045,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm_mask_srai_epi16() {
         let a = _mm_set1_epi16(8);
-        let r = _mm_mask_srai_epi16(a, 0, a, 2);
+        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_srai_epi16(a, 0b11111111, a, 2);
+        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
         let e = _mm_set1_epi16(2);
         assert_eq_m128i(r, e);
     }
@@ -14878,9 +15055,9 @@ mod tests {
     #[simd_test(enable = "avx512bw,avx512vl")]
     unsafe fn test_mm_maskz_srai_epi16() {
         let a = _mm_set1_epi16(8);
-        let r = _mm_maskz_srai_epi16(0, a, 2);
+        let r = _mm_maskz_srai_epi16::<2>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_srai_epi16(0b11111111, a, 2);
+        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
         let e = _mm_set1_epi16(2);
         assert_eq_m128i(r, e);
     }
@@ -16225,7 +16402,7 @@ mod tests {
             0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
             16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
         );
-        let r = _mm512_shufflelo_epi16(a, 0b00_01_01_11);
+        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
         assert_eq_m512i(r, e);
     }
 
@@ -16322,7 +16499,7 @@ mod tests {
             3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
             19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
         );
-        let r = _mm512_shufflehi_epi16(a, 0b00_01_01_11);
+        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
         assert_eq_m512i(r, e);
     }
 
@@ -17628,7 +17805,7 @@ mod tests {
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
         );
-        let r = _mm512_bslli_epi128(a, 9);
+        let r = _mm512_bslli_epi128::<9>(a);
         #[rustfmt::skip]
         let e = _mm512_set_epi8(
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -17648,7 +17825,7 @@ mod tests {
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
         );
-        let r = _mm512_bsrli_epi128(a, 9);
+        let r = _mm512_bsrli_epi128::<9>(a);
         #[rustfmt::skip]
         let e = _mm512_set_epi8(
             0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
@@ -17669,7 +17846,7 @@ mod tests {
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
         );
         let b = _mm512_set1_epi8(1);
-        let r = _mm512_alignr_epi8(a, b, 14);
+        let r = _mm512_alignr_epi8::<14>(a, b);
         #[rustfmt::skip]
         let e = _mm512_set_epi8(
             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index a3b52909cf..16313c349a 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -17012,16 +17012,17 @@ pub unsafe fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi32&expand=5305)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_slli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8()))
+#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_slli_epi32<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psllid256(a.as_i32x8(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
 }
 
 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17029,17 +17030,14 @@ pub unsafe fn _mm256_mask_slli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi32&expand=5306)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psllid256(a.as_i32x8(), imm8);
     let zero = _mm256_setzero_si256().as_i32x8();
-    transmute(simd_select_bitmask(k, shf.as_i32x8(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -17047,16 +17045,17 @@ pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi32&expand=5302)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_slli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4()))
+#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_slli_epi32<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psllid128(a.as_i32x4(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
 }
 
 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17064,17 +17063,14 @@ pub unsafe fn _mm_mask_slli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi32&expand=5303)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psllid128(a.as_i32x4(), imm8);
     let zero = _mm_setzero_si128().as_i32x4();
-    transmute(simd_select_bitmask(k, shf.as_i32x4(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
@@ -17129,16 +17125,17 @@ pub unsafe fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi32&expand=5517)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_srli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8()))
+#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_srli_epi32<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrlid256(a.as_i32x8(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
 }
 
 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17146,17 +17143,14 @@ pub unsafe fn _mm256_mask_srli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi32&expand=5518)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrlid256(a.as_i32x8(), imm8);
     let zero = _mm256_setzero_si256().as_i32x8();
-    transmute(simd_select_bitmask(k, shf.as_i32x8(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -17164,16 +17158,17 @@ pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi32&expand=5514)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_srli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4()))
+#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_srli_epi32<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrlid128(a.as_i32x4(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
 }
 
 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17181,17 +17176,14 @@ pub unsafe fn _mm_mask_srli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi32&expand=5515)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srli_epi32::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrlid128(a.as_i32x4(), imm8);
     let zero = _mm_setzero_si128().as_i32x4();
-    transmute(simd_select_bitmask(k, shf.as_i32x4(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
@@ -17246,16 +17238,17 @@ pub unsafe fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi64&expand=5314)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_slli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i64x4(), src.as_i64x4()))
+#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_slli_epi64<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliq256(a.as_i64x4(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i64x4()))
 }
 
 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17263,17 +17256,14 @@ pub unsafe fn _mm256_mask_slli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi64&expand=5315)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_slli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliq256(a.as_i64x4(), imm8);
     let zero = _mm256_setzero_si256().as_i64x4();
-    transmute(simd_select_bitmask(k, shf.as_i64x4(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -17281,16 +17271,17 @@ pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi64&expand=5311)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_slli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i64x2(), src.as_i64x2()))
+#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_slli_epi64<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliq128(a.as_i64x2(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i64x2()))
 }
 
 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17298,17 +17289,14 @@ pub unsafe fn _mm_mask_slli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi64&expand=5312)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_slli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = pslliq128(a.as_i64x2(), imm8);
     let zero = _mm_setzero_si128().as_i64x2();
-    transmute(simd_select_bitmask(k, shf.as_i64x2(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
@@ -17363,16 +17351,17 @@ pub unsafe fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi64&expand=5526)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm256_mask_srli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i64x4(), src.as_i64x4()))
+#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_srli_epi64<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrliq256(a.as_i64x4(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i64x4()))
 }
 
 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17380,17 +17369,14 @@ pub unsafe fn _mm256_mask_srli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi64&expand=5527)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm256_srli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrliq256(a.as_i64x4(), imm8);
     let zero = _mm256_setzero_si256().as_i64x4();
-    transmute(simd_select_bitmask(k, shf.as_i64x4(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -17398,16 +17384,17 @@ pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi64&expand=5523)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_srli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
-    transmute(simd_select_bitmask(k, shf.as_i64x2(), src.as_i64x2()))
+#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_mask_srli_epi64<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrliq128(a.as_i64x2(), imm8);
+    transmute(simd_select_bitmask(k, r, src.as_i64x2()))
 }
 
 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -17415,17 +17402,14 @@ pub unsafe fn _mm_mask_srli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi64&expand=5524)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
-    macro_rules! call {
-        ($imm8:expr) => {
-            _mm_srli_epi64::<$imm8>(a)
-        };
-    }
-    let shf = constify_imm8_sae!(imm8, call);
+#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+    static_assert_imm_u8!(IMM8);
+    let imm8 = IMM8 as i32;
+    let r = psrliq128(a.as_i64x2(), imm8);
     let zero = _mm_setzero_si128().as_i64x2();
-    transmute(simd_select_bitmask(k, shf.as_i64x2(), zero))
+    transmute(simd_select_bitmask(k, r, zero))
 }
 
 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
@@ -21807,12 +21791,12 @@ pub unsafe fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextractf32x4, imm8 = 3)
+    assert_instr(vextractf32x4, IMM8 = 3)
 )]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm512_extractf32x4_ps(a: __m512, imm8: i32) -> __m128 {
-    assert!(imm8 >= 0 && imm8 <= 3);
-    match imm8 & 0x3 {
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
+    static_assert_imm2!(IMM8);
+    match IMM8 & 0x3 {
         0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
         1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
         2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
@@ -21835,8 +21819,8 @@ pub unsafe fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
     k: __mmask8,
     a: __m512,
 ) -> __m128 {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_extractf32x4_ps(a, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_extractf32x4_ps::<IMM8>(a);
     transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
 }
 
@@ -21851,8 +21835,8 @@ pub unsafe fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_extractf32x4_ps(a, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_extractf32x4_ps::<IMM8>(a);
     let zero = _mm_setzero_ps().as_f32x4();
     transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
 }
@@ -21864,12 +21848,12 @@ pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextract, imm8 = 1) //should be vextractf32x4
+    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
 )]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm256_extractf32x4_ps(a: __m256, imm8: i32) -> __m128 {
-    assert!(imm8 >= 0 && imm8 <= 1);
-    match imm8 & 0x1 {
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
+    static_assert_imm1!(IMM8);
+    match IMM8 & 0x1 {
         0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
         _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
     }
@@ -21890,8 +21874,8 @@ pub unsafe fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
     k: __mmask8,
     a: __m256,
 ) -> __m128 {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_extractf32x4_ps(a, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_extractf32x4_ps::<IMM8>(a);
     transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
 }
 
@@ -21906,8 +21890,8 @@ pub unsafe fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_extractf32x4_ps(a, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_extractf32x4_ps::<IMM8>(a);
     let zero = _mm_setzero_ps().as_f32x4();
     transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
 }
@@ -21945,7 +21929,7 @@ pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM8: i32>(
     k: __mmask8,
     a: __m512i,
 ) -> __m256i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm1!(IMM8);
     let r = _mm512_extracti64x4_epi64(a, IMM8);
     transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
 }
@@ -21961,7 +21945,7 @@ pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm1!(IMM8);
     let r = _mm512_extracti64x4_epi64(a, IMM8);
     let zero = _mm256_setzero_si256().as_i64x4();
     transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
@@ -21974,12 +21958,12 @@ pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM8: i32>(k: __mmask8, a: _
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextractf64x4, imm8 = 1)
+    assert_instr(vextractf64x4, IMM8 = 1)
 )]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm512_extractf64x4_pd(a: __m512d, imm8: i32) -> __m256d {
-    assert!(imm8 >= 0 && imm8 <= 1);
-    match imm8 & 0x1 {
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
+    static_assert_imm1!(IMM8);
+    match IMM8 & 0x1 {
         0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
         _ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
     }
@@ -22000,8 +21984,8 @@ pub unsafe fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
     k: __mmask8,
     a: __m512d,
 ) -> __m256d {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_extractf64x4_pd(a, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_extractf64x4_pd::<IMM8>(a);
     transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
 }
 
@@ -22016,8 +22000,8 @@ pub unsafe fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_extractf64x4_pd(a, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_extractf64x4_pd::<IMM8>(a);
     let zero = _mm256_setzero_pd().as_f64x4();
     transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
 }
@@ -22060,7 +22044,7 @@ pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM8: i32>(
     k: __mmask8,
     a: __m512i,
 ) -> __m128i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm2!(IMM8);
     let r = _mm512_extracti32x4_epi32(a, IMM8);
     transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
 }
@@ -22076,7 +22060,7 @@ pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm2!(IMM8);
     let r = _mm512_extracti32x4_epi32(a, IMM8);
     let zero = _mm_setzero_si128().as_i32x4();
     transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
@@ -22118,7 +22102,7 @@ pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM8: i32>(
     k: __mmask8,
     a: __m256i,
 ) -> __m128i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm1!(IMM8);
     let r = _mm256_extracti32x4_epi32(a, IMM8);
     transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
 }
@@ -22134,7 +22118,7 @@ pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM8: i32>(
 )]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
-    static_assert_imm8!(IMM8);
+    static_assert_imm1!(IMM8);
     let r = _mm256_extracti32x4_epi32(a, IMM8);
     let zero = _mm_setzero_si128().as_i32x4();
     transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
@@ -22385,13 +22369,13 @@ pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_inserti32x4&expand=3174)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))] //should be vinserti32x4
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i, imm8: i32) -> __m512i {
-    assert!(imm8 >= 0 && imm8 <= 3);
+#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
+    static_assert_imm2!(IMM8);
     let a = a.as_i32x16();
     let b = _mm512_castsi128_si512(b).as_i32x16();
-    let ret: i32x16 = match imm8 & 0b11 {
+    let ret: i32x16 = match IMM8 & 0b11 {
         0 => simd_shuffle16(
             a,
             b,
@@ -22425,8 +22409,8 @@ pub unsafe fn _mm512_mask_inserti32x4<const IMM8: i32>(
     a: __m512i,
     b: __m128i,
 ) -> __m512i {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_inserti32x4(a, b, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_inserti32x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
 }
 
@@ -22442,8 +22426,8 @@ pub unsafe fn _mm512_maskz_inserti32x4<const IMM8: i32>(
     a: __m512i,
     b: __m128i,
 ) -> __m512i {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_inserti32x4(a, b, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_inserti32x4::<IMM8>(a, b);
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, r.as_i32x16(), zero))
 }
@@ -22455,14 +22439,14 @@ pub unsafe fn _mm512_maskz_inserti32x4<const IMM8: i32>(
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vinsert, imm8 = 1) //should be vinserti32x4
+    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
 )]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_inserti32x4(a: __m256i, b: __m128i, imm8: i32) -> __m256i {
-    assert!(imm8 >= 0 && imm8 <= 1);
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
+    static_assert_imm1!(IMM8);
     let a = a.as_i32x8();
     let b = _mm256_castsi128_si256(b).as_i32x8();
-    let ret: i32x8 = match imm8 & 0b1 {
+    let ret: i32x8 = match IMM8 & 0b1 {
         0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
         _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
     };
@@ -22485,8 +22469,8 @@ pub unsafe fn _mm256_mask_inserti32x4<const IMM8: i32>(
     a: __m256i,
     b: __m128i,
 ) -> __m256i {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_inserti32x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_inserti32x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
 }
 
@@ -22505,8 +22489,8 @@ pub unsafe fn _mm256_maskz_inserti32x4<const IMM8: i32>(
     a: __m256i,
     b: __m128i,
 ) -> __m256i {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_inserti32x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_inserti32x4::<IMM8>(a, b);
     let zero = _mm256_setzero_si256().as_i32x8();
     transmute(simd_select_bitmask(k, r.as_i32x8(), zero))
 }
@@ -22516,12 +22500,12 @@ pub unsafe fn _mm256_maskz_inserti32x4<const IMM8: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_inserti64x4&expand=3186)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))] //should be vinserti64x4
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i, imm8: i32) -> __m512i {
-    assert!(imm8 >= 0 && imm8 <= 1);
+#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
+    static_assert_imm1!(IMM8);
     let b = _mm512_castsi256_si512(b);
-    match imm8 & 0b1 {
+    match IMM8 & 0b1 {
         0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
         _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
     }
@@ -22540,8 +22524,8 @@ pub unsafe fn _mm512_mask_inserti64x4<const IMM8: i32>(
     a: __m512i,
     b: __m256i,
 ) -> __m512i {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_inserti64x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_inserti64x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
 }
 
@@ -22557,8 +22541,8 @@ pub unsafe fn _mm512_maskz_inserti64x4<const IMM8: i32>(
     a: __m512i,
     b: __m256i,
 ) -> __m512i {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_inserti64x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_inserti64x4::<IMM8>(a, b);
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, r.as_i64x8(), zero))
 }
@@ -22568,12 +22552,12 @@ pub unsafe fn _mm512_maskz_inserti64x4<const IMM8: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_insertf32x4&expand=3155)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128, imm8: i32) -> __m512 {
-    assert!(imm8 >= 0 && imm8 <= 3);
+#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
+    static_assert_imm2!(IMM8);
     let b = _mm512_castps128_ps512(b);
-    match imm8 & 0b11 {
+    match IMM8 & 0b11 {
         0 => simd_shuffle16(
             a,
             b,
@@ -22606,8 +22590,8 @@ pub unsafe fn _mm512_mask_insertf32x4<const IMM8: i32>(
     a: __m512,
     b: __m128,
 ) -> __m512 {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_insertf32x4(a, b, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_insertf32x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
 }
 
@@ -22623,8 +22607,8 @@ pub unsafe fn _mm512_maskz_insertf32x4<const IMM8: i32>(
     a: __m512,
     b: __m128,
 ) -> __m512 {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_insertf32x4(a, b, IMM8);
+    static_assert_imm2!(IMM8);
+    let r = _mm512_insertf32x4::<IMM8>(a, b);
     let zero = _mm512_setzero_ps().as_f32x16();
     transmute(simd_select_bitmask(k, r.as_f32x16(), zero))
 }
@@ -22636,13 +22620,13 @@ pub unsafe fn _mm512_maskz_insertf32x4<const IMM8: i32>(
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vinsert, imm8 = 1) //should be vinsertf32x4
+    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
 )]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_insertf32x4(a: __m256, b: __m128, imm8: i32) -> __m256 {
-    assert!(imm8 >= 0 && imm8 <= 1);
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
+    static_assert_imm1!(IMM8);
     let b = _mm256_castps128_ps256(b);
-    match imm8 & 0b1 {
+    match IMM8 & 0b1 {
         0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
         _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
     }
@@ -22664,8 +22648,8 @@ pub unsafe fn _mm256_mask_insertf32x4<const IMM8: i32>(
     a: __m256,
     b: __m128,
 ) -> __m256 {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_insertf32x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_insertf32x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
 }
 
@@ -22684,8 +22668,8 @@ pub unsafe fn _mm256_maskz_insertf32x4<const IMM8: i32>(
     a: __m256,
     b: __m128,
 ) -> __m256 {
-    static_assert_imm8!(IMM8);
-    let r = _mm256_insertf32x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm256_insertf32x4::<IMM8>(a, b);
     let zero = _mm256_setzero_ps().as_f32x8();
     transmute(simd_select_bitmask(k, r.as_f32x8(), zero))
 }
@@ -22695,12 +22679,12 @@ pub unsafe fn _mm256_maskz_insertf32x4<const IMM8: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_insertf64x4&expand=3167)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d, imm8: i32) -> __m512d {
-    assert!(imm8 >= 0 && imm8 <= 1);
+#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
+    static_assert_imm1!(IMM8);
     let b = _mm512_castpd256_pd512(b);
-    match imm8 & 0b1 {
+    match IMM8 & 0b1 {
         0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
         _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
     }
@@ -22719,8 +22703,8 @@ pub unsafe fn _mm512_mask_insertf64x4<const IMM8: i32>(
     a: __m512d,
     b: __m256d,
 ) -> __m512d {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_insertf64x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_insertf64x4::<IMM8>(a, b);
     transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
 }
 
@@ -22736,8 +22720,8 @@ pub unsafe fn _mm512_maskz_insertf64x4<const IMM8: i32>(
     a: __m512d,
     b: __m256d,
 ) -> __m512d {
-    static_assert_imm8!(IMM8);
-    let r = _mm512_insertf64x4(a, b, IMM8);
+    static_assert_imm1!(IMM8);
+    let r = _mm512_insertf64x4::<IMM8>(a, b);
     let zero = _mm512_setzero_pd().as_f64x8();
     transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
 }
@@ -36549,13 +36533,36 @@ extern "C" {
 
     #[link_name = "llvm.x86.avx512.pslli.d.512"]
     fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
+
+    #[link_name = "llvm.x86.avx2.pslli.d"]
+    fn psllid256(a: i32x8, imm8: i32) -> i32x8;
+    #[link_name = "llvm.x86.sse2.pslli.d"]
+    fn psllid128(a: i32x4, imm8: i32) -> i32x4;
+
     #[link_name = "llvm.x86.avx512.psrli.d.512"]
     fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
+
+    #[link_name = "llvm.x86.avx2.psrli.d"]
+    fn psrlid256(a: i32x8, imm8: i32) -> i32x8;
+    #[link_name = "llvm.x86.sse2.psrli.d"]
+    fn psrlid128(a: i32x4, imm8: i32) -> i32x4;
+
     #[link_name = "llvm.x86.avx512.pslli.q.512"]
     fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
+
+    #[link_name = "llvm.x86.avx2.pslli.q"]
+    fn pslliq256(a: i64x4, imm8: i32) -> i64x4;
+    #[link_name = "llvm.x86.sse2.pslli.q"]
+    fn pslliq128(a: i64x2, imm8: i32) -> i64x2;
+
     #[link_name = "llvm.x86.avx512.psrli.q.512"]
     fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
 
+    #[link_name = "llvm.x86.avx2.psrli.q"]
+    fn psrliq256(a: i64x4, imm8: i32) -> i64x4;
+    #[link_name = "llvm.x86.sse2.psrli.q"]
+    fn psrliq128(a: i64x2, imm8: i32) -> i64x2;
+
     #[link_name = "llvm.x86.avx512.psll.d.512"]
     fn vpslld(a: i32x16, count: i32x4) -> i32x16;
     #[link_name = "llvm.x86.avx512.psrl.d.512"]
@@ -44795,9 +44802,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_mask_slli_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
-        let r = _mm256_mask_slli_epi32(a, 0, a, 1);
+        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_slli_epi32(a, 0b11111111, a, 1);
+        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
         let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m256i(r, e);
     }
@@ -44805,9 +44812,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_maskz_slli_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
-        let r = _mm256_maskz_slli_epi32(0, a, 1);
+        let r = _mm256_maskz_slli_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_slli_epi32(0b11111111, a, 1);
+        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
         let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m256i(r, e);
     }
@@ -44815,9 +44822,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_mask_slli_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
-        let r = _mm_mask_slli_epi32(a, 0, a, 1);
+        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_slli_epi32(a, 0b00001111, a, 1);
+        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
         let e = _mm_set_epi32(0, 2, 2, 2);
         assert_eq_m128i(r, e);
     }
@@ -44825,9 +44832,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_maskz_slli_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
-        let r = _mm_maskz_slli_epi32(0, a, 1);
+        let r = _mm_maskz_slli_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_slli_epi32(0b00001111, a, 1);
+        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
         let e = _mm_set_epi32(0, 2, 2, 2);
         assert_eq_m128i(r, e);
     }
@@ -44863,9 +44870,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_mask_srli_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
-        let r = _mm256_mask_srli_epi32(a, 0, a, 1);
+        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_srli_epi32(a, 0b11111111, a, 1);
+        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
         let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
@@ -44873,9 +44880,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_maskz_srli_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
-        let r = _mm256_maskz_srli_epi32(0, a, 1);
+        let r = _mm256_maskz_srli_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_srli_epi32(0b11111111, a, 1);
+        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
         let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
@@ -44883,9 +44890,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_mask_srli_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
-        let r = _mm_mask_srli_epi32(a, 0, a, 1);
+        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_srli_epi32(a, 0b00001111, a, 1);
+        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
         let e = _mm_set_epi32(1 << 4, 0, 0, 0);
         assert_eq_m128i(r, e);
     }
@@ -44893,9 +44900,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_maskz_srli_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
-        let r = _mm_maskz_srli_epi32(0, a, 1);
+        let r = _mm_maskz_srli_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_srli_epi32(0b00001111, a, 1);
+        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
         let e = _mm_set_epi32(1 << 4, 0, 0, 0);
         assert_eq_m128i(r, e);
     }
@@ -46629,7 +46636,7 @@ mod tests {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
-        let r = _mm512_extractf32x4_ps(a, 0b1);
+        let r = _mm512_extractf32x4_ps::<1>(a);
         let e = _mm_setr_ps(5., 6., 7., 8.);
         assert_eq_m128(r, e);
     }
@@ -46662,7 +46669,7 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_extractf32x4_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
-        let r = _mm256_extractf32x4_ps(a, 0b1);
+        let r = _mm256_extractf32x4_ps::<1>(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
         assert_eq_m128(r, e);
     }
@@ -46910,7 +46917,7 @@ mod tests {
     unsafe fn test_mm512_inserti32x4() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_setr_epi32(17, 18, 19, 20);
-        let r = _mm512_inserti32x4(a, b, 0);
+        let r = _mm512_inserti32x4::<0>(a, b);
         let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         assert_eq_m512i(r, e);
     }
@@ -46941,7 +46948,7 @@ mod tests {
     unsafe fn test_mm256_inserti32x4() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi32(17, 18, 19, 20);
-        let r = _mm256_inserti32x4(a, b, 1);
+        let r = _mm256_inserti32x4::<1>(a, b);
         let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
         assert_eq_m256i(r, e);
     }
@@ -46974,7 +46981,7 @@ mod tests {
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm_setr_ps(17., 18., 19., 20.);
-        let r = _mm512_insertf32x4(a, b, 0);
+        let r = _mm512_insertf32x4::<0>(a, b);
         let e = _mm512_setr_ps(
             17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -47015,7 +47022,7 @@ mod tests {
     unsafe fn test_mm256_insertf32x4() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_ps(17., 18., 19., 20.);
-        let r = _mm256_insertf32x4(a, b, 1);
+        let r = _mm256_insertf32x4::<1>(a, b);
         let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index a6ca9303d3..07070f6475 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -7868,9 +7868,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_mask_slli_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
-        let r = _mm256_mask_slli_epi64(a, 0, a, 1);
+        let r = _mm256_mask_slli_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_slli_epi64(a, 0b00001111, a, 1);
+        let r = _mm256_mask_slli_epi64::<1>(a, 0b00001111, a);
         let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
         assert_eq_m256i(r, e);
     }
@@ -7878,9 +7878,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_maskz_slli_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
-        let r = _mm256_maskz_slli_epi64(0, a, 1);
+        let r = _mm256_maskz_slli_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_slli_epi64(0b00001111, a, 1);
+        let r = _mm256_maskz_slli_epi64::<1>(0b00001111, a);
         let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
         assert_eq_m256i(r, e);
     }
@@ -7888,9 +7888,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_mask_slli_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
-        let r = _mm_mask_slli_epi64(a, 0, a, 1);
+        let r = _mm_mask_slli_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_slli_epi64(a, 0b00000011, a, 1);
+        let r = _mm_mask_slli_epi64::<1>(a, 0b00000011, a);
         let e = _mm_set_epi64x(0, 1 << 33);
         assert_eq_m128i(r, e);
     }
@@ -7898,9 +7898,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_maskz_slli_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
-        let r = _mm_maskz_slli_epi64(0, a, 1);
+        let r = _mm_maskz_slli_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_slli_epi64(0b00000011, a, 1);
+        let r = _mm_maskz_slli_epi64::<1>(0b00000011, a);
         let e = _mm_set_epi64x(0, 1 << 33);
         assert_eq_m128i(r, e);
     }
@@ -7956,9 +7956,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_mask_srli_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
-        let r = _mm256_mask_srli_epi64(a, 0, a, 1);
+        let r = _mm256_mask_srli_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
-        let r = _mm256_mask_srli_epi64(a, 0b00001111, a, 1);
+        let r = _mm256_mask_srli_epi64::<1>(a, 0b00001111, a);
         let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
@@ -7966,9 +7966,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm256_maskz_srli_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
-        let r = _mm256_maskz_srli_epi64(0, a, 1);
+        let r = _mm256_maskz_srli_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
-        let r = _mm256_maskz_srli_epi64(0b00001111, a, 1);
+        let r = _mm256_maskz_srli_epi64::<1>(0b00001111, a);
         let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
@@ -7976,9 +7976,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_mask_srli_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
-        let r = _mm_mask_srli_epi64(a, 0, a, 1);
+        let r = _mm_mask_srli_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
-        let r = _mm_mask_srli_epi64(a, 0b00000011, a, 1);
+        let r = _mm_mask_srli_epi64::<1>(a, 0b00000011, a);
         let e = _mm_set_epi64x(1 << 4, 0);
         assert_eq_m128i(r, e);
     }
@@ -7986,9 +7986,9 @@ mod tests {
     #[simd_test(enable = "avx512f,avx512vl")]
     unsafe fn test_mm_maskz_srli_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
-        let r = _mm_maskz_srli_epi64(0, a, 1);
+        let r = _mm_maskz_srli_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
-        let r = _mm_maskz_srli_epi64(0b00000011, a, 1);
+        let r = _mm_maskz_srli_epi64::<1>(0b00000011, a);
         let e = _mm_set_epi64x(1 << 4, 0);
         assert_eq_m128i(r, e);
     }
@@ -9767,7 +9767,7 @@ mod tests {
     unsafe fn test_mm512_inserti64x4() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_setr_epi64x(17, 18, 19, 20);
-        let r = _mm512_inserti64x4(a, b, 1);
+        let r = _mm512_inserti64x4::<1>(a, b);
         let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20);
         assert_eq_m512i(r, e);
     }
@@ -9798,7 +9798,7 @@ mod tests {
     unsafe fn test_mm512_insertf64x4() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_pd(17., 18., 19., 20.);
-        let r = _mm512_insertf64x4(a, b, 1);
+        let r = _mm512_insertf64x4::<1>(a, b);
         let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.);
         assert_eq_m512d(r, e);
     }
@@ -11153,7 +11153,7 @@ mod tests {
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
-        let r = _mm512_extractf64x4_pd(a, 0x1);
+        let r = _mm512_extractf64x4_pd::<1>(a);
         let e = _mm256_setr_pd(5., 6., 7., 8.);
         assert_eq_m256d(r, e);
     }
@@ -11162,9 +11162,9 @@ mod tests {
     unsafe fn test_mm512_mask_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let src = _mm256_set1_pd(100.);
-        let r = _mm512_mask_extractf64x4_pd::<0x1>(src, 0, a);
+        let r = _mm512_mask_extractf64x4_pd::<1>(src, 0, a);
         assert_eq_m256d(r, src);
-        let r = _mm512_mask_extractf64x4_pd::<0x1>(src, 0b11111111, a);
+        let r = _mm512_mask_extractf64x4_pd::<1>(src, 0b11111111, a);
         let e = _mm256_setr_pd(5., 6., 7., 8.);
         assert_eq_m256d(r, e);
     }
@@ -11172,9 +11172,9 @@ mod tests {
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
-        let r = _mm512_maskz_extractf64x4_pd::<0x1>(0, a);
+        let r = _mm512_maskz_extractf64x4_pd::<1>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
-        let r = _mm512_maskz_extractf64x4_pd::<0x1>(0b00000001, a);
+        let r = _mm512_maskz_extractf64x4_pd::<1>(0b00000001, a);
         let e = _mm256_setr_pd(5., 0., 0., 0.);
         assert_eq_m256d(r, e);
     }