Skip to content

Commit 894e91c

Browse files
authored
Convert _mm_slli_si128 & _mm_srli_si128 to const generics (#1024)
1 parent dab4581 commit 894e91c

File tree

1 file changed

+82
-141
lines changed

1 file changed

+82
-141
lines changed

crates/core_arch/src/x86/sse2.rs

+82-141
Original file line numberDiff line numberDiff line change
@@ -412,65 +412,48 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
412412
#[inline]
413413
#[target_feature(enable = "sse2")]
414414
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
415-
#[rustc_args_required_const(1)]
415+
#[rustc_legacy_const_generics(1)]
416416
#[stable(feature = "simd_x86", since = "1.27.0")]
417-
pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
418-
_mm_slli_si128_impl(a, imm8)
417+
pub unsafe fn _mm_slli_si128<const imm8: i32>(a: __m128i) -> __m128i {
418+
static_assert_imm8!(imm8);
419+
_mm_slli_si128_impl::<imm8>(a)
419420
}
420421

421422
/// Implementation detail: converts the immediate argument of the
422423
/// `_mm_slli_si128` intrinsic into a compile-time constant.
423424
#[inline]
424425
#[target_feature(enable = "sse2")]
425-
unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
426-
let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
427-
let a = a.as_i8x16();
428-
macro_rules! shuffle {
429-
($shift:expr) => {
430-
simd_shuffle16::<i8x16, i8x16>(
431-
zero,
432-
a,
433-
[
434-
16 - $shift,
435-
17 - $shift,
436-
18 - $shift,
437-
19 - $shift,
438-
20 - $shift,
439-
21 - $shift,
440-
22 - $shift,
441-
23 - $shift,
442-
24 - $shift,
443-
25 - $shift,
444-
26 - $shift,
445-
27 - $shift,
446-
28 - $shift,
447-
29 - $shift,
448-
30 - $shift,
449-
31 - $shift,
450-
],
451-
)
452-
};
426+
unsafe fn _mm_slli_si128_impl<const imm8: i32>(a: __m128i) -> __m128i {
427+
const fn mask(shift: i32, i: u32) -> u32 {
428+
if (shift as u32) > 15 {
429+
i
430+
} else {
431+
16 - (shift as u32) + i
432+
}
453433
}
454-
let x = match imm8 {
455-
0 => shuffle!(0),
456-
1 => shuffle!(1),
457-
2 => shuffle!(2),
458-
3 => shuffle!(3),
459-
4 => shuffle!(4),
460-
5 => shuffle!(5),
461-
6 => shuffle!(6),
462-
7 => shuffle!(7),
463-
8 => shuffle!(8),
464-
9 => shuffle!(9),
465-
10 => shuffle!(10),
466-
11 => shuffle!(11),
467-
12 => shuffle!(12),
468-
13 => shuffle!(13),
469-
14 => shuffle!(14),
470-
15 => shuffle!(15),
471-
_ => shuffle!(16),
472-
};
473-
transmute(x)
434+
let zero = _mm_set1_epi8(0).as_i8x16();
435+
transmute(simd_shuffle16::<i8x16, i8x16>(
436+
zero,
437+
a.as_i8x16(),
438+
[
439+
mask(imm8, 0),
440+
mask(imm8, 1),
441+
mask(imm8, 2),
442+
mask(imm8, 3),
443+
mask(imm8, 4),
444+
mask(imm8, 5),
445+
mask(imm8, 6),
446+
mask(imm8, 7),
447+
mask(imm8, 8),
448+
mask(imm8, 9),
449+
mask(imm8, 10),
450+
mask(imm8, 11),
451+
mask(imm8, 12),
452+
mask(imm8, 13),
453+
mask(imm8, 14),
454+
mask(imm8, 15),
455+
],
456+
))
474457
}
475458

476459
/// Shifts `a` left by `imm8` bytes while shifting in zeros.
@@ -479,10 +462,11 @@ unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
479462
#[inline]
480463
#[target_feature(enable = "sse2")]
481464
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
482-
#[rustc_args_required_const(1)]
465+
#[rustc_legacy_const_generics(1)]
483466
#[stable(feature = "simd_x86", since = "1.27.0")]
484-
pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
485-
_mm_slli_si128_impl(a, imm8)
467+
pub unsafe fn _mm_bslli_si128<const imm8: i32>(a: __m128i) -> __m128i {
468+
static_assert_imm8!(imm8);
469+
_mm_slli_si128_impl::<imm8>(a)
486470
}
487471

488472
/// Shifts `a` right by `imm8` bytes while shifting in zeros.
@@ -491,10 +475,11 @@ pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
491475
#[inline]
492476
#[target_feature(enable = "sse2")]
493477
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
494-
#[rustc_args_required_const(1)]
478+
#[rustc_legacy_const_generics(1)]
495479
#[stable(feature = "simd_x86", since = "1.27.0")]
496-
pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
497-
_mm_srli_si128_impl(a, imm8)
480+
pub unsafe fn _mm_bsrli_si128<const imm8: i32>(a: __m128i) -> __m128i {
481+
static_assert_imm8!(imm8);
482+
_mm_srli_si128_impl::<imm8>(a)
498483
}
499484

500485
/// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
@@ -630,64 +615,48 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
630615
#[inline]
631616
#[target_feature(enable = "sse2")]
632617
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
633-
#[rustc_args_required_const(1)]
618+
#[rustc_legacy_const_generics(1)]
634619
#[stable(feature = "simd_x86", since = "1.27.0")]
635-
pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
636-
_mm_srli_si128_impl(a, imm8)
620+
pub unsafe fn _mm_srli_si128<const imm8: i32>(a: __m128i) -> __m128i {
621+
static_assert_imm8!(imm8);
622+
_mm_srli_si128_impl::<imm8>(a)
637623
}
638624

639625
/// Implementation detail: converts the immediate argument of the
640626
/// `_mm_srli_si128` intrinsic into a compile-time constant.
641627
#[inline]
642628
#[target_feature(enable = "sse2")]
643-
unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
644-
let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
645-
let a = a.as_i8x16();
646-
macro_rules! shuffle {
647-
($shift:expr) => {
648-
simd_shuffle16(
649-
a,
650-
zero,
651-
[
652-
0 + $shift,
653-
1 + $shift,
654-
2 + $shift,
655-
3 + $shift,
656-
4 + $shift,
657-
5 + $shift,
658-
6 + $shift,
659-
7 + $shift,
660-
8 + $shift,
661-
9 + $shift,
662-
10 + $shift,
663-
11 + $shift,
664-
12 + $shift,
665-
13 + $shift,
666-
14 + $shift,
667-
15 + $shift,
668-
],
669-
)
670-
};
629+
unsafe fn _mm_srli_si128_impl<const imm8: i32>(a: __m128i) -> __m128i {
630+
const fn mask(shift: i32, i: u32) -> u32 {
631+
if (shift as u32) > 15 {
632+
i + 16
633+
} else {
634+
i + (shift as u32)
635+
}
671636
}
672-
let x: i8x16 = match imm8 {
673-
0 => shuffle!(0),
674-
1 => shuffle!(1),
675-
2 => shuffle!(2),
676-
3 => shuffle!(3),
677-
4 => shuffle!(4),
678-
5 => shuffle!(5),
679-
6 => shuffle!(6),
680-
7 => shuffle!(7),
681-
8 => shuffle!(8),
682-
9 => shuffle!(9),
683-
10 => shuffle!(10),
684-
11 => shuffle!(11),
685-
12 => shuffle!(12),
686-
13 => shuffle!(13),
687-
14 => shuffle!(14),
688-
15 => shuffle!(15),
689-
_ => shuffle!(16),
690-
};
637+
let zero = _mm_set1_epi8(0).as_i8x16();
638+
let x: i8x16 = simd_shuffle16(
639+
a.as_i8x16(),
640+
zero,
641+
[
642+
mask(imm8, 0),
643+
mask(imm8, 1),
644+
mask(imm8, 2),
645+
mask(imm8, 3),
646+
mask(imm8, 4),
647+
mask(imm8, 5),
648+
mask(imm8, 6),
649+
mask(imm8, 7),
650+
mask(imm8, 8),
651+
mask(imm8, 9),
652+
mask(imm8, 10),
653+
mask(imm8, 11),
654+
mask(imm8, 12),
655+
mask(imm8, 13),
656+
mask(imm8, 14),
657+
mask(imm8, 15),
658+
],
659+
);
691660
transmute(x)
692661
}
693662

@@ -3375,37 +3344,23 @@ mod tests {
33753344
let a = _mm_setr_epi8(
33763345
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
33773346
);
3378-
let r = _mm_slli_si128(a, 1);
3347+
let r = _mm_slli_si128::<1>(a);
33793348
let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33803349
assert_eq_m128i(r, e);
33813350

33823351
#[rustfmt::skip]
33833352
let a = _mm_setr_epi8(
33843353
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
33853354
);
3386-
let r = _mm_slli_si128(a, 15);
3355+
let r = _mm_slli_si128::<15>(a);
33873356
let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
33883357
assert_eq_m128i(r, e);
33893358

33903359
#[rustfmt::skip]
33913360
let a = _mm_setr_epi8(
33923361
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
33933362
);
3394-
let r = _mm_slli_si128(a, 16);
3395-
assert_eq_m128i(r, _mm_set1_epi8(0));
3396-
3397-
#[rustfmt::skip]
3398-
let a = _mm_setr_epi8(
3399-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3400-
);
3401-
let r = _mm_slli_si128(a, -1);
3402-
assert_eq_m128i(_mm_set1_epi8(0), r);
3403-
3404-
#[rustfmt::skip]
3405-
let a = _mm_setr_epi8(
3406-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3407-
);
3408-
let r = _mm_slli_si128(a, -0x80000000);
3363+
let r = _mm_slli_si128::<16>(a);
34093364
assert_eq_m128i(r, _mm_set1_epi8(0));
34103365
}
34113366

@@ -3496,7 +3451,7 @@ mod tests {
34963451
let a = _mm_setr_epi8(
34973452
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
34983453
);
3499-
let r = _mm_srli_si128(a, 1);
3454+
let r = _mm_srli_si128::<1>(a);
35003455
#[rustfmt::skip]
35013456
let e = _mm_setr_epi8(
35023457
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
@@ -3507,29 +3462,15 @@ mod tests {
35073462
let a = _mm_setr_epi8(
35083463
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
35093464
);
3510-
let r = _mm_srli_si128(a, 15);
3465+
let r = _mm_srli_si128::<15>(a);
35113466
let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
35123467
assert_eq_m128i(r, e);
35133468

35143469
#[rustfmt::skip]
35153470
let a = _mm_setr_epi8(
35163471
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
35173472
);
3518-
let r = _mm_srli_si128(a, 16);
3519-
assert_eq_m128i(r, _mm_set1_epi8(0));
3520-
3521-
#[rustfmt::skip]
3522-
let a = _mm_setr_epi8(
3523-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3524-
);
3525-
let r = _mm_srli_si128(a, -1);
3526-
assert_eq_m128i(r, _mm_set1_epi8(0));
3527-
3528-
#[rustfmt::skip]
3529-
let a = _mm_setr_epi8(
3530-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3531-
);
3532-
let r = _mm_srli_si128(a, -0x80000000);
3473+
let r = _mm_srli_si128::<16>(a);
35333474
assert_eq_m128i(r, _mm_set1_epi8(0));
35343475
}
35353476

0 commit comments

Comments
 (0)