Skip to content

Commit 641cdf1

Browse files
committed
use a dynamic swizzle for swizzle_to_front
1 parent 5365315 commit 641cdf1

File tree

1 file changed

+42
-278
lines changed
  • roaring/src/bitmap/store/array_store

1 file changed

+42
-278
lines changed

roaring/src/bitmap/store/array_store/vector.rs

Lines changed: 42 additions & 278 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
use super::scalar;
1414
use core::simd::cmp::{SimdPartialEq, SimdPartialOrd};
1515
use core::simd::{
16-
mask16x8, simd_swizzle, u16x8, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
16+
mask16x8, u16x8, u8x16, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, ToBytes,
1717
};
1818

1919
// a one-pass SSE union algorithm
@@ -484,283 +484,47 @@ fn simd_merge_u16(a: Simd<u16, 8>, b: Simd<u16, 8>) -> [Simd<u16, 8>; 2] {
484484
/// Move the values in `val` with the corresponding index in `bitmask`
485485
/// set to the front of the return vector, preserving their order.
486486
///
487-
/// This had to be implemented as a jump table to be portable,
488-
/// as LLVM swizzle intrinsic only supports swizzle by a const
489-
/// value. https://github.com/rust-lang/portable-simd/issues/11
490-
///
491487
/// The values in the return vector after index bitmask.count_ones() is unspecified.
492-
///
493-
/// The masks can be constructed with the following snippet
494-
/// ```ignore
495-
/// for n in 0usize..256 {
496-
/// let mut x = n;
497-
/// let mut arr = [0; 8];
498-
/// let mut i = 0;
499-
/// while x > 0 {
500-
/// let lsb = x.trailing_zeros();
501-
/// arr[i] = lsb;
502-
/// x ^= 1 << lsb;
503-
/// i += 1;
504-
/// }
505-
/// }
506-
/// ```
488+
// Dynamic swizzle is only available for `u8`s.
489+
//
490+
// So we need to convert the `u16x8` to `u8x16`, and then swizzle it two lanes at a time.
491+
//
492+
// e.g. if `bitmask` is `0b0101`, then swizzle the first two bytes (the first u16 lane) to the
493+
// first two positions, and the 5th and 6th bytes (the third u16 lane) to the next two positions.
494+
//
495+
// Note however:
496+
// https://github.com/rust-lang/rust/blob/34097a38afc9efdedf776d3f1c84a190ff334886/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs#L12-L15
497+
// > Note that the current implementation is selected during build-time
498+
// > of the standard library, so `cargo build -Zbuild-std` may be necessary
499+
// > to unlock better performance, especially for larger vectors.
500+
// > A planned compiler improvement will enable using `#[target_feature]` instead.
501+
//
502+
// Specifically, e.g. the default `x86_64` target does not enable ssse3, so this may be
503+
// suboptimal without `-Zbuild-std` on `x86_64` targets.
507504
pub fn swizzle_to_front(val: u16x8, bitmask: u8) -> u16x8 {
508-
match bitmask {
509-
0x00 => simd_swizzle!(val, [0, 0, 0, 0, 0, 0, 0, 0]),
510-
0x01 => simd_swizzle!(val, [0, 0, 0, 0, 0, 0, 0, 0]),
511-
0x02 => simd_swizzle!(val, [1, 0, 0, 0, 0, 0, 0, 0]),
512-
0x03 => simd_swizzle!(val, [0, 1, 0, 0, 0, 0, 0, 0]),
513-
0x04 => simd_swizzle!(val, [2, 0, 0, 0, 0, 0, 0, 0]),
514-
0x05 => simd_swizzle!(val, [0, 2, 0, 0, 0, 0, 0, 0]),
515-
0x06 => simd_swizzle!(val, [1, 2, 0, 0, 0, 0, 0, 0]),
516-
0x07 => simd_swizzle!(val, [0, 1, 2, 0, 0, 0, 0, 0]),
517-
0x08 => simd_swizzle!(val, [3, 0, 0, 0, 0, 0, 0, 0]),
518-
0x09 => simd_swizzle!(val, [0, 3, 0, 0, 0, 0, 0, 0]),
519-
0x0A => simd_swizzle!(val, [1, 3, 0, 0, 0, 0, 0, 0]),
520-
0x0B => simd_swizzle!(val, [0, 1, 3, 0, 0, 0, 0, 0]),
521-
0x0C => simd_swizzle!(val, [2, 3, 0, 0, 0, 0, 0, 0]),
522-
0x0D => simd_swizzle!(val, [0, 2, 3, 0, 0, 0, 0, 0]),
523-
0x0E => simd_swizzle!(val, [1, 2, 3, 0, 0, 0, 0, 0]),
524-
0x0F => simd_swizzle!(val, [0, 1, 2, 3, 0, 0, 0, 0]),
525-
0x10 => simd_swizzle!(val, [4, 0, 0, 0, 0, 0, 0, 0]),
526-
0x11 => simd_swizzle!(val, [0, 4, 0, 0, 0, 0, 0, 0]),
527-
0x12 => simd_swizzle!(val, [1, 4, 0, 0, 0, 0, 0, 0]),
528-
0x13 => simd_swizzle!(val, [0, 1, 4, 0, 0, 0, 0, 0]),
529-
0x14 => simd_swizzle!(val, [2, 4, 0, 0, 0, 0, 0, 0]),
530-
0x15 => simd_swizzle!(val, [0, 2, 4, 0, 0, 0, 0, 0]),
531-
0x16 => simd_swizzle!(val, [1, 2, 4, 0, 0, 0, 0, 0]),
532-
0x17 => simd_swizzle!(val, [0, 1, 2, 4, 0, 0, 0, 0]),
533-
0x18 => simd_swizzle!(val, [3, 4, 0, 0, 0, 0, 0, 0]),
534-
0x19 => simd_swizzle!(val, [0, 3, 4, 0, 0, 0, 0, 0]),
535-
0x1A => simd_swizzle!(val, [1, 3, 4, 0, 0, 0, 0, 0]),
536-
0x1B => simd_swizzle!(val, [0, 1, 3, 4, 0, 0, 0, 0]),
537-
0x1C => simd_swizzle!(val, [2, 3, 4, 0, 0, 0, 0, 0]),
538-
0x1D => simd_swizzle!(val, [0, 2, 3, 4, 0, 0, 0, 0]),
539-
0x1E => simd_swizzle!(val, [1, 2, 3, 4, 0, 0, 0, 0]),
540-
0x1F => simd_swizzle!(val, [0, 1, 2, 3, 4, 0, 0, 0]),
541-
0x20 => simd_swizzle!(val, [5, 0, 0, 0, 0, 0, 0, 0]),
542-
0x21 => simd_swizzle!(val, [0, 5, 0, 0, 0, 0, 0, 0]),
543-
0x22 => simd_swizzle!(val, [1, 5, 0, 0, 0, 0, 0, 0]),
544-
0x23 => simd_swizzle!(val, [0, 1, 5, 0, 0, 0, 0, 0]),
545-
0x24 => simd_swizzle!(val, [2, 5, 0, 0, 0, 0, 0, 0]),
546-
0x25 => simd_swizzle!(val, [0, 2, 5, 0, 0, 0, 0, 0]),
547-
0x26 => simd_swizzle!(val, [1, 2, 5, 0, 0, 0, 0, 0]),
548-
0x27 => simd_swizzle!(val, [0, 1, 2, 5, 0, 0, 0, 0]),
549-
0x28 => simd_swizzle!(val, [3, 5, 0, 0, 0, 0, 0, 0]),
550-
0x29 => simd_swizzle!(val, [0, 3, 5, 0, 0, 0, 0, 0]),
551-
0x2A => simd_swizzle!(val, [1, 3, 5, 0, 0, 0, 0, 0]),
552-
0x2B => simd_swizzle!(val, [0, 1, 3, 5, 0, 0, 0, 0]),
553-
0x2C => simd_swizzle!(val, [2, 3, 5, 0, 0, 0, 0, 0]),
554-
0x2D => simd_swizzle!(val, [0, 2, 3, 5, 0, 0, 0, 0]),
555-
0x2E => simd_swizzle!(val, [1, 2, 3, 5, 0, 0, 0, 0]),
556-
0x2F => simd_swizzle!(val, [0, 1, 2, 3, 5, 0, 0, 0]),
557-
0x30 => simd_swizzle!(val, [4, 5, 0, 0, 0, 0, 0, 0]),
558-
0x31 => simd_swizzle!(val, [0, 4, 5, 0, 0, 0, 0, 0]),
559-
0x32 => simd_swizzle!(val, [1, 4, 5, 0, 0, 0, 0, 0]),
560-
0x33 => simd_swizzle!(val, [0, 1, 4, 5, 0, 0, 0, 0]),
561-
0x34 => simd_swizzle!(val, [2, 4, 5, 0, 0, 0, 0, 0]),
562-
0x35 => simd_swizzle!(val, [0, 2, 4, 5, 0, 0, 0, 0]),
563-
0x36 => simd_swizzle!(val, [1, 2, 4, 5, 0, 0, 0, 0]),
564-
0x37 => simd_swizzle!(val, [0, 1, 2, 4, 5, 0, 0, 0]),
565-
0x38 => simd_swizzle!(val, [3, 4, 5, 0, 0, 0, 0, 0]),
566-
0x39 => simd_swizzle!(val, [0, 3, 4, 5, 0, 0, 0, 0]),
567-
0x3A => simd_swizzle!(val, [1, 3, 4, 5, 0, 0, 0, 0]),
568-
0x3B => simd_swizzle!(val, [0, 1, 3, 4, 5, 0, 0, 0]),
569-
0x3C => simd_swizzle!(val, [2, 3, 4, 5, 0, 0, 0, 0]),
570-
0x3D => simd_swizzle!(val, [0, 2, 3, 4, 5, 0, 0, 0]),
571-
0x3E => simd_swizzle!(val, [1, 2, 3, 4, 5, 0, 0, 0]),
572-
0x3F => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 0, 0]),
573-
0x40 => simd_swizzle!(val, [6, 0, 0, 0, 0, 0, 0, 0]),
574-
0x41 => simd_swizzle!(val, [0, 6, 0, 0, 0, 0, 0, 0]),
575-
0x42 => simd_swizzle!(val, [1, 6, 0, 0, 0, 0, 0, 0]),
576-
0x43 => simd_swizzle!(val, [0, 1, 6, 0, 0, 0, 0, 0]),
577-
0x44 => simd_swizzle!(val, [2, 6, 0, 0, 0, 0, 0, 0]),
578-
0x45 => simd_swizzle!(val, [0, 2, 6, 0, 0, 0, 0, 0]),
579-
0x46 => simd_swizzle!(val, [1, 2, 6, 0, 0, 0, 0, 0]),
580-
0x47 => simd_swizzle!(val, [0, 1, 2, 6, 0, 0, 0, 0]),
581-
0x48 => simd_swizzle!(val, [3, 6, 0, 0, 0, 0, 0, 0]),
582-
0x49 => simd_swizzle!(val, [0, 3, 6, 0, 0, 0, 0, 0]),
583-
0x4A => simd_swizzle!(val, [1, 3, 6, 0, 0, 0, 0, 0]),
584-
0x4B => simd_swizzle!(val, [0, 1, 3, 6, 0, 0, 0, 0]),
585-
0x4C => simd_swizzle!(val, [2, 3, 6, 0, 0, 0, 0, 0]),
586-
0x4D => simd_swizzle!(val, [0, 2, 3, 6, 0, 0, 0, 0]),
587-
0x4E => simd_swizzle!(val, [1, 2, 3, 6, 0, 0, 0, 0]),
588-
0x4F => simd_swizzle!(val, [0, 1, 2, 3, 6, 0, 0, 0]),
589-
0x50 => simd_swizzle!(val, [4, 6, 0, 0, 0, 0, 0, 0]),
590-
0x51 => simd_swizzle!(val, [0, 4, 6, 0, 0, 0, 0, 0]),
591-
0x52 => simd_swizzle!(val, [1, 4, 6, 0, 0, 0, 0, 0]),
592-
0x53 => simd_swizzle!(val, [0, 1, 4, 6, 0, 0, 0, 0]),
593-
0x54 => simd_swizzle!(val, [2, 4, 6, 0, 0, 0, 0, 0]),
594-
0x55 => simd_swizzle!(val, [0, 2, 4, 6, 0, 0, 0, 0]),
595-
0x56 => simd_swizzle!(val, [1, 2, 4, 6, 0, 0, 0, 0]),
596-
0x57 => simd_swizzle!(val, [0, 1, 2, 4, 6, 0, 0, 0]),
597-
0x58 => simd_swizzle!(val, [3, 4, 6, 0, 0, 0, 0, 0]),
598-
0x59 => simd_swizzle!(val, [0, 3, 4, 6, 0, 0, 0, 0]),
599-
0x5A => simd_swizzle!(val, [1, 3, 4, 6, 0, 0, 0, 0]),
600-
0x5B => simd_swizzle!(val, [0, 1, 3, 4, 6, 0, 0, 0]),
601-
0x5C => simd_swizzle!(val, [2, 3, 4, 6, 0, 0, 0, 0]),
602-
0x5D => simd_swizzle!(val, [0, 2, 3, 4, 6, 0, 0, 0]),
603-
0x5E => simd_swizzle!(val, [1, 2, 3, 4, 6, 0, 0, 0]),
604-
0x5F => simd_swizzle!(val, [0, 1, 2, 3, 4, 6, 0, 0]),
605-
0x60 => simd_swizzle!(val, [5, 6, 0, 0, 0, 0, 0, 0]),
606-
0x61 => simd_swizzle!(val, [0, 5, 6, 0, 0, 0, 0, 0]),
607-
0x62 => simd_swizzle!(val, [1, 5, 6, 0, 0, 0, 0, 0]),
608-
0x63 => simd_swizzle!(val, [0, 1, 5, 6, 0, 0, 0, 0]),
609-
0x64 => simd_swizzle!(val, [2, 5, 6, 0, 0, 0, 0, 0]),
610-
0x65 => simd_swizzle!(val, [0, 2, 5, 6, 0, 0, 0, 0]),
611-
0x66 => simd_swizzle!(val, [1, 2, 5, 6, 0, 0, 0, 0]),
612-
0x67 => simd_swizzle!(val, [0, 1, 2, 5, 6, 0, 0, 0]),
613-
0x68 => simd_swizzle!(val, [3, 5, 6, 0, 0, 0, 0, 0]),
614-
0x69 => simd_swizzle!(val, [0, 3, 5, 6, 0, 0, 0, 0]),
615-
0x6A => simd_swizzle!(val, [1, 3, 5, 6, 0, 0, 0, 0]),
616-
0x6B => simd_swizzle!(val, [0, 1, 3, 5, 6, 0, 0, 0]),
617-
0x6C => simd_swizzle!(val, [2, 3, 5, 6, 0, 0, 0, 0]),
618-
0x6D => simd_swizzle!(val, [0, 2, 3, 5, 6, 0, 0, 0]),
619-
0x6E => simd_swizzle!(val, [1, 2, 3, 5, 6, 0, 0, 0]),
620-
0x6F => simd_swizzle!(val, [0, 1, 2, 3, 5, 6, 0, 0]),
621-
0x70 => simd_swizzle!(val, [4, 5, 6, 0, 0, 0, 0, 0]),
622-
0x71 => simd_swizzle!(val, [0, 4, 5, 6, 0, 0, 0, 0]),
623-
0x72 => simd_swizzle!(val, [1, 4, 5, 6, 0, 0, 0, 0]),
624-
0x73 => simd_swizzle!(val, [0, 1, 4, 5, 6, 0, 0, 0]),
625-
0x74 => simd_swizzle!(val, [2, 4, 5, 6, 0, 0, 0, 0]),
626-
0x75 => simd_swizzle!(val, [0, 2, 4, 5, 6, 0, 0, 0]),
627-
0x76 => simd_swizzle!(val, [1, 2, 4, 5, 6, 0, 0, 0]),
628-
0x77 => simd_swizzle!(val, [0, 1, 2, 4, 5, 6, 0, 0]),
629-
0x78 => simd_swizzle!(val, [3, 4, 5, 6, 0, 0, 0, 0]),
630-
0x79 => simd_swizzle!(val, [0, 3, 4, 5, 6, 0, 0, 0]),
631-
0x7A => simd_swizzle!(val, [1, 3, 4, 5, 6, 0, 0, 0]),
632-
0x7B => simd_swizzle!(val, [0, 1, 3, 4, 5, 6, 0, 0]),
633-
0x7C => simd_swizzle!(val, [2, 3, 4, 5, 6, 0, 0, 0]),
634-
0x7D => simd_swizzle!(val, [0, 2, 3, 4, 5, 6, 0, 0]),
635-
0x7E => simd_swizzle!(val, [1, 2, 3, 4, 5, 6, 0, 0]),
636-
0x7F => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 6, 0]),
637-
0x80 => simd_swizzle!(val, [7, 0, 0, 0, 0, 0, 0, 0]),
638-
0x81 => simd_swizzle!(val, [0, 7, 0, 0, 0, 0, 0, 0]),
639-
0x82 => simd_swizzle!(val, [1, 7, 0, 0, 0, 0, 0, 0]),
640-
0x83 => simd_swizzle!(val, [0, 1, 7, 0, 0, 0, 0, 0]),
641-
0x84 => simd_swizzle!(val, [2, 7, 0, 0, 0, 0, 0, 0]),
642-
0x85 => simd_swizzle!(val, [0, 2, 7, 0, 0, 0, 0, 0]),
643-
0x86 => simd_swizzle!(val, [1, 2, 7, 0, 0, 0, 0, 0]),
644-
0x87 => simd_swizzle!(val, [0, 1, 2, 7, 0, 0, 0, 0]),
645-
0x88 => simd_swizzle!(val, [3, 7, 0, 0, 0, 0, 0, 0]),
646-
0x89 => simd_swizzle!(val, [0, 3, 7, 0, 0, 0, 0, 0]),
647-
0x8A => simd_swizzle!(val, [1, 3, 7, 0, 0, 0, 0, 0]),
648-
0x8B => simd_swizzle!(val, [0, 1, 3, 7, 0, 0, 0, 0]),
649-
0x8C => simd_swizzle!(val, [2, 3, 7, 0, 0, 0, 0, 0]),
650-
0x8D => simd_swizzle!(val, [0, 2, 3, 7, 0, 0, 0, 0]),
651-
0x8E => simd_swizzle!(val, [1, 2, 3, 7, 0, 0, 0, 0]),
652-
0x8F => simd_swizzle!(val, [0, 1, 2, 3, 7, 0, 0, 0]),
653-
0x90 => simd_swizzle!(val, [4, 7, 0, 0, 0, 0, 0, 0]),
654-
0x91 => simd_swizzle!(val, [0, 4, 7, 0, 0, 0, 0, 0]),
655-
0x92 => simd_swizzle!(val, [1, 4, 7, 0, 0, 0, 0, 0]),
656-
0x93 => simd_swizzle!(val, [0, 1, 4, 7, 0, 0, 0, 0]),
657-
0x94 => simd_swizzle!(val, [2, 4, 7, 0, 0, 0, 0, 0]),
658-
0x95 => simd_swizzle!(val, [0, 2, 4, 7, 0, 0, 0, 0]),
659-
0x96 => simd_swizzle!(val, [1, 2, 4, 7, 0, 0, 0, 0]),
660-
0x97 => simd_swizzle!(val, [0, 1, 2, 4, 7, 0, 0, 0]),
661-
0x98 => simd_swizzle!(val, [3, 4, 7, 0, 0, 0, 0, 0]),
662-
0x99 => simd_swizzle!(val, [0, 3, 4, 7, 0, 0, 0, 0]),
663-
0x9A => simd_swizzle!(val, [1, 3, 4, 7, 0, 0, 0, 0]),
664-
0x9B => simd_swizzle!(val, [0, 1, 3, 4, 7, 0, 0, 0]),
665-
0x9C => simd_swizzle!(val, [2, 3, 4, 7, 0, 0, 0, 0]),
666-
0x9D => simd_swizzle!(val, [0, 2, 3, 4, 7, 0, 0, 0]),
667-
0x9E => simd_swizzle!(val, [1, 2, 3, 4, 7, 0, 0, 0]),
668-
0x9F => simd_swizzle!(val, [0, 1, 2, 3, 4, 7, 0, 0]),
669-
0xA0 => simd_swizzle!(val, [5, 7, 0, 0, 0, 0, 0, 0]),
670-
0xA1 => simd_swizzle!(val, [0, 5, 7, 0, 0, 0, 0, 0]),
671-
0xA2 => simd_swizzle!(val, [1, 5, 7, 0, 0, 0, 0, 0]),
672-
0xA3 => simd_swizzle!(val, [0, 1, 5, 7, 0, 0, 0, 0]),
673-
0xA4 => simd_swizzle!(val, [2, 5, 7, 0, 0, 0, 0, 0]),
674-
0xA5 => simd_swizzle!(val, [0, 2, 5, 7, 0, 0, 0, 0]),
675-
0xA6 => simd_swizzle!(val, [1, 2, 5, 7, 0, 0, 0, 0]),
676-
0xA7 => simd_swizzle!(val, [0, 1, 2, 5, 7, 0, 0, 0]),
677-
0xA8 => simd_swizzle!(val, [3, 5, 7, 0, 0, 0, 0, 0]),
678-
0xA9 => simd_swizzle!(val, [0, 3, 5, 7, 0, 0, 0, 0]),
679-
0xAA => simd_swizzle!(val, [1, 3, 5, 7, 0, 0, 0, 0]),
680-
0xAB => simd_swizzle!(val, [0, 1, 3, 5, 7, 0, 0, 0]),
681-
0xAC => simd_swizzle!(val, [2, 3, 5, 7, 0, 0, 0, 0]),
682-
0xAD => simd_swizzle!(val, [0, 2, 3, 5, 7, 0, 0, 0]),
683-
0xAE => simd_swizzle!(val, [1, 2, 3, 5, 7, 0, 0, 0]),
684-
0xAF => simd_swizzle!(val, [0, 1, 2, 3, 5, 7, 0, 0]),
685-
0xB0 => simd_swizzle!(val, [4, 5, 7, 0, 0, 0, 0, 0]),
686-
0xB1 => simd_swizzle!(val, [0, 4, 5, 7, 0, 0, 0, 0]),
687-
0xB2 => simd_swizzle!(val, [1, 4, 5, 7, 0, 0, 0, 0]),
688-
0xB3 => simd_swizzle!(val, [0, 1, 4, 5, 7, 0, 0, 0]),
689-
0xB4 => simd_swizzle!(val, [2, 4, 5, 7, 0, 0, 0, 0]),
690-
0xB5 => simd_swizzle!(val, [0, 2, 4, 5, 7, 0, 0, 0]),
691-
0xB6 => simd_swizzle!(val, [1, 2, 4, 5, 7, 0, 0, 0]),
692-
0xB7 => simd_swizzle!(val, [0, 1, 2, 4, 5, 7, 0, 0]),
693-
0xB8 => simd_swizzle!(val, [3, 4, 5, 7, 0, 0, 0, 0]),
694-
0xB9 => simd_swizzle!(val, [0, 3, 4, 5, 7, 0, 0, 0]),
695-
0xBA => simd_swizzle!(val, [1, 3, 4, 5, 7, 0, 0, 0]),
696-
0xBB => simd_swizzle!(val, [0, 1, 3, 4, 5, 7, 0, 0]),
697-
0xBC => simd_swizzle!(val, [2, 3, 4, 5, 7, 0, 0, 0]),
698-
0xBD => simd_swizzle!(val, [0, 2, 3, 4, 5, 7, 0, 0]),
699-
0xBE => simd_swizzle!(val, [1, 2, 3, 4, 5, 7, 0, 0]),
700-
0xBF => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 7, 0]),
701-
0xC0 => simd_swizzle!(val, [6, 7, 0, 0, 0, 0, 0, 0]),
702-
0xC1 => simd_swizzle!(val, [0, 6, 7, 0, 0, 0, 0, 0]),
703-
0xC2 => simd_swizzle!(val, [1, 6, 7, 0, 0, 0, 0, 0]),
704-
0xC3 => simd_swizzle!(val, [0, 1, 6, 7, 0, 0, 0, 0]),
705-
0xC4 => simd_swizzle!(val, [2, 6, 7, 0, 0, 0, 0, 0]),
706-
0xC5 => simd_swizzle!(val, [0, 2, 6, 7, 0, 0, 0, 0]),
707-
0xC6 => simd_swizzle!(val, [1, 2, 6, 7, 0, 0, 0, 0]),
708-
0xC7 => simd_swizzle!(val, [0, 1, 2, 6, 7, 0, 0, 0]),
709-
0xC8 => simd_swizzle!(val, [3, 6, 7, 0, 0, 0, 0, 0]),
710-
0xC9 => simd_swizzle!(val, [0, 3, 6, 7, 0, 0, 0, 0]),
711-
0xCA => simd_swizzle!(val, [1, 3, 6, 7, 0, 0, 0, 0]),
712-
0xCB => simd_swizzle!(val, [0, 1, 3, 6, 7, 0, 0, 0]),
713-
0xCC => simd_swizzle!(val, [2, 3, 6, 7, 0, 0, 0, 0]),
714-
0xCD => simd_swizzle!(val, [0, 2, 3, 6, 7, 0, 0, 0]),
715-
0xCE => simd_swizzle!(val, [1, 2, 3, 6, 7, 0, 0, 0]),
716-
0xCF => simd_swizzle!(val, [0, 1, 2, 3, 6, 7, 0, 0]),
717-
0xD0 => simd_swizzle!(val, [4, 6, 7, 0, 0, 0, 0, 0]),
718-
0xD1 => simd_swizzle!(val, [0, 4, 6, 7, 0, 0, 0, 0]),
719-
0xD2 => simd_swizzle!(val, [1, 4, 6, 7, 0, 0, 0, 0]),
720-
0xD3 => simd_swizzle!(val, [0, 1, 4, 6, 7, 0, 0, 0]),
721-
0xD4 => simd_swizzle!(val, [2, 4, 6, 7, 0, 0, 0, 0]),
722-
0xD5 => simd_swizzle!(val, [0, 2, 4, 6, 7, 0, 0, 0]),
723-
0xD6 => simd_swizzle!(val, [1, 2, 4, 6, 7, 0, 0, 0]),
724-
0xD7 => simd_swizzle!(val, [0, 1, 2, 4, 6, 7, 0, 0]),
725-
0xD8 => simd_swizzle!(val, [3, 4, 6, 7, 0, 0, 0, 0]),
726-
0xD9 => simd_swizzle!(val, [0, 3, 4, 6, 7, 0, 0, 0]),
727-
0xDA => simd_swizzle!(val, [1, 3, 4, 6, 7, 0, 0, 0]),
728-
0xDB => simd_swizzle!(val, [0, 1, 3, 4, 6, 7, 0, 0]),
729-
0xDC => simd_swizzle!(val, [2, 3, 4, 6, 7, 0, 0, 0]),
730-
0xDD => simd_swizzle!(val, [0, 2, 3, 4, 6, 7, 0, 0]),
731-
0xDE => simd_swizzle!(val, [1, 2, 3, 4, 6, 7, 0, 0]),
732-
0xDF => simd_swizzle!(val, [0, 1, 2, 3, 4, 6, 7, 0]),
733-
0xE0 => simd_swizzle!(val, [5, 6, 7, 0, 0, 0, 0, 0]),
734-
0xE1 => simd_swizzle!(val, [0, 5, 6, 7, 0, 0, 0, 0]),
735-
0xE2 => simd_swizzle!(val, [1, 5, 6, 7, 0, 0, 0, 0]),
736-
0xE3 => simd_swizzle!(val, [0, 1, 5, 6, 7, 0, 0, 0]),
737-
0xE4 => simd_swizzle!(val, [2, 5, 6, 7, 0, 0, 0, 0]),
738-
0xE5 => simd_swizzle!(val, [0, 2, 5, 6, 7, 0, 0, 0]),
739-
0xE6 => simd_swizzle!(val, [1, 2, 5, 6, 7, 0, 0, 0]),
740-
0xE7 => simd_swizzle!(val, [0, 1, 2, 5, 6, 7, 0, 0]),
741-
0xE8 => simd_swizzle!(val, [3, 5, 6, 7, 0, 0, 0, 0]),
742-
0xE9 => simd_swizzle!(val, [0, 3, 5, 6, 7, 0, 0, 0]),
743-
0xEA => simd_swizzle!(val, [1, 3, 5, 6, 7, 0, 0, 0]),
744-
0xEB => simd_swizzle!(val, [0, 1, 3, 5, 6, 7, 0, 0]),
745-
0xEC => simd_swizzle!(val, [2, 3, 5, 6, 7, 0, 0, 0]),
746-
0xED => simd_swizzle!(val, [0, 2, 3, 5, 6, 7, 0, 0]),
747-
0xEE => simd_swizzle!(val, [1, 2, 3, 5, 6, 7, 0, 0]),
748-
0xEF => simd_swizzle!(val, [0, 1, 2, 3, 5, 6, 7, 0]),
749-
0xF0 => simd_swizzle!(val, [4, 5, 6, 7, 0, 0, 0, 0]),
750-
0xF1 => simd_swizzle!(val, [0, 4, 5, 6, 7, 0, 0, 0]),
751-
0xF2 => simd_swizzle!(val, [1, 4, 5, 6, 7, 0, 0, 0]),
752-
0xF3 => simd_swizzle!(val, [0, 1, 4, 5, 6, 7, 0, 0]),
753-
0xF4 => simd_swizzle!(val, [2, 4, 5, 6, 7, 0, 0, 0]),
754-
0xF5 => simd_swizzle!(val, [0, 2, 4, 5, 6, 7, 0, 0]),
755-
0xF6 => simd_swizzle!(val, [1, 2, 4, 5, 6, 7, 0, 0]),
756-
0xF7 => simd_swizzle!(val, [0, 1, 2, 4, 5, 6, 7, 0]),
757-
0xF8 => simd_swizzle!(val, [3, 4, 5, 6, 7, 0, 0, 0]),
758-
0xF9 => simd_swizzle!(val, [0, 3, 4, 5, 6, 7, 0, 0]),
759-
0xFA => simd_swizzle!(val, [1, 3, 4, 5, 6, 7, 0, 0]),
760-
0xFB => simd_swizzle!(val, [0, 1, 3, 4, 5, 6, 7, 0]),
761-
0xFC => simd_swizzle!(val, [2, 3, 4, 5, 6, 7, 0, 0]),
762-
0xFD => simd_swizzle!(val, [0, 2, 3, 4, 5, 6, 7, 0]),
763-
0xFE => simd_swizzle!(val, [1, 2, 3, 4, 5, 6, 7, 0]),
764-
0xFF => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 6, 7]),
765-
}
505+
static SWIZZLE_TABLE: [[u8; 16]; 256] = {
506+
let mut table = [[0; 16]; 256];
507+
let mut n = 0usize;
508+
while n < table.len() {
509+
let mut x = n;
510+
let mut i = 0;
511+
while x > 0 {
512+
let lsb = x.trailing_zeros() as u8;
513+
x ^= 1 << lsb;
514+
table[n][i] = lsb * 2; // first byte
515+
table[n][i + 1] = lsb * 2 + 1; // second byte
516+
i += 2;
517+
}
518+
n += 1;
519+
}
520+
table
521+
};
522+
523+
// Our swizzle table retains the order of the bytes in the 16 bit lanes, we can
524+
// stick with native byte order as long as we convert back with native endianness too.
525+
let val_convert: u8x16 = val.to_ne_bytes();
526+
let swizzle_idxs = u8x16::from_array(SWIZZLE_TABLE[bitmask as usize]);
527+
528+
let swizzled: u8x16 = val_convert.swizzle_dyn(swizzle_idxs);
529+
u16x8::from_ne_bytes(swizzled)
766530
}

0 commit comments

Comments
 (0)