|
13 | 13 | use super::scalar; |
14 | 14 | use core::simd::cmp::{SimdPartialEq, SimdPartialOrd}; |
15 | 15 | use core::simd::{ |
16 | | - mask16x8, simd_swizzle, u16x8, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, |
| 16 | + mask16x8, u16x8, u8x16, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, ToBytes, |
17 | 17 | }; |
18 | 18 |
|
19 | 19 | // a one-pass SSE union algorithm |
@@ -484,283 +484,47 @@ fn simd_merge_u16(a: Simd<u16, 8>, b: Simd<u16, 8>) -> [Simd<u16, 8>; 2] { |
484 | 484 | /// Move the values in `val` with the corresponding index in `bitmask` |
485 | 485 | /// set to the front of the return vector, preserving their order. |
486 | 486 | /// |
487 | | -/// This had to be implemented as a jump table to be portable, |
488 | | -/// as LLVM swizzle intrinsic only supports swizzle by a const |
489 | | -/// value. https://github.com/rust-lang/portable-simd/issues/11 |
490 | | -/// |
491 | 487 | /// The values in the return vector after index bitmask.count_ones() is unspecified. |
492 | | -/// |
493 | | -/// The masks can be constructed with the following snippet |
494 | | -/// ```ignore |
495 | | -/// for n in 0usize..256 { |
496 | | -/// let mut x = n; |
497 | | -/// let mut arr = [0; 8]; |
498 | | -/// let mut i = 0; |
499 | | -/// while x > 0 { |
500 | | -/// let lsb = x.trailing_zeros(); |
501 | | -/// arr[i] = lsb; |
502 | | -/// x ^= 1 << lsb; |
503 | | -/// i += 1; |
504 | | -/// } |
505 | | -/// } |
506 | | -/// ``` |
| 488 | +// Dynamic swizzle is only available for `u8`s. |
| 489 | +// |
| 490 | +// So we need to convert the `u16x8` to `u8x16`, and then swizzle it two lanes at a time. |
| 491 | +// |
| 492 | +// e.g. if `bitmask` is `0b0101`, then swizzle the first two bytes (the first u16 lane) to the |
| 493 | +// first two positions, and the 5th and 6th bytes (the third u16 lane) to the next two positions. |
| 494 | +// |
| 495 | +// Note however: |
| 496 | +// https://github.com/rust-lang/rust/blob/34097a38afc9efdedf776d3f1c84a190ff334886/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs#L12-L15 |
| 497 | +// > Note that the current implementation is selected during build-time |
| 498 | +// > of the standard library, so `cargo build -Zbuild-std` may be necessary |
| 499 | +// > to unlock better performance, especially for larger vectors. |
| 500 | +// > A planned compiler improvement will enable using `#[target_feature]` instead. |
| 501 | +// |
| 502 | +// Specifically, e.g. the default `x86_64` target does not enable ssse3, so this may be |
| 503 | +// suboptimal without `-Zbuild-std` on `x86_64` targets. |
507 | 504 | pub fn swizzle_to_front(val: u16x8, bitmask: u8) -> u16x8 { |
508 | | - match bitmask { |
509 | | - 0x00 => simd_swizzle!(val, [0, 0, 0, 0, 0, 0, 0, 0]), |
510 | | - 0x01 => simd_swizzle!(val, [0, 0, 0, 0, 0, 0, 0, 0]), |
511 | | - 0x02 => simd_swizzle!(val, [1, 0, 0, 0, 0, 0, 0, 0]), |
512 | | - 0x03 => simd_swizzle!(val, [0, 1, 0, 0, 0, 0, 0, 0]), |
513 | | - 0x04 => simd_swizzle!(val, [2, 0, 0, 0, 0, 0, 0, 0]), |
514 | | - 0x05 => simd_swizzle!(val, [0, 2, 0, 0, 0, 0, 0, 0]), |
515 | | - 0x06 => simd_swizzle!(val, [1, 2, 0, 0, 0, 0, 0, 0]), |
516 | | - 0x07 => simd_swizzle!(val, [0, 1, 2, 0, 0, 0, 0, 0]), |
517 | | - 0x08 => simd_swizzle!(val, [3, 0, 0, 0, 0, 0, 0, 0]), |
518 | | - 0x09 => simd_swizzle!(val, [0, 3, 0, 0, 0, 0, 0, 0]), |
519 | | - 0x0A => simd_swizzle!(val, [1, 3, 0, 0, 0, 0, 0, 0]), |
520 | | - 0x0B => simd_swizzle!(val, [0, 1, 3, 0, 0, 0, 0, 0]), |
521 | | - 0x0C => simd_swizzle!(val, [2, 3, 0, 0, 0, 0, 0, 0]), |
522 | | - 0x0D => simd_swizzle!(val, [0, 2, 3, 0, 0, 0, 0, 0]), |
523 | | - 0x0E => simd_swizzle!(val, [1, 2, 3, 0, 0, 0, 0, 0]), |
524 | | - 0x0F => simd_swizzle!(val, [0, 1, 2, 3, 0, 0, 0, 0]), |
525 | | - 0x10 => simd_swizzle!(val, [4, 0, 0, 0, 0, 0, 0, 0]), |
526 | | - 0x11 => simd_swizzle!(val, [0, 4, 0, 0, 0, 0, 0, 0]), |
527 | | - 0x12 => simd_swizzle!(val, [1, 4, 0, 0, 0, 0, 0, 0]), |
528 | | - 0x13 => simd_swizzle!(val, [0, 1, 4, 0, 0, 0, 0, 0]), |
529 | | - 0x14 => simd_swizzle!(val, [2, 4, 0, 0, 0, 0, 0, 0]), |
530 | | - 0x15 => simd_swizzle!(val, [0, 2, 4, 0, 0, 0, 0, 0]), |
531 | | - 0x16 => simd_swizzle!(val, [1, 2, 4, 0, 0, 0, 0, 0]), |
532 | | - 0x17 => simd_swizzle!(val, [0, 1, 2, 4, 0, 0, 0, 0]), |
533 | | - 0x18 => simd_swizzle!(val, [3, 4, 0, 0, 0, 0, 0, 0]), |
534 | | - 0x19 => simd_swizzle!(val, [0, 3, 4, 0, 0, 0, 0, 0]), |
535 | | - 0x1A => simd_swizzle!(val, [1, 3, 4, 0, 0, 0, 0, 0]), |
536 | | - 0x1B => simd_swizzle!(val, [0, 1, 3, 4, 0, 0, 0, 0]), |
537 | | - 0x1C => simd_swizzle!(val, [2, 3, 4, 0, 0, 0, 0, 0]), |
538 | | - 0x1D => simd_swizzle!(val, [0, 2, 3, 4, 0, 0, 0, 0]), |
539 | | - 0x1E => simd_swizzle!(val, [1, 2, 3, 4, 0, 0, 0, 0]), |
540 | | - 0x1F => simd_swizzle!(val, [0, 1, 2, 3, 4, 0, 0, 0]), |
541 | | - 0x20 => simd_swizzle!(val, [5, 0, 0, 0, 0, 0, 0, 0]), |
542 | | - 0x21 => simd_swizzle!(val, [0, 5, 0, 0, 0, 0, 0, 0]), |
543 | | - 0x22 => simd_swizzle!(val, [1, 5, 0, 0, 0, 0, 0, 0]), |
544 | | - 0x23 => simd_swizzle!(val, [0, 1, 5, 0, 0, 0, 0, 0]), |
545 | | - 0x24 => simd_swizzle!(val, [2, 5, 0, 0, 0, 0, 0, 0]), |
546 | | - 0x25 => simd_swizzle!(val, [0, 2, 5, 0, 0, 0, 0, 0]), |
547 | | - 0x26 => simd_swizzle!(val, [1, 2, 5, 0, 0, 0, 0, 0]), |
548 | | - 0x27 => simd_swizzle!(val, [0, 1, 2, 5, 0, 0, 0, 0]), |
549 | | - 0x28 => simd_swizzle!(val, [3, 5, 0, 0, 0, 0, 0, 0]), |
550 | | - 0x29 => simd_swizzle!(val, [0, 3, 5, 0, 0, 0, 0, 0]), |
551 | | - 0x2A => simd_swizzle!(val, [1, 3, 5, 0, 0, 0, 0, 0]), |
552 | | - 0x2B => simd_swizzle!(val, [0, 1, 3, 5, 0, 0, 0, 0]), |
553 | | - 0x2C => simd_swizzle!(val, [2, 3, 5, 0, 0, 0, 0, 0]), |
554 | | - 0x2D => simd_swizzle!(val, [0, 2, 3, 5, 0, 0, 0, 0]), |
555 | | - 0x2E => simd_swizzle!(val, [1, 2, 3, 5, 0, 0, 0, 0]), |
556 | | - 0x2F => simd_swizzle!(val, [0, 1, 2, 3, 5, 0, 0, 0]), |
557 | | - 0x30 => simd_swizzle!(val, [4, 5, 0, 0, 0, 0, 0, 0]), |
558 | | - 0x31 => simd_swizzle!(val, [0, 4, 5, 0, 0, 0, 0, 0]), |
559 | | - 0x32 => simd_swizzle!(val, [1, 4, 5, 0, 0, 0, 0, 0]), |
560 | | - 0x33 => simd_swizzle!(val, [0, 1, 4, 5, 0, 0, 0, 0]), |
561 | | - 0x34 => simd_swizzle!(val, [2, 4, 5, 0, 0, 0, 0, 0]), |
562 | | - 0x35 => simd_swizzle!(val, [0, 2, 4, 5, 0, 0, 0, 0]), |
563 | | - 0x36 => simd_swizzle!(val, [1, 2, 4, 5, 0, 0, 0, 0]), |
564 | | - 0x37 => simd_swizzle!(val, [0, 1, 2, 4, 5, 0, 0, 0]), |
565 | | - 0x38 => simd_swizzle!(val, [3, 4, 5, 0, 0, 0, 0, 0]), |
566 | | - 0x39 => simd_swizzle!(val, [0, 3, 4, 5, 0, 0, 0, 0]), |
567 | | - 0x3A => simd_swizzle!(val, [1, 3, 4, 5, 0, 0, 0, 0]), |
568 | | - 0x3B => simd_swizzle!(val, [0, 1, 3, 4, 5, 0, 0, 0]), |
569 | | - 0x3C => simd_swizzle!(val, [2, 3, 4, 5, 0, 0, 0, 0]), |
570 | | - 0x3D => simd_swizzle!(val, [0, 2, 3, 4, 5, 0, 0, 0]), |
571 | | - 0x3E => simd_swizzle!(val, [1, 2, 3, 4, 5, 0, 0, 0]), |
572 | | - 0x3F => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 0, 0]), |
573 | | - 0x40 => simd_swizzle!(val, [6, 0, 0, 0, 0, 0, 0, 0]), |
574 | | - 0x41 => simd_swizzle!(val, [0, 6, 0, 0, 0, 0, 0, 0]), |
575 | | - 0x42 => simd_swizzle!(val, [1, 6, 0, 0, 0, 0, 0, 0]), |
576 | | - 0x43 => simd_swizzle!(val, [0, 1, 6, 0, 0, 0, 0, 0]), |
577 | | - 0x44 => simd_swizzle!(val, [2, 6, 0, 0, 0, 0, 0, 0]), |
578 | | - 0x45 => simd_swizzle!(val, [0, 2, 6, 0, 0, 0, 0, 0]), |
579 | | - 0x46 => simd_swizzle!(val, [1, 2, 6, 0, 0, 0, 0, 0]), |
580 | | - 0x47 => simd_swizzle!(val, [0, 1, 2, 6, 0, 0, 0, 0]), |
581 | | - 0x48 => simd_swizzle!(val, [3, 6, 0, 0, 0, 0, 0, 0]), |
582 | | - 0x49 => simd_swizzle!(val, [0, 3, 6, 0, 0, 0, 0, 0]), |
583 | | - 0x4A => simd_swizzle!(val, [1, 3, 6, 0, 0, 0, 0, 0]), |
584 | | - 0x4B => simd_swizzle!(val, [0, 1, 3, 6, 0, 0, 0, 0]), |
585 | | - 0x4C => simd_swizzle!(val, [2, 3, 6, 0, 0, 0, 0, 0]), |
586 | | - 0x4D => simd_swizzle!(val, [0, 2, 3, 6, 0, 0, 0, 0]), |
587 | | - 0x4E => simd_swizzle!(val, [1, 2, 3, 6, 0, 0, 0, 0]), |
588 | | - 0x4F => simd_swizzle!(val, [0, 1, 2, 3, 6, 0, 0, 0]), |
589 | | - 0x50 => simd_swizzle!(val, [4, 6, 0, 0, 0, 0, 0, 0]), |
590 | | - 0x51 => simd_swizzle!(val, [0, 4, 6, 0, 0, 0, 0, 0]), |
591 | | - 0x52 => simd_swizzle!(val, [1, 4, 6, 0, 0, 0, 0, 0]), |
592 | | - 0x53 => simd_swizzle!(val, [0, 1, 4, 6, 0, 0, 0, 0]), |
593 | | - 0x54 => simd_swizzle!(val, [2, 4, 6, 0, 0, 0, 0, 0]), |
594 | | - 0x55 => simd_swizzle!(val, [0, 2, 4, 6, 0, 0, 0, 0]), |
595 | | - 0x56 => simd_swizzle!(val, [1, 2, 4, 6, 0, 0, 0, 0]), |
596 | | - 0x57 => simd_swizzle!(val, [0, 1, 2, 4, 6, 0, 0, 0]), |
597 | | - 0x58 => simd_swizzle!(val, [3, 4, 6, 0, 0, 0, 0, 0]), |
598 | | - 0x59 => simd_swizzle!(val, [0, 3, 4, 6, 0, 0, 0, 0]), |
599 | | - 0x5A => simd_swizzle!(val, [1, 3, 4, 6, 0, 0, 0, 0]), |
600 | | - 0x5B => simd_swizzle!(val, [0, 1, 3, 4, 6, 0, 0, 0]), |
601 | | - 0x5C => simd_swizzle!(val, [2, 3, 4, 6, 0, 0, 0, 0]), |
602 | | - 0x5D => simd_swizzle!(val, [0, 2, 3, 4, 6, 0, 0, 0]), |
603 | | - 0x5E => simd_swizzle!(val, [1, 2, 3, 4, 6, 0, 0, 0]), |
604 | | - 0x5F => simd_swizzle!(val, [0, 1, 2, 3, 4, 6, 0, 0]), |
605 | | - 0x60 => simd_swizzle!(val, [5, 6, 0, 0, 0, 0, 0, 0]), |
606 | | - 0x61 => simd_swizzle!(val, [0, 5, 6, 0, 0, 0, 0, 0]), |
607 | | - 0x62 => simd_swizzle!(val, [1, 5, 6, 0, 0, 0, 0, 0]), |
608 | | - 0x63 => simd_swizzle!(val, [0, 1, 5, 6, 0, 0, 0, 0]), |
609 | | - 0x64 => simd_swizzle!(val, [2, 5, 6, 0, 0, 0, 0, 0]), |
610 | | - 0x65 => simd_swizzle!(val, [0, 2, 5, 6, 0, 0, 0, 0]), |
611 | | - 0x66 => simd_swizzle!(val, [1, 2, 5, 6, 0, 0, 0, 0]), |
612 | | - 0x67 => simd_swizzle!(val, [0, 1, 2, 5, 6, 0, 0, 0]), |
613 | | - 0x68 => simd_swizzle!(val, [3, 5, 6, 0, 0, 0, 0, 0]), |
614 | | - 0x69 => simd_swizzle!(val, [0, 3, 5, 6, 0, 0, 0, 0]), |
615 | | - 0x6A => simd_swizzle!(val, [1, 3, 5, 6, 0, 0, 0, 0]), |
616 | | - 0x6B => simd_swizzle!(val, [0, 1, 3, 5, 6, 0, 0, 0]), |
617 | | - 0x6C => simd_swizzle!(val, [2, 3, 5, 6, 0, 0, 0, 0]), |
618 | | - 0x6D => simd_swizzle!(val, [0, 2, 3, 5, 6, 0, 0, 0]), |
619 | | - 0x6E => simd_swizzle!(val, [1, 2, 3, 5, 6, 0, 0, 0]), |
620 | | - 0x6F => simd_swizzle!(val, [0, 1, 2, 3, 5, 6, 0, 0]), |
621 | | - 0x70 => simd_swizzle!(val, [4, 5, 6, 0, 0, 0, 0, 0]), |
622 | | - 0x71 => simd_swizzle!(val, [0, 4, 5, 6, 0, 0, 0, 0]), |
623 | | - 0x72 => simd_swizzle!(val, [1, 4, 5, 6, 0, 0, 0, 0]), |
624 | | - 0x73 => simd_swizzle!(val, [0, 1, 4, 5, 6, 0, 0, 0]), |
625 | | - 0x74 => simd_swizzle!(val, [2, 4, 5, 6, 0, 0, 0, 0]), |
626 | | - 0x75 => simd_swizzle!(val, [0, 2, 4, 5, 6, 0, 0, 0]), |
627 | | - 0x76 => simd_swizzle!(val, [1, 2, 4, 5, 6, 0, 0, 0]), |
628 | | - 0x77 => simd_swizzle!(val, [0, 1, 2, 4, 5, 6, 0, 0]), |
629 | | - 0x78 => simd_swizzle!(val, [3, 4, 5, 6, 0, 0, 0, 0]), |
630 | | - 0x79 => simd_swizzle!(val, [0, 3, 4, 5, 6, 0, 0, 0]), |
631 | | - 0x7A => simd_swizzle!(val, [1, 3, 4, 5, 6, 0, 0, 0]), |
632 | | - 0x7B => simd_swizzle!(val, [0, 1, 3, 4, 5, 6, 0, 0]), |
633 | | - 0x7C => simd_swizzle!(val, [2, 3, 4, 5, 6, 0, 0, 0]), |
634 | | - 0x7D => simd_swizzle!(val, [0, 2, 3, 4, 5, 6, 0, 0]), |
635 | | - 0x7E => simd_swizzle!(val, [1, 2, 3, 4, 5, 6, 0, 0]), |
636 | | - 0x7F => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 6, 0]), |
637 | | - 0x80 => simd_swizzle!(val, [7, 0, 0, 0, 0, 0, 0, 0]), |
638 | | - 0x81 => simd_swizzle!(val, [0, 7, 0, 0, 0, 0, 0, 0]), |
639 | | - 0x82 => simd_swizzle!(val, [1, 7, 0, 0, 0, 0, 0, 0]), |
640 | | - 0x83 => simd_swizzle!(val, [0, 1, 7, 0, 0, 0, 0, 0]), |
641 | | - 0x84 => simd_swizzle!(val, [2, 7, 0, 0, 0, 0, 0, 0]), |
642 | | - 0x85 => simd_swizzle!(val, [0, 2, 7, 0, 0, 0, 0, 0]), |
643 | | - 0x86 => simd_swizzle!(val, [1, 2, 7, 0, 0, 0, 0, 0]), |
644 | | - 0x87 => simd_swizzle!(val, [0, 1, 2, 7, 0, 0, 0, 0]), |
645 | | - 0x88 => simd_swizzle!(val, [3, 7, 0, 0, 0, 0, 0, 0]), |
646 | | - 0x89 => simd_swizzle!(val, [0, 3, 7, 0, 0, 0, 0, 0]), |
647 | | - 0x8A => simd_swizzle!(val, [1, 3, 7, 0, 0, 0, 0, 0]), |
648 | | - 0x8B => simd_swizzle!(val, [0, 1, 3, 7, 0, 0, 0, 0]), |
649 | | - 0x8C => simd_swizzle!(val, [2, 3, 7, 0, 0, 0, 0, 0]), |
650 | | - 0x8D => simd_swizzle!(val, [0, 2, 3, 7, 0, 0, 0, 0]), |
651 | | - 0x8E => simd_swizzle!(val, [1, 2, 3, 7, 0, 0, 0, 0]), |
652 | | - 0x8F => simd_swizzle!(val, [0, 1, 2, 3, 7, 0, 0, 0]), |
653 | | - 0x90 => simd_swizzle!(val, [4, 7, 0, 0, 0, 0, 0, 0]), |
654 | | - 0x91 => simd_swizzle!(val, [0, 4, 7, 0, 0, 0, 0, 0]), |
655 | | - 0x92 => simd_swizzle!(val, [1, 4, 7, 0, 0, 0, 0, 0]), |
656 | | - 0x93 => simd_swizzle!(val, [0, 1, 4, 7, 0, 0, 0, 0]), |
657 | | - 0x94 => simd_swizzle!(val, [2, 4, 7, 0, 0, 0, 0, 0]), |
658 | | - 0x95 => simd_swizzle!(val, [0, 2, 4, 7, 0, 0, 0, 0]), |
659 | | - 0x96 => simd_swizzle!(val, [1, 2, 4, 7, 0, 0, 0, 0]), |
660 | | - 0x97 => simd_swizzle!(val, [0, 1, 2, 4, 7, 0, 0, 0]), |
661 | | - 0x98 => simd_swizzle!(val, [3, 4, 7, 0, 0, 0, 0, 0]), |
662 | | - 0x99 => simd_swizzle!(val, [0, 3, 4, 7, 0, 0, 0, 0]), |
663 | | - 0x9A => simd_swizzle!(val, [1, 3, 4, 7, 0, 0, 0, 0]), |
664 | | - 0x9B => simd_swizzle!(val, [0, 1, 3, 4, 7, 0, 0, 0]), |
665 | | - 0x9C => simd_swizzle!(val, [2, 3, 4, 7, 0, 0, 0, 0]), |
666 | | - 0x9D => simd_swizzle!(val, [0, 2, 3, 4, 7, 0, 0, 0]), |
667 | | - 0x9E => simd_swizzle!(val, [1, 2, 3, 4, 7, 0, 0, 0]), |
668 | | - 0x9F => simd_swizzle!(val, [0, 1, 2, 3, 4, 7, 0, 0]), |
669 | | - 0xA0 => simd_swizzle!(val, [5, 7, 0, 0, 0, 0, 0, 0]), |
670 | | - 0xA1 => simd_swizzle!(val, [0, 5, 7, 0, 0, 0, 0, 0]), |
671 | | - 0xA2 => simd_swizzle!(val, [1, 5, 7, 0, 0, 0, 0, 0]), |
672 | | - 0xA3 => simd_swizzle!(val, [0, 1, 5, 7, 0, 0, 0, 0]), |
673 | | - 0xA4 => simd_swizzle!(val, [2, 5, 7, 0, 0, 0, 0, 0]), |
674 | | - 0xA5 => simd_swizzle!(val, [0, 2, 5, 7, 0, 0, 0, 0]), |
675 | | - 0xA6 => simd_swizzle!(val, [1, 2, 5, 7, 0, 0, 0, 0]), |
676 | | - 0xA7 => simd_swizzle!(val, [0, 1, 2, 5, 7, 0, 0, 0]), |
677 | | - 0xA8 => simd_swizzle!(val, [3, 5, 7, 0, 0, 0, 0, 0]), |
678 | | - 0xA9 => simd_swizzle!(val, [0, 3, 5, 7, 0, 0, 0, 0]), |
679 | | - 0xAA => simd_swizzle!(val, [1, 3, 5, 7, 0, 0, 0, 0]), |
680 | | - 0xAB => simd_swizzle!(val, [0, 1, 3, 5, 7, 0, 0, 0]), |
681 | | - 0xAC => simd_swizzle!(val, [2, 3, 5, 7, 0, 0, 0, 0]), |
682 | | - 0xAD => simd_swizzle!(val, [0, 2, 3, 5, 7, 0, 0, 0]), |
683 | | - 0xAE => simd_swizzle!(val, [1, 2, 3, 5, 7, 0, 0, 0]), |
684 | | - 0xAF => simd_swizzle!(val, [0, 1, 2, 3, 5, 7, 0, 0]), |
685 | | - 0xB0 => simd_swizzle!(val, [4, 5, 7, 0, 0, 0, 0, 0]), |
686 | | - 0xB1 => simd_swizzle!(val, [0, 4, 5, 7, 0, 0, 0, 0]), |
687 | | - 0xB2 => simd_swizzle!(val, [1, 4, 5, 7, 0, 0, 0, 0]), |
688 | | - 0xB3 => simd_swizzle!(val, [0, 1, 4, 5, 7, 0, 0, 0]), |
689 | | - 0xB4 => simd_swizzle!(val, [2, 4, 5, 7, 0, 0, 0, 0]), |
690 | | - 0xB5 => simd_swizzle!(val, [0, 2, 4, 5, 7, 0, 0, 0]), |
691 | | - 0xB6 => simd_swizzle!(val, [1, 2, 4, 5, 7, 0, 0, 0]), |
692 | | - 0xB7 => simd_swizzle!(val, [0, 1, 2, 4, 5, 7, 0, 0]), |
693 | | - 0xB8 => simd_swizzle!(val, [3, 4, 5, 7, 0, 0, 0, 0]), |
694 | | - 0xB9 => simd_swizzle!(val, [0, 3, 4, 5, 7, 0, 0, 0]), |
695 | | - 0xBA => simd_swizzle!(val, [1, 3, 4, 5, 7, 0, 0, 0]), |
696 | | - 0xBB => simd_swizzle!(val, [0, 1, 3, 4, 5, 7, 0, 0]), |
697 | | - 0xBC => simd_swizzle!(val, [2, 3, 4, 5, 7, 0, 0, 0]), |
698 | | - 0xBD => simd_swizzle!(val, [0, 2, 3, 4, 5, 7, 0, 0]), |
699 | | - 0xBE => simd_swizzle!(val, [1, 2, 3, 4, 5, 7, 0, 0]), |
700 | | - 0xBF => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 7, 0]), |
701 | | - 0xC0 => simd_swizzle!(val, [6, 7, 0, 0, 0, 0, 0, 0]), |
702 | | - 0xC1 => simd_swizzle!(val, [0, 6, 7, 0, 0, 0, 0, 0]), |
703 | | - 0xC2 => simd_swizzle!(val, [1, 6, 7, 0, 0, 0, 0, 0]), |
704 | | - 0xC3 => simd_swizzle!(val, [0, 1, 6, 7, 0, 0, 0, 0]), |
705 | | - 0xC4 => simd_swizzle!(val, [2, 6, 7, 0, 0, 0, 0, 0]), |
706 | | - 0xC5 => simd_swizzle!(val, [0, 2, 6, 7, 0, 0, 0, 0]), |
707 | | - 0xC6 => simd_swizzle!(val, [1, 2, 6, 7, 0, 0, 0, 0]), |
708 | | - 0xC7 => simd_swizzle!(val, [0, 1, 2, 6, 7, 0, 0, 0]), |
709 | | - 0xC8 => simd_swizzle!(val, [3, 6, 7, 0, 0, 0, 0, 0]), |
710 | | - 0xC9 => simd_swizzle!(val, [0, 3, 6, 7, 0, 0, 0, 0]), |
711 | | - 0xCA => simd_swizzle!(val, [1, 3, 6, 7, 0, 0, 0, 0]), |
712 | | - 0xCB => simd_swizzle!(val, [0, 1, 3, 6, 7, 0, 0, 0]), |
713 | | - 0xCC => simd_swizzle!(val, [2, 3, 6, 7, 0, 0, 0, 0]), |
714 | | - 0xCD => simd_swizzle!(val, [0, 2, 3, 6, 7, 0, 0, 0]), |
715 | | - 0xCE => simd_swizzle!(val, [1, 2, 3, 6, 7, 0, 0, 0]), |
716 | | - 0xCF => simd_swizzle!(val, [0, 1, 2, 3, 6, 7, 0, 0]), |
717 | | - 0xD0 => simd_swizzle!(val, [4, 6, 7, 0, 0, 0, 0, 0]), |
718 | | - 0xD1 => simd_swizzle!(val, [0, 4, 6, 7, 0, 0, 0, 0]), |
719 | | - 0xD2 => simd_swizzle!(val, [1, 4, 6, 7, 0, 0, 0, 0]), |
720 | | - 0xD3 => simd_swizzle!(val, [0, 1, 4, 6, 7, 0, 0, 0]), |
721 | | - 0xD4 => simd_swizzle!(val, [2, 4, 6, 7, 0, 0, 0, 0]), |
722 | | - 0xD5 => simd_swizzle!(val, [0, 2, 4, 6, 7, 0, 0, 0]), |
723 | | - 0xD6 => simd_swizzle!(val, [1, 2, 4, 6, 7, 0, 0, 0]), |
724 | | - 0xD7 => simd_swizzle!(val, [0, 1, 2, 4, 6, 7, 0, 0]), |
725 | | - 0xD8 => simd_swizzle!(val, [3, 4, 6, 7, 0, 0, 0, 0]), |
726 | | - 0xD9 => simd_swizzle!(val, [0, 3, 4, 6, 7, 0, 0, 0]), |
727 | | - 0xDA => simd_swizzle!(val, [1, 3, 4, 6, 7, 0, 0, 0]), |
728 | | - 0xDB => simd_swizzle!(val, [0, 1, 3, 4, 6, 7, 0, 0]), |
729 | | - 0xDC => simd_swizzle!(val, [2, 3, 4, 6, 7, 0, 0, 0]), |
730 | | - 0xDD => simd_swizzle!(val, [0, 2, 3, 4, 6, 7, 0, 0]), |
731 | | - 0xDE => simd_swizzle!(val, [1, 2, 3, 4, 6, 7, 0, 0]), |
732 | | - 0xDF => simd_swizzle!(val, [0, 1, 2, 3, 4, 6, 7, 0]), |
733 | | - 0xE0 => simd_swizzle!(val, [5, 6, 7, 0, 0, 0, 0, 0]), |
734 | | - 0xE1 => simd_swizzle!(val, [0, 5, 6, 7, 0, 0, 0, 0]), |
735 | | - 0xE2 => simd_swizzle!(val, [1, 5, 6, 7, 0, 0, 0, 0]), |
736 | | - 0xE3 => simd_swizzle!(val, [0, 1, 5, 6, 7, 0, 0, 0]), |
737 | | - 0xE4 => simd_swizzle!(val, [2, 5, 6, 7, 0, 0, 0, 0]), |
738 | | - 0xE5 => simd_swizzle!(val, [0, 2, 5, 6, 7, 0, 0, 0]), |
739 | | - 0xE6 => simd_swizzle!(val, [1, 2, 5, 6, 7, 0, 0, 0]), |
740 | | - 0xE7 => simd_swizzle!(val, [0, 1, 2, 5, 6, 7, 0, 0]), |
741 | | - 0xE8 => simd_swizzle!(val, [3, 5, 6, 7, 0, 0, 0, 0]), |
742 | | - 0xE9 => simd_swizzle!(val, [0, 3, 5, 6, 7, 0, 0, 0]), |
743 | | - 0xEA => simd_swizzle!(val, [1, 3, 5, 6, 7, 0, 0, 0]), |
744 | | - 0xEB => simd_swizzle!(val, [0, 1, 3, 5, 6, 7, 0, 0]), |
745 | | - 0xEC => simd_swizzle!(val, [2, 3, 5, 6, 7, 0, 0, 0]), |
746 | | - 0xED => simd_swizzle!(val, [0, 2, 3, 5, 6, 7, 0, 0]), |
747 | | - 0xEE => simd_swizzle!(val, [1, 2, 3, 5, 6, 7, 0, 0]), |
748 | | - 0xEF => simd_swizzle!(val, [0, 1, 2, 3, 5, 6, 7, 0]), |
749 | | - 0xF0 => simd_swizzle!(val, [4, 5, 6, 7, 0, 0, 0, 0]), |
750 | | - 0xF1 => simd_swizzle!(val, [0, 4, 5, 6, 7, 0, 0, 0]), |
751 | | - 0xF2 => simd_swizzle!(val, [1, 4, 5, 6, 7, 0, 0, 0]), |
752 | | - 0xF3 => simd_swizzle!(val, [0, 1, 4, 5, 6, 7, 0, 0]), |
753 | | - 0xF4 => simd_swizzle!(val, [2, 4, 5, 6, 7, 0, 0, 0]), |
754 | | - 0xF5 => simd_swizzle!(val, [0, 2, 4, 5, 6, 7, 0, 0]), |
755 | | - 0xF6 => simd_swizzle!(val, [1, 2, 4, 5, 6, 7, 0, 0]), |
756 | | - 0xF7 => simd_swizzle!(val, [0, 1, 2, 4, 5, 6, 7, 0]), |
757 | | - 0xF8 => simd_swizzle!(val, [3, 4, 5, 6, 7, 0, 0, 0]), |
758 | | - 0xF9 => simd_swizzle!(val, [0, 3, 4, 5, 6, 7, 0, 0]), |
759 | | - 0xFA => simd_swizzle!(val, [1, 3, 4, 5, 6, 7, 0, 0]), |
760 | | - 0xFB => simd_swizzle!(val, [0, 1, 3, 4, 5, 6, 7, 0]), |
761 | | - 0xFC => simd_swizzle!(val, [2, 3, 4, 5, 6, 7, 0, 0]), |
762 | | - 0xFD => simd_swizzle!(val, [0, 2, 3, 4, 5, 6, 7, 0]), |
763 | | - 0xFE => simd_swizzle!(val, [1, 2, 3, 4, 5, 6, 7, 0]), |
764 | | - 0xFF => simd_swizzle!(val, [0, 1, 2, 3, 4, 5, 6, 7]), |
765 | | - } |
| 505 | + static SWIZZLE_TABLE: [[u8; 16]; 256] = { |
| 506 | + let mut table = [[0; 16]; 256]; |
| 507 | + let mut n = 0usize; |
| 508 | + while n < table.len() { |
| 509 | + let mut x = n; |
| 510 | + let mut i = 0; |
| 511 | + while x > 0 { |
| 512 | + let lsb = x.trailing_zeros() as u8; |
| 513 | + x ^= 1 << lsb; |
| 514 | + table[n][i] = lsb * 2; // first byte |
| 515 | + table[n][i + 1] = lsb * 2 + 1; // second byte |
| 516 | + i += 2; |
| 517 | + } |
| 518 | + n += 1; |
| 519 | + } |
| 520 | + table |
| 521 | + }; |
| 522 | + |
| 523 | + // Our swizzle table retains the order of the bytes in the 16 bit lanes, we can |
| 524 | + // stick with native byte order as long as we convert back with native endianness too. |
| 525 | + let val_convert: u8x16 = val.to_ne_bytes(); |
| 526 | + let swizzle_idxs = u8x16::from_array(SWIZZLE_TABLE[bitmask as usize]); |
| 527 | + |
| 528 | + let swizzled: u8x16 = val_convert.swizzle_dyn(swizzle_idxs); |
| 529 | + u16x8::from_ne_bytes(swizzled) |
766 | 530 | } |
0 commit comments