Skip to content

Commit 5424140

Browse files
Add SIMD shuffles for SimdType{2,4,8,16,32,64}
This const generic implementation for certain lane sizes represents a more limited interface than what LLVM's shufflevector instruction can handle, as normally the length of U can be different from the length of T, but offers an interface that it is expected to be able to expand the capabilities of in the future.
1 parent 9b6b5d7 commit 5424140

File tree

5 files changed

+64
-1
lines changed

5 files changed

+64
-1
lines changed

crates/core_simd/src/intrinsics.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,12 @@ extern "platform-intrinsic" {
5252
pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
5353
pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
5454
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
55+
56+
// shufflevector
57+
pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
58+
pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
59+
pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
60+
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
61+
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
62+
pub(crate) fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
5563
}

crates/core_simd/src/lib.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
11
#![no_std]
2-
#![feature(repr_simd, platform_intrinsics, link_llvm_intrinsics, simd_ffi)]
2+
#![allow(incomplete_features)]
3+
#![feature(
4+
repr_simd,
5+
platform_intrinsics,
6+
link_llvm_intrinsics,
7+
simd_ffi,
8+
const_generics
9+
)]
310
#![warn(missing_docs)]
411
//! Portable SIMD module.
512
613
#[macro_use]
714
mod macros;
15+
#[macro_use]
16+
mod permute;
817

918
mod fmt;
1019
mod intrinsics;

crates/core_simd/src/macros.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ macro_rules! impl_vector {
148148
Self::splat(value)
149149
}
150150
}
151+
152+
impl_shuffle_2pow_lanes!{ $name }
151153
}
152154
}
153155

crates/core_simd/src/permute.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
macro_rules! impl_shuffle_lane {
2+
{ $name:ident, $fn:ident, $n:literal } => {
3+
impl $name<$n> {
4+
/// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
5+
/// the indices in the const parameter. The first or "self" vector will have its lanes
6+
/// indexed from 0, and the second vector will have its first lane indexed at $n.
7+
/// Indices must be in-bounds of either vector at compile time.
8+
///
9+
/// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data
10+
/// into the desired patterns in advance is preferred, but shuffles are still faster
11+
/// than storing and reloading from memory.
12+
#[inline]
13+
pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
14+
unsafe { crate::intrinsics::$fn(self, second, IDX) }
15+
}
16+
}
17+
}
18+
}
19+
20+
macro_rules! impl_shuffle_2pow_lanes {
21+
{ $name:ident } => {
22+
impl_shuffle_lane!{ $name, simd_shuffle2, 2 }
23+
impl_shuffle_lane!{ $name, simd_shuffle4, 4 }
24+
impl_shuffle_lane!{ $name, simd_shuffle8, 8 }
25+
impl_shuffle_lane!{ $name, simd_shuffle16, 16 }
26+
impl_shuffle_lane!{ $name, simd_shuffle32, 32 }
27+
impl_shuffle_lane!{ $name, simd_shuffle64, 64 }
28+
}
29+
}

crates/core_simd/tests/permute.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
use core_simd::SimdU32;
2+
3+
#[cfg(target_arch = "wasm32")]
4+
use wasm_bindgen_test::*;
5+
6+
#[cfg(target_arch = "wasm32")]
7+
wasm_bindgen_test_configure!(run_in_browser);
8+
9+
#[test]
10+
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
11+
fn simple_shuffle() {
12+
let a = SimdU32::from_array([2, 4, 1, 9]);
13+
let b = a;
14+
assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
15+
}

0 commit comments

Comments
 (0)