Skip to content

Commit 3e97256

Browse files
committed
Add checks for aarch64 and sse2 for simd_bitmask
1 parent 0950cf0 commit 3e97256

File tree

1 file changed

+83
-23
lines changed

1 file changed

+83
-23
lines changed

tests/assembly/simd-bitmask.rs

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1-
//@ revisions: x86-avx2 x86-avx512
1+
//@ revisions: x86 x86-avx2 x86-avx512 aarch64
2+
//@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
3+
//@ [x86] needs-llvm-components: x86
24
//@ [x86-avx2] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
35
//@ [x86-avx2] compile-flags: -C target-feature=+avx2
46
//@ [x86-avx2] needs-llvm-components: x86
57
//@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
68
//@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
79
//@ [x86-avx512] needs-llvm-components: x86
10+
//@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
11+
//@ [aarch64] needs-llvm-components: aarch64
12+
//@ [aarch64] min-llvm-version: 18.0
813
//@ assembly-output: emit-asm
914
//@ compile-flags: --crate-type=lib -O
1015

@@ -19,20 +24,17 @@ pub trait Sized {}
1924
#[lang = "copy"]
2025
trait Copy {}
2126

22-
#[repr(simd)]
23-
pub struct i8x16([i8; 16]);
24-
2527
#[repr(simd)]
2628
pub struct m8x16([i8; 16]);
2729

2830
#[repr(simd)]
29-
pub struct i8x64([i8; 64]);
31+
pub struct m8x64([i8; 64]);
3032

3133
#[repr(simd)]
32-
pub struct m8x64([i8; 64]);
34+
pub struct m32x4([i32; 4]);
3335

3436
#[repr(simd)]
35-
pub struct f64x8([f64; 8]);
37+
pub struct m64x2([i64; 2]);
3638

3739
#[repr(simd)]
3840
pub struct m64x4([i64; 4]);
@@ -41,44 +43,102 @@ extern "rust-intrinsic" {
4143
fn simd_bitmask<V, B>(mask: V) -> B;
4244
}
4345

44-
// CHECK-LABEL: bitmask_i8x16
46+
// CHECK-LABEL: bitmask_m8x16
4547
#[no_mangle]
46-
pub unsafe extern "C" fn bitmask_i8x16(mask: m8x16) -> u16 {
47-
// simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
48-
// note that there is no byte shift, llvm uses a word shift to move the least significant bit
48+
pub unsafe extern "C" fn bitmask_m8x16(mask: m8x16) -> u16 {
49+
// The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
50+
// Note that x86 has no byte shift, llvm uses a word shift to move the least significant bit
4951
// of each byte into the right position.
5052
//
53+
// x86-NOT: psllw
54+
// x86: movmskb eax, xmm0
55+
//
5156
// x86-avx2-NOT: vpsllw
5257
// x86-avx2: vpmovmskb eax, xmm0
5358
//
5459
// x86-avx512-NOT: vpsllw xmm0
5560
// x86-avx512: vpmovmskb eax, xmm0
61+
//
62+
// aarch64: adrp
63+
// aarch64-NEXT: cmlt
64+
// aarch64-NEXT: ldr
65+
// aarch64-NEXT: and
66+
// aarch64-NEXT: ext
67+
// aarch64-NEXT: zip1
68+
// aarch64-NEXT: addv
69+
// aarch64-NEXT: fmov
5670
simd_bitmask(mask)
5771
}
5872

59-
// CHECK-LABEL: bitmask_i8x64
73+
// CHECK-LABEL: bitmask_m8x64
6074
#[no_mangle]
61-
pub unsafe extern "C" fn bitmask_i8x64(mask: m8x64) -> u64 {
62-
// simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
63-
// note that there is no byte shift, llvm uses a word shift to move the least significant bit
75+
pub unsafe extern "C" fn bitmask_m8x64(mask: m8x64) -> u64 {
76+
// The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
77+
// Note that x86 has no byte shift, llvm uses a word shift to move the least significant bit
6478
// of each byte into the right position.
6579
//
66-
// x86-avx2-NOT: vpsllw
67-
// x86-avx2: vpmovmskb
68-
// x86-avx2: shl
69-
// x86-avx2-NEXT: vpmovmskb
70-
// x86-avx2-NEXT: or
80+
// The parameter is a 512 bit vector which in the C abi is only valid for avx512 targets.
7181
//
7282
// x86-avx512-NOT: vpsllw
7383
// x86-avx512: vpmovb2m k0, zmm0
7484
// x86-avx512: kmovq rax, k0
7585
simd_bitmask(mask)
7686
}
7787

78-
// CHECK-LABEL: bitmask_f64x4
88+
// CHECK-LABEL: bitmask_m32x4
89+
#[no_mangle]
90+
pub unsafe extern "C" fn bitmask_m32x4(mask: m32x4) -> u8 {
91+
// The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
92+
//
93+
// x86-NOT: psllq
94+
// x86: movmskps eax, xmm0
95+
//
96+
// x86-avx2-NOT: vpsllq
97+
// x86-avx2: vmovmskps eax, xmm0
98+
//
99+
// x86-avx512-NOT: vpsllq
100+
// x86-avx512: vmovmskps eax, xmm0
101+
//
102+
// aarch64: adrp
103+
// aarch64-NEXT: cmlt
104+
// aarch64-NEXT: ldr
105+
// aarch64-NEXT: and
106+
// aarch64-NEXT: addv
107+
// aarch64-NEXT: fmov
108+
// aarch64-NEXT: and
109+
simd_bitmask(mask)
110+
}
111+
112+
// CHECK-LABEL: bitmask_m64x2
113+
#[no_mangle]
114+
pub unsafe extern "C" fn bitmask_m64x2(mask: m64x2) -> u8 {
115+
// The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
116+
//
117+
// x86-NOT: psllq
118+
// x86: movmskpd eax, xmm0
119+
//
120+
// x86-avx2-NOT: vpsllq
121+
// x86-avx2: vmovmskpd eax, xmm0
122+
//
123+
// x86-avx512-NOT: vpsllq
124+
// x86-avx512: vmovmskpd eax, xmm0
125+
//
126+
// aarch64: adrp
127+
// aarch64-NEXT: cmlt
128+
// aarch64-NEXT: ldr
129+
// aarch64-NEXT: and
130+
// aarch64-NEXT: addp
131+
// aarch64-NEXT: fmov
132+
// aarch64-NEXT: and
133+
simd_bitmask(mask)
134+
}
135+
136+
// CHECK-LABEL: bitmask_m64x4
79137
#[no_mangle]
80-
pub unsafe extern "C" fn bitmask_f64x4(mask: m64x4) -> u8 {
81-
// simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
138+
pub unsafe extern "C" fn bitmask_m64x4(mask: m64x4) -> u8 {
139+
// The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
140+
//
141+
// The parameter is a 256 bit vector which in the C abi is only valid for avx/avx512 targets.
82142
//
83143
// x86-avx2-NOT: vpsllq
84144
// x86-avx2: vmovmskpd eax, ymm0

0 commit comments

Comments
 (0)