1
- //@ revisions: x86-avx2 x86-avx512
1
+ //@ revisions: x86 x86-avx2 x86-avx512 aarch64
2
+ //@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
3
+ //@ [x86] needs-llvm-components: x86
2
4
//@ [x86-avx2] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
3
5
//@ [x86-avx2] compile-flags: -C target-feature=+avx2
4
6
//@ [x86-avx2] needs-llvm-components: x86
5
7
//@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
6
8
//@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
7
9
//@ [x86-avx512] needs-llvm-components: x86
10
+ //@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
11
+ //@ [aarch64] needs-llvm-components: aarch64
12
+ //@ [aarch64] min-llvm-version: 18.0
8
13
//@ assembly-output: emit-asm
9
14
//@ compile-flags: --crate-type=lib -O
10
15
@@ -19,20 +24,17 @@ pub trait Sized {}
19
24
#[ lang = "copy" ]
20
25
trait Copy { }
21
26
22
- #[ repr( simd) ]
23
- pub struct i8x16 ( [ i8 ; 16 ] ) ;
24
-
25
27
#[ repr( simd) ]
26
28
pub struct m8x16 ( [ i8 ; 16 ] ) ;
27
29
28
30
#[ repr( simd) ]
29
- pub struct i8x64 ( [ i8 ; 64 ] ) ;
31
+ pub struct m8x64 ( [ i8 ; 64 ] ) ;
30
32
31
33
#[ repr( simd) ]
32
- pub struct m8x64 ( [ i8 ; 64 ] ) ;
34
+ pub struct m32x4 ( [ i32 ; 4 ] ) ;
33
35
34
36
#[ repr( simd) ]
35
- pub struct f64x8 ( [ f64 ; 8 ] ) ;
37
+ pub struct m64x2 ( [ i64 ; 2 ] ) ;
36
38
37
39
#[ repr( simd) ]
38
40
pub struct m64x4 ( [ i64 ; 4 ] ) ;
@@ -41,44 +43,102 @@ extern "rust-intrinsic" {
41
43
fn simd_bitmask < V , B > ( mask : V ) -> B ;
42
44
}
43
45
44
- // CHECK-LABEL: bitmask_i8x16
46
+ // CHECK-LABEL: bitmask_m8x16
45
47
#[ no_mangle]
46
- pub unsafe extern "C" fn bitmask_i8x16 ( mask : m8x16 ) -> u16 {
47
- // simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
48
- // note that there is no byte shift, llvm uses a word shift to move the least significant bit
48
+ pub unsafe extern "C" fn bitmask_m8x16 ( mask : m8x16 ) -> u16 {
49
+ // The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
50
+ // Note that x86 has no byte shift, llvm uses a word shift to move the least significant bit
49
51
// of each byte into the right position.
50
52
//
53
+ // x86-NOT: psllw
54
+ // x86: movmskb eax, xmm0
55
+ //
51
56
// x86-avx2-NOT: vpsllw
52
57
// x86-avx2: vpmovmskb eax, xmm0
53
58
//
54
59
// x86-avx512-NOT: vpsllw xmm0
55
60
// x86-avx512: vpmovmskb eax, xmm0
61
+ //
62
+ // aarch64: adrp
63
+ // aarch64-NEXT: cmlt
64
+ // aarch64-NEXT: ldr
65
+ // aarch64-NEXT: and
66
+ // aarch64-NEXT: ext
67
+ // aarch64-NEXT: zip1
68
+ // aarch64-NEXT: addv
69
+ // aarch64-NEXT: fmov
56
70
simd_bitmask ( mask)
57
71
}
58
72
59
- // CHECK-LABEL: bitmask_i8x64
73
+ // CHECK-LABEL: bitmask_m8x64
60
74
#[ no_mangle]
61
- pub unsafe extern "C" fn bitmask_i8x64 ( mask : m8x64 ) -> u64 {
62
- // simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
63
- // note that there is no byte shift, llvm uses a word shift to move the least significant bit
75
+ pub unsafe extern "C" fn bitmask_m8x64 ( mask : m8x64 ) -> u64 {
76
+ // The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
77
+ // Note that x86 has no byte shift, llvm uses a word shift to move the least significant bit
64
78
// of each byte into the right position.
65
79
//
66
- // x86-avx2-NOT: vpsllw
67
- // x86-avx2: vpmovmskb
68
- // x86-avx2: shl
69
- // x86-avx2-NEXT: vpmovmskb
70
- // x86-avx2-NEXT: or
80
+ // The parameter is a 512 bit vector which in the C abi is only valid for avx512 targets.
71
81
//
72
82
// x86-avx512-NOT: vpsllw
73
83
// x86-avx512: vpmovb2m k0, zmm0
74
84
// x86-avx512: kmovq rax, k0
75
85
simd_bitmask ( mask)
76
86
}
77
87
78
- // CHECK-LABEL: bitmask_f64x4
88
+ // CHECK-LABEL: bitmask_m32x4
89
+ #[ no_mangle]
90
+ pub unsafe extern "C" fn bitmask_m32x4 ( mask : m32x4 ) -> u8 {
91
+ // The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
92
+ //
93
+ // x86-NOT: psllq
94
+ // x86: movmskps eax, xmm0
95
+ //
96
+ // x86-avx2-NOT: vpsllq
97
+ // x86-avx2: vmovmskps eax, xmm0
98
+ //
99
+ // x86-avx512-NOT: vpsllq
100
+ // x86-avx512: vmovmskps eax, xmm0
101
+ //
102
+ // aarch64: adrp
103
+ // aarch64-NEXT: cmlt
104
+ // aarch64-NEXT: ldr
105
+ // aarch64-NEXT: and
106
+ // aarch64-NEXT: addv
107
+ // aarch64-NEXT: fmov
108
+ // aarch64-NEXT: and
109
+ simd_bitmask ( mask)
110
+ }
111
+
112
+ // CHECK-LABEL: bitmask_m64x2
113
+ #[ no_mangle]
114
+ pub unsafe extern "C" fn bitmask_m64x2 ( mask : m64x2 ) -> u8 {
115
+ // The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
116
+ //
117
+ // x86-NOT: psllq
118
+ // x86: movmskpd eax, xmm0
119
+ //
120
+ // x86-avx2-NOT: vpsllq
121
+ // x86-avx2: vmovmskpd eax, xmm0
122
+ //
123
+ // x86-avx512-NOT: vpsllq
124
+ // x86-avx512: vmovmskpd eax, xmm0
125
+ //
126
+ // aarch64: adrp
127
+ // aarch64-NEXT: cmlt
128
+ // aarch64-NEXT: ldr
129
+ // aarch64-NEXT: and
130
+ // aarch64-NEXT: addp
131
+ // aarch64-NEXT: fmov
132
+ // aarch64-NEXT: and
133
+ simd_bitmask ( mask)
134
+ }
135
+
136
+ // CHECK-LABEL: bitmask_m64x4
79
137
#[ no_mangle]
80
- pub unsafe extern "C" fn bitmask_f64x4 ( mask : m64x4 ) -> u8 {
81
- // simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
138
+ pub unsafe extern "C" fn bitmask_m64x4 ( mask : m64x4 ) -> u8 {
139
+ // The simd_bitmask intrinsic already uses the most significant bit, so no shift is necessary.
140
+ //
141
+ // The parameter is a 256 bit vector which in the C abi is only valid for avx/avx512 targets.
82
142
//
83
143
// x86-avx2-NOT: vpsllq
84
144
// x86-avx2: vmovmskpd eax, ymm0
0 commit comments