@@ -29,17 +29,29 @@ pub struct i8x16([i8; 16]);
29
29
pub struct m8x16 ( [ i8 ; 16 ] ) ;
30
30
31
31
#[ repr( simd) ]
32
- pub struct f32x8 ( [ f32 ; 8 ] ) ;
32
+ pub struct f32x4 ( [ f32 ; 4 ] ) ;
33
33
34
34
#[ repr( simd) ]
35
- pub struct m32x8 ( [ i32 ; 8 ] ) ;
35
+ pub struct m32x4 ( [ i32 ; 4 ] ) ;
36
+
37
+ #[ repr( simd) ]
38
+ pub struct f64x2 ( [ f64 ; 2 ] ) ;
39
+
40
+ #[ repr( simd) ]
41
+ pub struct m64x2 ( [ i64 ; 2 ] ) ;
36
42
37
43
#[ repr( simd) ]
38
44
pub struct f64x4 ( [ f64 ; 4 ] ) ;
39
45
40
46
#[ repr( simd) ]
41
47
pub struct m64x4 ( [ i64 ; 4 ] ) ;
42
48
49
+ #[ repr( simd) ]
50
+ pub struct f64x8 ( [ f64 ; 8 ] ) ;
51
+
52
+ #[ repr( simd) ]
53
+ pub struct m64x8 ( [ i64 ; 8 ] ) ;
54
+
43
55
extern "rust-intrinsic" {
44
56
fn simd_select < M , V > ( mask : M , a : V , b : V ) -> V ;
45
57
}
@@ -60,40 +72,59 @@ pub unsafe extern "C" fn select_i8x16(mask: m8x16, a: i8x16, b: i8x16) -> i8x16
60
72
simd_select ( mask, a, b)
61
73
}
62
74
63
- // CHECK-LABEL: select_f32x8
75
+ // CHECK-LABEL: select_f32x4
64
76
#[ no_mangle]
65
- pub unsafe extern "C" fn select_f32x8 ( mask : m32x8 , a : f32x8 , b : f32x8 ) -> f32x8 {
66
- // x86-avx2: vpslld ymm0, ymm0 , 31
67
- // x86-avx2-NEXT: vblendvps ymm0, ymm2, ymm1, ymm0
77
+ pub unsafe extern "C" fn select_f32x4 ( mask : m32x4 , a : f32x4 , b : f32x4 ) -> f32x4 {
78
+ // x86-avx2: vpslld xmm0, xmm0 , 31
79
+ // x86-avx2-NEXT: vblendvps xmm0, xmm2, xmm1, xmm0
68
80
//
69
- // x86-avx512: vpslld ymm0, ymm0 , 31
70
- // x86-avx512-NEXT: vpmovd2m k1, ymm0
71
- // x86-avx512-NEXT: vblendmps ymm0 {k1}, ymm2, ymm1
81
+ // x86-avx512: vpslld xmm0, xmm0 , 31
82
+ // x86-avx512-NEXT: vpmovd2m k1, xmm0
83
+ // x86-avx512-NEXT: vblendmps xmm0 {k1}, xmm2, xmm1
72
84
//
73
85
// aarch64: shl v0.4s, v0.4s, #31
74
- // aarch64-NEXT: shl v1.4s, v1.4s, #31
75
86
// aarch64-NEXT: cmlt v0.4s, v0.4s, #0
76
- // aarch64-NEXT: cmlt v1.4s, v1.4s, #0
77
- // aarch64-NEXT: bsl v0.16b, v2.16b, v4.16b
78
- // aarch64-NEXT: bsl v1.16b, v3.16b, v5.16b
87
+ // aarch64-NEXT: bsl v0.16b, v1.16b, v2.16b
88
+ simd_select ( mask, a, b)
89
+ }
90
+
91
+ // CHECK-LABEL: select_f64x2
92
+ #[ no_mangle]
93
+ pub unsafe extern "C" fn select_f64x2 ( mask : m64x2 , a : f64x2 , b : f64x2 ) -> f64x2 {
94
+ // x86-avx2: vpsllq xmm0, xmm0, 63
95
+ // x86-avx2-NEXT: vblendvpd xmm0, xmm2, xmm1, xmm0
96
+ //
97
+ // x86-avx512: vpsllq xmm0, xmm0, 63
98
+ // x86-avx512-NEXT: vpmovq2m k1, xmm0
99
+ // x86-avx512-NEXT: vblendmpd xmm0 {k1}, xmm2, xmm1
100
+ //
101
+ // aarch64: shl v0.2d, v0.2d, #63
102
+ // aarch64-NEXT: cmlt v0.2d, v0.2d, #0
103
+ // aarch64-NEXT: bsl v0.16b, v1.16b, v2.16b
79
104
simd_select ( mask, a, b)
80
105
}
81
106
82
107
// CHECK-LABEL: select_f64x4
83
108
#[ no_mangle]
84
109
pub unsafe extern "C" fn select_f64x4 ( mask : m64x4 , a : f64x4 , b : f64x4 ) -> f64x4 {
110
+ // The parameter is a 256 bit vector which in the C abi is only valid for avx targets.
111
+ //
85
112
// x86-avx2: vpsllq ymm0, ymm0, 63
86
113
// x86-avx2-NEXT: vblendvpd ymm0, ymm2, ymm1, ymm0
87
114
//
88
115
// x86-avx512: vpsllq ymm0, ymm0, 63
89
116
// x86-avx512-NEXT: vpmovq2m k1, ymm0
90
117
// x86-avx512-NEXT: vblendmpd ymm0 {k1}, ymm2, ymm1
118
+ simd_select ( mask, a, b)
119
+ }
120
+
121
+ // CHECK-LABEL: select_f64x8
122
+ #[ no_mangle]
123
+ pub unsafe extern "C" fn select_f64x8 ( mask : m64x8 , a : f64x8 , b : f64x8 ) -> f64x8 {
124
+ // The parameter is a 256 bit vector which in the C abi is only valid for avx512 targets.
91
125
//
92
- // aarch64: shl v0.2d, v0.2d, #63
93
- // aarch64-NEXT: shl v1.2d, v1.2d, #63
94
- // aarch64-NEXT: cmlt v0.2d, v0.2d, #0
95
- // aarch64-NEXT: cmlt v1.2d, v1.2d, #0
96
- // aarch64-NEXT: bsl v0.16b, v2.16b, v4.16b
97
- // aarch64-NEXT: bsl v1.16b, v3.16b, v5.16b
126
+ // x86-avx512: vpsllq zmm0, zmm0, 63
127
+ // x86-avx512-NEXT: vpmovq2m k1, zmm0
128
+ // x86-avx512-NEXT: vblendmpd zmm0 {k1}, zmm2, zmm1
98
129
simd_select ( mask, a, b)
99
130
}
0 commit comments