@@ -155,8 +155,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
155
155
; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
156
156
; X86-AVX512F-NEXT: kmovw %eax, %k1
157
157
; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
158
- ; X86-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
159
- ; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
158
+ ; X86-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
160
159
; X86-AVX512F-NEXT: retl
161
160
;
162
161
; X86-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
@@ -165,26 +164,23 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
165
164
; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
166
165
; X86-AVX512BW-NEXT: kmovd %eax, %k1
167
166
; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
168
- ; X86-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
169
- ; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
167
+ ; X86-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
170
168
; X86-AVX512BW-NEXT: retl
171
169
;
172
170
; X64-AVX512F-LABEL: combine_vpermt2var_8f64_identity_mask:
173
171
; X64-AVX512F: # %bb.0:
174
172
; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
175
173
; X64-AVX512F-NEXT: kmovw %edi, %k1
176
174
; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
177
- ; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
178
- ; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
175
+ ; X64-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
179
176
; X64-AVX512F-NEXT: retq
180
177
;
181
178
; X64-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
182
179
; X64-AVX512BW: # %bb.0:
183
180
; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
184
181
; X64-AVX512BW-NEXT: kmovd %edi, %k1
185
182
; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
186
- ; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
187
- ; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
183
+ ; X64-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
188
184
; X64-AVX512BW-NEXT: retq
189
185
%res0 = call <8 x double > @llvm.x86.avx512.maskz.vpermt2var.pd.512 (<8 x i64 > <i64 7 , i64 6 , i64 5 , i64 4 , i64 3 , i64 2 , i64 1 , i64 0 >, <8 x double > %x0 , <8 x double > %x1 , i8 %m )
190
186
%res1 = call <8 x double > @llvm.x86.avx512.maskz.vpermt2var.pd.512 (<8 x i64 > <i64 7 , i64 14 , i64 5 , i64 12 , i64 3 , i64 10 , i64 1 , i64 8 >, <8 x double > %res0 , <8 x double > %res0 , i8 %m )
@@ -259,8 +255,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
259
255
; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
260
256
; X86-AVX512F-NEXT: kmovw %eax, %k1
261
257
; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
262
- ; X86-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
263
- ; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
258
+ ; X86-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
264
259
; X86-AVX512F-NEXT: retl
265
260
;
266
261
; X86-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
@@ -269,26 +264,23 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
269
264
; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
270
265
; X86-AVX512BW-NEXT: kmovd %eax, %k1
271
266
; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
272
- ; X86-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
273
- ; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
267
+ ; X86-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
274
268
; X86-AVX512BW-NEXT: retl
275
269
;
276
270
; X64-AVX512F-LABEL: combine_vpermt2var_8i64_identity_mask:
277
271
; X64-AVX512F: # %bb.0:
278
272
; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
279
273
; X64-AVX512F-NEXT: kmovw %edi, %k1
280
274
; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
281
- ; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
282
- ; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
275
+ ; X64-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
283
276
; X64-AVX512F-NEXT: retq
284
277
;
285
278
; X64-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
286
279
; X64-AVX512BW: # %bb.0:
287
280
; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
288
281
; X64-AVX512BW-NEXT: kmovd %edi, %k1
289
282
; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
290
- ; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
291
- ; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
283
+ ; X64-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
292
284
; X64-AVX512BW-NEXT: retq
293
285
%res0 = call <8 x i64 > @llvm.x86.avx512.maskz.vpermt2var.q.512 (<8 x i64 > <i64 7 , i64 6 , i64 5 , i64 4 , i64 3 , i64 2 , i64 1 , i64 0 >, <8 x i64 > %x0 , <8 x i64 > %x1 , i8 %m )
294
286
%res1 = call <8 x i64 > @llvm.x86.avx512.maskz.vpermt2var.q.512 (<8 x i64 > <i64 7 , i64 14 , i64 5 , i64 12 , i64 3 , i64 10 , i64 1 , i64 8 >, <8 x i64 > %res0 , <8 x i64 > %res0 , i8 %m )
@@ -309,26 +301,23 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
309
301
; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
310
302
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
311
303
; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
312
- ; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
313
- ; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
304
+ ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
314
305
; X86-NEXT: retl
315
306
;
316
307
; X64-AVX512F-LABEL: combine_vpermt2var_16f32_identity_mask:
317
308
; X64-AVX512F: # %bb.0:
318
309
; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
319
310
; X64-AVX512F-NEXT: kmovw %edi, %k1
320
311
; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
321
- ; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
322
- ; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
312
+ ; X64-AVX512F-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
323
313
; X64-AVX512F-NEXT: retq
324
314
;
325
315
; X64-AVX512BW-LABEL: combine_vpermt2var_16f32_identity_mask:
326
316
; X64-AVX512BW: # %bb.0:
327
317
; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
328
318
; X64-AVX512BW-NEXT: kmovd %edi, %k1
329
319
; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
330
- ; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
331
- ; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
320
+ ; X64-AVX512BW-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
332
321
; X64-AVX512BW-NEXT: retq
333
322
%res0 = call <16 x float > @llvm.x86.avx512.maskz.vpermt2var.ps.512 (<16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >, <16 x float > %x0 , <16 x float > %x1 , i16 %m )
334
323
%res1 = call <16 x float > @llvm.x86.avx512.maskz.vpermt2var.ps.512 (<16 x i32 > <i32 15 , i32 30 , i32 13 , i32 28 , i32 11 , i32 26 , i32 9 , i32 24 , i32 7 , i32 22 , i32 5 , i32 20 , i32 3 , i32 18 , i32 1 , i32 16 >, <16 x float > %res0 , <16 x float > %res0 , i16 %m )
@@ -598,26 +587,23 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
598
587
; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
599
588
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
600
589
; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
601
- ; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
602
- ; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
590
+ ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
603
591
; X86-NEXT: retl
604
592
;
605
593
; X64-AVX512F-LABEL: combine_vpermt2var_16i32_identity_mask:
606
594
; X64-AVX512F: # %bb.0:
607
595
; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
608
596
; X64-AVX512F-NEXT: kmovw %edi, %k1
609
597
; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
610
- ; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
611
- ; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
598
+ ; X64-AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
612
599
; X64-AVX512F-NEXT: retq
613
600
;
614
601
; X64-AVX512BW-LABEL: combine_vpermt2var_16i32_identity_mask:
615
602
; X64-AVX512BW: # %bb.0:
616
603
; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
617
604
; X64-AVX512BW-NEXT: kmovd %edi, %k1
618
605
; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
619
- ; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
620
- ; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
606
+ ; X64-AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
621
607
; X64-AVX512BW-NEXT: retq
622
608
%res0 = call <16 x i32 > @llvm.x86.avx512.maskz.vpermt2var.d.512 (<16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >, <16 x i32 > %x0 , <16 x i32 > %x1 , i16 %m )
623
609
%res1 = call <16 x i32 > @llvm.x86.avx512.maskz.vpermt2var.d.512 (<16 x i32 > <i32 15 , i32 30 , i32 13 , i32 28 , i32 11 , i32 26 , i32 9 , i32 24 , i32 7 , i32 22 , i32 5 , i32 20 , i32 3 , i32 18 , i32 1 , i32 16 >, <16 x i32 > %res0 , <16 x i32 > %res0 , i16 %m )
0 commit comments