@@ -257,12 +257,21 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
257
257
}
258
258
259
259
define <32 x i16 > @concat_packsswd_int_2x256 (<8 x i32 > %a0 , <8 x i32 > %a1 , <8 x i32 > %a2 , <8 x i32 > %a3 ) {
260
- ; AVX512-LABEL: concat_packsswd_int_2x256:
261
- ; AVX512: # %bb.0:
262
- ; AVX512-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
263
- ; AVX512-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
264
- ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
265
- ; AVX512-NEXT: retq
260
+ ; AVX512F-LABEL: concat_packsswd_int_2x256:
261
+ ; AVX512F: # %bb.0:
262
+ ; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
263
+ ; AVX512F-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
264
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
265
+ ; AVX512F-NEXT: retq
266
+ ;
267
+ ; AVX512BW-LABEL: concat_packsswd_int_2x256:
268
+ ; AVX512BW: # %bb.0:
269
+ ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
270
+ ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
271
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
272
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
273
+ ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
274
+ ; AVX512BW-NEXT: retq
266
275
%lo = tail call <16 x i16 > @llvm.x86.avx2.packssdw (<8 x i32 > %a0 , <8 x i32 > %a1 )
267
276
%hi = tail call <16 x i16 > @llvm.x86.avx2.packssdw (<8 x i32 > %a2 , <8 x i32 > %a3 )
268
277
%res = shufflevector <16 x i16 > %lo , <16 x i16 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
@@ -271,12 +280,21 @@ define <32 x i16> @concat_packsswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x
271
280
declare <16 x i16 > @llvm.x86.avx2.packssdw (<8 x i32 >, <8 x i32 >)
272
281
273
282
define <32 x i16 > @concat_packuswd_int_2x256 (<8 x i32 > %a0 , <8 x i32 > %a1 , <8 x i32 > %a2 , <8 x i32 > %a3 ) {
274
- ; AVX512-LABEL: concat_packuswd_int_2x256:
275
- ; AVX512: # %bb.0:
276
- ; AVX512-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
277
- ; AVX512-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
278
- ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
279
- ; AVX512-NEXT: retq
283
+ ; AVX512F-LABEL: concat_packuswd_int_2x256:
284
+ ; AVX512F: # %bb.0:
285
+ ; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
286
+ ; AVX512F-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
287
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
288
+ ; AVX512F-NEXT: retq
289
+ ;
290
+ ; AVX512BW-LABEL: concat_packuswd_int_2x256:
291
+ ; AVX512BW: # %bb.0:
292
+ ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
293
+ ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
294
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
295
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
296
+ ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
297
+ ; AVX512BW-NEXT: retq
280
298
%lo = tail call <16 x i16 > @llvm.x86.avx2.packusdw (<8 x i32 > %a0 , <8 x i32 > %a1 )
281
299
%hi = tail call <16 x i16 > @llvm.x86.avx2.packusdw (<8 x i32 > %a2 , <8 x i32 > %a3 )
282
300
%res = shufflevector <16 x i16 > %lo , <16 x i16 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
0 commit comments