Skip to content

Commit 3c24326

Browse files
committed
[X86] Remove combineVectorSignBitsTruncation and leave TRUNCATE -> PACKSS/PACKUS to legalization/lowering
Don't prematurely fold TRUNCATE nodes to PACKSS/PACKUS target nodes - we miss out on generic TRUNCATE folds. Helps some regressions from D152928 and #63946 Fixes #63710
1 parent dd7ba38 commit 3c24326

11 files changed

+87
-170
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -20014,12 +20014,14 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
2001420014
}
2001520015

2001620016
// Sub-128-bit truncation - widen to 128-bit src and pack in the lower half.
20017+
// On pre-AVX512, pack the src in both halves to help value tracking.
2001720018
if (SrcSizeInBits <= 128) {
2001820019
InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
2001920020
OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
2002020021
In = widenSubVector(In, false, Subtarget, DAG, DL, 128);
20021-
In = DAG.getBitcast(InVT, In);
20022-
SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
20022+
SDValue LHS = DAG.getBitcast(InVT, In);
20023+
SDValue RHS = Subtarget.hasAVX512() ? DAG.getUNDEF(InVT) : LHS;
20024+
SDValue Res = DAG.getNode(Opcode, DL, OutVT, LHS, RHS);
2002320025
Res = extractSubVector(Res, 0, DAG, DL, SrcSizeInBits / 2);
2002420026
Res = DAG.getBitcast(PackedVT, Res);
2002520027
return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
@@ -50844,46 +50846,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
5084450846
return SDValue();
5084550847
}
5084650848

50847-
/// This function transforms vector truncation of 'extended sign-bits' or
50848-
/// 'extended zero-bits' values.
50849-
/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
50850-
/// TODO: Remove this and just use LowerTruncateVecPackWithSignBits.
50851-
static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
50852-
SelectionDAG &DAG,
50853-
const X86Subtarget &Subtarget) {
50854-
// Requires SSE2.
50855-
if (!Subtarget.hasSSE2())
50856-
return SDValue();
50857-
50858-
if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple())
50859-
return SDValue();
50860-
50861-
SDValue In = N->getOperand(0);
50862-
if (!In.getValueType().isSimple())
50863-
return SDValue();
50864-
50865-
MVT VT = N->getValueType(0).getSimpleVT();
50866-
MVT InVT = In.getValueType().getSimpleVT();
50867-
50868-
// AVX512 has fast truncate, but if the input is already going to be split,
50869-
// there's no harm in trying pack.
50870-
if (Subtarget.hasAVX512() &&
50871-
!(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
50872-
InVT.is512BitVector())) {
50873-
// PACK should still be worth it for 128-bit vectors if the sources were
50874-
// originally concatenated from subvectors.
50875-
if (VT.getSizeInBits() > 128 || !isFreeToSplitVector(In.getNode(), DAG))
50876-
return SDValue();
50877-
}
50878-
50879-
unsigned PackOpcode;
50880-
if (SDValue Src =
50881-
matchTruncateWithPACK(PackOpcode, VT, In, DL, DAG, Subtarget))
50882-
return truncateVectorWithPACK(PackOpcode, VT, Src, DL, DAG, Subtarget);
50883-
50884-
return SDValue();
50885-
}
50886-
5088750849
// Try to form a MULHU or MULHS node by looking for
5088850850
// (trunc (srl (mul ext, ext), 16))
5088950851
// TODO: This is X86 specific because we want to be able to handle wide types
@@ -51140,10 +51102,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
5114051102
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
5114151103
}
5114251104

51143-
// Try to truncate extended sign/zero bits with PACKSS/PACKUS.
51144-
if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
51145-
return V;
51146-
5114751105
return SDValue();
5114851106
}
5114951107

llvm/test/CodeGen/X86/movmsk-cmp.ll

Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -422,25 +422,15 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
422422
}
423423

424424
define i1 @allones_v32i16_sign(<32 x i16> %arg) {
425-
; SSE2-LABEL: allones_v32i16_sign:
426-
; SSE2: # %bb.0:
427-
; SSE2-NEXT: pand %xmm3, %xmm1
428-
; SSE2-NEXT: pand %xmm2, %xmm0
429-
; SSE2-NEXT: packsswb %xmm1, %xmm0
430-
; SSE2-NEXT: pmovmskb %xmm0, %eax
431-
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
432-
; SSE2-NEXT: sete %al
433-
; SSE2-NEXT: retq
434-
;
435-
; SSE41-LABEL: allones_v32i16_sign:
436-
; SSE41: # %bb.0:
437-
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
438-
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
439-
; SSE41-NEXT: packsswb %xmm1, %xmm0
440-
; SSE41-NEXT: pmovmskb %xmm0, %eax
441-
; SSE41-NEXT: cmpl $65535, %eax # imm = 0xFFFF
442-
; SSE41-NEXT: sete %al
443-
; SSE41-NEXT: retq
425+
; SSE-LABEL: allones_v32i16_sign:
426+
; SSE: # %bb.0:
427+
; SSE-NEXT: packsswb %xmm1, %xmm0
428+
; SSE-NEXT: packsswb %xmm3, %xmm2
429+
; SSE-NEXT: pand %xmm0, %xmm2
430+
; SSE-NEXT: pmovmskb %xmm2, %eax
431+
; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
432+
; SSE-NEXT: sete %al
433+
; SSE-NEXT: retq
444434
;
445435
; AVX1-LABEL: allones_v32i16_sign:
446436
; AVX1: # %bb.0:
@@ -496,25 +486,15 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
496486
}
497487

498488
define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
499-
; SSE2-LABEL: allzeros_v32i16_sign:
500-
; SSE2: # %bb.0:
501-
; SSE2-NEXT: por %xmm3, %xmm1
502-
; SSE2-NEXT: por %xmm2, %xmm0
503-
; SSE2-NEXT: packsswb %xmm1, %xmm0
504-
; SSE2-NEXT: pmovmskb %xmm0, %eax
505-
; SSE2-NEXT: testl %eax, %eax
506-
; SSE2-NEXT: sete %al
507-
; SSE2-NEXT: retq
508-
;
509-
; SSE41-LABEL: allzeros_v32i16_sign:
510-
; SSE41: # %bb.0:
511-
; SSE41-NEXT: pminsw %xmm3, %xmm1
512-
; SSE41-NEXT: pminsw %xmm2, %xmm0
513-
; SSE41-NEXT: packsswb %xmm1, %xmm0
514-
; SSE41-NEXT: pmovmskb %xmm0, %eax
515-
; SSE41-NEXT: testl %eax, %eax
516-
; SSE41-NEXT: sete %al
517-
; SSE41-NEXT: retq
489+
; SSE-LABEL: allzeros_v32i16_sign:
490+
; SSE: # %bb.0:
491+
; SSE-NEXT: packsswb %xmm3, %xmm2
492+
; SSE-NEXT: packsswb %xmm1, %xmm0
493+
; SSE-NEXT: por %xmm2, %xmm0
494+
; SSE-NEXT: pmovmskb %xmm0, %eax
495+
; SSE-NEXT: testl %eax, %eax
496+
; SSE-NEXT: sete %al
497+
; SSE-NEXT: retq
518498
;
519499
; AVX1-LABEL: allzeros_v32i16_sign:
520500
; AVX1: # %bb.0:

llvm/test/CodeGen/X86/pmulh.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -310,41 +310,41 @@ define <16 x i16> @and_mulhuw_v16i16(<16 x i32> %a, <16 x i32> %b) {
310310
; SSE2-LABEL: and_mulhuw_v16i16:
311311
; SSE2: # %bb.0:
312312
; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767,32767,32767]
313-
; SSE2-NEXT: pand %xmm8, %xmm1
314-
; SSE2-NEXT: pand %xmm8, %xmm0
315-
; SSE2-NEXT: packssdw %xmm1, %xmm0
316313
; SSE2-NEXT: pand %xmm8, %xmm3
317314
; SSE2-NEXT: pand %xmm8, %xmm2
318315
; SSE2-NEXT: packssdw %xmm3, %xmm2
319-
; SSE2-NEXT: pand %xmm8, %xmm5
320-
; SSE2-NEXT: pand %xmm8, %xmm4
321-
; SSE2-NEXT: packssdw %xmm5, %xmm4
322-
; SSE2-NEXT: pmulhw %xmm4, %xmm0
316+
; SSE2-NEXT: pand %xmm8, %xmm1
317+
; SSE2-NEXT: pand %xmm8, %xmm0
318+
; SSE2-NEXT: packssdw %xmm1, %xmm0
323319
; SSE2-NEXT: pand %xmm8, %xmm7
324-
; SSE2-NEXT: pand %xmm6, %xmm8
325-
; SSE2-NEXT: packssdw %xmm7, %xmm8
326-
; SSE2-NEXT: pmulhw %xmm2, %xmm8
327-
; SSE2-NEXT: movdqa %xmm8, %xmm1
320+
; SSE2-NEXT: pand %xmm8, %xmm6
321+
; SSE2-NEXT: packssdw %xmm7, %xmm6
322+
; SSE2-NEXT: pmulhw %xmm2, %xmm6
323+
; SSE2-NEXT: pand %xmm8, %xmm5
324+
; SSE2-NEXT: pand %xmm4, %xmm8
325+
; SSE2-NEXT: packssdw %xmm5, %xmm8
326+
; SSE2-NEXT: pmulhw %xmm8, %xmm0
327+
; SSE2-NEXT: movdqa %xmm6, %xmm1
328328
; SSE2-NEXT: retq
329329
;
330330
; SSE41-LABEL: and_mulhuw_v16i16:
331331
; SSE41: # %bb.0:
332332
; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767,32767,32767]
333-
; SSE41-NEXT: pand %xmm8, %xmm1
334-
; SSE41-NEXT: pand %xmm8, %xmm0
335-
; SSE41-NEXT: packusdw %xmm1, %xmm0
336333
; SSE41-NEXT: pand %xmm8, %xmm3
337334
; SSE41-NEXT: pand %xmm8, %xmm2
338335
; SSE41-NEXT: packusdw %xmm3, %xmm2
339-
; SSE41-NEXT: pand %xmm8, %xmm5
340-
; SSE41-NEXT: pand %xmm8, %xmm4
341-
; SSE41-NEXT: packusdw %xmm5, %xmm4
342-
; SSE41-NEXT: pmulhw %xmm4, %xmm0
336+
; SSE41-NEXT: pand %xmm8, %xmm1
337+
; SSE41-NEXT: pand %xmm8, %xmm0
338+
; SSE41-NEXT: packusdw %xmm1, %xmm0
343339
; SSE41-NEXT: pand %xmm8, %xmm7
344-
; SSE41-NEXT: pand %xmm6, %xmm8
345-
; SSE41-NEXT: packusdw %xmm7, %xmm8
346-
; SSE41-NEXT: pmulhw %xmm2, %xmm8
347-
; SSE41-NEXT: movdqa %xmm8, %xmm1
340+
; SSE41-NEXT: pand %xmm8, %xmm6
341+
; SSE41-NEXT: packusdw %xmm7, %xmm6
342+
; SSE41-NEXT: pmulhw %xmm2, %xmm6
343+
; SSE41-NEXT: pand %xmm8, %xmm5
344+
; SSE41-NEXT: pand %xmm4, %xmm8
345+
; SSE41-NEXT: packusdw %xmm5, %xmm8
346+
; SSE41-NEXT: pmulhw %xmm8, %xmm0
347+
; SSE41-NEXT: movdqa %xmm6, %xmm1
348348
; SSE41-NEXT: retq
349349
;
350350
; AVX2-LABEL: and_mulhuw_v16i16:
@@ -408,39 +408,39 @@ define <16 x i16> @sext_mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
408408
define <16 x i16> @ashr_mulhuw_v16i16(<16 x i32> %a, <16 x i32> %b) {
409409
; SSE2-LABEL: ashr_mulhuw_v16i16:
410410
; SSE2: # %bb.0:
411-
; SSE2-NEXT: psrad $16, %xmm7
412-
; SSE2-NEXT: psrad $16, %xmm6
413-
; SSE2-NEXT: packssdw %xmm7, %xmm6
414-
; SSE2-NEXT: psrad $16, %xmm3
415-
; SSE2-NEXT: psrad $16, %xmm2
416-
; SSE2-NEXT: packssdw %xmm3, %xmm2
417-
; SSE2-NEXT: pmulhw %xmm6, %xmm2
418411
; SSE2-NEXT: psrad $16, %xmm5
419412
; SSE2-NEXT: psrad $16, %xmm4
420413
; SSE2-NEXT: packssdw %xmm5, %xmm4
421414
; SSE2-NEXT: psrad $16, %xmm1
422415
; SSE2-NEXT: psrad $16, %xmm0
423416
; SSE2-NEXT: packssdw %xmm1, %xmm0
424417
; SSE2-NEXT: pmulhw %xmm4, %xmm0
418+
; SSE2-NEXT: psrad $16, %xmm7
419+
; SSE2-NEXT: psrad $16, %xmm6
420+
; SSE2-NEXT: packssdw %xmm7, %xmm6
421+
; SSE2-NEXT: psrad $16, %xmm3
422+
; SSE2-NEXT: psrad $16, %xmm2
423+
; SSE2-NEXT: packssdw %xmm3, %xmm2
424+
; SSE2-NEXT: pmulhw %xmm6, %xmm2
425425
; SSE2-NEXT: movdqa %xmm2, %xmm1
426426
; SSE2-NEXT: retq
427427
;
428428
; SSE41-LABEL: ashr_mulhuw_v16i16:
429429
; SSE41: # %bb.0:
430-
; SSE41-NEXT: psrld $16, %xmm1
431-
; SSE41-NEXT: psrld $16, %xmm0
432-
; SSE41-NEXT: packusdw %xmm1, %xmm0
433430
; SSE41-NEXT: psrld $16, %xmm3
434431
; SSE41-NEXT: psrld $16, %xmm2
435432
; SSE41-NEXT: packusdw %xmm3, %xmm2
436-
; SSE41-NEXT: psrld $16, %xmm5
437-
; SSE41-NEXT: psrld $16, %xmm4
438-
; SSE41-NEXT: packusdw %xmm5, %xmm4
439-
; SSE41-NEXT: pmulhw %xmm4, %xmm0
433+
; SSE41-NEXT: psrld $16, %xmm1
434+
; SSE41-NEXT: psrld $16, %xmm0
435+
; SSE41-NEXT: packusdw %xmm1, %xmm0
440436
; SSE41-NEXT: psrld $16, %xmm7
441437
; SSE41-NEXT: psrld $16, %xmm6
442438
; SSE41-NEXT: packusdw %xmm7, %xmm6
443439
; SSE41-NEXT: pmulhw %xmm2, %xmm6
440+
; SSE41-NEXT: psrld $16, %xmm5
441+
; SSE41-NEXT: psrld $16, %xmm4
442+
; SSE41-NEXT: packusdw %xmm5, %xmm4
443+
; SSE41-NEXT: pmulhw %xmm4, %xmm0
444444
; SSE41-NEXT: movdqa %xmm6, %xmm1
445445
; SSE41-NEXT: retq
446446
;

llvm/test/CodeGen/X86/psubus.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,13 +1872,13 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
18721872
; SSE41-LABEL: psubus_16i32_max:
18731873
; SSE41: # %bb.0: # %vector.ph
18741874
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1875-
; SSE41-NEXT: pminud %xmm6, %xmm5
1876-
; SSE41-NEXT: pminud %xmm6, %xmm4
1877-
; SSE41-NEXT: packusdw %xmm5, %xmm4
18781875
; SSE41-NEXT: pminud %xmm6, %xmm3
18791876
; SSE41-NEXT: pminud %xmm6, %xmm2
18801877
; SSE41-NEXT: packusdw %xmm3, %xmm2
18811878
; SSE41-NEXT: psubusw %xmm2, %xmm0
1879+
; SSE41-NEXT: pminud %xmm6, %xmm5
1880+
; SSE41-NEXT: pminud %xmm6, %xmm4
1881+
; SSE41-NEXT: packusdw %xmm5, %xmm4
18821882
; SSE41-NEXT: psubusw %xmm4, %xmm1
18831883
; SSE41-NEXT: retq
18841884
;

llvm/test/CodeGen/X86/sext-vsetcc.ll

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -638,32 +638,17 @@ define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind {
638638
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm2
639639
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
640640
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm3, %ymm2
641-
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
642-
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm4
643-
; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm9
644-
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm5
645-
; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm10
646-
; AVX2-NEXT: vpor %xmm3, %xmm10, %xmm3
647-
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm6
648-
; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm10
649-
; AVX2-NEXT: vpor %xmm10, %xmm9, %xmm9
650-
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm7
651-
; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm10
652-
; AVX2-NEXT: vpackssdw %xmm10, %xmm7, %xmm7
641+
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm3
642+
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm4
643+
; AVX2-NEXT: vpor %ymm4, %ymm2, %ymm2
644+
; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
645+
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm2
646+
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
647+
; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
648+
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm2
653649
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0
654-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm8
655-
; AVX2-NEXT: vpackssdw %xmm8, %xmm0, %xmm0
656-
; AVX2-NEXT: vpor %xmm0, %xmm7, %xmm0
657-
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm7
658-
; AVX2-NEXT: vpor %xmm3, %xmm7, %xmm3
659-
; AVX2-NEXT: vpor %xmm3, %xmm9, %xmm3
660-
; AVX2-NEXT: vpor %xmm5, %xmm2, %xmm2
661-
; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1
662-
; AVX2-NEXT: vpor %xmm6, %xmm4, %xmm2
663-
; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1
664-
; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
665-
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
666-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
650+
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
651+
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
667652
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
668653
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
669654
; AVX2-NEXT: retq

llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -319,13 +319,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
319319
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
320320
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
321321
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
322-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
323-
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
324322
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
325-
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
326-
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
327-
; AVX1-NEXT: vpextrb $8, %xmm0, %edx
328-
; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
323+
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
324+
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
325+
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
326+
; AVX1-NEXT: vmovd %xmm0, %eax
327+
; AVX1-NEXT: vpextrb $4, %xmm0, %edx
328+
; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
329329
; AVX1-NEXT: # kill: def $al killed $al killed $eax
330330
; AVX1-NEXT: # kill: def $dl killed $dl killed $edx
331331
; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -375,11 +375,12 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
375375
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
376376
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
377377
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
378-
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
379-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
380-
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
381-
; AVX2-NEXT: vpextrb $8, %xmm0, %edx
382-
; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
378+
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm1
379+
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
380+
; AVX2-NEXT: vmovd %xmm0, %eax
381+
; AVX2-NEXT: notl %eax
382+
; AVX2-NEXT: vpextrb $8, %xmm1, %edx
383+
; AVX2-NEXT: vpextrb $0, %xmm2, %ecx
383384
; AVX2-NEXT: # kill: def $al killed $al killed $eax
384385
; AVX2-NEXT: # kill: def $dl killed $dl killed $edx
385386
; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx

llvm/test/CodeGen/X86/vector-reduce-and-bool.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,9 +1211,8 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind {
12111211
; AVX1OR2: # %bb.0:
12121212
; AVX1OR2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
12131213
; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0
1214-
; AVX1OR2-NEXT: vpsraw $15, %xmm0, %xmm0
12151214
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1216-
; AVX1OR2-NEXT: testl %eax, %eax
1215+
; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
12171216
; AVX1OR2-NEXT: sete %al
12181217
; AVX1OR2-NEXT: retq
12191218
;

llvm/test/CodeGen/X86/vector-trunc-packus.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4092,7 +4092,7 @@ define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width"
40924092
; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
40934093
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
40944094
; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
4095-
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
4095+
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
40964096
; AVX2-FAST-NEXT: retq
40974097
;
40984098
; AVX512F-LABEL: trunc_packus_v4i32_v4i8:

llvm/test/CodeGen/X86/vector-trunc-ssat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3827,7 +3827,7 @@ define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) {
38273827
; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
38283828
; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
38293829
; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
3830-
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
3830+
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
38313831
; AVX2-FAST-NEXT: retq
38323832
;
38333833
; AVX512F-LABEL: trunc_ssat_v4i32_v4i8:

llvm/test/CodeGen/X86/vector-trunc-usat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2940,7 +2940,7 @@ define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) {
29402940
; AVX2-FAST: # %bb.0:
29412941
; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
29422942
; AVX2-FAST-NEXT: vpminud %xmm1, %xmm0, %xmm0
2943-
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
2943+
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
29442944
; AVX2-FAST-NEXT: retq
29452945
;
29462946
; AVX512F-LABEL: trunc_usat_v4i32_v4i8:

0 commit comments

Comments
 (0)