Skip to content

Commit 7bfcf8c

Browse files
RKSimonagozillon
authored andcommitted
[X86] X86FixupVectorConstants - load+zero vector constants that can be stored in a truncated form (#80428)
Further develops the vsextload support added in #79815 / b5d35fe - reduces the size of the vector constant by storing it in the constant pool in a truncated form, and zero-extend it as part of the load.
1 parent 035b64e commit 7bfcf8c

File tree

68 files changed

+1297
-717
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1297
-717
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp

+12-6
Original file line numberDiff line numberDiff line change
@@ -1318,7 +1318,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13181318
CASE_PMOVZX(PMOVZXBW, r)
13191319
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13201320
[[fallthrough]];
1321-
CASE_PMOVZX(PMOVZXBW, m)
1321+
CASE_MASK_PMOVZX(PMOVZXBW, m)
1322+
CASE_MASKZ_PMOVZX(PMOVZXBW, m)
13221323
DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
13231324
ShuffleMask);
13241325
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1327,7 +1328,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13271328
CASE_PMOVZX(PMOVZXBD, r)
13281329
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13291330
[[fallthrough]];
1330-
CASE_PMOVZX(PMOVZXBD, m)
1331+
CASE_MASK_PMOVZX(PMOVZXBD, m)
1332+
CASE_MASKZ_PMOVZX(PMOVZXBD, m)
13311333
DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
13321334
ShuffleMask);
13331335
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1336,7 +1338,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13361338
CASE_PMOVZX(PMOVZXBQ, r)
13371339
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13381340
[[fallthrough]];
1339-
CASE_PMOVZX(PMOVZXBQ, m)
1341+
CASE_MASK_PMOVZX(PMOVZXBQ, m)
1342+
CASE_MASKZ_PMOVZX(PMOVZXBQ, m)
13401343
DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
13411344
ShuffleMask);
13421345
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1345,7 +1348,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13451348
CASE_PMOVZX(PMOVZXWD, r)
13461349
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13471350
[[fallthrough]];
1348-
CASE_PMOVZX(PMOVZXWD, m)
1351+
CASE_MASK_PMOVZX(PMOVZXWD, m)
1352+
CASE_MASKZ_PMOVZX(PMOVZXWD, m)
13491353
DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
13501354
ShuffleMask);
13511355
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1354,7 +1358,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13541358
CASE_PMOVZX(PMOVZXWQ, r)
13551359
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13561360
[[fallthrough]];
1357-
CASE_PMOVZX(PMOVZXWQ, m)
1361+
CASE_MASK_PMOVZX(PMOVZXWQ, m)
1362+
CASE_MASKZ_PMOVZX(PMOVZXWQ, m)
13581363
DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
13591364
ShuffleMask);
13601365
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1363,7 +1368,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
13631368
CASE_PMOVZX(PMOVZXDQ, r)
13641369
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
13651370
[[fallthrough]];
1366-
CASE_PMOVZX(PMOVZXDQ, m)
1371+
CASE_MASK_PMOVZX(PMOVZXDQ, m)
1372+
CASE_MASKZ_PMOVZX(PMOVZXDQ, m)
13671373
DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
13681374
ShuffleMask);
13691375
DestName = getRegName(MI->getOperand(0).getReg());

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

+46-6
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ static Constant *rebuildSExtCst(const Constant *C, unsigned NumElts,
299299
unsigned SrcEltBitWidth) {
300300
return rebuildExtCst(C, true, NumElts, SrcEltBitWidth);
301301
}
302+
static Constant *rebuildZExtCst(const Constant *C, unsigned NumElts,
303+
unsigned SrcEltBitWidth) {
304+
return rebuildExtCst(C, false, NumElts, SrcEltBitWidth);
305+
}
302306

303307
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
304308
MachineBasicBlock &MBB,
@@ -416,13 +420,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
416420
case X86::MOVDQUrm: {
417421
FixupEntry Fixups[] = {
418422
{HasSSE41 ? X86::PMOVSXBQrm : 0, 2, 8, rebuildSExtCst},
423+
{HasSSE41 ? X86::PMOVZXBQrm : 0, 2, 8, rebuildZExtCst},
419424
{X86::MOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
420425
{HasSSE41 ? X86::PMOVSXBDrm : 0, 4, 8, rebuildSExtCst},
426+
{HasSSE41 ? X86::PMOVZXBDrm : 0, 4, 8, rebuildZExtCst},
421427
{HasSSE41 ? X86::PMOVSXWQrm : 0, 2, 16, rebuildSExtCst},
428+
{HasSSE41 ? X86::PMOVZXWQrm : 0, 2, 16, rebuildZExtCst},
422429
{X86::MOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
423430
{HasSSE41 ? X86::PMOVSXBWrm : 0, 8, 8, rebuildSExtCst},
431+
{HasSSE41 ? X86::PMOVZXBWrm : 0, 8, 8, rebuildZExtCst},
424432
{HasSSE41 ? X86::PMOVSXWDrm : 0, 4, 16, rebuildSExtCst},
425-
{HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst}};
433+
{HasSSE41 ? X86::PMOVZXWDrm : 0, 4, 16, rebuildZExtCst},
434+
{HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst},
435+
{HasSSE41 ? X86::PMOVZXDQrm : 0, 2, 32, rebuildZExtCst}};
426436
return FixupConstant(Fixups, 1);
427437
}
428438
case X86::VMOVDQArm:
@@ -431,17 +441,23 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
431441
{HasAVX2 ? X86::VPBROADCASTBrm : 0, 1, 8, rebuildSplatCst},
432442
{HasAVX2 ? X86::VPBROADCASTWrm : 0, 1, 16, rebuildSplatCst},
433443
{X86::VPMOVSXBQrm, 2, 8, rebuildSExtCst},
444+
{X86::VPMOVZXBQrm, 2, 8, rebuildZExtCst},
434445
{X86::VMOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
435446
{HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
436447
rebuildSplatCst},
437448
{X86::VPMOVSXBDrm, 4, 8, rebuildSExtCst},
449+
{X86::VPMOVZXBDrm, 4, 8, rebuildZExtCst},
438450
{X86::VPMOVSXWQrm, 2, 16, rebuildSExtCst},
451+
{X86::VPMOVZXWQrm, 2, 16, rebuildZExtCst},
439452
{X86::VMOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
440453
{HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
441454
rebuildSplatCst},
442455
{X86::VPMOVSXBWrm, 8, 8, rebuildSExtCst},
456+
{X86::VPMOVZXBWrm, 8, 8, rebuildZExtCst},
443457
{X86::VPMOVSXWDrm, 4, 16, rebuildSExtCst},
444-
{X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst}};
458+
{X86::VPMOVZXWDrm, 4, 16, rebuildZExtCst},
459+
{X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst},
460+
{X86::VPMOVZXDQrm, 2, 32, rebuildZExtCst}};
445461
return FixupConstant(Fixups, 1);
446462
}
447463
case X86::VMOVDQAYrm:
@@ -452,15 +468,21 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
452468
{HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
453469
rebuildSplatCst},
454470
{HasAVX2 ? X86::VPMOVSXBQYrm : 0, 4, 8, rebuildSExtCst},
471+
{HasAVX2 ? X86::VPMOVZXBQYrm : 0, 4, 8, rebuildZExtCst},
455472
{HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
456473
rebuildSplatCst},
457474
{HasAVX2 ? X86::VPMOVSXBDYrm : 0, 8, 8, rebuildSExtCst},
475+
{HasAVX2 ? X86::VPMOVZXBDYrm : 0, 8, 8, rebuildZExtCst},
458476
{HasAVX2 ? X86::VPMOVSXWQYrm : 0, 4, 16, rebuildSExtCst},
477+
{HasAVX2 ? X86::VPMOVZXWQYrm : 0, 4, 16, rebuildZExtCst},
459478
{HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
460479
rebuildSplatCst},
461480
{HasAVX2 ? X86::VPMOVSXBWYrm : 0, 16, 8, rebuildSExtCst},
481+
{HasAVX2 ? X86::VPMOVZXBWYrm : 0, 16, 8, rebuildZExtCst},
462482
{HasAVX2 ? X86::VPMOVSXWDYrm : 0, 8, 16, rebuildSExtCst},
463-
{HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst}};
483+
{HasAVX2 ? X86::VPMOVZXWDYrm : 0, 8, 16, rebuildZExtCst},
484+
{HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst},
485+
{HasAVX2 ? X86::VPMOVZXDQYrm : 0, 4, 32, rebuildZExtCst}};
464486
return FixupConstant(Fixups, 1);
465487
}
466488
case X86::VMOVDQA32Z128rm:
@@ -471,15 +493,21 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
471493
{HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1, 8, rebuildSplatCst},
472494
{HasBWI ? X86::VPBROADCASTWZ128rm : 0, 1, 16, rebuildSplatCst},
473495
{X86::VPMOVSXBQZ128rm, 2, 8, rebuildSExtCst},
496+
{X86::VPMOVZXBQZ128rm, 2, 8, rebuildZExtCst},
474497
{X86::VMOVDI2PDIZrm, 1, 32, rebuildZeroUpperCst},
475498
{X86::VPBROADCASTDZ128rm, 1, 32, rebuildSplatCst},
476499
{X86::VPMOVSXBDZ128rm, 4, 8, rebuildSExtCst},
500+
{X86::VPMOVZXBDZ128rm, 4, 8, rebuildZExtCst},
477501
{X86::VPMOVSXWQZ128rm, 2, 16, rebuildSExtCst},
502+
{X86::VPMOVZXWQZ128rm, 2, 16, rebuildZExtCst},
478503
{X86::VMOVQI2PQIZrm, 1, 64, rebuildZeroUpperCst},
479504
{X86::VPBROADCASTQZ128rm, 1, 64, rebuildSplatCst},
480505
{HasBWI ? X86::VPMOVSXBWZ128rm : 0, 8, 8, rebuildSExtCst},
506+
{HasBWI ? X86::VPMOVZXBWZ128rm : 0, 8, 8, rebuildZExtCst},
481507
{X86::VPMOVSXWDZ128rm, 4, 16, rebuildSExtCst},
482-
{X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst}};
508+
{X86::VPMOVZXWDZ128rm, 4, 16, rebuildZExtCst},
509+
{X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst},
510+
{X86::VPMOVZXDQZ128rm, 2, 32, rebuildZExtCst}};
483511
return FixupConstant(Fixups, 1);
484512
}
485513
case X86::VMOVDQA32Z256rm:
@@ -491,13 +519,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
491519
{HasBWI ? X86::VPBROADCASTWZ256rm : 0, 1, 16, rebuildSplatCst},
492520
{X86::VPBROADCASTDZ256rm, 1, 32, rebuildSplatCst},
493521
{X86::VPMOVSXBQZ256rm, 4, 8, rebuildSExtCst},
522+
{X86::VPMOVZXBQZ256rm, 4, 8, rebuildZExtCst},
494523
{X86::VPBROADCASTQZ256rm, 1, 64, rebuildSplatCst},
495524
{X86::VPMOVSXBDZ256rm, 8, 8, rebuildSExtCst},
525+
{X86::VPMOVZXBDZ256rm, 8, 8, rebuildZExtCst},
496526
{X86::VPMOVSXWQZ256rm, 4, 16, rebuildSExtCst},
527+
{X86::VPMOVZXWQZ256rm, 4, 16, rebuildZExtCst},
497528
{X86::VBROADCASTI32X4Z256rm, 1, 128, rebuildSplatCst},
498529
{HasBWI ? X86::VPMOVSXBWZ256rm : 0, 16, 8, rebuildSExtCst},
530+
{HasBWI ? X86::VPMOVZXBWZ256rm : 0, 16, 8, rebuildZExtCst},
499531
{X86::VPMOVSXWDZ256rm, 8, 16, rebuildSExtCst},
500-
{X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst}};
532+
{X86::VPMOVZXWDZ256rm, 8, 16, rebuildZExtCst},
533+
{X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst},
534+
{X86::VPMOVZXDQZ256rm, 4, 32, rebuildZExtCst}};
501535
return FixupConstant(Fixups, 1);
502536
}
503537
case X86::VMOVDQA32Zrm:
@@ -510,13 +544,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
510544
{X86::VPBROADCASTDZrm, 1, 32, rebuildSplatCst},
511545
{X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
512546
{X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
547+
{X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst},
513548
{X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
514549
{X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
550+
{X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst},
515551
{X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
552+
{X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst},
516553
{X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
517554
{HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
555+
{HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst},
518556
{X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
519-
{X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst}};
557+
{X86::VPMOVZXWDZrm, 16, 16, rebuildZExtCst},
558+
{X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst},
559+
{X86::VPMOVZXDQZrm, 8, 32, rebuildZExtCst}};
520560
return FixupConstant(Fixups, 1);
521561
}
522562
}

llvm/lib/Target/X86/X86MCInstLower.cpp

+65-15
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,18 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
13881388
return MBBI;
13891389
}
13901390

1391+
static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1392+
if (Info.RegClass == X86::VR128RegClassID ||
1393+
Info.RegClass == X86::VR128XRegClassID)
1394+
return 128;
1395+
if (Info.RegClass == X86::VR256RegClassID ||
1396+
Info.RegClass == X86::VR256XRegClassID)
1397+
return 256;
1398+
if (Info.RegClass == X86::VR512RegClassID)
1399+
return 512;
1400+
llvm_unreachable("Unknown register class!");
1401+
}
1402+
13911403
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
13921404
unsigned SrcOp2Idx, ArrayRef<int> Mask) {
13931405
std::string Comment;
@@ -1582,8 +1594,8 @@ static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
15821594
}
15831595
}
15841596

1585-
static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1586-
int SrcEltBits, int DstEltBits) {
1597+
static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1598+
int SrcEltBits, int DstEltBits, bool IsSext) {
15871599
auto *C = X86::getConstantFromPool(*MI, 1);
15881600
if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
15891601
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
@@ -1598,7 +1610,8 @@ static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
15981610
if (i != 0)
15991611
CS << ",";
16001612
if (CDS->getElementType()->isIntegerTy()) {
1601-
APInt Elt = CDS->getElementAsAPInt(i).sext(DstEltBits);
1613+
APInt Elt = CDS->getElementAsAPInt(i);
1614+
Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits);
16021615
printConstant(Elt, CS);
16031616
} else
16041617
CS << "?";
@@ -1611,6 +1624,36 @@ static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
16111624

16121625
return false;
16131626
}
1627+
static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1628+
int SrcEltBits, int DstEltBits) {
1629+
printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true);
1630+
}
1631+
static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1632+
int SrcEltBits, int DstEltBits) {
1633+
if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false))
1634+
return;
1635+
1636+
// We didn't find a constant load, fallback to a shuffle mask decode.
1637+
std::string Comment;
1638+
raw_string_ostream CS(Comment);
1639+
1640+
const MachineOperand &DstOp = MI->getOperand(0);
1641+
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1642+
1643+
unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
1644+
assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 &&
1645+
"Illegal extension ratio");
1646+
unsigned NumElts = Width / DstEltBits;
1647+
unsigned Scale = DstEltBits / SrcEltBits;
1648+
for (unsigned I = 0; I != NumElts; ++I) {
1649+
if (I != 0)
1650+
CS << ",";
1651+
CS << "mem[" << I << "]";
1652+
for (unsigned S = 1; S != Scale; ++S)
1653+
CS << ",zero";
1654+
}
1655+
OutStreamer.AddComment(CS.str());
1656+
}
16141657

16151658
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
16161659
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
@@ -1688,18 +1731,6 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
16881731
}
16891732
}
16901733

1691-
static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1692-
if (Info.RegClass == X86::VR128RegClassID ||
1693-
Info.RegClass == X86::VR128XRegClassID)
1694-
return 128;
1695-
if (Info.RegClass == X86::VR256RegClassID ||
1696-
Info.RegClass == X86::VR256XRegClassID)
1697-
return 256;
1698-
if (Info.RegClass == X86::VR512RegClassID)
1699-
return 512;
1700-
llvm_unreachable("Unknown register class!");
1701-
}
1702-
17031734
static void addConstantComments(const MachineInstr *MI,
17041735
MCStreamer &OutStreamer) {
17051736
switch (MI->getOpcode()) {
@@ -2039,6 +2070,25 @@ static void addConstantComments(const MachineInstr *MI,
20392070
CASE_MOVX_RM(SX, WQ)
20402071
printSignExtend(MI, OutStreamer, 16, 64);
20412072
break;
2073+
2074+
CASE_MOVX_RM(ZX, BD)
2075+
printZeroExtend(MI, OutStreamer, 8, 32);
2076+
break;
2077+
CASE_MOVX_RM(ZX, BQ)
2078+
printZeroExtend(MI, OutStreamer, 8, 64);
2079+
break;
2080+
CASE_MOVX_RM(ZX, BW)
2081+
printZeroExtend(MI, OutStreamer, 8, 16);
2082+
break;
2083+
CASE_MOVX_RM(ZX, DQ)
2084+
printZeroExtend(MI, OutStreamer, 32, 64);
2085+
break;
2086+
CASE_MOVX_RM(ZX, WD)
2087+
printZeroExtend(MI, OutStreamer, 16, 32);
2088+
break;
2089+
CASE_MOVX_RM(ZX, WQ)
2090+
printZeroExtend(MI, OutStreamer, 16, 64);
2091+
break;
20422092
}
20432093
}
20442094

llvm/test/CodeGen/X86/avx2-vector-shifts.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,14 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
6060
; X86-LABEL: test_vpslld_var:
6161
; X86: # %bb.0:
6262
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
63-
; X86-NEXT: vpmovsxwd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
63+
; X86-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
6464
; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
6565
; X86-NEXT: retl
6666
;
6767
; X64-LABEL: test_vpslld_var:
6868
; X64: # %bb.0:
6969
; X64-NEXT: vmovd %edi, %xmm0
70-
; X64-NEXT: vpmovsxwd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
70+
; X64-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
7171
; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
7272
; X64-NEXT: retq
7373
%amt = insertelement <8 x i32> undef, i32 %shift, i32 0

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
108108
; AVX1-NEXT: vmovd %edi, %xmm0
109109
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110110
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
111-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
111+
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
112112
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
113113
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
114114
; AVX1-NEXT: retq
@@ -117,7 +117,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
117117
; AVX2: # %bb.0:
118118
; AVX2-NEXT: vmovd %edi, %xmm0
119119
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
120-
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
120+
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
121121
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
122122
; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
123123
; AVX2-NEXT: retq
@@ -268,7 +268,7 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
268268
; AVX2: # %bb.0:
269269
; AVX2-NEXT: vmovd %edi, %xmm0
270270
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
271-
; AVX2-NEXT: vpmovsxwd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
271+
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
272272
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
273273
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
274274
; AVX2-NEXT: retq
@@ -445,7 +445,7 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
445445
; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8]
446446
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
447447
; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
448-
; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm2 = [16,32,64,128]
448+
; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = [16,32,64,128]
449449
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
450450
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
451451
; AVX2-NEXT: retq
@@ -505,10 +505,10 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
505505
; AVX2: # %bb.0:
506506
; AVX2-NEXT: vmovd %edi, %xmm0
507507
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
508-
; AVX2-NEXT: vpmovsxwd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
508+
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
509509
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
510510
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
511-
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
511+
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
512512
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
513513
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
514514
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)