Skip to content

Commit 860f9e5

Browse files
authored
[NFC][X86] Reorder the registers to reduce unnecessary iterations (#70222)
* Introduce field `PositionOrder` for class `Register` and `RegisterTuples` * If register A's `PositionOrder` < register B's `PositionOrder`, then A is placed before B in the enum in X86GenRegisterInfo.inc * The new order of registers in the enum for X86 will be 1. Registers before AVX512, 2. AVX512 registers (X/YMM16-31, ZMM0-31, K registers) 3. AMX registers (TMM) 4. APX registers (R16-R31) * Add a new target hook `getNumSupportedRegs()` to return the number of registers for the function (may overestimate). * Replace `getNumRegs()` with `getNumSupportedRegs()` in LiveVariables to eliminate iterations on unsupported registers This patch can reduce 0.3% instruction count regression for sqlite3 during compile-stage (O3) by not iterating on APX registers for #67702
1 parent e2550b7 commit 860f9e5

File tree

13 files changed

+166
-60
lines changed

13 files changed

+166
-60
lines changed

llvm/include/llvm/CodeGen/LiveVariables.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class LiveVariables : public MachineFunctionPass {
147147
bool HandlePhysRegKill(Register Reg, MachineInstr *MI);
148148

149149
/// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask.
150-
void HandleRegMask(const MachineOperand&);
150+
void HandleRegMask(const MachineOperand &, unsigned);
151151

152152
void HandlePhysRegUse(Register Reg, MachineInstr &MI);
153153
void HandlePhysRegDef(Register Reg, MachineInstr *MI,
@@ -170,7 +170,8 @@ class LiveVariables : public MachineFunctionPass {
170170
/// is coming from.
171171
void analyzePHINodes(const MachineFunction& Fn);
172172

173-
void runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs);
173+
void runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs,
174+
unsigned NumRegs);
174175

175176
void runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs);
176177
public:

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
266266
virtual ~TargetRegisterInfo();
267267

268268
public:
269+
/// Return the number of registers for the function. (may overestimate)
270+
virtual unsigned getNumSupportedRegs(const MachineFunction &) const {
271+
return getNumRegs();
272+
}
273+
269274
// Register numbers can represent physical registers, virtual registers, and
270275
// sometimes stack slots. The unsigned values are divided into these ranges:
271276
//

llvm/include/llvm/TableGen/Record.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,11 @@ struct LessRecordRegister {
21542154
};
21552155

21562156
bool operator()(const Record *Rec1, const Record *Rec2) const {
2157+
int64_t LHSPositionOrder = Rec1->getValueAsInt("PositionOrder");
2158+
int64_t RHSPositionOrder = Rec2->getValueAsInt("PositionOrder");
2159+
if (LHSPositionOrder != RHSPositionOrder)
2160+
return LHSPositionOrder < RHSPositionOrder;
2161+
21572162
RecordParts LHSParts(StringRef(Rec1->getName()));
21582163
RecordParts RHSParts(StringRef(Rec2->getName()));
21592164

llvm/include/llvm/Target/Target.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,10 @@ class Register<string n, list<string> altNames = []> {
205205
// isConstant - This register always holds a constant value (e.g. the zero
206206
// register in architectures such as MIPS)
207207
bit isConstant = false;
208+
209+
/// PositionOrder - Indicate tablegen to place the newly added register at a later
210+
/// position to avoid iterations on them on unsupported target.
211+
int PositionOrder = 0;
208212
}
209213

210214
// RegisterWithSubRegs - This can be used to define instances of Register which
@@ -417,6 +421,10 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs,
417421

418422
// List of asm names for the generated tuple registers.
419423
list<string> RegAsmNames = RegNames;
424+
425+
// PositionOrder - Indicate tablegen to place the newly added register at a later
426+
// position to avoid iterations on them on unsupported target.
427+
int PositionOrder = 0;
420428
}
421429

422430
// RegisterCategory - This class is a list of RegisterClasses that belong to a

llvm/lib/CodeGen/LiveVariables.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,11 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
406406
return true;
407407
}
408408

409-
void LiveVariables::HandleRegMask(const MachineOperand &MO) {
409+
void LiveVariables::HandleRegMask(const MachineOperand &MO, unsigned NumRegs) {
410410
// Call HandlePhysRegKill() for all live registers clobbered by Mask.
411411
// Clobbered registers are always dead, sp there is no need to use
412412
// HandlePhysRegDef().
413-
for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
413+
for (unsigned Reg = 1; Reg != NumRegs; ++Reg) {
414414
// Skip dead regs.
415415
if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
416416
continue;
@@ -421,7 +421,8 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
421421
// This avoids needless implicit operands.
422422
unsigned Super = Reg;
423423
for (MCPhysReg SR : TRI->superregs(Reg))
424-
if ((PhysRegDef[SR] || PhysRegUse[SR]) && MO.clobbersPhysReg(SR))
424+
if (SR < NumRegs && (PhysRegDef[SR] || PhysRegUse[SR]) &&
425+
MO.clobbersPhysReg(SR))
425426
Super = SR;
426427
HandlePhysRegKill(Super, nullptr);
427428
}
@@ -478,7 +479,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
478479
}
479480

480481
void LiveVariables::runOnInstr(MachineInstr &MI,
481-
SmallVectorImpl<unsigned> &Defs) {
482+
SmallVectorImpl<unsigned> &Defs,
483+
unsigned NumRegs) {
482484
assert(!MI.isDebugOrPseudoInstr());
483485
// Process all of the operands of the instruction...
484486
unsigned NumOperandsToProcess = MI.getNumOperands();
@@ -527,7 +529,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
527529

528530
// Process all masked registers. (Call clobbers).
529531
for (unsigned Mask : RegMasks)
530-
HandleRegMask(MI.getOperand(Mask));
532+
HandleRegMask(MI.getOperand(Mask), NumRegs);
531533

532534
// Process all defs.
533535
for (unsigned MOReg : DefRegs) {
@@ -539,7 +541,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
539541
UpdatePhysRegDefs(MI, Defs);
540542
}
541543

542-
void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
544+
void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) {
543545
// Mark live-in registers as live-in.
544546
SmallVector<unsigned, 4> Defs;
545547
for (const auto &LI : MBB->liveins()) {
@@ -556,7 +558,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
556558
continue;
557559
DistanceMap.insert(std::make_pair(&MI, Dist++));
558560

559-
runOnInstr(MI, Defs);
561+
runOnInstr(MI, Defs, NumRegs);
560562
}
561563

562564
// Handle any virtual assignments from PHI nodes which might be at the
@@ -597,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
597599
MRI = &mf.getRegInfo();
598600
TRI = MF->getSubtarget().getRegisterInfo();
599601

600-
const unsigned NumRegs = TRI->getNumRegs();
602+
const unsigned NumRegs = TRI->getNumSupportedRegs(mf);
601603
PhysRegDef.assign(NumRegs, nullptr);
602604
PhysRegUse.assign(NumRegs, nullptr);
603605
PHIVarInfo.resize(MF->getNumBlockIDs());

llvm/lib/Target/X86/AsmParser/X86Operand.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -357,28 +357,28 @@ struct X86Operand final : public MCParsedAsmOperand {
357357
}
358358

359359
bool isMem64_RC128X() const {
360-
return isMem64() && isMemIndexReg(X86::XMM0, X86::XMM31);
360+
return isMem64() && X86II::isXMMReg(Mem.IndexReg);
361361
}
362362
bool isMem128_RC128X() const {
363-
return isMem128() && isMemIndexReg(X86::XMM0, X86::XMM31);
363+
return isMem128() && X86II::isXMMReg(Mem.IndexReg);
364364
}
365365
bool isMem128_RC256X() const {
366-
return isMem128() && isMemIndexReg(X86::YMM0, X86::YMM31);
366+
return isMem128() && X86II::isYMMReg(Mem.IndexReg);
367367
}
368368
bool isMem256_RC128X() const {
369-
return isMem256() && isMemIndexReg(X86::XMM0, X86::XMM31);
369+
return isMem256() && X86II::isXMMReg(Mem.IndexReg);
370370
}
371371
bool isMem256_RC256X() const {
372-
return isMem256() && isMemIndexReg(X86::YMM0, X86::YMM31);
372+
return isMem256() && X86II::isYMMReg(Mem.IndexReg);
373373
}
374374
bool isMem256_RC512() const {
375-
return isMem256() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
375+
return isMem256() && X86II::isZMMReg(Mem.IndexReg);
376376
}
377377
bool isMem512_RC256X() const {
378-
return isMem512() && isMemIndexReg(X86::YMM0, X86::YMM31);
378+
return isMem512() && X86II::isYMMReg(Mem.IndexReg);
379379
}
380380
bool isMem512_RC512() const {
381-
return isMem512() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
381+
return isMem512() && X86II::isZMMReg(Mem.IndexReg);
382382
}
383383
bool isMem512_GR16() const {
384384
if (!isMem512())

llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,11 +1182,39 @@ namespace X86II {
11821182
}
11831183
}
11841184

1185+
/// \returns true if the register is a XMM.
1186+
inline bool isXMMReg(unsigned RegNo) {
1187+
assert(X86::XMM15 - X86::XMM0 == 15 &&
1188+
"XMM0-15 registers are not continuous");
1189+
assert(X86::XMM31 - X86::XMM16 == 15 &&
1190+
"XMM16-31 registers are not continuous");
1191+
return (RegNo >= X86::XMM0 && RegNo <= X86::XMM15) ||
1192+
(RegNo >= X86::XMM16 && RegNo <= X86::XMM31);
1193+
}
1194+
1195+
/// \returns true if the register is a YMM.
1196+
inline bool isYMMReg(unsigned RegNo) {
1197+
assert(X86::YMM15 - X86::YMM0 == 15 &&
1198+
"YMM0-15 registers are not continuous");
1199+
assert(X86::YMM31 - X86::YMM16 == 15 &&
1200+
"YMM16-31 registers are not continuous");
1201+
return (RegNo >= X86::YMM0 && RegNo <= X86::YMM15) ||
1202+
(RegNo >= X86::YMM16 && RegNo <= X86::YMM31);
1203+
}
1204+
1205+
/// \returns true if the register is a ZMM.
1206+
inline bool isZMMReg(unsigned RegNo) {
1207+
assert(X86::ZMM31 - X86::ZMM0 == 31 && "ZMM registers are not continuous");
1208+
return RegNo >= X86::ZMM0 && RegNo <= X86::ZMM31;
1209+
}
1210+
11851211
/// \returns true if the MachineOperand is a x86-64 extended (r8 or
11861212
/// higher) register, e.g. r8, xmm8, xmm13, etc.
11871213
inline bool isX86_64ExtendedReg(unsigned RegNo) {
1188-
if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM31) ||
1189-
(RegNo >= X86::YMM8 && RegNo <= X86::YMM31) ||
1214+
if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM15) ||
1215+
(RegNo >= X86::XMM16 && RegNo <= X86::XMM31) ||
1216+
(RegNo >= X86::YMM8 && RegNo <= X86::YMM15) ||
1217+
(RegNo >= X86::YMM16 && RegNo <= X86::YMM31) ||
11901218
(RegNo >= X86::ZMM8 && RegNo <= X86::ZMM31))
11911219
return true;
11921220

llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,11 @@ using namespace llvm;
234234
CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
235235

236236
static unsigned getVectorRegSize(unsigned RegNo) {
237-
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
237+
if (X86II::isZMMReg(RegNo))
238238
return 512;
239-
if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
239+
if (X86II::isYMMReg(RegNo))
240240
return 256;
241-
if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
241+
if (X86II::isXMMReg(RegNo))
242242
return 128;
243243
if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
244244
return 64;

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,8 +604,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
604604
}
605605
}
606606
if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
607-
for (unsigned n = 16; n != 32; ++n) {
608-
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
607+
for (unsigned n = 0; n != 16; ++n) {
608+
for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
609+
++AI)
609610
Reserved.set(*AI);
610611
}
611612
}
@@ -616,6 +617,26 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
616617
return Reserved;
617618
}
618619

620+
unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
621+
// All existing Intel CPUs that support AMX support AVX512 and all existing
622+
// Intel CPUs that support APX support AMX. AVX512 implies AVX.
623+
//
624+
// We enumerate the registers in X86GenRegisterInfo.inc in this order:
625+
//
626+
// Registers before AVX512,
627+
// AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
628+
// AMX registers (TMM)
629+
// APX registers (R16-R31)
630+
//
631+
// and try to return the minimum number of registers supported by the target.
632+
633+
assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
634+
(X86::K6_K7 + 1 == X86::TMMCFG) &&
635+
(X86::TMM7 + 1 == X86::NUM_TARGET_REGS) &&
636+
"Register number may be incorrect");
637+
return X86::NUM_TARGET_REGS;
638+
}
639+
619640
bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
620641
MCRegister Reg) const {
621642
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();

llvm/lib/Target/X86/X86RegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
5151
public:
5252
explicit X86RegisterInfo(const Triple &TT);
5353

54+
/// Return the number of registers for the function.
55+
unsigned getNumSupportedRegs(const MachineFunction &MF) const override;
56+
5457
// FIXME: This should be tablegen'd like getDwarfRegNum is
5558
int getSEHRegNum(unsigned i) const;
5659

0 commit comments

Comments
 (0)