Skip to content

Commit 7b9b674

Browse files
MacDuetstellar
authored andcommitted
[AArch64][SME] Reduce ptrue count when filling p-regs from z-regs (llvm#125523)
Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
1 parent 1951944 commit 7b9b674

File tree

2 files changed

+34
-74
lines changed

2 files changed

+34
-74
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4175,7 +4175,10 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
41754175
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
41764176
/// registers in \p UsedRegs.
41774177
static Register tryScavengeRegister(LiveRegUnits const &UsedRegs,
4178-
BitVector const &ScavengeableRegs) {
4178+
BitVector const &ScavengeableRegs,
4179+
Register PreferredReg) {
4180+
if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg))
4181+
return PreferredReg;
41794182
for (auto Reg : ScavengeableRegs.set_bits()) {
41804183
if (UsedRegs.available(Reg))
41814184
return Reg;
@@ -4212,11 +4215,12 @@ struct ScopedScavengeOrSpill {
42124215
Register SpillCandidate, const TargetRegisterClass &RC,
42134216
LiveRegUnits const &UsedRegs,
42144217
BitVector const &AllocatableRegs,
4215-
std::optional<int> *MaybeSpillFI)
4218+
std::optional<int> *MaybeSpillFI,
4219+
Register PreferredReg = AArch64::NoRegister)
42164220
: MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
42174221
*MF.getSubtarget().getInstrInfo())),
42184222
TRI(*MF.getSubtarget().getRegisterInfo()) {
4219-
FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
4223+
FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs, PreferredReg);
42204224
if (FreeReg != AArch64::NoRegister)
42214225
return;
42224226
assert(MaybeSpillFI && "Expected emergency spill slot FI information "
@@ -4331,12 +4335,10 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
43314335
/// spilling if necessary). If the status flags are in use at the point of
43324336
/// expansion they are preserved (by moving them to/from a GPR). This may cause
43334337
/// an additional spill if no GPR is free at the expansion point.
4334-
static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
4335-
MachineInstr &MI,
4336-
const TargetRegisterInfo &TRI,
4337-
LiveRegUnits const &UsedRegs,
4338-
ScavengeableRegs const &SR,
4339-
EmergencyStackSlots &SpillSlots) {
4338+
static bool expandFillPPRFromZPRSlotPseudo(
4339+
MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
4340+
LiveRegUnits const &UsedRegs, ScavengeableRegs const &SR,
4341+
MachineInstr *&LastPTrue, EmergencyStackSlots &SpillSlots) {
43404342
MachineFunction &MF = *MBB.getParent();
43414343
auto *TII =
43424344
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -4347,7 +4349,9 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
43474349

43484350
ScopedScavengeOrSpill PredReg(
43494351
MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs,
4350-
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI);
4352+
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI,
4353+
/*PreferredReg=*/
4354+
LastPTrue ? LastPTrue->getOperand(0).getReg() : AArch64::NoRegister);
43514355

43524356
// Elide NZCV spills if we know it is not used.
43534357
bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
@@ -4371,9 +4375,17 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
43714375
.addImm(AArch64SysReg::NZCV)
43724376
.addReg(AArch64::NZCV, RegState::Implicit)
43734377
.getInstr());
4374-
MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
4375-
.addReg(*PredReg, RegState::Define)
4376-
.addImm(31));
4378+
4379+
// Reuse previous ptrue if we know it has not been clobbered.
4380+
if (LastPTrue) {
4381+
assert(*PredReg == LastPTrue->getOperand(0).getReg());
4382+
LastPTrue->moveBefore(&MI);
4383+
} else {
4384+
LastPTrue = BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
4385+
.addReg(*PredReg, RegState::Define)
4386+
.addImm(31);
4387+
}
4388+
MachineInstrs.push_back(LastPTrue);
43774389
MachineInstrs.push_back(
43784390
BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B))
43794391
.addReg(MI.getOperand(0).getReg(), RegState::Define)
@@ -4402,19 +4414,24 @@ static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
44024414
LiveRegUnits UsedRegs(TRI);
44034415
UsedRegs.addLiveOuts(MBB);
44044416
bool HasPPRSpills = false;
4417+
MachineInstr *LastPTrue = nullptr;
44054418
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
44064419
UsedRegs.stepBackward(MI);
44074420
switch (MI.getOpcode()) {
44084421
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4422+
if (LastPTrue &&
4423+
MI.definesRegister(LastPTrue->getOperand(0).getReg(), &TRI))
4424+
LastPTrue = nullptr;
44094425
HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR,
4410-
SpillSlots);
4426+
LastPTrue, SpillSlots);
44114427
MI.eraseFromParent();
44124428
break;
44134429
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
44144430
expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots);
44154431
MI.eraseFromParent();
4416-
break;
4432+
[[fallthrough]];
44174433
default:
4434+
LastPTrue = nullptr;
44184435
break;
44194436
}
44204437
}

0 commit comments

Comments
 (0)