Skip to content

Commit 9829598

Browse files
[AArch64][SME2] Extend getRegAllocationHints for ZPRStridedOrContiguousReg (#119865)
ZPR2StridedOrContiguous loads used by a FORM_TRANSPOSED_REG_TUPLE pseudo should attempt to assign a strided register to avoid unnecessary copies, even though this may overlap with the list of SVE callee-saved registers.
1 parent 2210da3 commit 9829598

File tree

3 files changed

+856
-44
lines changed

3 files changed

+856
-44
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,36 @@ bool AArch64RegisterInfo::getRegAllocationHints(
10991099
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
11001100
const MachineRegisterInfo &MRI = MF.getRegInfo();
11011101

1102+
// The SVE calling convention preserves registers Z8-Z23. As a result, there
1103+
// are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
1104+
// callee-saved registers and so by default these will be pushed to the back
1105+
// of the allocation order for the ZPRStridedOrContiguous classes.
1106+
// If any of the instructions which define VirtReg are used by the
1107+
// FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
1108+
// instructions over reducing the number of clobbered callee-save registers,
1109+
// so we add the strided registers as a hint.
1110+
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
1111+
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1112+
if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1113+
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
1114+
any_of(MRI.use_nodbg_instructions(VirtReg), [](const MachineInstr &Use) {
1115+
return Use.getOpcode() ==
1116+
AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1117+
Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1118+
})) {
1119+
const TargetRegisterClass *StridedRC =
1120+
RegID == AArch64::ZPR2StridedOrContiguousRegClassID
1121+
? &AArch64::ZPR2StridedRegClass
1122+
: &AArch64::ZPR4StridedRegClass;
1123+
1124+
for (MCPhysReg Reg : Order)
1125+
if (StridedRC->contains(Reg))
1126+
Hints.push_back(Reg);
1127+
1128+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
1129+
VRM);
1130+
}
1131+
11021132
for (MachineInstr &MI : MRI.def_instructions(VirtReg)) {
11031133
if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
11041134
MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)

0 commit comments

Comments
 (0)