Skip to content

Commit 1fec092

Browse files
authored
[AArch64][GlobalISel] Allow selecting FPR index loads. (#143835)
We can, through legalization of certain operations, end up generating G_INDEXED_LOAD into FPR registers that require entensions. SExt and ZExt will always opt for GPR, but anyext/noext can curently be set to FPR registers in regbankselect. As writing a subregister will set higher bits in the same register to 0, we can successfully handle zext and anyext on FPR registers, which is what this patch attempts to add.
1 parent cb4f329 commit 1fec092

File tree

3 files changed

+401
-12
lines changed

3 files changed

+401
-12
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5553,9 +5553,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55535553
unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
55545554
bool IsPre = ExtLd.isPre();
55555555
bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5556-
bool InsertIntoXReg = false;
5556+
unsigned InsertIntoSubReg = 0;
55575557
bool IsDst64 = Ty.getSizeInBits() == 64;
55585558

5559+
// ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5560+
// long as they are scalar.
5561+
bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5562+
if ((IsSExt && IsFPR) || Ty.isVector())
5563+
return false;
5564+
55595565
unsigned Opc = 0;
55605566
LLT NewLdDstTy;
55615567
LLT s32 = LLT::scalar(32);
@@ -5568,9 +5574,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55685574
else
55695575
Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
55705576
NewLdDstTy = IsDst64 ? s64 : s32;
5577+
} else if (IsFPR) {
5578+
Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5579+
InsertIntoSubReg = AArch64::bsub;
5580+
NewLdDstTy = LLT::scalar(MemSizeBits);
55715581
} else {
55725582
Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5573-
InsertIntoXReg = IsDst64;
5583+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55745584
NewLdDstTy = s32;
55755585
}
55765586
} else if (MemSizeBits == 16) {
@@ -5580,27 +5590,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55805590
else
55815591
Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
55825592
NewLdDstTy = IsDst64 ? s64 : s32;
5593+
} else if (IsFPR) {
5594+
Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5595+
InsertIntoSubReg = AArch64::hsub;
5596+
NewLdDstTy = LLT::scalar(MemSizeBits);
55835597
} else {
55845598
Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5585-
InsertIntoXReg = IsDst64;
5599+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55865600
NewLdDstTy = s32;
55875601
}
55885602
} else if (MemSizeBits == 32) {
55895603
if (IsSExt) {
55905604
Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
55915605
NewLdDstTy = s64;
5606+
} else if (IsFPR) {
5607+
Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5608+
InsertIntoSubReg = AArch64::ssub;
5609+
NewLdDstTy = LLT::scalar(MemSizeBits);
55925610
} else {
55935611
Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5594-
InsertIntoXReg = IsDst64;
5612+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55955613
NewLdDstTy = s32;
55965614
}
55975615
} else {
55985616
llvm_unreachable("Unexpected size for indexed load");
55995617
}
56005618

5601-
if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5602-
return false; // We should be on gpr.
5603-
56045619
auto Cst = getIConstantVRegVal(Offset, MRI);
56055620
if (!Cst)
56065621
return false; // Shouldn't happen, but just in case.
@@ -5610,15 +5625,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
56105625
LdMI.cloneMemRefs(ExtLd);
56115626
constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
56125627
// Make sure to select the load with the MemTy as the dest type, and then
5613-
// insert into X reg if needed.
5614-
if (InsertIntoXReg) {
5628+
// insert into a larger reg if needed.
5629+
if (InsertIntoSubReg) {
56155630
// Generate a SUBREG_TO_REG.
56165631
auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
56175632
.addImm(0)
56185633
.addUse(LdMI.getReg(1))
5619-
.addImm(AArch64::sub_32);
5620-
RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5621-
MRI);
5634+
.addImm(InsertIntoSubReg);
5635+
RBI.constrainGenericRegister(
5636+
SubToReg.getReg(0),
5637+
*getRegClassForTypeOnBank(MRI.getType(Dst),
5638+
*RBI.getRegBank(Dst, MRI, TRI)),
5639+
MRI);
56225640
} else {
56235641
auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
56245642
selectCopy(*Copy, TII, MRI, TRI, RBI);

0 commit comments

Comments
 (0)