Skip to content

Commit 100828d

Browse files
AMDGPU/GlobalISel: refactor build readfirstlane helpers
Refactor helpers that build readfirstlane for input registers. Required by upcoming patches thet need to build readfirstlane for output registers.
1 parent 3bc86bf commit 100828d

File tree

2 files changed

+73
-38
lines changed

2 files changed

+73
-38
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -700,58 +700,75 @@ static LLT getHalfSizedType(LLT Ty) {
700700

701701
// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
702702
// source value into a scalar register.
703-
Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B,
704-
MachineRegisterInfo &MRI,
705-
Register Src) const {
703+
Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc(MachineIRBuilder &B,
704+
Register Src) const {
705+
MachineRegisterInfo &MRI = *B.getMRI();
706706
LLT Ty = MRI.getType(Src);
707707
const RegisterBank *Bank = getRegBank(Src, MRI, *TRI);
708708

709-
if (Bank == &AMDGPU::SGPRRegBank)
710-
return Src;
711-
712-
unsigned Bits = Ty.getSizeInBits();
713-
assert(Bits % 32 == 0);
714-
715709
if (Bank != &AMDGPU::VGPRRegBank) {
716710
// We need to copy from AGPR to VGPR
717711
Src = B.buildCopy(Ty, Src).getReg(0);
718712
MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
719713
}
720714

715+
Register Dst = MRI.createGenericVirtualRegister(Ty);
716+
MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
717+
buildReadFirstLaneForType(B, Dst, Src);
718+
return Dst;
719+
}
720+
721+
void AMDGPURegisterBankInfo::buildReadFirstLaneB32(MachineIRBuilder &B,
722+
Register SgprDst,
723+
Register VgprSrc) const {
724+
MachineRegisterInfo &MRI = *B.getMRI();
725+
B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {SgprDst}, {VgprSrc});
726+
MRI.setRegClass(VgprSrc, &AMDGPU::VGPR_32RegClass);
727+
MRI.setRegClass(SgprDst, &AMDGPU::SReg_32RegClass);
728+
}
729+
730+
void AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32(
731+
MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
732+
unsigned NumElts) const {
733+
MachineRegisterInfo &MRI = *B.getMRI();
721734
LLT S32 = LLT::scalar(32);
722-
unsigned NumParts = Bits / 32;
723-
SmallVector<Register, 8> SrcParts;
724-
SmallVector<Register, 8> DstParts;
735+
SmallVector<Register, 8> VgprSrcParts;
736+
SmallVector<Register, 8> SgprDstParts;
725737

726-
if (Bits == 32) {
727-
SrcParts.push_back(Src);
728-
} else {
729-
auto Unmerge = B.buildUnmerge(S32, Src);
730-
for (unsigned i = 0; i < NumParts; ++i)
731-
SrcParts.push_back(Unmerge.getReg(i));
738+
for (unsigned i = 0; i < NumElts; ++i) {
739+
VgprSrcParts.push_back(MRI.createGenericVirtualRegister(S32));
740+
SgprDstParts.push_back(MRI.createGenericVirtualRegister(S32));
732741
}
733742

734-
for (unsigned i = 0; i < NumParts; ++i) {
735-
Register SrcPart = SrcParts[i];
736-
Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737-
MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
743+
B.buildUnmerge(VgprSrcParts, VgprSrc);
744+
for (unsigned i = 0; i < NumElts; ++i) {
745+
buildReadFirstLaneB32(B, SgprDstParts[i], VgprSrcParts[i]);
746+
}
747+
B.buildMergeLikeInstr(SgprDst, SgprDstParts);
748+
}
738749

739-
const TargetRegisterClass *Constrained =
740-
constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI);
741-
(void)Constrained;
742-
assert(Constrained && "Failed to constrain readfirstlane src reg");
750+
void AMDGPURegisterBankInfo::buildReadFirstLaneForType(MachineIRBuilder &B,
751+
Register SgprDst,
752+
Register VgprSrc) const {
753+
MachineRegisterInfo &MRI = *B.getMRI();
754+
LLT S32 = LLT::scalar(32);
755+
LLT S64 = LLT::scalar(64);
756+
LLT Ty = MRI.getType(SgprDst);
743757

744-
B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
758+
if (Ty == S32 || Ty == LLT::pointer(3, 32)) {
759+
return buildReadFirstLaneB32(B, SgprDst, VgprSrc);
760+
}
745761

746-
DstParts.push_back(DstPart);
762+
if (Ty == S64 || Ty == LLT::pointer(0, 64) || Ty == LLT::pointer(1, 64)) {
763+
return buildReadFirstLaneSequenceOfB32(B, SgprDst, VgprSrc, 2);
747764
}
748765

749-
if (Bits == 32)
750-
return DstParts[0];
766+
if (Ty.isVector() && Ty.getElementType() == S32) {
767+
return buildReadFirstLaneSequenceOfB32(B, SgprDst, VgprSrc,
768+
Ty.getNumElements());
769+
}
751770

752-
Register Dst = B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753-
MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
754-
return Dst;
771+
llvm_unreachable("Type not supported");
755772
}
756773

757774
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
@@ -888,7 +905,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
888905
B.setMBB(*LoopBB);
889906
}
890907

891-
Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg);
908+
Register CurrentLaneReg = buildReadFirstLaneSrc(B, OpReg);
892909

893910
// Build the comparison(s).
894911
unsigned OpSize = OpTy.getSizeInBits();
@@ -1020,7 +1037,7 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
10201037
if (Bank == &AMDGPU::SGPRRegBank)
10211038
return;
10221039

1023-
Reg = buildReadFirstLane(B, MRI, Reg);
1040+
Reg = buildReadFirstLaneSrc(B, Reg);
10241041
MI.getOperand(OpIdx).setReg(Reg);
10251042
}
10261043

@@ -1603,7 +1620,7 @@ bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
16031620
MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
16041621

16051622
if (!DstOnValu) {
1606-
DstHi = buildReadFirstLane(B, MRI, DstHi);
1623+
DstHi = buildReadFirstLaneSrc(B, DstHi);
16071624
} else {
16081625
MulHiInVgpr = true;
16091626
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,17 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
5757
iterator_range<MachineBasicBlock::iterator> Range,
5858
SmallSet<Register, 4> &SGPROperandRegs) const;
5959

60-
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI,
61-
Register Src) const;
60+
Register buildReadFirstLaneSrc(MachineIRBuilder &B, Register Src) const;
61+
62+
void buildReadFirstLaneForType(MachineIRBuilder &B, Register SgprDst,
63+
Register VgprSrc) const;
64+
65+
void buildReadFirstLaneB32(MachineIRBuilder &B, Register SgprDst,
66+
Register VgprSrc) const;
67+
68+
void buildReadFirstLaneSequenceOfB32(MachineIRBuilder &B, Register SgprDst,
69+
Register VgprSrc,
70+
unsigned NumElts) const;
6271

6372
bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI,
6473
ArrayRef<unsigned> OpIndices) const;
@@ -116,6 +125,12 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
116125
const MachineRegisterInfo &MRI,
117126
const TargetRegisterInfo &TRI) const;
118127

128+
// Return a value mapping for an operand that is same as already assigned
129+
// reg bank or corresponds to assigned register class + LLT
130+
const ValueMapping *
131+
getPreAssignedOpMapping(Register Reg, const MachineRegisterInfo &MRI,
132+
const TargetRegisterInfo &TRI) const;
133+
119134
// Return a value mapping for an operand that is required to be a AGPR.
120135
const ValueMapping *getAGPROpMapping(Register Reg,
121136
const MachineRegisterInfo &MRI,
@@ -155,6 +170,9 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
155170

156171
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
157172
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
173+
const InstructionMapping &
174+
getDefaultMappingVOPWithPreassignedDef(const MachineInstr &MI) const;
175+
158176
const InstructionMapping &getDefaultMappingAllVGPR(
159177
const MachineInstr &MI) const;
160178

0 commit comments

Comments
 (0)