Skip to content

Commit c762f62

Browse files
AMDGPU/GlobalISel: refactor build readfirstlane helpers
Refactor helpers that build readfirstlane for input registers. Required by upcoming patches thet need to build readfirstlane for output registers.
1 parent d1a69e4 commit c762f62

File tree

2 files changed

+71
-38
lines changed

2 files changed

+71
-38
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -700,58 +700,69 @@ static LLT getHalfSizedType(LLT Ty) {
700700

701701
// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
702702
// source value into a scalar register.
703-
Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B,
704-
MachineRegisterInfo &MRI,
705-
Register Src) const {
703+
Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc(MachineIRBuilder &B,
704+
Register Src) const {
705+
MachineRegisterInfo &MRI = *B.getMRI();
706706
LLT Ty = MRI.getType(Src);
707707
const RegisterBank *Bank = getRegBank(Src, MRI, *TRI);
708708

709-
if (Bank == &AMDGPU::SGPRRegBank)
710-
return Src;
711-
712-
unsigned Bits = Ty.getSizeInBits();
713-
assert(Bits % 32 == 0);
714-
715709
if (Bank != &AMDGPU::VGPRRegBank) {
716710
// We need to copy from AGPR to VGPR
717711
Src = B.buildCopy(Ty, Src).getReg(0);
718712
MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
719713
}
720714

715+
return buildReadFirstLaneForType(B, Ty, Src).getReg(0);
716+
}
717+
718+
MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneB32(
719+
MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc) const {
720+
MachineRegisterInfo &MRI = *B.getMRI();
721+
auto RFL = B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {SgprDst}, {VgprSrc});
722+
MRI.setRegClass(RFL.getReg(0), &AMDGPU::SReg_32RegClass);
723+
MRI.setRegClass(RFL.getReg(1), &AMDGPU::VGPR_32RegClass);
724+
return RFL;
725+
}
726+
727+
MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32(
728+
MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc,
729+
unsigned NumElts) const {
730+
MachineRegisterInfo &MRI = *B.getMRI();
721731
LLT S32 = LLT::scalar(32);
722-
unsigned NumParts = Bits / 32;
723-
SmallVector<Register, 8> SrcParts;
724-
SmallVector<Register, 8> DstParts;
732+
SmallVector<Register, 8> SgprDstParts;
725733

726-
if (Bits == 32) {
727-
SrcParts.push_back(Src);
728-
} else {
729-
auto Unmerge = B.buildUnmerge(S32, Src);
730-
for (unsigned i = 0; i < NumParts; ++i)
731-
SrcParts.push_back(Unmerge.getReg(i));
734+
auto Unmerge = B.buildUnmerge(S32, VgprSrc);
735+
for (unsigned i = 0; i < NumElts; ++i) {
736+
SgprDstParts.push_back(
737+
buildReadFirstLaneB32(B, S32, Unmerge.getReg(i)).getReg(0));
732738
}
733739

734-
for (unsigned i = 0; i < NumParts; ++i) {
735-
Register SrcPart = SrcParts[i];
736-
Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737-
MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
740+
auto Merge = B.buildMergeLikeInstr(SgprDst, SgprDstParts);
741+
MRI.setRegBank(Merge.getReg(0), AMDGPU::SGPRRegBank);
742+
return Merge;
743+
}
738744

739-
const TargetRegisterClass *Constrained =
740-
constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI);
741-
(void)Constrained;
742-
assert(Constrained && "Failed to constrain readfirstlane src reg");
745+
MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneForType(
746+
MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc) const {
747+
MachineRegisterInfo &MRI = *B.getMRI();
748+
LLT S32 = LLT::scalar(32);
749+
LLT S64 = LLT::scalar(64);
750+
LLT Ty = SgprDst.getLLTTy(MRI);
743751

744-
B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752+
if (Ty == S32 || (Ty.isPointer() && Ty.getSizeInBits() == 32)) {
753+
return buildReadFirstLaneB32(B, SgprDst, VgprSrc);
754+
}
745755

746-
DstParts.push_back(DstPart);
756+
if (Ty == S64 || (Ty.isPointer() && Ty.getSizeInBits() == 64)) {
757+
return buildReadFirstLaneSequenceOfB32(B, SgprDst, VgprSrc, 2);
747758
}
748759

749-
if (Bits == 32)
750-
return DstParts[0];
760+
if (Ty.isVector() && Ty.getElementType() == S32) {
761+
return buildReadFirstLaneSequenceOfB32(B, SgprDst, VgprSrc,
762+
Ty.getNumElements());
763+
}
751764

752-
Register Dst = B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753-
MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
754-
return Dst;
765+
llvm_unreachable("Type not supported");
755766
}
756767

757768
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
@@ -888,7 +899,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
888899
B.setMBB(*LoopBB);
889900
}
890901

891-
Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg);
902+
Register CurrentLaneReg = buildReadFirstLaneSrc(B, OpReg);
892903

893904
// Build the comparison(s).
894905
unsigned OpSize = OpTy.getSizeInBits();
@@ -1020,7 +1031,7 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
10201031
if (Bank == &AMDGPU::SGPRRegBank)
10211032
return;
10221033

1023-
Reg = buildReadFirstLane(B, MRI, Reg);
1034+
Reg = buildReadFirstLaneSrc(B, Reg);
10241035
MI.getOperand(OpIdx).setReg(Reg);
10251036
}
10261037

@@ -1603,7 +1614,7 @@ bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
16031614
MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
16041615

16051616
if (!DstOnValu) {
1606-
DstHi = buildReadFirstLane(B, MRI, DstHi);
1617+
DstHi = buildReadFirstLaneSrc(B, DstHi);
16071618
} else {
16081619
MulHiInVgpr = true;
16091620
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
1515

1616
#include "llvm/ADT/SmallSet.h"
17+
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
1718
#include "llvm/CodeGen/MachineBasicBlock.h"
1819
#include "llvm/CodeGen/Register.h"
1920
#include "llvm/CodeGen/RegisterBankInfo.h"
@@ -57,8 +58,20 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
5758
iterator_range<MachineBasicBlock::iterator> Range,
5859
SmallSet<Register, 4> &SGPROperandRegs) const;
5960

60-
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI,
61-
Register Src) const;
61+
Register buildReadFirstLaneSrc(MachineIRBuilder &B, Register Src) const;
62+
63+
MachineInstrBuilder buildReadFirstLaneForType(MachineIRBuilder &B,
64+
const DstOp &SgprDst,
65+
const SrcOp &VgprSrc) const;
66+
67+
MachineInstrBuilder buildReadFirstLaneB32(MachineIRBuilder &B,
68+
const DstOp &SgprDst,
69+
const SrcOp &VgprSrc) const;
70+
71+
MachineInstrBuilder buildReadFirstLaneSequenceOfB32(MachineIRBuilder &B,
72+
const DstOp &SgprDst,
73+
const SrcOp &VgprSrc,
74+
unsigned NumElts) const;
6275

6376
bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI,
6477
ArrayRef<unsigned> OpIndices) const;
@@ -116,6 +129,12 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
116129
const MachineRegisterInfo &MRI,
117130
const TargetRegisterInfo &TRI) const;
118131

132+
// Return a value mapping for an operand that is same as already assigned
133+
// reg bank or corresponds to assigned register class + LLT
134+
const ValueMapping *
135+
getPreAssignedOpMapping(Register Reg, const MachineRegisterInfo &MRI,
136+
const TargetRegisterInfo &TRI) const;
137+
119138
// Return a value mapping for an operand that is required to be a AGPR.
120139
const ValueMapping *getAGPROpMapping(Register Reg,
121140
const MachineRegisterInfo &MRI,
@@ -155,6 +174,9 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
155174

156175
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
157176
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
177+
const InstructionMapping &
178+
getDefaultMappingVOPWithPreassignedDef(const MachineInstr &MI) const;
179+
158180
const InstructionMapping &getDefaultMappingAllVGPR(
159181
const MachineInstr &MI) const;
160182

0 commit comments

Comments
 (0)