Skip to content

Commit d86b68a

Browse files
authored
MCExpr-ify SIProgramInfo (llvm#88257)
Convert members in SIProgramInfo affected by variables provided by AMDGPUResourceUsageAnalysis into MCExprs.
1 parent a7ee81e commit d86b68a

17 files changed

+1040
-218
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 288 additions & 133 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
7878

7979
void initTargetStreamer(Module &M);
8080

81+
static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx);
82+
8183
public:
8284
explicit AMDGPUAsmPrinter(TargetMachine &TM,
8385
std::unique_ptr<MCStreamer> Streamer);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include "SIMachineFunctionInfo.h"
2020
#include "SIProgramInfo.h"
2121
#include "llvm/IR/Module.h"
22+
#include "llvm/MC/MCContext.h"
23+
#include "llvm/MC/MCExpr.h"
2224
using namespace llvm;
2325

2426
static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
@@ -462,6 +464,16 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
462464
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
463465
const Function &F = MF.getFunction();
464466

467+
auto GetMCExprValue = [&MF](const MCExpr *Value) {
468+
int64_t Val;
469+
if (!Value->evaluateAsAbsolute(Val)) {
470+
MCContext &Ctx = MF.getContext();
471+
Ctx.reportError(SMLoc(), "could not resolve expression when required.");
472+
Val = 0;
473+
}
474+
return static_cast<uint64_t>(Val);
475+
};
476+
465477
auto Kern = HSAMetadataDoc->getMapNode();
466478

467479
Align MaxKernArgAlign;
@@ -470,10 +482,11 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
470482
Kern[".group_segment_fixed_size"] =
471483
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
472484
Kern[".private_segment_fixed_size"] =
473-
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
474-
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
475-
Kern[".uses_dynamic_stack"] =
476-
Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
485+
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.ScratchSize));
486+
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
487+
Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(
488+
static_cast<bool>(GetMCExprValue(ProgramInfo.DynamicCallStack)));
489+
}
477490

478491
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
479492
Kern[".workgroup_processor_mode"] =
@@ -484,12 +497,15 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
484497
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
485498
Kern[".wavefront_size"] =
486499
Kern.getDocument()->getNode(STM.getWavefrontSize());
487-
Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
488-
Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
500+
Kern[".sgpr_count"] =
501+
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumSGPR));
502+
Kern[".vgpr_count"] =
503+
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumVGPR));
489504

490505
// Only add AGPR count to metadata for supported devices
491506
if (STM.hasMAIInsts()) {
492-
Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
507+
Kern[".agpr_count"] =
508+
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumAccVGPR));
493509
}
494510

495511
Kern[".max_flat_workgroup_size"] =

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -674,29 +674,8 @@ bool GCNSubtarget::useVGPRIndexMode() const {
674674
bool GCNSubtarget::useAA() const { return UseAA; }
675675

676676
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
677-
if (getGeneration() >= AMDGPUSubtarget::GFX10)
678-
return getMaxWavesPerEU();
679-
680-
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
681-
if (SGPRs <= 80)
682-
return 10;
683-
if (SGPRs <= 88)
684-
return 9;
685-
if (SGPRs <= 100)
686-
return 8;
687-
return 7;
688-
}
689-
if (SGPRs <= 48)
690-
return 10;
691-
if (SGPRs <= 56)
692-
return 9;
693-
if (SGPRs <= 64)
694-
return 8;
695-
if (SGPRs <= 72)
696-
return 7;
697-
if (SGPRs <= 80)
698-
return 6;
699-
return 5;
677+
return AMDGPU::IsaInfo::getOccupancyWithNumSGPRs(SGPRs, getMaxWavesPerEU(),
678+
getGeneration());
700679
}
701680

702681
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs) const {

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8399,12 +8399,16 @@ bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
83998399
AGVK VK = StringSwitch<AGVK>(TokenId)
84008400
.Case("max", AGVK::AGVK_Max)
84018401
.Case("or", AGVK::AGVK_Or)
8402+
.Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8403+
.Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8404+
.Case("alignto", AGVK::AGVK_AlignTo)
8405+
.Case("occupancy", AGVK::AGVK_Occupancy)
84028406
.Default(AGVK::AGVK_None);
84038407

84048408
if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
84058409
SmallVector<const MCExpr *, 4> Exprs;
84068410
uint64_t CommaCount = 0;
8407-
lex(); // Eat 'max'/'or'
8411+
lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
84088412
lex(); // Eat '('
84098413
while (true) {
84108414
if (trySkipToken(AsmToken::RParen)) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "AMDGPUMCExpr.h"
10+
#include "GCNSubtarget.h"
11+
#include "Utils/AMDGPUBaseInfo.h"
12+
#include "llvm/IR/Function.h"
1013
#include "llvm/MC/MCContext.h"
1114
#include "llvm/MC/MCStreamer.h"
1215
#include "llvm/MC/MCSymbol.h"
@@ -16,6 +19,7 @@
1619
#include <optional>
1720

1821
using namespace llvm;
22+
using namespace llvm::AMDGPU;
1923

2024
AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
2125
ArrayRef<const MCExpr *> Args,
@@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
6165
case AGVK_Max:
6266
OS << "max(";
6367
break;
68+
case AGVK_ExtraSGPRs:
69+
OS << "extrasgprs(";
70+
break;
71+
case AGVK_TotalNumVGPRs:
72+
OS << "totalnumvgprs(";
73+
break;
74+
case AGVK_AlignTo:
75+
OS << "alignto(";
76+
break;
77+
case AGVK_Occupancy:
78+
OS << "occupancy(";
79+
break;
6480
}
6581
for (auto It = Args.begin(); It != Args.end(); ++It) {
6682
(*It)->print(OS, MAI, /*InParens=*/false);
@@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
8298
}
8399
}
84100

101+
bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(MCValue &Res,
102+
const MCAsmLayout *Layout,
103+
const MCFixup *Fixup) const {
104+
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
105+
MCValue MCVal;
106+
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
107+
!MCVal.isAbsolute())
108+
return false;
109+
110+
ConstantValue = MCVal.getConstant();
111+
return true;
112+
};
113+
114+
assert(Args.size() == 3 &&
115+
"AMDGPUVariadic Argument count incorrect for ExtraSGPRs");
116+
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
117+
uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
118+
119+
bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
120+
121+
assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
122+
if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
123+
!TryGetMCExprValue(Args[1], FlatScrUsed))
124+
return false;
125+
126+
uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
127+
STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
128+
Res = MCValue::get(ExtraSGPRs);
129+
return true;
130+
}
131+
132+
bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(MCValue &Res,
133+
const MCAsmLayout *Layout,
134+
const MCFixup *Fixup) const {
135+
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
136+
MCValue MCVal;
137+
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
138+
!MCVal.isAbsolute())
139+
return false;
140+
141+
ConstantValue = MCVal.getConstant();
142+
return true;
143+
};
144+
assert(Args.size() == 2 &&
145+
"AMDGPUVariadic Argument count incorrect for TotalNumVGPRs");
146+
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
147+
uint64_t NumAGPR = 0, NumVGPR = 0;
148+
149+
bool Has90AInsts = AMDGPU::isGFX90A(*STI);
150+
151+
if (!TryGetMCExprValue(Args[0], NumAGPR) ||
152+
!TryGetMCExprValue(Args[1], NumVGPR))
153+
return false;
154+
155+
uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
156+
: std::max(NumVGPR, NumAGPR);
157+
Res = MCValue::get(TotalNum);
158+
return true;
159+
}
160+
161+
bool AMDGPUVariadicMCExpr::evaluateAlignTo(MCValue &Res,
162+
const MCAsmLayout *Layout,
163+
const MCFixup *Fixup) const {
164+
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
165+
MCValue MCVal;
166+
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
167+
!MCVal.isAbsolute())
168+
return false;
169+
170+
ConstantValue = MCVal.getConstant();
171+
return true;
172+
};
173+
174+
assert(Args.size() == 2 &&
175+
"AMDGPUVariadic Argument count incorrect for AlignTo");
176+
uint64_t Value = 0, Align = 0;
177+
if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
178+
return false;
179+
180+
Res = MCValue::get(alignTo(Value, Align));
181+
return true;
182+
}
183+
184+
bool AMDGPUVariadicMCExpr::evaluateOccupancy(MCValue &Res,
185+
const MCAsmLayout *Layout,
186+
const MCFixup *Fixup) const {
187+
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
188+
MCValue MCVal;
189+
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
190+
!MCVal.isAbsolute())
191+
return false;
192+
193+
ConstantValue = MCVal.getConstant();
194+
return true;
195+
};
196+
assert(Args.size() == 7 &&
197+
"AMDGPUVariadic Argument count incorrect for Occupancy");
198+
uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
199+
NumSGPRs, NumVGPRs;
200+
201+
bool Success = true;
202+
Success &= TryGetMCExprValue(Args[0], MaxWaves);
203+
Success &= TryGetMCExprValue(Args[1], Granule);
204+
Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
205+
Success &= TryGetMCExprValue(Args[3], Generation);
206+
Success &= TryGetMCExprValue(Args[4], InitOccupancy);
207+
208+
assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");
209+
210+
if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
211+
!TryGetMCExprValue(Args[6], NumVGPRs))
212+
return false;
213+
214+
unsigned Occupancy = InitOccupancy;
215+
if (NumSGPRs)
216+
Occupancy = std::min(
217+
Occupancy, IsaInfo::getOccupancyWithNumSGPRs(
218+
NumSGPRs, MaxWaves,
219+
static_cast<AMDGPUSubtarget::Generation>(Generation)));
220+
if (NumVGPRs)
221+
Occupancy = std::min(Occupancy,
222+
IsaInfo::getNumWavesPerEUWithNumVGPRs(
223+
NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
224+
225+
Res = MCValue::get(Occupancy);
226+
return true;
227+
}
228+
85229
bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl(
86230
MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
87231
std::optional<int64_t> Total;
88232

233+
switch (Kind) {
234+
default:
235+
break;
236+
case AGVK_ExtraSGPRs:
237+
return evaluateExtraSGPRs(Res, Layout, Fixup);
238+
case AGVK_AlignTo:
239+
return evaluateAlignTo(Res, Layout, Fixup);
240+
case AGVK_TotalNumVGPRs:
241+
return evaluateTotalNumVGPR(Res, Layout, Fixup);
242+
case AGVK_Occupancy:
243+
return evaluateOccupancy(Res, Layout, Fixup);
244+
}
245+
89246
for (const MCExpr *Arg : Args) {
90247
MCValue ArgRes;
91248
if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
@@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
113270
}
114271
return nullptr;
115272
}
273+
274+
/// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
275+
/// are unresolvable but needed for further MCExprs). Derived from
276+
/// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
277+
///
278+
const AMDGPUVariadicMCExpr *
279+
AMDGPUVariadicMCExpr::createExtraSGPRs(const MCExpr *VCCUsed,
280+
const MCExpr *FlatScrUsed,
281+
bool XNACKUsed, MCContext &Ctx) {
282+
283+
return create(AGVK_ExtraSGPRs,
284+
{VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
285+
Ctx);
286+
}
287+
288+
const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR(
289+
const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
290+
return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
291+
}
292+
293+
/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
294+
///
295+
/// Remove dependency on GCNSubtarget and depend only only the necessary values
296+
/// for said occupancy computation. Should match computeOccupancy implementation
297+
/// without passing \p STM on.
298+
const AMDGPUVariadicMCExpr *
299+
AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
300+
const MCExpr *NumVGPRs,
301+
const GCNSubtarget &STM, MCContext &Ctx) {
302+
unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
303+
unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
304+
unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
305+
unsigned Generation = STM.getGeneration();
306+
307+
auto CreateExpr = [&Ctx](unsigned Value) {
308+
return MCConstantExpr::create(Value, Ctx);
309+
};
310+
311+
return create(AGVK_Occupancy,
312+
{CreateExpr(MaxWaves), CreateExpr(Granule),
313+
CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
314+
CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
315+
Ctx);
316+
}

0 commit comments

Comments
 (0)