7
7
// ===----------------------------------------------------------------------===//
8
8
9
9
#include " AMDGPUMCExpr.h"
10
+ #include " GCNSubtarget.h"
11
+ #include " Utils/AMDGPUBaseInfo.h"
12
+ #include " llvm/IR/Function.h"
10
13
#include " llvm/MC/MCContext.h"
11
14
#include " llvm/MC/MCStreamer.h"
12
15
#include " llvm/MC/MCSymbol.h"
16
19
#include < optional>
17
20
18
21
using namespace llvm ;
22
+ using namespace llvm ::AMDGPU;
19
23
20
24
AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr (VariadicKind Kind,
21
25
ArrayRef<const MCExpr *> Args,
@@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
61
65
case AGVK_Max:
62
66
OS << " max(" ;
63
67
break ;
68
+ case AGVK_ExtraSGPRs:
69
+ OS << " extrasgprs(" ;
70
+ break ;
71
+ case AGVK_TotalNumVGPRs:
72
+ OS << " totalnumvgprs(" ;
73
+ break ;
74
+ case AGVK_AlignTo:
75
+ OS << " alignto(" ;
76
+ break ;
77
+ case AGVK_Occupancy:
78
+ OS << " occupancy(" ;
79
+ break ;
64
80
}
65
81
for (auto It = Args.begin (); It != Args.end (); ++It) {
66
82
(*It)->print (OS, MAI, /* InParens=*/ false );
@@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
82
98
}
83
99
}
84
100
101
+ bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs (MCValue &Res,
102
+ const MCAsmLayout *Layout,
103
+ const MCFixup *Fixup) const {
104
+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
105
+ MCValue MCVal;
106
+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
107
+ !MCVal.isAbsolute ())
108
+ return false ;
109
+
110
+ ConstantValue = MCVal.getConstant ();
111
+ return true ;
112
+ };
113
+
114
+ assert (Args.size () == 3 &&
115
+ " AMDGPUVariadic Argument count incorrect for ExtraSGPRs" );
116
+ const MCSubtargetInfo *STI = Ctx.getSubtargetInfo ();
117
+ uint64_t VCCUsed = 0 , FlatScrUsed = 0 , XNACKUsed = 0 ;
118
+
119
+ bool Success = TryGetMCExprValue (Args[2 ], XNACKUsed);
120
+
121
+ assert (Success && " Arguments 3 for ExtraSGPRs should be a known constant" );
122
+ if (!Success || !TryGetMCExprValue (Args[0 ], VCCUsed) ||
123
+ !TryGetMCExprValue (Args[1 ], FlatScrUsed))
124
+ return false ;
125
+
126
+ uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs (
127
+ STI, (bool )VCCUsed, (bool )FlatScrUsed, (bool )XNACKUsed);
128
+ Res = MCValue::get (ExtraSGPRs);
129
+ return true ;
130
+ }
131
+
132
+ bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR (MCValue &Res,
133
+ const MCAsmLayout *Layout,
134
+ const MCFixup *Fixup) const {
135
+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
136
+ MCValue MCVal;
137
+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
138
+ !MCVal.isAbsolute ())
139
+ return false ;
140
+
141
+ ConstantValue = MCVal.getConstant ();
142
+ return true ;
143
+ };
144
+ assert (Args.size () == 2 &&
145
+ " AMDGPUVariadic Argument count incorrect for TotalNumVGPRs" );
146
+ const MCSubtargetInfo *STI = Ctx.getSubtargetInfo ();
147
+ uint64_t NumAGPR = 0 , NumVGPR = 0 ;
148
+
149
+ bool Has90AInsts = AMDGPU::isGFX90A (*STI);
150
+
151
+ if (!TryGetMCExprValue (Args[0 ], NumAGPR) ||
152
+ !TryGetMCExprValue (Args[1 ], NumVGPR))
153
+ return false ;
154
+
155
+ uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo (NumVGPR, 4 ) + NumAGPR
156
+ : std::max (NumVGPR, NumAGPR);
157
+ Res = MCValue::get (TotalNum);
158
+ return true ;
159
+ }
160
+
161
+ bool AMDGPUVariadicMCExpr::evaluateAlignTo (MCValue &Res,
162
+ const MCAsmLayout *Layout,
163
+ const MCFixup *Fixup) const {
164
+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
165
+ MCValue MCVal;
166
+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
167
+ !MCVal.isAbsolute ())
168
+ return false ;
169
+
170
+ ConstantValue = MCVal.getConstant ();
171
+ return true ;
172
+ };
173
+
174
+ assert (Args.size () == 2 &&
175
+ " AMDGPUVariadic Argument count incorrect for AlignTo" );
176
+ uint64_t Value = 0 , Align = 0 ;
177
+ if (!TryGetMCExprValue (Args[0 ], Value) || !TryGetMCExprValue (Args[1 ], Align))
178
+ return false ;
179
+
180
+ Res = MCValue::get (alignTo (Value, Align));
181
+ return true ;
182
+ }
183
+
184
+ bool AMDGPUVariadicMCExpr::evaluateOccupancy (MCValue &Res,
185
+ const MCAsmLayout *Layout,
186
+ const MCFixup *Fixup) const {
187
+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
188
+ MCValue MCVal;
189
+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
190
+ !MCVal.isAbsolute ())
191
+ return false ;
192
+
193
+ ConstantValue = MCVal.getConstant ();
194
+ return true ;
195
+ };
196
+ assert (Args.size () == 7 &&
197
+ " AMDGPUVariadic Argument count incorrect for Occupancy" );
198
+ uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
199
+ NumSGPRs, NumVGPRs;
200
+
201
+ bool Success = true ;
202
+ Success &= TryGetMCExprValue (Args[0 ], MaxWaves);
203
+ Success &= TryGetMCExprValue (Args[1 ], Granule);
204
+ Success &= TryGetMCExprValue (Args[2 ], TargetTotalNumVGPRs);
205
+ Success &= TryGetMCExprValue (Args[3 ], Generation);
206
+ Success &= TryGetMCExprValue (Args[4 ], InitOccupancy);
207
+
208
+ assert (Success && " Arguments 1 to 5 for Occupancy should be known constants" );
209
+
210
+ if (!Success || !TryGetMCExprValue (Args[5 ], NumSGPRs) ||
211
+ !TryGetMCExprValue (Args[6 ], NumVGPRs))
212
+ return false ;
213
+
214
+ unsigned Occupancy = InitOccupancy;
215
+ if (NumSGPRs)
216
+ Occupancy = std::min (
217
+ Occupancy, IsaInfo::getOccupancyWithNumSGPRs (
218
+ NumSGPRs, MaxWaves,
219
+ static_cast <AMDGPUSubtarget::Generation>(Generation)));
220
+ if (NumVGPRs)
221
+ Occupancy = std::min (Occupancy,
222
+ IsaInfo::getNumWavesPerEUWithNumVGPRs (
223
+ NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
224
+
225
+ Res = MCValue::get (Occupancy);
226
+ return true ;
227
+ }
228
+
85
229
bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl (
86
230
MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
87
231
std::optional<int64_t > Total;
88
232
233
+ switch (Kind) {
234
+ default :
235
+ break ;
236
+ case AGVK_ExtraSGPRs:
237
+ return evaluateExtraSGPRs (Res, Layout, Fixup);
238
+ case AGVK_AlignTo:
239
+ return evaluateAlignTo (Res, Layout, Fixup);
240
+ case AGVK_TotalNumVGPRs:
241
+ return evaluateTotalNumVGPR (Res, Layout, Fixup);
242
+ case AGVK_Occupancy:
243
+ return evaluateOccupancy (Res, Layout, Fixup);
244
+ }
245
+
89
246
for (const MCExpr *Arg : Args) {
90
247
MCValue ArgRes;
91
248
if (!Arg->evaluateAsRelocatable (ArgRes, Layout, Fixup) ||
@@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
113
270
}
114
271
return nullptr ;
115
272
}
273
+
274
+ // / Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
275
+ // / are unresolvable but needed for further MCExprs). Derived from
276
+ // / implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
277
+ // /
278
+ const AMDGPUVariadicMCExpr *
279
+ AMDGPUVariadicMCExpr::createExtraSGPRs (const MCExpr *VCCUsed,
280
+ const MCExpr *FlatScrUsed,
281
+ bool XNACKUsed, MCContext &Ctx) {
282
+
283
+ return create (AGVK_ExtraSGPRs,
284
+ {VCCUsed, FlatScrUsed, MCConstantExpr::create (XNACKUsed, Ctx)},
285
+ Ctx);
286
+ }
287
+
288
+ const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR (
289
+ const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
290
+ return create (AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
291
+ }
292
+
293
+ // / Mimics GCNSubtarget::computeOccupancy for MCExpr.
294
+ // /
295
+ // / Remove dependency on GCNSubtarget and depend only only the necessary values
296
+ // / for said occupancy computation. Should match computeOccupancy implementation
297
+ // / without passing \p STM on.
298
+ const AMDGPUVariadicMCExpr *
299
+ AMDGPUVariadicMCExpr::createOccupancy (unsigned InitOcc, const MCExpr *NumSGPRs,
300
+ const MCExpr *NumVGPRs,
301
+ const GCNSubtarget &STM, MCContext &Ctx) {
302
+ unsigned MaxWaves = IsaInfo::getMaxWavesPerEU (&STM);
303
+ unsigned Granule = IsaInfo::getVGPRAllocGranule (&STM);
304
+ unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs (&STM);
305
+ unsigned Generation = STM.getGeneration ();
306
+
307
+ auto CreateExpr = [&Ctx](unsigned Value) {
308
+ return MCConstantExpr::create (Value, Ctx);
309
+ };
310
+
311
+ return create (AGVK_Occupancy,
312
+ {CreateExpr (MaxWaves), CreateExpr (Granule),
313
+ CreateExpr (TargetTotalNumVGPRs), CreateExpr (Generation),
314
+ CreateExpr (InitOcc), NumSGPRs, NumVGPRs},
315
+ Ctx);
316
+ }
0 commit comments