Skip to content

Commit a699ccb

Browse files
authored
MCExpr-ify amd_kernel_code_t (#91587)
Redefines the amd_kernel_code_t struct with MCExprs for members that would be derived from SIProgramInfo MCExpr members.
1 parent cdcd653 commit a699ccb

14 files changed

+831
-179
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "AMDGPU.h"
2020
#include "AMDGPUHSAMetadataStreamer.h"
2121
#include "AMDGPUResourceUsageAnalysis.h"
22-
#include "AMDKernelCodeT.h"
2322
#include "GCNSubtarget.h"
2423
#include "MCTargetDesc/AMDGPUInstPrinter.h"
2524
#include "MCTargetDesc/AMDGPUMCExpr.h"
@@ -29,6 +28,7 @@
2928
#include "SIMachineFunctionInfo.h"
3029
#include "TargetInfo/AMDGPUTargetInfo.h"
3130
#include "Utils/AMDGPUBaseInfo.h"
31+
#include "Utils/AMDKernelCodeTUtils.h"
3232
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3333
#include "llvm/BinaryFormat/ELF.h"
3434
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
205205
if (STM.isMesaKernel(F) &&
206206
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
207207
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
208-
amd_kernel_code_t KernelCode;
208+
AMDGPUMCKernelCodeT KernelCode;
209209
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
210+
KernelCode.validate(&STM, MF->getContext());
210211
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
211212
}
212213

@@ -1317,7 +1318,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
13171318
}
13181319
}
13191320

1320-
void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1321+
void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
13211322
const SIProgramInfo &CurrentProgramInfo,
13221323
const MachineFunction &MF) const {
13231324
const Function &F = MF.getFunction();
@@ -1328,24 +1329,22 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
13281329
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
13291330
MCContext &Ctx = MF.getContext();
13301331

1331-
AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
1332+
Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false);
13321333

1333-
Out.compute_pgm_resource_registers =
1334-
CurrentProgramInfo.getComputePGMRSrc1(STM) |
1335-
(CurrentProgramInfo.getComputePGMRSrc2() << 32);
1334+
Out.compute_pgm_resource1_registers =
1335+
CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1336+
Out.compute_pgm_resource2_registers =
1337+
CurrentProgramInfo.getComputePGMRSrc2(Ctx);
13361338
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
13371339

1338-
if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
1339-
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
1340+
Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;
13401341

1341-
AMD_HSA_BITS_SET(Out.code_properties,
1342-
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
1342+
AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
13431343
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
13441344

13451345
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
13461346
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
1347-
Out.code_properties |=
1348-
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
1347+
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
13491348
}
13501349

13511350
if (UserSGPRInfo.hasDispatchPtr())
@@ -1371,10 +1370,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
13711370

13721371
Align MaxKernArgAlign;
13731372
Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1374-
Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
1375-
Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
1376-
Out.workitem_private_segment_byte_size =
1377-
getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
1373+
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1374+
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1375+
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
13781376
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
13791377

13801378
// kernarg_segment_alignment is specified as log of the alignment.

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
#include "SIProgramInfo.h"
1818
#include "llvm/CodeGen/AsmPrinter.h"
1919

20-
struct amd_kernel_code_t;
21-
2220
namespace llvm {
2321

2422
class AMDGPUMachineFunction;
@@ -29,6 +27,7 @@ class MCOperand;
2927

3028
namespace AMDGPU {
3129
struct MCKernelDescriptor;
30+
struct AMDGPUMCKernelCodeT;
3231
namespace HSAMD {
3332
class MetadataStreamer;
3433
}
@@ -50,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
5049
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
5150

5251
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
53-
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
52+
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
53+
const SIProgramInfo &KernelInfo,
5454
const MachineFunction &MF) const;
5555

5656
/// Emit register usage information so that the GPU driver

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,7 +1340,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
13401340
bool ParseDirectiveAMDGCNTarget();
13411341
bool ParseDirectiveAMDHSACodeObjectVersion();
13421342
bool ParseDirectiveAMDHSAKernel();
1343-
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1343+
bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
13441344
bool ParseDirectiveAMDKernelCodeT();
13451345
// TODO: Possibly make subtargetHasRegister const.
13461346
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5863,7 +5863,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
58635863
}
58645864

58655865
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5866-
amd_kernel_code_t &Header) {
5866+
AMDGPUMCKernelCodeT &C) {
58675867
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
58685868
// assembly for backwards compatibility.
58695869
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5873,25 +5873,13 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
58735873

58745874
SmallString<40> ErrStr;
58755875
raw_svector_ostream Err(ErrStr);
5876-
if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5876+
if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
58775877
return TokError(Err.str());
58785878
}
58795879
Lex();
58805880

5881-
if (ID == "enable_dx10_clamp") {
5882-
if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5883-
isGFX12Plus())
5884-
return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5885-
}
5886-
5887-
if (ID == "enable_ieee_mode") {
5888-
if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5889-
isGFX12Plus())
5890-
return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5891-
}
5892-
58935881
if (ID == "enable_wavefront_size32") {
5894-
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5882+
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
58955883
if (!isGFX10Plus())
58965884
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
58975885
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5903,41 +5891,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
59035891
}
59045892

59055893
if (ID == "wavefront_size") {
5906-
if (Header.wavefront_size == 5) {
5894+
if (C.wavefront_size == 5) {
59075895
if (!isGFX10Plus())
59085896
return TokError("wavefront_size=5 is only allowed on GFX10+");
59095897
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
59105898
return TokError("wavefront_size=5 requires +WavefrontSize32");
5911-
} else if (Header.wavefront_size == 6) {
5899+
} else if (C.wavefront_size == 6) {
59125900
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
59135901
return TokError("wavefront_size=6 requires +WavefrontSize64");
59145902
}
59155903
}
59165904

5917-
if (ID == "enable_wgp_mode") {
5918-
if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5919-
!isGFX10Plus())
5920-
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5921-
}
5922-
5923-
if (ID == "enable_mem_ordered") {
5924-
if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5925-
!isGFX10Plus())
5926-
return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5927-
}
5928-
5929-
if (ID == "enable_fwd_progress") {
5930-
if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5931-
!isGFX10Plus())
5932-
return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5933-
}
5934-
59355905
return false;
59365906
}
59375907

59385908
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5939-
amd_kernel_code_t Header;
5940-
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5909+
AMDGPUMCKernelCodeT KernelCode;
5910+
KernelCode.initDefault(&getSTI(), getContext());
59415911

59425912
while (true) {
59435913
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -5951,11 +5921,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
59515921
if (ID == ".end_amd_kernel_code_t")
59525922
break;
59535923

5954-
if (ParseAMDKernelCodeTValue(ID, Header))
5924+
if (ParseAMDKernelCodeTValue(ID, KernelCode))
59555925
return true;
59565926
}
59575927

5958-
getTargetStreamer().EmitAMDKernelCodeT(Header);
5928+
KernelCode.validate(&getSTI(), getContext());
5929+
getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
59595930

59605931
return false;
59615932
}

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "AMDGPUTargetStreamer.h"
1414
#include "AMDGPUMCKernelDescriptor.h"
1515
#include "AMDGPUPTNote.h"
16-
#include "AMDKernelCodeT.h"
1716
#include "Utils/AMDGPUBaseInfo.h"
1817
#include "Utils/AMDKernelCodeTUtils.h"
1918
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
@@ -240,10 +239,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
240239
OS << "\t.amdhsa_code_object_version " << COV << '\n';
241240
}
242241

243-
void
244-
AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
242+
void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
245243
OS << "\t.amd_kernel_code_t\n";
246-
dumpAmdKernelCode(&Header, OS, "\t\t");
244+
Header.EmitKernelCodeT(OS, getContext());
247245
OS << "\t.end_amd_kernel_code_t\n";
248246
}
249247

@@ -789,12 +787,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
789787

790788
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
791789

792-
void
793-
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
794-
790+
void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
795791
MCStreamer &OS = getStreamer();
796792
OS.pushSection();
797-
OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
793+
Header.EmitKernelCodeT(OS, getContext());
798794
OS.popSection();
799795
}
800796

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
#include "Utils/AMDGPUPALMetadata.h"
1414
#include "llvm/MC/MCStreamer.h"
1515

16-
struct amd_kernel_code_t;
17-
1816
namespace llvm {
1917

2018
class MCELFStreamer;
@@ -23,6 +21,7 @@ class formatted_raw_ostream;
2321

2422
namespace AMDGPU {
2523

24+
struct AMDGPUMCKernelCodeT;
2625
struct MCKernelDescriptor;
2726
namespace HSAMD {
2827
struct Metadata;
@@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
5453
CodeObjectVersion = COV;
5554
}
5655

57-
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
56+
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};
5857

5958
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
6059

@@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
130129

131130
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
132131

133-
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
132+
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
134133

135134
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
136135

@@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
186185

187186
void EmitDirectiveAMDGCNTarget() override;
188187

189-
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
188+
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
190189

191190
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
192191

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
11111111
#define C_00B84C_LDS_SIZE 0xFF007FFF
11121112
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
11131113
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
1114-
#define C_00B84C_EXCP_EN
1114+
#define C_00B84C_EXCP_EN 0x80FFFFFF
11151115

11161116
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
11171117
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "AMDGPUAsmUtils.h"
1212
#include "AMDKernelCodeT.h"
1313
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14+
#include "Utils/AMDKernelCodeTUtils.h"
1415
#include "llvm/ADT/StringExtras.h"
1516
#include "llvm/BinaryFormat/ELF.h"
1617
#include "llvm/IR/Attributes.h"
@@ -1218,39 +1219,37 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
12181219
}
12191220
} // end namespace IsaInfo
12201221

1221-
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
1222+
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
12221223
const MCSubtargetInfo *STI) {
12231224
IsaVersion Version = getIsaVersion(STI->getCPU());
1224-
1225-
memset(&Header, 0, sizeof(Header));
1226-
1227-
Header.amd_kernel_code_version_major = 1;
1228-
Header.amd_kernel_code_version_minor = 2;
1229-
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1230-
Header.amd_machine_version_major = Version.Major;
1231-
Header.amd_machine_version_minor = Version.Minor;
1232-
Header.amd_machine_version_stepping = Version.Stepping;
1233-
Header.kernel_code_entry_byte_offset = sizeof(Header);
1234-
Header.wavefront_size = 6;
1225+
KernelCode.amd_kernel_code_version_major = 1;
1226+
KernelCode.amd_kernel_code_version_minor = 2;
1227+
KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1228+
KernelCode.amd_machine_version_major = Version.Major;
1229+
KernelCode.amd_machine_version_minor = Version.Minor;
1230+
KernelCode.amd_machine_version_stepping = Version.Stepping;
1231+
KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1232+
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1233+
KernelCode.wavefront_size = 5;
1234+
KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1235+
} else {
1236+
KernelCode.wavefront_size = 6;
1237+
}
12351238

12361239
// If the code object does not support indirect functions, then the value must
12371240
// be 0xffffffff.
1238-
Header.call_convention = -1;
1241+
KernelCode.call_convention = -1;
12391242

12401243
// These alignment values are specified in powers of two, so alignment =
12411244
// 2^n. The minimum alignment is 2^4 = 16.
1242-
Header.kernarg_segment_alignment = 4;
1243-
Header.group_segment_alignment = 4;
1244-
Header.private_segment_alignment = 4;
1245+
KernelCode.kernarg_segment_alignment = 4;
1246+
KernelCode.group_segment_alignment = 4;
1247+
KernelCode.private_segment_alignment = 4;
12451248

12461249
if (Version.Major >= 10) {
1247-
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1248-
Header.wavefront_size = 5;
1249-
Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1250-
}
1251-
Header.compute_pgm_resource_registers |=
1252-
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1253-
S_00B848_MEM_ORDERED(1);
1250+
KernelCode.compute_pgm_resource_registers |=
1251+
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1252+
S_00B848_MEM_ORDERED(1);
12541253
}
12551254
}
12561255

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class raw_ostream;
3737

3838
namespace AMDGPU {
3939

40+
struct AMDGPUMCKernelCodeT;
4041
struct IsaVersion;
4142

4243
/// Generic target versions emitted by this version of LLVM.
@@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
860861
LLVM_READONLY
861862
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
862863

863-
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
864+
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
864865
const MCSubtargetInfo *STI);
865866

866867
bool isGroupSegment(const GlobalValue *GV);

0 commit comments

Comments
 (0)