Skip to content

Commit 5093413

Browse files
authored
[mlir][gpu][NVPTX] Enable NVIDIA GPU JIT compilation path (#66220)
This patch adds an NVPTX compilation path that enables JIT compilation on NVIDIA targets. The following modifications were performed: 1. Adding a format field to the GPU object attribute, allowing the translation attribute to use the correct runtime function to load the module. Likewise, a dictionary attribute was added to add any possible extra options. 2. Adding the `createObject` method to `GPUTargetAttrInterface`; this method returns a GPU object from a binary string. 3. Adding the function `mgpuModuleLoadJIT`, which is only available for NVIDIA GPUs, as there is no equivalent for AMD. 4. Adding the CMake flag `MLIR_GPU_COMPILATION_TEST_FORMAT` to specify the format to use during testing.
1 parent 6f4a528 commit 5093413

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+375
-135
lines changed

mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,21 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
3333

3434
If serialization fails then the method should return `std::nullopt`.
3535

36-
The `module` argument must be a GPU Module Op. The `options` argument is
37-
meant to be used for passing additional options that are not in the
36+
The `module` parameter must be a GPU Module Op. The `options` parameter
37+
is meant to be used for passing additional options that are not in the
3838
attribute.
3939
}],
4040
"std::optional<SmallVector<char, 0>>", "serializeToObject",
41-
(ins "Operation*":$module, "const gpu::TargetOptions&":$options)>
41+
(ins "Operation*":$module, "const gpu::TargetOptions&":$options)>,
42+
InterfaceMethod<[{
43+
Creates a GPU object attribute from a binary string.
44+
45+
The `object` parameter is a binary string. The `options` parameter is
46+
meant to be used for passing additional options that are not in the
47+
attribute.
48+
}], "Attribute", "createObject",
49+
(ins "const SmallVector<char, 0>&":$object,
50+
"const gpu::TargetOptions&":$options)>
4251
];
4352
}
4453

mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,62 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
2020
// GPU object attribute.
2121
//===----------------------------------------------------------------------===//
2222

23+
// For documentation on this enum cases, see the `GPU_ObjectAttr` docs.
24+
def GPU_ObjectOffload : I32EnumAttrCase<"Offload", 1, "offload">;
25+
def GPU_ObjectISA : I32EnumAttrCase<"Assembly", 2, "assembly">;
26+
def GPU_ObjectBinary : I32EnumAttrCase<"Binary", 3, "bin">;
27+
def GPU_ObjectFatbin : I32EnumAttrCase<"Fatbin", 4, "fatbin">;
28+
def GPU_CompilationTargetEnum : GPU_I32Enum<
29+
"CompilationTarget", "GPU compilation format", [
30+
GPU_ObjectOffload,
31+
GPU_ObjectISA,
32+
GPU_ObjectBinary,
33+
GPU_ObjectFatbin
34+
]>;
35+
2336
def GPU_ObjectAttr : GPU_Attr<"Object", "object"> {
2437
let description = [{
25-
A GPU object attribute pairs a GPU target with a binary string,
26-
encapsulating the information of how the object was generated with the
27-
object itself.
38+
A GPU object attribute glues together a GPU target, the object kind, a
39+
binary string with the object, and the object properties, encapsulating how
40+
the object was generated and its properties with the object itself.
41+
42+
There are four object formats:
43+
1. `Offload`: represents generic objects not described by the other three
44+
formats, and its meaning is target-dependent. For example, on the NVPTX and
45+
AMDGPU targets, this format is associated with LLVM bitcode.
46+
2. `Assembly`: represents GPU assembly code. For example, in the NVPTX
47+
target, assembly is PTX code, which can be JITted at runtime.
48+
3. `Binary`: represents executable code for a GPU single architecture. For
49+
example, PTX code that was compiled for a specific compute capability. Note
50+
that this format is likely to throw an error if there is an architecture
51+
mismatch between the compiled and running architecture.
52+
4. `Fatbin`: represents a GPU fat binary with executable code for multiple
53+
architectures. This format is the default; thus, it gets elided inassembly
54+
code.
2855

29-
The target attribute must implement the `TargetAttrInterface` interface.
56+
Object properties are specified through the `properties` dictionary
57+
attribute and can be used to define additional information.
58+
The target attribute must implement or promise the `TargetAttrInterface`
59+
interface.
3060

3161
```
32-
#gpu.object<#nvvm.target, "...">
62+
#gpu.object<#rocdl.target, offload = "..."> // An offload object.
63+
#gpu.object<#nvvm.target, properties = {O = 3 : i32}, assembly = "..."> // An assembly object with additional properties.
64+
#gpu.object<#rocdl.target, bin = "..."> // A binary object.
65+
#gpu.object<#nvvm.target, "..."> // A fatbin object.
3366
```
3467
}];
35-
let parameters = (ins "Attribute":$target, "StringAttr":$object);
36-
let assemblyFormat = [{`<` $target `,` $object `>`}];
68+
let parameters = (ins
69+
"Attribute":$target,
70+
DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format,
71+
"StringAttr":$object,
72+
OptionalParameter<"DictionaryAttr">:$properties
73+
);
74+
let assemblyFormat = [{ `<`
75+
$target `,` (`properties` `=` $properties ^ `,`)?
76+
custom<Object>($format, $object)
77+
`>`
78+
}];
3779
let genVerifyDecl = 1;
3880
}
3981

mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ namespace LLVM {
2525
class ModuleTranslation;
2626
}
2727
namespace gpu {
28+
enum class CompilationTarget : uint32_t;
29+
2830
/// This class indicates that the attribute associated with this trait is a GPU
2931
/// offloading translation attribute. These kinds of attributes must implement
3032
/// an interface for handling the translation of GPU offloading operations like
@@ -42,27 +44,15 @@ class OffloadingTranslationAttrTrait
4244
/// ensure type safeness. Targets are free to ignore these options.
4345
class TargetOptions {
4446
public:
45-
/// The target representation of the compilation process.
46-
typedef enum {
47-
offload = 1, /// The process should produce an offloading representation.
48-
/// For the NVVM & ROCDL targets this option produces LLVM IR.
49-
assembly = 2, /// The process should produce assembly code.
50-
binary = 4, /// The process should produce a binary.
51-
fatbinary = 8, /// The process should produce a fat binary.
52-
binOrFatbin =
53-
binary |
54-
fatbinary, /// The process should produce a binary or fatbinary. It's up
55-
/// to the target to decide which.
56-
} CompilationTarget;
57-
5847
/// Constructor initializing the toolkit path, the list of files to link to,
5948
/// extra command line options, the compilation target and a callback for
6049
/// obtaining the parent symbol table. The default compilation target is
61-
/// `binOrFatbin`.
62-
TargetOptions(StringRef toolkitPath = {},
63-
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {},
64-
CompilationTarget compilationTarget = binOrFatbin,
65-
function_ref<SymbolTable *()> getSymbolTableCallback = {});
50+
/// `Fatbin`.
51+
TargetOptions(
52+
StringRef toolkitPath = {}, ArrayRef<std::string> linkFiles = {},
53+
StringRef cmdOptions = {},
54+
CompilationTarget compilationTarget = getDefaultCompilationTarget(),
55+
function_ref<SymbolTable *()> getSymbolTableCallback = {});
6656

6757
/// Returns the typeID.
6858
TypeID getTypeID() const;
@@ -90,13 +80,17 @@ class TargetOptions {
9080
/// table.
9181
SymbolTable *getSymbolTable() const;
9282

83+
/// Returns the default compilation target: `CompilationTarget::Fatbin`.
84+
static CompilationTarget getDefaultCompilationTarget();
85+
9386
protected:
9487
/// Derived classes must use this constructor to initialize `typeID` to the
9588
/// appropiate value: ie. `TargetOptions(TypeID::get<DerivedClass>())`.
96-
TargetOptions(TypeID typeID, StringRef toolkitPath = {},
97-
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {},
98-
CompilationTarget compilationTarget = binOrFatbin,
99-
function_ref<SymbolTable *()> getSymbolTableCallback = {});
89+
TargetOptions(
90+
TypeID typeID, StringRef toolkitPath = {},
91+
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {},
92+
CompilationTarget compilationTarget = getDefaultCompilationTarget(),
93+
function_ref<SymbolTable *()> getSymbolTableCallback = {});
10094

10195
/// Path to the target toolkit.
10296
std::string toolkitPath;
@@ -108,7 +102,7 @@ class TargetOptions {
108102
/// process.
109103
std::string cmdOptions;
110104

111-
/// Compilation process target representation.
105+
/// Compilation process target format.
112106
CompilationTarget compilationTarget;
113107

114108
/// Callback for obtaining the parent symbol table of all the GPU modules

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def GpuModuleToBinaryPass
6868
2. `assembly`, `isa`: produces assembly code.
6969
3. `binary`, `bin`: produces binaries.
7070
4. `fatbinary`, `fatbin`: produces fatbinaries.
71-
5. `binOrFatbin`: produces bins or fatbins, the target decides which.
7271
}];
7372
let options = [
7473
Option<"offloadingHandler", "handler", "Attribute", "nullptr",
@@ -79,7 +78,7 @@ def GpuModuleToBinaryPass
7978
"Extra files to link to.">,
8079
Option<"cmdOptions", "opts", "std::string", [{""}],
8180
"Command line options to pass to the tools.">,
82-
Option<"compilationTarget", "format", "std::string", [{"binOrFatbin"}],
81+
Option<"compilationTarget", "format", "std::string", [{"fatbin"}],
8382
"The target representation of the compilation process.">
8483
];
8584
}

mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,23 @@ struct SparseCompilerOptions
144144
desc("GPU target architecture")};
145145
PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features",
146146
desc("GPU target features")};
147+
/// For NVIDIA GPUs there are 3 compilation format options:
148+
/// 1. `isa`: the compiler generates PTX and the driver JITs the PTX.
149+
/// 2. `bin`: generates a CUBIN object for `chip=gpuChip`.
150+
/// 3. `fatbin`: generates a fat binary with a CUBIN object for `gpuChip` and
151+
/// also embeds the PTX in the fat binary.
152+
/// Notes:
153+
/// Option 1 adds a significant runtime performance hit, however, tests are
154+
/// more likely to pass with this option.
155+
/// Option 2 is better for execution time as there is no JIT; however, the
156+
/// program will fail if there's an architecture mismatch between `gpuChip`
157+
/// and the GPU running the program.
158+
/// Option 3 is the best compromise between options 1 and 2 as it can JIT in
159+
/// case of an architecture mismatch between `gpuChip` and the running
160+
/// architecture. However, it's only possible to JIT to a higher CC than
161+
/// `gpuChip`.
162+
PassOptions::Option<std::string> gpuFormat{
163+
*this, "gpu-format", desc("GPU compilation format"), init("fatbin")};
147164

148165
/// This option is used to enable GPU library generation.
149166
PassOptions::Option<bool> enableGPULibgen{

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,7 +1959,8 @@ void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,
19591959
//===----------------------------------------------------------------------===//
19601960

19611961
LogicalResult ObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
1962-
Attribute target, StringAttr object) {
1962+
Attribute target, CompilationTarget format,
1963+
StringAttr object, DictionaryAttr properties) {
19631964
if (!target)
19641965
return emitError() << "the target attribute cannot be null";
19651966
if (target.hasPromiseOrImplementsInterface<TargetAttrInterface>())
@@ -1968,6 +1969,40 @@ LogicalResult ObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
19681969
"`gpu::TargetAttrInterface`";
19691970
}
19701971

1972+
namespace {
1973+
LogicalResult parseObject(AsmParser &odsParser, CompilationTarget &format,
1974+
StringAttr &object) {
1975+
std::optional<CompilationTarget> formatResult;
1976+
StringRef enumKeyword;
1977+
auto loc = odsParser.getCurrentLocation();
1978+
if (failed(odsParser.parseOptionalKeyword(&enumKeyword)))
1979+
formatResult = CompilationTarget::Fatbin;
1980+
if (!formatResult &&
1981+
(formatResult =
1982+
gpu::symbolizeEnum<gpu::CompilationTarget>(enumKeyword)) &&
1983+
odsParser.parseEqual())
1984+
return odsParser.emitError(loc, "expected an equal sign");
1985+
if (!formatResult)
1986+
return odsParser.emitError(loc, "expected keyword for GPU object format");
1987+
FailureOr<StringAttr> objectResult =
1988+
FieldParser<StringAttr>::parse(odsParser);
1989+
if (failed(objectResult))
1990+
return odsParser.emitError(odsParser.getCurrentLocation(),
1991+
"failed to parse GPU_ObjectAttr parameter "
1992+
"'object' which is to be a `StringAttr`");
1993+
format = *formatResult;
1994+
object = *objectResult;
1995+
return success();
1996+
}
1997+
1998+
void printObject(AsmPrinter &odsParser, CompilationTarget format,
1999+
StringAttr object) {
2000+
if (format != CompilationTarget::Fatbin)
2001+
odsParser << stringifyEnum(format) << " = ";
2002+
odsParser << object;
2003+
}
2004+
} // namespace
2005+
19712006
//===----------------------------------------------------------------------===//
19722007
// GPU select object attribute
19732008
//===----------------------------------------------------------------------===//
@@ -2020,6 +2055,14 @@ SymbolTable *TargetOptions::getSymbolTable() const {
20202055
return getSymbolTableCallback ? getSymbolTableCallback() : nullptr;
20212056
}
20222057

2058+
CompilationTarget TargetOptions::getCompilationTarget() const {
2059+
return compilationTarget;
2060+
}
2061+
2062+
CompilationTarget TargetOptions::getDefaultCompilationTarget() {
2063+
return CompilationTarget::Fatbin;
2064+
}
2065+
20232066
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
20242067
TargetOptions::tokenizeCmdOptions() const {
20252068
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> options;
@@ -2043,10 +2086,6 @@ TargetOptions::tokenizeCmdOptions() const {
20432086
return options;
20442087
}
20452088

2046-
TargetOptions::CompilationTarget TargetOptions::getCompilationTarget() const {
2047-
return compilationTarget;
2048-
}
2049-
20502089
MLIR_DEFINE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions)
20512090

20522091
#include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"

mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ void GpuModuleToBinaryPass::getDependentDialects(
5757

5858
void GpuModuleToBinaryPass::runOnOperation() {
5959
RewritePatternSet patterns(&getContext());
60-
int targetFormat = llvm::StringSwitch<int>(compilationTarget)
61-
.Cases("offloading", "llvm", TargetOptions::offload)
62-
.Cases("assembly", "isa", TargetOptions::assembly)
63-
.Cases("binary", "bin", TargetOptions::binary)
64-
.Cases("fatbinary", "fatbin", TargetOptions::fatbinary)
65-
.Case("binOrFatbin", TargetOptions::binOrFatbin)
66-
.Default(-1);
67-
if (targetFormat == -1)
60+
auto targetFormat =
61+
llvm::StringSwitch<std::optional<CompilationTarget>>(compilationTarget)
62+
.Cases("offloading", "llvm", CompilationTarget::Offload)
63+
.Cases("assembly", "isa", CompilationTarget::Assembly)
64+
.Cases("binary", "bin", CompilationTarget::Binary)
65+
.Cases("fatbinary", "fatbin", CompilationTarget::Fatbin)
66+
.Default(std::nullopt);
67+
if (!targetFormat)
6868
getOperation()->emitError() << "Invalid format specified.";
6969

7070
// Lazy symbol table builder callback.
@@ -82,10 +82,8 @@ void GpuModuleToBinaryPass::runOnOperation() {
8282
return &parentTable.value();
8383
};
8484

85-
TargetOptions targetOptions(
86-
toolkitPath, linkFiles, cmdOptions,
87-
static_cast<TargetOptions::CompilationTarget>(targetFormat),
88-
lazyTableBuilder);
85+
TargetOptions targetOptions(toolkitPath, linkFiles, cmdOptions, *targetFormat,
86+
lazyTableBuilder);
8987
if (failed(transformGpuModulesToBinaries(
9088
getOperation(),
9189
offloadingHandler ? dyn_cast<OffloadingLLVMTranslationAttrInterface>(
@@ -107,17 +105,19 @@ LogicalResult moduleSerializer(GPUModuleOp op,
107105
auto target = dyn_cast<gpu::TargetAttrInterface>(targetAttr);
108106
assert(target &&
109107
"Target attribute doesn't implements `TargetAttrInterface`.");
110-
std::optional<SmallVector<char, 0>> object =
108+
std::optional<SmallVector<char, 0>> serializedModule =
111109
target.serializeToObject(op, targetOptions);
112-
113-
if (!object) {
110+
if (!serializedModule) {
114111
op.emitError("An error happened while serializing the module.");
115112
return failure();
116113
}
117114

118-
objects.push_back(builder.getAttr<gpu::ObjectAttr>(
119-
target,
120-
builder.getStringAttr(StringRef(object->data(), object->size()))));
115+
Attribute object = target.createObject(*serializedModule, targetOptions);
116+
if (!object) {
117+
op.emitError("An error happened while creating the object.");
118+
return failure();
119+
}
120+
objects.push_back(object);
121121
}
122122
builder.setInsertionPointAfter(op);
123123
builder.create<gpu::BinaryOp>(op.getLoc(), op.getName(), handler,

mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ void mlir::sparse_tensor::buildSparseCompiler(
8484
nvvmTargetOptions.features = options.gpuFeatures;
8585
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
8686
pm.addPass(createGpuToLLVMConversionPass());
87-
pm.addPass(createGpuModuleToBinaryPass());
87+
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
88+
gpuModuleToBinaryPassOptions.compilationTarget = options.gpuFormat;
89+
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
8890
}
8991

9092
pm.addPass(createReconcileUnrealizedCastsPass());

mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,27 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule mgpuModuleLoad(void *data) {
126126
return module;
127127
}
128128

129+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule mgpuModuleLoadJIT(void *data,
130+
int optLevel) {
131+
ScopedContext scopedContext;
132+
CUmodule module = nullptr;
133+
char jitErrorBuffer[4096] = {0};
134+
CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
135+
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
136+
CU_JIT_OPTIMIZATION_LEVEL};
137+
void *jitOptionsVals[] = {jitErrorBuffer,
138+
reinterpret_cast<void *>(sizeof(jitErrorBuffer)),
139+
reinterpret_cast<void *>(optLevel)};
140+
141+
CUresult result =
142+
cuModuleLoadDataEx(&module, data, 3, jitOptions, jitOptionsVals);
143+
if (result) {
144+
fprintf(stderr, "JIT compilation failed with: '%s'\n", jitErrorBuffer);
145+
CUDA_REPORT_IF_ERROR(result);
146+
}
147+
return module;
148+
}
149+
129150
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuModuleUnload(CUmodule module) {
130151
CUDA_REPORT_IF_ERROR(cuModuleUnload(module));
131152
}

mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ extern "C" hipModule_t mgpuModuleLoad(void *data) {
3838
return module;
3939
}
4040

41+
extern "C" hipModule_t mgpuModuleLoadJIT(void *data, int optLevel) {
42+
assert(false && "This function is not available in HIP.");
43+
return nullptr;
44+
}
45+
4146
extern "C" void mgpuModuleUnload(hipModule_t module) {
4247
HIP_REPORT_IF_ERROR(hipModuleUnload(module));
4348
}

0 commit comments

Comments
 (0)