-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[mlir][gpu][NVPTX] Enable NVIDIA GPU JIT compilation path #66220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
608af55
05afac0
4ad818f
a9c9e7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,18 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" | |
// GPU object attribute. | ||
//===----------------------------------------------------------------------===// | ||
|
||
def GPU_ObjectOffload : I32EnumAttrCase<"Offload", 1, "offload">; | ||
def GPU_ObjectISA : I32EnumAttrCase<"Assembly", 2, "assembly">; | ||
def GPU_ObjectBinary : I32EnumAttrCase<"Binary", 3, "bin">; | ||
def GPU_ObjectFatbin : I32EnumAttrCase<"Fatbin", 4, "fatbin">; | ||
def GPU_CompilationTargetEnum : GPU_I32Enum< | ||
"CompilationTarget", "GPU object format", [ | ||
GPU_ObjectOffload, | ||
GPU_ObjectISA, | ||
GPU_ObjectBinary, | ||
GPU_ObjectFatbin | ||
]>; | ||
|
||
def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { | ||
let description = [{ | ||
A GPU object attribute pairs a GPU target with a binary string, | ||
|
@@ -32,8 +44,17 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { | |
#gpu.object<#nvvm.target, "..."> | ||
``` | ||
}]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the doc please |
||
let parameters = (ins "Attribute":$target, "StringAttr":$object); | ||
let assemblyFormat = [{`<` $target `,` $object `>`}]; | ||
let parameters = (ins | ||
"Attribute":$target, | ||
DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format, | ||
"StringAttr":$object, | ||
OptionalParameter<"DictionaryAttr">:$properties | ||
); | ||
let assemblyFormat = [{ `<` | ||
$target `,` (`properties` `=` $properties ^ `,`)? | ||
custom<Object>($format, $object) | ||
`>` | ||
}]; | ||
let genVerifyDecl = 1; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,8 @@ namespace LLVM { | |
class ModuleTranslation; | ||
} | ||
namespace gpu { | ||
enum class CompilationTarget : uint32_t; | ||
|
||
/// This class indicates that the attribute associated with this trait is a GPU | ||
/// offloading translation attribute. These kinds of attributes must implement | ||
/// an interface for handling the translation of GPU offloading operations like | ||
|
@@ -42,27 +44,15 @@ class OffloadingTranslationAttrTrait | |
/// ensure type safeness. Targets are free to ignore these options. | ||
class TargetOptions { | ||
public: | ||
/// The target representation of the compilation process. | ||
typedef enum { | ||
offload = 1, /// The process should produce an offloading representation. | ||
/// For the NVVM & ROCDL targets this option produces LLVM IR. | ||
assembly = 2, /// The process should produce assembly code. | ||
binary = 4, /// The process should produce a binary. | ||
fatbinary = 8, /// The process should produce a fat binary. | ||
binOrFatbin = | ||
binary | | ||
fatbinary, /// The process should produce a binary or fatbinary. It's up | ||
/// to the target to decide which. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (this is the doc that may have been lost moving to ODS, cf the other comment above) |
||
} CompilationTarget; | ||
|
||
/// Constructor initializing the toolkit path, the list of files to link to, | ||
/// extra command line options, the compilation target and a callback for | ||
/// obtaining the parent symbol table. The default compilation target is | ||
/// `binOrFatbin`. | ||
TargetOptions(StringRef toolkitPath = {}, | ||
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {}, | ||
CompilationTarget compilationTarget = binOrFatbin, | ||
function_ref<SymbolTable *()> getSymbolTableCallback = {}); | ||
TargetOptions( | ||
StringRef toolkitPath = {}, ArrayRef<std::string> linkFiles = {}, | ||
StringRef cmdOptions = {}, | ||
CompilationTarget compilationTarget = getDefaultCompilationTarget(), | ||
function_ref<SymbolTable *()> getSymbolTableCallback = {}); | ||
|
||
/// Returns the typeID. | ||
TypeID getTypeID() const; | ||
|
@@ -90,13 +80,17 @@ class TargetOptions { | |
/// table. | ||
SymbolTable *getSymbolTable() const; | ||
|
||
/// Returns the default compilation target: `CompilationTarget::Fatbin`. | ||
static CompilationTarget getDefaultCompilationTarget(); | ||
|
||
protected: | ||
/// Derived classes must use this constructor to initialize `typeID` to the | ||
/// appropiate value: ie. `TargetOptions(TypeID::get<DerivedClass>())`. | ||
TargetOptions(TypeID typeID, StringRef toolkitPath = {}, | ||
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {}, | ||
CompilationTarget compilationTarget = binOrFatbin, | ||
function_ref<SymbolTable *()> getSymbolTableCallback = {}); | ||
TargetOptions( | ||
TypeID typeID, StringRef toolkitPath = {}, | ||
ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {}, | ||
CompilationTarget compilationTarget = getDefaultCompilationTarget(), | ||
function_ref<SymbolTable *()> getSymbolTableCallback = {}); | ||
|
||
/// Path to the target toolkit. | ||
std::string toolkitPath; | ||
|
@@ -108,7 +102,7 @@ class TargetOptions { | |
/// process. | ||
std::string cmdOptions; | ||
|
||
/// Compilation process target representation. | ||
/// Compilation process target format. | ||
CompilationTarget compilationTarget; | ||
|
||
/// Callback for obtaining the parent symbol table of all the GPU modules | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -144,6 +144,22 @@ struct SparseCompilerOptions | |||||
desc("GPU target architecture")}; | ||||||
PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features", | ||||||
desc("GPU target features")}; | ||||||
/// For NVIDIA GPUs there are 3 compilation format options: | ||||||
/// 1. `isa`: the compiler generates PTX and the runtime JITs the PTX. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
/// 2. `bin`: generates a CUBIN object for `chip=gpuChip`. | ||||||
/// 3. `fatbin`: generates a fat binary with a CUBIN object for `gpuChip` and | ||||||
/// also embeds the PTX in the fat binary. | ||||||
/// Notes: | ||||||
/// Option 1 adds a significant runtime performance hit, however, tests are | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for adding this detailed explanation. |
||||||
/// more likely to pass with this option. | ||||||
/// Option 2 is better for execution time as there is no JIT; however, the | ||||||
/// program will fail if there's an arch mismatch between `gpuChip` and the | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can you please spell out "architecture" (unless this is NVidia convention to write it that way) |
||||||
/// GPU running the program. | ||||||
/// Option 3 is the best compromise between options 1 & 2 as it can JIT in | ||||||
fabianmcg marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
/// case of an arch mismatch, however, it's only possible to JIT to a higher | ||||||
/// CC than `gpuChip`. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the CC target when using 1.? To some extent there shouldn't be any difference between 1 and 3? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's never specified that's why If there's an arch mismatch then 1 and 3 have the same performance hit, however if the compiled arch matches the running arch, then it behaves like 2 and there's no performance hit. |
||||||
PassOptions::Option<std::string> gpuFormat{ | ||||||
*this, "gpu-format", desc("GPU compilation format"), init("isa")}; | ||||||
|
||||||
/// This option is used to enable GPU library generation. | ||||||
PassOptions::Option<bool> enableGPULibgen{ | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This deserves some doc.
(I'm not totally sure right now what "offload" does in this list actually)
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added the docs in the
ObjectAttr
docs. Theoffload
format is meant to be a generic format, for NVPTX and & AMDGPU it generates LLVM bitcode. Execution from this format is not enabled in trunk, however downstream users could use it.