-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL][ROCm] Use offload-arch instead of mcpu for AMD arch #4239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a6218a7
669c331
1edd3a5
40bd9df
ed8b330
0a01af1
7331029
fad9c90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3960,8 +3960,9 @@ class OffloadingActionBuilder final { | |
/// List of static archives to extract FPGA dependency info from | ||
ActionList FPGAArchiveInputs; | ||
|
||
/// List of CUDA architectures to use in this compilation with NVPTX targets. | ||
SmallVector<CudaArch, 8> GpuArchList; | ||
/// List of GPU architectures to use in this compilation with NVPTX/AMDGCN | ||
/// targets. | ||
SmallVector<std::pair<llvm::Triple, std::string>, 8> GpuArchList; | ||
|
||
/// Build the last steps for CUDA after all BC files have been linked. | ||
JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { | ||
|
@@ -3998,13 +3999,17 @@ class OffloadingActionBuilder final { | |
const Driver::InputList &Inputs) | ||
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {} | ||
|
||
void withBoundArchForToolChain(const ToolChain* TC, | ||
void withBoundArchForToolChain(const ToolChain *TC, | ||
llvm::function_ref<void(const char *)> Op) { | ||
if (TC->getTriple().isNVPTX()) | ||
for (CudaArch A : GpuArchList) | ||
Op(CudaArchToString(A)); | ||
else | ||
Op(nullptr); | ||
for (auto &A : GpuArchList) { | ||
if (TC->getTriple() == A.first) { | ||
Op(Args.MakeArgString(A.second.c_str())); | ||
return; | ||
} | ||
} | ||
|
||
// no bound arch for this toolchain | ||
Op(nullptr); | ||
} | ||
|
||
ActionBuilderReturnCode | ||
|
@@ -4058,8 +4063,8 @@ class OffloadingActionBuilder final { | |
} | ||
const auto *TC = ToolChains.front(); | ||
const char *BoundArch = nullptr; | ||
if (TC->getTriple().isNVPTX()) | ||
BoundArch = CudaArchToString(GpuArchList.front()); | ||
if (TC->getTriple().isNVPTX() || TC->getTriple().isAMDGCN()) | ||
BoundArch = GpuArchList.front().second.c_str(); | ||
DA.add(*DeviceCompilerInput, *TC, BoundArch, Action::OFK_SYCL); | ||
// Clear the input file, it is already a dependence to a host | ||
// action. | ||
|
@@ -4642,39 +4647,94 @@ class OffloadingActionBuilder final { | |
} | ||
} | ||
|
||
/// Initialize the GPU architecture list from arguments - this populates `GpuArchList` from | ||
/// `--cuda-gpu-arch` flags. Only relevant if compiling to CUDA. Return true if any | ||
/// initialization errors are found. | ||
/// Initialize the GPU architecture list from arguments - this populates | ||
/// `GpuArchList` from `--offload-arch` flags. Only relevant if compiling to | ||
/// CUDA or AMDGCN. Return true if any initialization errors are found. | ||
/// FIXME: "offload-arch" and the BoundArch mechanism should also be | ||
// used in the SYCLToolChain for SPIR-V AOT to track the offload | ||
// architecture instead of the Triple sub-arch it currently uses. | ||
bool initializeGpuArchMap() { | ||
const OptTable &Opts = C.getDriver().getOpts(); | ||
for (auto *A : Args) { | ||
unsigned Index; | ||
llvm::Triple *TargetBE = nullptr; | ||
|
||
if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) | ||
auto GetTripleIt = [&, this](llvm::StringRef Triple) { | ||
llvm::Triple TargetTriple{Triple}; | ||
auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) { | ||
return SYCLTriple == TargetTriple; | ||
}); | ||
return TripleIt != SYCLTripleList.end() ? &*TripleIt : nullptr; | ||
}; | ||
|
||
if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) { | ||
TargetBE = GetTripleIt(A->getValue(0)); | ||
// Passing device args: -Xsycl-target-backend=<triple> -opt=val. | ||
if (llvm::Triple(A->getValue(0)).isNVPTX()) | ||
if (TargetBE) | ||
Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); | ||
else | ||
continue; | ||
else if (A->getOption().matches(options::OPT_Xsycl_backend)) | ||
} else if (A->getOption().matches(options::OPT_Xsycl_backend)) { | ||
if (SYCLTripleList.size() > 1) { | ||
C.getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) | ||
<< A->getSpelling(); | ||
continue; | ||
} | ||
// Passing device args: -Xsycl-target-backend -opt=val. | ||
TargetBE = &SYCLTripleList.front(); | ||
Index = Args.getBaseArgs().MakeIndex(A->getValue(0)); | ||
else | ||
} else | ||
continue; | ||
|
||
A->claim(); | ||
auto ParsedArg = Opts.ParseOneArg(Args, Index); | ||
|
||
// TODO: Support --no-cuda-gpu-arch, --{,no-}cuda-gpu-arch=all. | ||
if (ParsedArg && | ||
ParsedArg->getOption().matches(options::OPT_offload_arch_EQ)) { | ||
llvm::StringRef ArchStr = ParsedArg->getValue(0); | ||
if (TargetBE->isNVPTX()) { | ||
// CUDA arch also applies to AMDGCN ... | ||
CudaArch Arch = StringToCudaArch(ArchStr); | ||
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { | ||
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) | ||
<< ArchStr; | ||
continue; | ||
} | ||
ArchStr = CudaArchToString(Arch); | ||
} else if (TargetBE->isAMDGCN()) { | ||
llvm::StringMap<bool> Features; | ||
auto Arch = | ||
parseTargetID(getHIPOffloadTargetTriple(), ArchStr, &Features); | ||
if (!Arch) { | ||
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; | ||
continue; | ||
} | ||
auto CanId = getCanonicalTargetID(Arch.getValue(), Features); | ||
ArchStr = Args.MakeArgStringRef(CanId); | ||
} | ||
ParsedArg->claim(); | ||
GpuArchList.push_back(StringToCudaArch(ParsedArg->getValue(0))); | ||
GpuArchList.emplace_back(*TargetBE, ArchStr); | ||
} | ||
} | ||
|
||
// If there are no CUDA architectures provided then default to SM_50. | ||
if (GpuArchList.empty()) { | ||
GpuArchList.push_back(CudaArch::SM_50); | ||
// Handle defaults architectures | ||
for (auto &Triple : SYCLTripleList) { | ||
// For NVIDIA use SM_50 as a default | ||
if (Triple.isNVPTX() && llvm::none_of(GpuArchList, [&](auto &P) { | ||
return P.first.isNVPTX(); | ||
})) { | ||
llvm::StringRef DefaultArch = CudaArchToString(CudaArch::SM_50); | ||
GpuArchList.emplace_back(Triple, DefaultArch); | ||
} | ||
|
||
// For AMD require the architecture to be set by the user | ||
if (Triple.isAMDGCN() && llvm::none_of(GpuArchList, [&](auto &P) { | ||
return P.first.isAMDGCN(); | ||
})) { | ||
C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about adding a default AMD GPU arch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason I didn't go for the default AMD GPU arch is that as I understand it we need to specify the exact GPU architecture for AMD so a default would only work for a very specific type of GPUs. Which means that in a lot of cases users would still need to specify the architecture manually, so I think it is better to force the architecture to always be set manually and have a clear diagnostic, than have a default architecture that rarely works and a more confusing error message from hip. This is different with NVidia because |
||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
|
Uh oh!
There was an error while loading. Please reload this page.