diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..e8391d7491a3a 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -806,7 +806,8 @@ class ToolChain { /// Get paths for device libraries. virtual llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args) const; + getDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const; /// Add the system specific linker arguments to use /// for the given HIP runtime library type. diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..95a0e6a48d659 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1614,7 +1614,8 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector -ToolChain::getDeviceLibs(const ArgList &DriverArgs) const { +ToolChain::getDeviceLibs(const ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 7fc34f451f183..ad68979338cfa 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -31,6 +31,68 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; +RocmInstallationDetector::CommonBitcodeLibsPreferences:: + CommonBitcodeLibsPreferences(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) + : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion( + tools::getAMDGPUCodeObjectVersion(D, DriverArgs))) { + const auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + + IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP; + + const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + Wave64 = + !HasWave32 || DriverArgs.hasFlag(options::OPT_mwavefrontsize64, + options::OPT_mno_wavefrontsize64, false); + + const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP || + DeviceOffloadingKind == Action::OFK_HIP; + + // Default to enabling f32 denormals on subtargets where fma is fast with + // denormals + const bool DefaultDAZ = + (Kind == llvm::AMDGPU::GK_NONE) + ? false + : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32)); + // TODO: There are way too many flags that change this. Do we need to + // check them all? + DAZ = IsKnownOffloading + ? DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, + DefaultDAZ) + : DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || DefaultDAZ; + + FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only) || + DriverArgs.hasFlag(options::OPT_ffinite_math_only, + options::OPT_fno_finite_math_only, false); + + UnsafeMathOpt = + DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations) || + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + + FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math) || + DriverArgs.hasFlag(options::OPT_ffast_math, + options::OPT_fno_fast_math, false); + + const bool DefaultSqrt = IsKnownOffloading ? true : false; + CorrectSqrt = + DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) || + DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt); + // GPU Sanitizer currently only supports ASan and is enabled through host + // ASan. + GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, + options::OPT_fno_gpu_sanitize, true) && + NeedsASanRT); +} + void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { assert(!Path.empty()); @@ -884,33 +946,14 @@ void ROCMToolChain::addClangTargetOptions( ABIVer)) return; - bool Wave64 = isWave64(DriverArgs, Kind); - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || - getDefaultDenormsAreZeroForTarget(Kind); - bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); - - bool UnsafeMathOpt = - DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); - bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); - bool CorrectSqrt = - DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); - - // GPU Sanitizer currently only supports ASan and is enabled through host - // ASan. - bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, true) && - getSanitizerArgs(DriverArgs).needsAsanRt(); - // Add the OpenCL specific bitcode library. llvm::SmallVector BCLibs; BCLibs.emplace_back(RocmInstallation->getOpenCLPath().str()); // Add the generic set of libraries. BCLibs.append(RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, - FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false)); + DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind, + getSanitizerArgs(DriverArgs).needsAsanRt())); for (auto [BCFile, Internalize] : BCLibs) { if (Internalize) @@ -947,35 +990,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs( llvm::SmallVector RocmInstallationDetector::getCommonBitcodeLibs( - const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, - bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, - bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, - bool isOpenMP) const { + const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, + StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) const { llvm::SmallVector BCLibs; + CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch, + DeviceOffloadingKind, NeedsASanRT}; + auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, bool Internalize = true) { BCLib.ShouldInternalize = Internalize; BCLibs.emplace_back(BCLib); }; auto AddSanBCLibs = [&]() { - if (GPUSan) + if (Pref.GPUSan) AddBCLib(getAsanRTLPath(), false); }; AddSanBCLibs(); AddBCLib(getOCMLPath()); - if (!isOpenMP) + if (!Pref.IsOpenMP) AddBCLib(getOCKLPath()); - else if (GPUSan && isOpenMP) + else if (Pref.GPUSan && Pref.IsOpenMP) AddBCLib(getOCKLPath(), false); - AddBCLib(getDenormalsAreZeroPath(DAZ)); - AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); - AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); - AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); - AddBCLib(getWavefrontSize64Path(Wave64)); + AddBCLib(getDenormalsAreZeroPath(Pref.DAZ)); + AddBCLib(getUnsafeMathPath(Pref.UnsafeMathOpt || Pref.FastRelaxedMath)); + AddBCLib(getFiniteOnlyPath(Pref.FiniteOnly || Pref.FastRelaxedMath)); + AddBCLib(getCorrectlyRoundedSqrtPath(Pref.CorrectSqrt)); + AddBCLib(getWavefrontSize64Path(Pref.Wave64)); AddBCLib(LibDeviceFile); - auto ABIVerPath = getABIVersionPath(ABIVer); + auto ABIVerPath = getABIVersionPath(Pref.ABIVer); if (!ABIVerPath.empty()) AddBCLib(ABIVerPath); @@ -983,9 +1028,9 @@ RocmInstallationDetector::getCommonBitcodeLibs( } llvm::SmallVector -ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, - const std::string &GPUArch, - bool isOpenMP) const { +ROCMToolChain::getCommonDeviceLibNames( + const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch, + Action::OffloadKind DeviceOffloadingKind) const { auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); @@ -996,33 +1041,9 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, ABIVer)) return {}; - // If --hip-device-lib is not set, add the default bitcode libraries. - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, - options::OPT_fno_gpu_flush_denormals_to_zero, - getDefaultDenormsAreZeroForTarget(Kind)); - bool FiniteOnly = DriverArgs.hasFlag( - options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); - bool UnsafeMathOpt = - DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, false); - bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, - options::OPT_fno_fast_math, false); - bool CorrectSqrt = DriverArgs.hasFlag( - options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, - options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); - bool Wave64 = isWave64(DriverArgs, Kind); - - // GPU Sanitizer currently only supports ASan and is enabled through host - // ASan. - bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, true) && - getSanitizerArgs(DriverArgs).needsAsanRt(); - return RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, - FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); + DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind, + getSanitizerArgs(DriverArgs).needsAsanRt()); } bool AMDGPUToolChain::shouldSkipSanitizeOption( diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 08bd4fa556f78..513c77df786fe 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -147,7 +147,7 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { llvm::SmallVector getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch, - bool isOpenMP = false) const; + Action::OffloadKind DeviceOffloadingKind) const; SanitizerMask getSupportedSanitizers() const override { return SanitizerKind::Address; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 7ffa3f0f558f3..2b41d54a9eb73 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -44,7 +44,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( true)) return; - for (auto BCFile : getDeviceLibs(DriverArgs)) { + for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -132,7 +132,9 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector -AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { +AMDGPUOpenMPToolChain::getDeviceLibs( + const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const { if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) return {}; @@ -140,8 +142,8 @@ AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { getTriple(), Args.getLastArgValue(options::OPT_march_EQ)); SmallVector BCLibs; - for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(), - /*IsOpenMP=*/true)) + for (auto BCLib : + getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind)) BCLibs.emplace_back(BCLib); return BCLibs; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index 0536c9f7f564c..cbafdf57fa466 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -58,7 +58,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final const llvm::opt::ArgList &Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadKind) const override; const ToolChain &HostTC; }; diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 5fe0f85ef09af..b4c6da0d73d13 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -264,7 +264,7 @@ void HIPAMDToolChain::addClangTargetOptions( return; // No DeviceLibs for SPIR-V. } - for (auto BCFile : getDeviceLibs(DriverArgs)) { + for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -355,7 +355,8 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector -HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, + Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true) || @@ -397,7 +398,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { assert(!GpuArch.empty() && "Must have an explicit GPU arch."); // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str())) + for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(), + DeviceOffloadingKind)) BCLibs.emplace_back(N); // Add instrument lib. diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index 3630b11cd8b1a..bcc3ebb0f33de 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -80,7 +80,8 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index 53649ca40d99b..643a67f743957 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -149,7 +149,8 @@ void HIPSPVToolChain::addClangTargetOptions( CC1Args.append( {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"}); - for (const BitCodeLibraryInfo &BCFile : getDeviceLibs(DriverArgs)) + for (const BitCodeLibraryInfo &BCFile : + getDeviceLibs(DriverArgs, DeviceOffloadingKind)) CC1Args.append( {"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); } @@ -200,7 +201,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, } llvm::SmallVector -HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPSPVToolChain::getDeviceLibs( + const llvm::opt::ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index ecd82e7052e48..caf6924151446 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h index 2a09da0114898..ebd544367a5a8 100644 --- a/clang/lib/Driver/ToolChains/ROCm.h +++ b/clang/lib/Driver/ToolChains/ROCm.h @@ -11,6 +11,7 @@ #include "clang/Basic/Cuda.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" @@ -18,6 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VersionTuple.h" +#include "llvm/TargetParser/TargetParser.h" #include "llvm/TargetParser/Triple.h" namespace clang { @@ -77,6 +79,24 @@ class RocmInstallationDetector { SPACKReleaseStr(SPACKReleaseStr.str()) {} }; + struct CommonBitcodeLibsPreferences { + CommonBitcodeLibsPreferences(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT); + + DeviceLibABIVersion ABIVer; + bool IsOpenMP; + bool Wave64; + bool DAZ; + bool FiniteOnly; + bool UnsafeMathOpt; + bool FastRelaxedMath; + bool CorrectSqrt; + bool GPUSan; + }; + const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; @@ -175,11 +195,11 @@ class RocmInstallationDetector { /// Get file paths of default bitcode libraries common to AMDGPU based /// toolchains. - llvm::SmallVector getCommonBitcodeLibs( - const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, - bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, - bool FastRelaxedMath, bool CorrectSqrt, DeviceLibABIVersion ABIVer, - bool GPUSan, bool isOpenMP) const; + llvm::SmallVector + getCommonBitcodeLibs(const llvm::opt::ArgList &DriverArgs, + StringRef LibDeviceFile, StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) const; /// Check file paths of default bitcode libraries common to AMDGPU based /// toolchains. \returns false if there are invalid or missing files. bool checkCommonBitcodeLibs(StringRef GPUArch, StringRef LibDeviceFile,