diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b67..6272b7d20112f 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -108,6 +108,7 @@ add_clang_library(clangBasic Targets/MSP430.cpp Targets/Mips.cpp Targets/NVPTX.cpp + Targets/NativeCPU.cpp Targets/OSTargets.cpp Targets/PNaCl.cpp Targets/PPC.cpp diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index aa7ef2de597aa..0699ec686e4e6 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -546,41 +546,6 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (Opts.FakeAddressSpaceMap) AddrSpaceMap = &FakeAddrSpaceMap; - - if ((Opts.SYCLIsDevice || Opts.OpenCL) && Opts.SYCLIsNativeCPU) { - // For SYCL Native CPU we use the NVPTXAddrSpaceMap because - // we need builtins to be mangled with AS information. - // This is also enabled in OpenCL mode so that mangling - // matches when building libclc. - - static const unsigned SYCLNativeCPUASMap[] = { - 0, // Default - 1, // opencl_global - 3, // opencl_local - 4, // opencl_constant - 0, // opencl_private - 0, // opencl_generic - 1, // opencl_global_device - 1, // opencl_global_host - 1, // cuda_device - 4, // cuda_constant - 3, // cuda_shared - 1, // sycl_global - 1, // sycl_global_device - 1, // sycl_global_host - 3, // sycl_local - 0, // sycl_private - 0, // ptr32_sptr - 0, // ptr32_uptr - 0, // ptr64 - 0, // hlsl_groupshared - 0, // hlsl_constant - 20, // wasm_funcref - }; - - AddrSpaceMap = &SYCLNativeCPUASMap; - UseAddrSpaceMapMangling = true; - } } bool TargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index e26fb39d91fde..7e95e4a45c549 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -28,6 +28,7 @@ #include "Targets/MSP430.h" #include "Targets/Mips.h" #include "Targets/NVPTX.h" +#include "Targets/NativeCPU.h" #include "Targets/OSTargets.h" #include "Targets/PNaCl.h" #include "Targets/PPC.h" @@ -117,6 +118,13 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, default: return nullptr; + case llvm::Triple::UnknownArch: + // native_cpu is only known to Clang, not to LLVM. + if (Triple.str() == "native_cpu") + return std::make_unique(Triple, Opts); + + return nullptr; + case llvm::Triple::arc: return std::make_unique(Triple, Opts); diff --git a/clang/lib/Basic/Targets/NativeCPU.cpp b/clang/lib/Basic/Targets/NativeCPU.cpp new file mode 100644 index 0000000000000..685cf9093645f --- /dev/null +++ b/clang/lib/Basic/Targets/NativeCPU.cpp @@ -0,0 +1,109 @@ +//===--- NativeCPU.cpp - Implement NativeCPU target feature support -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements NativeCPU TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#include "NativeCPU.h" +#include + +using namespace clang; +using namespace clang::targets; + +static const LangASMap NativeCPUASMap = { + 0, // Default + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_private + 0, // opencl_generic + 1, // opencl_global_device + 1, // opencl_global_host + 1, // cuda_device + 4, // cuda_constant + 3, // cuda_shared + 1, // sycl_global + 1, // sycl_global_device + 1, // sycl_global_host + 3, // sycl_local + 0, // sycl_private + 0, // ptr32_sptr + 0, // ptr32_uptr + 0, // ptr64 + 0, // hlsl_groupshared + 0, // hlsl_constant + 20, // wasm_funcref +}; + +NativeCPUTargetInfo::NativeCPUTargetInfo(const llvm::Triple &, + const TargetOptions &Opts) + : TargetInfo(llvm::Triple()) { + AddrSpaceMap = &NativeCPUASMap; + UseAddrSpaceMapMangling = true; + HasLegalHalfType = true; + HasFloat16 = true; + resetDataLayout("e"); + + llvm::Triple HostTriple([&] { + // Take the default target triple if no other host triple is specified so + // that system headers work. + if (Opts.HostTriple.empty()) + return llvm::sys::getDefaultTargetTriple(); + + return Opts.HostTriple; + }()); + if (HostTriple.getArch() != llvm::Triple::UnknownArch) { + HostTarget = AllocateTarget(HostTriple, Opts); + + // Copy properties from host target. + BoolWidth = HostTarget->getBoolWidth(); + BoolAlign = HostTarget->getBoolAlign(); + IntWidth = HostTarget->getIntWidth(); + IntAlign = HostTarget->getIntAlign(); + HalfWidth = HostTarget->getHalfWidth(); + HalfAlign = HostTarget->getHalfAlign(); + FloatWidth = HostTarget->getFloatWidth(); + FloatAlign = HostTarget->getFloatAlign(); + DoubleWidth = HostTarget->getDoubleWidth(); + DoubleAlign = HostTarget->getDoubleAlign(); + LongWidth = HostTarget->getLongWidth(); + LongAlign = HostTarget->getLongAlign(); + LongLongWidth = HostTarget->getLongLongWidth(); + LongLongAlign = HostTarget->getLongLongAlign(); + PointerWidth = HostTarget->getPointerWidth(LangAS::Default); + PointerAlign = HostTarget->getPointerAlign(LangAS::Default); + MinGlobalAlign = HostTarget->getMinGlobalAlign(/*TypeSize=*/0, + /*HasNonWeakDef=*/true); + NewAlign = HostTarget->getNewAlign(); + DefaultAlignForAttributeAligned = + HostTarget->getDefaultAlignForAttributeAligned(); + SizeType = HostTarget->getSizeType(); + PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); + IntMaxType = HostTarget->getIntMaxType(); + WCharType = HostTarget->getWCharType(); + WIntType = HostTarget->getWIntType(); + Char16Type = HostTarget->getChar16Type(); + Char32Type = HostTarget->getChar32Type(); + Int64Type = HostTarget->getInt64Type(); + SigAtomicType = HostTarget->getSigAtomicType(); + ProcessIDType = HostTarget->getProcessIDType(); + + UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); + UseZeroLengthBitfieldAlignment = + HostTarget->useZeroLengthBitfieldAlignment(); + UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); + ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); + + // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and + // we need those macros to be identical on host and device, because (among + // other things) they affect which standard library classes are defined, + // and we need all classes to be defined on both the host and device. + MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); + } +} diff --git a/clang/lib/Basic/Targets/NativeCPU.h b/clang/lib/Basic/Targets/NativeCPU.h new file mode 100644 index 0000000000000..44106cd8d0282 --- /dev/null +++ b/clang/lib/Basic/Targets/NativeCPU.h @@ -0,0 +1,72 @@ +//===--- NativeCPU.h - Declare NativeCPU target feature support -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares NativeCPU TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H +#define LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H + +#include "Targets.h" + +namespace clang { +namespace targets { + +class LLVM_LIBRARY_VISIBILITY NativeCPUTargetInfo final : public TargetInfo { + std::unique_ptr HostTarget; + +public: + NativeCPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override { + DefineStd(Builder, "NativeCPU", Opts); + } + + SmallVector getTargetBuiltins() const override { + return {}; + } + + BuiltinVaListKind getBuiltinVaListKind() const override { + if (HostTarget) + return HostTarget->getBuiltinVaListKind(); + + return TargetInfo::VoidPtrBuiltinVaList; + } + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &info) const override { + return true; + } + + std::string_view getClobbers() const override { return ""; } + + void setSupportedOpenCLOpts() override { supportAllOpenCLOpts(); } + + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + if (HostTarget) + return HostTarget->checkCallingConvention(CC); + + return TargetInfo::checkCallingConvention(CC); + } + +protected: + ArrayRef getGCCRegNames() const override { return {}; } + + ArrayRef getGCCRegAliases() const override { + return {}; + } + + bool hasBitIntType() const override { return true; } +}; + +} // namespace targets +} // namespace clang + +#endif // LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index a07e81892372a..1a91cbaa998df 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Driver/Compilation.h" +#include "ToolChains/SYCL.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" #include "clang/Driver/Driver.h" @@ -127,7 +128,8 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, if (DeviceOffloadKind == Action::OFK_OpenMP || DeviceOffloadKind == Action::OFK_SYCL) { const ToolChain *HostTC = getSingleOffloadToolChain(); - bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple()); + bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple()) || + isSYCLNativeCPU(TC->getTriple()); OffloadArgs = TC->TranslateOffloadTargetArgs( *TranslatedArgs, SameTripleAsHost, AllocatedArgs, DeviceOffloadKind); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c65c235435bae..b222313be9d30 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -968,6 +968,10 @@ static bool isValidSYCLTriple(llvm::Triple T) { !T.hasEnvironment()) return true; + // 'native_cpu' is valid for Native CPU. + if (isSYCLNativeCPU(T)) + return true; + // Check for invalid SYCL device triple values. // Non-SPIR/SPIRV arch. if (!T.isSPIROrSPIRV()) @@ -1392,12 +1396,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } Arch = Device->data(); UserTargetName = "amdgcn-amd-amdhsa"; - } else if (Val == "native_cpu") { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - llvm::Triple HostTriple = HostTC->getTriple(); - SYCLTriples.insert(HostTriple.normalize()); - continue; } llvm::Triple DeviceTriple(getSYCLDeviceTriple(UserTargetName)); @@ -5667,9 +5665,7 @@ class OffloadingActionBuilder final { auto IsAMDGCN = TargetTriple.isAMDGCN(); auto IsSPIR = TargetTriple.isSPIROrSPIRV(); bool IsSpirvAOT = TargetTriple.isSPIRAOT(); - const bool IsSYCLNativeCPU = - TC->getAuxTriple() && - driver::isSYCLNativeCPU(TargetTriple, *TC->getAuxTriple()); + bool IsSYCLNativeCPU = isSYCLNativeCPU(TargetTriple); for (const auto &Input : ListIndex) { if (TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga && types::isFPGA(Input->getType())) { @@ -6733,12 +6729,6 @@ class OffloadingActionBuilder final { C.getDriver().getSYCLDeviceTriple("amdgcn-amd-amdhsa"), ValidDevice->data()); UserTargetName = "amdgcn-amd-amdhsa"; - } else if (Val == "native_cpu") { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - llvm::Triple TT = HostTC->getTriple(); - SYCLTripleList.push_back(TT); - continue; } llvm::Triple TT( @@ -7277,10 +7267,6 @@ class OffloadingActionBuilder final { /// Offload deps output is then forwarded to active device action builders so /// they can add it to the device linker inputs. void addDeviceLinkDependenciesFromHost(ActionList &LinkerInputs) { - if (isSYCLNativeCPU(C.getArgs())) { - // SYCL Native CPU doesn't need deps from clang-offload-deps. - return; - } // Link image for reading dependencies from it. auto *LA = C.MakeAction(LinkerInputs, types::TY_Host_Dependencies_Image); @@ -9684,9 +9670,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( Action::OffloadKind DependentOffloadKind; if (UI.DependentOffloadKind == Action::OFK_SYCL && TargetDeviceOffloadKind == Action::OFK_None && - !(isSYCLNativeCPU(Args) && - isSYCLNativeCPU(C.getDefaultToolChain().getTriple(), - TC->getTriple()) && + !(isSYCLNativeCPU(C.getDefaultToolChain().getTriple()) && UA->getDependentActionsInfo().size() > 1)) DependentOffloadKind = Action::OFK_Host; else @@ -10581,9 +10565,9 @@ const ToolChain &Driver::getOffloadToolChain( *HostTC, Args, Kind); break; default: - if (Kind == Action::OFK_SYCL && isSYCLNativeCPU(Args)) - TC = std::make_unique(*this, Target, - *HostTC, Args); + if (Kind == Action::OFK_SYCL && isSYCLNativeCPU(Target)) + TC = std::make_unique(*this, Target, *HostTC, + Args); break; } } diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index f03f2e99dad63..714fd9d37aea7 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -177,7 +177,8 @@ bool OffloadTargetInfo::isOffloadKindCompatible( } bool OffloadTargetInfo::isTripleValid() const { - return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch; + return !Triple.str().empty() && (Triple.getArch() != Triple::UnknownArch || + Triple.str() == "native_cpu---"); } bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 73f0c38e0e294..43fbf18006ea5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5470,8 +5470,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = getToolChain(); const llvm::Triple &RawTriple = TC.getTriple(); - const llvm::Triple &Triple = TC.getEffectiveTriple(); - const std::string &TripleStr = Triple.getTriple(); + llvm::Triple Triple = TC.getEffectiveTriple(); + std::string TripleStr = Triple.getTriple(); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); @@ -5513,7 +5513,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool IsUsingLTO = LTOMode != LTOK_None; bool IsFPGASYCLOffloadDevice = IsSYCLDevice && Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga; - const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC); const bool IsSYCLCUDACompat = isSYCLCudaCompatEnabled(Args); // Perform the SYCL host compilation using an external compiler if the user @@ -5573,6 +5572,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsCuda || IsHIP || IsSYCL) IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); + // Adjust for SYCL NativeCPU compilations. When compiling in device mode, the + // first compilation uses the NativeCPU target for LLVM IR generation, the + // second compilation uses the host target for machine code generation. + const bool IsSYCLNativeCPU = isSYCLNativeCPU(Triple); + if (IsSYCL && IsSYCLDevice && IsSYCLNativeCPU && AuxTriple && + isa(JA)) { + Triple = *AuxTriple; + TripleStr = Triple.getTriple(); + } + // C++ is not supported for IAMCU. if (IsIAMCU && types::isCXX(Input.getType())) D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; @@ -6112,6 +6121,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); } if (IsSYCLDevice && IsSYCLNativeCPU) { + // NativeCPU generates an initial LLVM module for an unknown target, then + // compiles that for host. Avoid generating a warning for that. + CmdArgs.push_back("-Wno-override-module"); CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-sycl-native-cpu-backend"); } @@ -10439,10 +10451,6 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, TargetTripleOpt = ("llvm_" + TargetTripleOpt).str(); } - const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC); - if (IsSYCLNativeCPU) { - TargetTripleOpt = "native_cpu"; - } WrapperArgs.push_back( C.getArgs().MakeArgString(Twine("-target=") + TargetTripleOpt)); @@ -11180,7 +11188,7 @@ static bool shouldEmitOnlyKernelsAsEntryPoints(const ToolChain &TC, if (TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs, options::OPT_fsycl_remove_unused_external_funcs, false)) return false; - if (isSYCLNativeCPU(TC)) + if (isSYCLNativeCPU(Triple)) return true; // When supporting dynamic linking, non-kernels in a device image can be // called. @@ -11239,7 +11247,7 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (!Triple.isAMDGCN()) addArgs(PostLinkArgs, TCArgs, {"-emit-param-info"}); // Enable program metadata - if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(TC)) + if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(Triple)) addArgs(PostLinkArgs, TCArgs, {"-emit-program-metadata"}); if (OutputType != types::TY_LLVM_BC) { assert(OutputType == types::TY_Tempfiletable); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index c30311958e621..39086088f731d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -626,11 +626,6 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = { {"libsycl-nativecpu_utils", "internal"}}; - const bool isNativeCPU = - (driver::isSYCLNativeCPU(Args) && - driver::isSYCLNativeCPU(C.getDefaultToolChain().getTriple(), - TargetTriple)); - bool IsWindowsMSVCEnv = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); @@ -788,7 +783,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addLibraries(SYCLDeviceTsanLibs); #endif - if (isNativeCPU) + if (isSYCLNativeCPU(TargetTriple)) addLibraries(SYCLNativeCpuDeviceLibs); return LibraryList; @@ -950,7 +945,6 @@ const char *SYCL::Linker::constructLLVMLinkCommand( // instead of the original object. if (JA.isDeviceOffloading(Action::OFK_SYCL)) { bool IsRDC = !shouldDoPerObjectFileLinking(C); - const bool IsSYCLNativeCPU = isSYCLNativeCPU(this->getToolChain()); auto isNoRDCDeviceCodeLink = [&](const InputInfo &II) { if (IsRDC) return false; @@ -964,6 +958,8 @@ const char *SYCL::Linker::constructLLVMLinkCommand( const ToolChain *HostTC = C.getSingleOffloadToolChain(); const bool IsNVPTX = this->getToolChain().getTriple().isNVPTX(); const bool IsAMDGCN = this->getToolChain().getTriple().isAMDGCN(); + const bool IsSYCLNativeCPU = + isSYCLNativeCPU(this->getToolChain().getTriple()); const bool IsFPGA = this->getToolChain().getTriple().isSPIR() && this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; @@ -1115,7 +1111,8 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA, assert((getToolChain().getTriple().isSPIROrSPIRV() || getToolChain().getTriple().isNVPTX() || - getToolChain().getTriple().isAMDGCN() || isSYCLNativeCPU(Args)) && + getToolChain().getTriple().isAMDGCN() || + isSYCLNativeCPU(getToolChain().getTriple())) && "Unsupported target"); std::string SubArchName = @@ -1717,9 +1714,9 @@ static ArrayRef getUnsupportedOpts() { } // Currently supported options by SYCL NativeCPU device compilation -static inline bool SupportedByNativeCPU(const SYCLToolChain &TC, +static inline bool SupportedByNativeCPU(const llvm::Triple &Triple, const OptSpecifier &Opt) { - if (!TC.IsSYCLNativeCPU) + if (!isSYCLNativeCPU(Triple)) return false; switch (Opt.getID()) { @@ -1736,7 +1733,6 @@ static inline bool SupportedByNativeCPU(const SYCLToolChain &TC, SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) : ToolChain(D, Triple, Args), HostTC(HostTC), - IsSYCLNativeCPU(Triple == HostTC.getTriple()), SYCLInstallation(D, Triple, Args) { // Lookup binaries into the driver directory, this is used to discover any // dependent SYCL offload compilation tools. @@ -1746,7 +1742,7 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, for (OptSpecifier Opt : getUnsupportedOpts()) { if (const Arg *A = Args.getLastArg(Opt)) { // Native CPU can support options unsupported by other targets. - if (SupportedByNativeCPU(*this, Opt)) + if (SupportedByNativeCPU(getTriple(), Opt)) continue; // All sanitizer options are not currently supported, except // AddressSanitizer and MemorySanitizer and ThreadSanitizer @@ -1790,7 +1786,7 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, for (OptSpecifier UnsupportedOpt : getUnsupportedOpts()) { if (Opt.matches(UnsupportedOpt)) { // NativeCPU should allow most normal cpu options. - if (SupportedByNativeCPU(*this, Opt.getID())) + if (SupportedByNativeCPU(getTriple(), Opt.getID())) continue; if (Opt.getID() == options::OPT_fsanitize_EQ && A->getValues().size() == 1) { @@ -2185,7 +2181,7 @@ Tool *SYCLToolChain::buildBackendCompiler() const { } Tool *SYCLToolChain::buildLinker() const { - assert(getTriple().isSPIROrSPIRV() || IsSYCLNativeCPU); + assert(getTriple().isSPIROrSPIRV() || isSYCLNativeCPU(getTriple())); return new tools::SYCL::Linker(*this); } diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 66a1313fbce34..1fea680a216ee 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -269,6 +269,11 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool { } // end namespace SYCL } // end namespace tools +inline bool isSYCLNativeCPU(const llvm::Triple &Triple) { + return Triple.getArch() == llvm::Triple::UnknownArch && + Triple.str() == "native_cpu"; +} + namespace toolchains { class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { @@ -309,12 +314,12 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { bool useIntegratedAs() const override { return true; } bool isPICDefault() const override { - if (this->IsSYCLNativeCPU) + if (isSYCLNativeCPU(this->getTriple())) return this->HostTC.isPICDefault(); return false; } llvm::codegenoptions::DebugInfoFormat getDefaultDebugFormat() const override { - if (this->IsSYCLNativeCPU || + if (isSYCLNativeCPU(this->getTriple()) && this->HostTC.getTriple().isWindowsMSVCEnvironment()) return this->HostTC.getDefaultDebugFormat(); return ToolChain::getDefaultDebugFormat(); @@ -337,9 +342,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { SanitizerMask getSupportedSanitizers() const override; - const bool IsSYCLNativeCPU; - - protected: Tool *buildBackendCompiler() const override; Tool *buildLinker() const override; @@ -353,24 +355,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { }; } // end namespace toolchains - -inline bool isSYCLNativeCPU(const llvm::opt::ArgList &Args) { - if (auto SYCLTargets = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { - if (SYCLTargets->containsValue("native_cpu")) - return true; - } - return false; -} - -inline bool isSYCLNativeCPU(const llvm::Triple &HostT, - const llvm::Triple &DevT) { - return HostT == DevT; -} - -inline bool isSYCLNativeCPU(const ToolChain &TC) { - const llvm::Triple *const AuxTriple = TC.getAuxTriple(); - return AuxTriple && isSYCLNativeCPU(TC.getTriple(), *AuxTriple); -} } // end namespace driver } // end namespace clang diff --git a/clang/test/CodeGenSYCL/native_cpu_as.cpp b/clang/test/CodeGenSYCL/native_cpu_as.cpp index 9dfaa3d43b8b3..62974e6d4f41c 100644 --- a/clang/test/CodeGenSYCL/native_cpu_as.cpp +++ b/clang/test/CodeGenSYCL/native_cpu_as.cpp @@ -1,11 +1,11 @@ // This test is temporarily disabled for SYCL Native CPU on Windows // UNSUPPORTED: system-windows // Checks that name mangling matches between SYCL Native CPU and OpenCL when -fsycl-is-native-cpu is set -// RUN: %clang_cc1 -DCPP -fsycl-is-device -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s -// RUN: FileCheck -input-file=%t_sycl.ll %s +// RUN: %clang_cc1 -triple=native_cpu -DCPP -fsycl-is-device -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s +// RUN: FileCheck -input-file=%t_sycl.ll %s -// RUN: %clang_cc1 -x cl -DOCL -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_ocl.ll %s -// RUN: FileCheck -input-file=%t_ocl.ll %s +// RUN: %clang_cc1 -triple=native_cpu -x cl -DOCL -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_ocl.ll %s +// RUN: FileCheck -input-file=%t_ocl.ll %s #ifdef CPP #define AS_LOCAL __attribute((address_space(3))) @@ -13,7 +13,7 @@ #define AS_PRIVATE __attribute((address_space(0))) #define ATTRS [[intel::device_indirectly_callable]] #define ATTRS2 SYCL_EXTERNAL -#else +#else #ifdef OCL #define AS_LOCAL __local #define AS_GLOBAL __global @@ -29,8 +29,6 @@ ATTRS void func(AS_LOCAL int *p1, AS_GLOBAL int *p2, AS_PRIVATE int *p3){ int private_var; use_private(&private_var); } -// CHECK: define dso_local void @_Z4funcPU3AS3iPU3AS1iPi( -// CHECK: call void @_Z11use_privatePi( - - +// CHECK: define void @_Z4funcPU3AS3iPU3AS1iPi( +// CHECK: call void @_Z11use_privatePi( diff --git a/clang/test/Driver/sycl-native-cpu-fsycl.cpp b/clang/test/Driver/sycl-native-cpu-fsycl.cpp index d4dafdf89b610..9ef386d86c717 100644 --- a/clang/test/Driver/sycl-native-cpu-fsycl.cpp +++ b/clang/test/Driver/sycl-native-cpu-fsycl.cpp @@ -5,8 +5,8 @@ //RUN: %clang -fsycl --sysroot=%S/Inputs/SYCL -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -target aarch64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS-AARCH64 //Link together multiple TUs. -//RUN: touch %t_1.o -//RUN: touch %t_2.o +//RUN: touch %t_1.o +//RUN: touch %t_2.o //RUN: %clang -fsycl -fsycl-targets=native_cpu --sysroot=%S/Inputs/SYCL -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %t_1.o %t_2.o -ccc-print-bindings 2>&1 | FileCheck %s --check-prefix=CHECK_BINDINGS_MULTI_TU //CHECK_ACTIONS: +- 0: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (host-sycl) @@ -14,7 +14,7 @@ //CHECK_ACTIONS: | +- 2: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (device-sycl) //CHECK_ACTIONS: | +- 3: preprocessor, {2}, c++-cpp-output, (device-sycl) //CHECK_ACTIONS: |- 4: compiler, {3}, ir, (device-sycl) -//CHECK_ACTIONS: +- 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (x86_64-unknown-linux-gnu)" {4}, c++-cpp-output +//CHECK_ACTIONS: +- 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (native_cpu)" {4}, c++-cpp-output //CHECK_ACTIONS: +- 6: compiler, {5}, ir, (host-sycl) //CHECK_ACTIONS: +- 7: backend, {6}, assembler, (host-sycl) //CHECK_ACTIONS:+- 8: assembler, {7}, object, (host-sycl) @@ -27,7 +27,7 @@ //this is where we compile the device code to a shared lib, and we link the host shared lib and the device shared lib //CHECK_ACTIONS:| +- [[VAL81:.*]]: backend, {[[NCPULINK]]}, assembler, (device-sycl) //CHECK_ACTIONS:| +- [[VAL82:.*]]: assembler, {[[VAL81]]}, object, (device-sycl) -//CHECK_ACTIONS:|- [[VAL822:.*]]: offload, "device-sycl (x86_64-unknown-linux-gnu)" {[[VAL82]]}, object +//CHECK_ACTIONS:|- [[VAL822:.*]]: offload, "device-sycl (native_cpu)" {[[VAL82]]}, object //call sycl-post-link and clang-offload-wrapper //CHECK_ACTIONS:| +- [[VAL83:.*]]: sycl-post-link, {[[LINKALL]]}, tempfiletable, (device-sycl) //CHECK_ACTIONS:| +- [[VAL84:.*]]: clang-offload-wrapper, {[[VAL83]]}, object, (device-sycl) @@ -44,24 +44,26 @@ //CHECK_BINDINGS:# "{{.*}}" - "offload wrapper", inputs: ["[[TABLEFILE]].table"], output: "[[WRAPPEROBJ:.*]].o" //CHECK_BINDINGS:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[HOSTOBJ]].o", "[[KERNELOBJ]].o", "[[WRAPPEROBJ]].o"], output: "a.{{.*}}" -//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__" +//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__" //CHECK_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}} //CHECK_INVO:{{.*}}clang{{.*}}"-x" "ir" //CHECK_INVO:{{.*}}sycl-post-link{{.*}}"-emit-program-metadata" // checks that the device and host triple is correct in the generated actions when it is set explicitly -//CHECK_ACTIONS-AARCH64: +- 5: offload, "host-sycl (aarch64-unknown-linux-gnu)" {1}, "device-sycl (aarch64-unknown-linux-gnu)" {4}, c++-cpp-output -//CHECK_ACTIONS-AARCH64:|- 16: offload, "device-sycl (aarch64-unknown-linux-gnu)" {15}, object -//CHECK_ACTIONS-AARCH64:|- 19: offload, "device-sycl (aarch64-unknown-linux-gnu)" {18}, object +//CHECK_ACTIONS-AARCH64: +- 5: offload, "host-sycl (aarch64-unknown-linux-gnu)" {1}, "device-sycl (native_cpu)" {4}, c++-cpp-output +//CHECK_ACTIONS-AARCH64:|- 16: offload, "device-sycl (native_cpu)" {15}, object +//CHECK_ACTIONS-AARCH64:|- 19: offload, "device-sycl (native_cpu)" {18}, object // checks that bindings are correct when linking together multiple TUs on native cpu -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE1HOST:.*]].o", "[[FILE1DEV:.*]].o"] -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE2HOST:.*]].o", "[[FILE2DEV:.*]].o"] -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE1DEV]].o"], output: "[[FILE1SPV:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE2DEV]].o"], output: "[[FILE2SPV:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[FILE1SPV]].bc", "[[FILE2SPV]].bc"], output: "[[LINK1:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[LINK1]].bc", "{{.*}}.bc"], output: "[[LINK2:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "clang", inputs: ["{{.*}}.bc"], output: "[[KERNELO:.*]].o" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL post link", inputs: ["[[LINK2]].bc"], output: "[[POSTL:.*]].table" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload wrapper", inputs: ["[[POSTL]].table"], output: "[[WRAP:.*]].o" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[FILE1HOST]].o", "[[FILE2HOST]].o", "[[KERNELO]].o", "[[WRAP]].o"], output: "{{.*}}" +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE1HOST:.*\.o]]", "{{.*\.o}}"] +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE2HOST:.*\.o]]", "{{.*\.o}}"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["{{.*\.o}}", "[[FILE1DEV:.*\.o]]"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE1DEV]]"], output: "[[FILE1SPV:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["{{.*\.o}}", "[[FILE2DEV:.*\.o]]"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE2DEV]]"], output: "[[FILE2SPV:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL::Linker", inputs: ["[[FILE1SPV]]", "[[FILE2SPV]]"], output: "[[LINK1:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL::Linker", inputs: ["[[LINK1]]", "{{.*\.bc}}"], output: "[[LINK2:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "clang", inputs: ["{{.*}}.bc"], output: "[[KERNELO:.*\.o]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL post link", inputs: ["[[LINK2]]"], output: "[[POSTL:.*\.table]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload wrapper", inputs: ["[[POSTL]]"], output: "[[WRAP:.*\.o]]" +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[FILE1HOST]]", "[[FILE2HOST]]", "[[KERNELO]]", "[[WRAP]]"], output: "{{.*}}" diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 8664d8023e199..dccb20cec1664 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -1,26 +1,31 @@ // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -### 2>&1 | FileCheck %s -// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64 +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64 -// checks that the host and device triple are the same, and that the sycl-native-cpu LLVM option is set -// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// checks that the host triple is native_cpu, the device triple is set, and that the sycl-native-cpu LLVM option is set +// CHECK: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "{{.*}}" "-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" // checks that the target triples are set correctly when the target is set explicitly -// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// CHECK-AARCH64: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" -// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s // CHECK-LINUX: {{.*}}"-fsycl-is-device"{{.*}}"-dwarf-version=[[DVERSION:.*]]" "-debugger-tuning=gdb" // CHECK-LINUX-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-dwarf-version=[[DVERSION]]" "-debugger-tuning=gdb" // CHECK-LINUX-NOT: codeview -// RUN: %clang -### -target x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s +// RUN: %clang -### --target=x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s // CHECK-WIN: {{.*}}"-fsycl-is-device"{{.*}}"-gcodeview" // CHECK-WIN-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-gcodeview" // CHECK-WIN-NOT: dwarf // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled -// RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s +// Also check that we pass the expected backend options. +// RUN: %clang -fsycl -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu -march=armv9.4-a -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s +// CHECK-OPTS: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:[^"]*]]" +// CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device" // CHECK-OPTS-NOT: -sycl-opt +// CHECK-OPTS-SAME: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" +// CHECK-OPTS-SAME: "-target-feature" "+v9.4a" // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s // CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__" diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c index a3abba9292d9e..cc94c73188c75 100644 --- a/clang/test/Driver/sycl.c +++ b/clang/test/Driver/sycl.c @@ -145,6 +145,7 @@ // RUN: | FileCheck -check-prefix=DEBUG-WIN %s // RUN: %clang_cl -### -fsycl -Zi -c %s 2>&1 \ // RUN: | FileCheck -check-prefix=DEBUG-WIN %s -// DEBUG-WIN: {{.*}}"-fsycl-is-device"{{.*}}"-gcodeview" +// DEBUG-WIN: {{.*}}"-fsycl-is-device" +// DEBUG-WIN-NOT: "-gcodeview" // DEBUG-WIN: {{.*}}"-fsycl-is-host"{{.*}}"-gcodeview" // DEBUG-WIN-NOT: dwarf-version diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 6d9baaa166f09..eede563e9255b 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -646,7 +646,7 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args, SmallVector &PostLinkArgs, const llvm::Triple Triple) { const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); - bool SYCLNativeCPU = (HostTriple == Triple); + bool SYCLNativeCPU = Triple.str() == "native_cpu"; bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && !Triple.isSPIRAOT() && !SYCLNativeCPU); if (SpecConstsSupported) @@ -1506,7 +1506,10 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, if (!ClangPath) return ClangPath.takeError(); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + if (Triple.str() == "native_cpu") + Triple = llvm::Triple(Args.getLastArgValue(OPT_host_triple_EQ)); + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); // Create a new file to write the linked device image to. Assume that the // input filename already has the device and architecture. @@ -1659,6 +1662,9 @@ Expected linkDevice(ArrayRef InputFiles, case Triple::loongarch64: return generic::clang(InputFiles, Args); default: + if (Triple.str() == "native_cpu" && IsSYCLKind) + return generic::clang(InputFiles, Args); + return createStringError(Triple.getArchName() + " linking is not supported"); } diff --git a/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp b/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp index f4ed4488f3cac..42593433544a9 100644 --- a/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp +++ b/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp @@ -120,7 +120,8 @@ int main(int argc, const char **argv) { .Case("fpga", true) .Default(false); - bool TripleIsValid = Triple(Triples[I]).getArch() != Triple::UnknownArch; + bool TripleIsValid = Triple(Triples[I]).getArch() != Triple::UnknownArch || + Triples[I] == "native_cpu"; if (!KindIsValid || !TripleIsValid) { SmallVector Buf; @@ -211,7 +212,9 @@ int main(int argc, const char **argv) { // global variable llvm.used to represent a reference to a symbol. But for // other targets we have to create a real reference since llvm.used may // not be representable in the object file. - if (Kinds[I] == "sycl" || Triple(Triples[I]).isSPIR()) { + if (Triples[I] == "native_cpu") { + // SYCL Native CPU doesn't need deps from clang-offload-deps. + } else if (Kinds[I] == "sycl" || Triple(Triples[I]).isSPIR()) { auto *GV = new GlobalVariable( Mod, ArrayTy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ArrayTy, Used), "llvm.used"); diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 5ace69e7c33c9..8f1044f8144a9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -39,7 +39,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS libspirv/lib/amdgcn/SOURCES; libspirv/lib/ptx-nvidiacl/SOURCES; libspirv/lib/r600/SOURCES; - libspirv/lib/native_cpu-unknown-linux/SOURCES; + libspirv/lib/native_cpu/SOURCES; ) set( LIBCLC_MIN_LLVM 3.9.0 ) @@ -49,9 +49,6 @@ set( LIBCLC_TARGETS_TO_BUILD "all" option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF ) -set( LIBCLC_NATIVECPU_FLAGS_X86_64 "" - CACHE STRING "Semicolon-separated list of compiler flags for x86_64 libclc target.") - if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) # Out-of-tree configuration set( LIBCLC_STANDALONE_BUILD TRUE ) @@ -189,10 +186,10 @@ else() endforeach() endif() -option( LIBCLC_NATIVECPU_HOST_TARGET "Build libclc for Native CPU using the host triple." Off) +option( LIBCLC_NATIVECPU_HOST_TARGET "Build libclc for Native CPU." Off) if( LIBCLC_NATIVECPU_HOST_TARGET ) - list(APPEND LIBCLC_TARGETS_TO_BUILD ${LLVM_TARGET_TRIPLE}) + list(APPEND LIBCLC_TARGETS_TO_BUILD native_cpu) endif() list( SORT LIBCLC_TARGETS_TO_BUILD ) @@ -224,9 +221,7 @@ set( nvptx--nvidiacl_devices none ) set( nvptx64--nvidiacl_devices none ) set( spirv-mesa3d-_devices none ) set( spirv64-mesa3d-_devices none ) -# TODO: Does this need to be set for each possible triple? -set( x86_64-unknown-linux-gnu_devices none ) -set( aarch64-unknown-linux-gnu_devices none ) +set( native_cpu_devices none ) # Setup aliases set( cedar_aliases palm sumo sumo2 redwood juniper ) @@ -304,11 +299,9 @@ else(LIBCLC_STANDALONE_BUILD) endif(LIBCLC_STANDALONE_BUILD) file( TO_CMAKE_PATH ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/clc LIBCLC_LIBRARY_OUTPUT_INTDIR ) -set(NATIVECPU_SUPPORTED_ARCH "x86_64;aarch64") - foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) message( STATUS "libclc target '${t}' is enabled" ) - string( REPLACE "-" ";" TRIPLE ${t} ) + string( REPLACE "-" ";" TRIPLE ${t}-- ) list( GET TRIPLE 0 ARCH ) list( GET TRIPLE 1 VENDOR ) list( GET TRIPLE 2 OS ) @@ -351,18 +344,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( libspirv_dirs ${opencl_dirs} ) - set(IS_NATIVE_CPU_ARCH FALSE) - if( ARCH IN_LIST NATIVECPU_SUPPORTED_ARCH ) - set(IS_NATIVE_CPU_ARCH TRUE) - endif() - - if( IS_NATIVE_CPU_ARCH AND OS STREQUAL linux) - LIST( APPEND libspirv_dirs native_cpu-unknown-linux ) - elseif( IS_NATIVE_CPU_ARCH AND NOT OS STREQUAL linux ) - message(WARNING "libclc is being built for an unsupported ARCH/OS" - " configuration, some SYCL programs may fail to build.") - endif() - set( clc_lib_files ) set( clc_gen_files clc-convert.cl ) @@ -431,11 +412,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # AMDGCN needs libclc to be compiled to high bc version since all atomic # clang builtins need to be accessible list( APPEND build_flags -mcpu=gfx942 -mllvm --amdgpu-oclc-reflect-enable=false ) - elseif( IS_NATIVE_CPU_ARCH ) + elseif( ARCH STREQUAL native_cpu ) list( APPEND build_flags -Xclang -fsycl-is-native-cpu ) - if( ARCH STREQUAL x86_64 ) - list( APPEND build_flags ${LIBCLC_NATIVECPU_FLAGS_X86_64}) - endif() endif() endif() @@ -474,7 +452,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( has_distinct_generic_addrspace FALSE ) elseif( ARCH STREQUAL amdgcn ) set( opt_flags -O3 --amdgpu-oclc-reflect-enable=false ) - elseif( IS_NATIVE_CPU_ARCH ) + elseif( ARCH STREQUAL native_cpu ) set( opt_flags -O3 ) set( has_distinct_generic_addrspace FALSE ) else() @@ -496,9 +474,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) "+__opencl_c_3d_image_writes," "+__opencl_c_images," "+cl_khr_3d_image_writes") - if( ARCH STREQUAL "aarch64") - string( APPEND CL_3_0_EXTENSIONS ",+cl_clang_storage_class_specifiers,+__opencl_c_fp64,+cl_khr_int64_base_atomics" ) - endif() if( supports_generic_addrspace ) string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" ) if( has_distinct_generic_addrspace ) diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/SOURCES b/libclc/libspirv/lib/native_cpu/SOURCES similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/SOURCES rename to libclc/libspirv/lib/native_cpu/SOURCES diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/cl_khr_int64_extended_atomics/minmax_helpers.ll b/libclc/libspirv/lib/native_cpu/cl_khr_int64_extended_atomics/minmax_helpers.ll similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/cl_khr_int64_extended_atomics/minmax_helpers.ll rename to libclc/libspirv/lib/native_cpu/cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/integer/popcount.cl b/libclc/libspirv/lib/native_cpu/integer/popcount.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/integer/popcount.cl rename to libclc/libspirv/lib/native_cpu/integer/popcount.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/clc_sqrt.cl b/libclc/libspirv/lib/native_cpu/math/clc_sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/clc_sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/clc_sqrt.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/fma.cl b/libclc/libspirv/lib/native_cpu/math/fma.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/fma.cl rename to libclc/libspirv/lib/native_cpu/math/fma.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/helpers.h b/libclc/libspirv/lib/native_cpu/math/helpers.h similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/helpers.h rename to libclc/libspirv/lib/native_cpu/math/helpers.h diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_cos.cl b/libclc/libspirv/lib/native_cpu/math/native_cos.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_cos.cl rename to libclc/libspirv/lib/native_cpu/math/native_cos.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp.cl b/libclc/libspirv/lib/native_cpu/math/native_exp.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp.cl rename to libclc/libspirv/lib/native_cpu/math/native_exp.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp2.cl b/libclc/libspirv/lib/native_cpu/math/native_exp2.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp2.cl rename to libclc/libspirv/lib/native_cpu/math/native_exp2.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log.cl b/libclc/libspirv/lib/native_cpu/math/native_log.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log.cl rename to libclc/libspirv/lib/native_cpu/math/native_log.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log10.cl b/libclc/libspirv/lib/native_cpu/math/native_log10.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log10.cl rename to libclc/libspirv/lib/native_cpu/math/native_log10.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log2.cl b/libclc/libspirv/lib/native_cpu/math/native_log2.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log2.cl rename to libclc/libspirv/lib/native_cpu/math/native_log2.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sin.cl b/libclc/libspirv/lib/native_cpu/math/native_sin.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sin.cl rename to libclc/libspirv/lib/native_cpu/math/native_sin.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sqrt.cl b/libclc/libspirv/lib/native_cpu/math/native_sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/native_sqrt.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/round.cl b/libclc/libspirv/lib/native_cpu/math/round.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/round.cl rename to libclc/libspirv/lib/native_cpu/math/round.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/sqrt.cl b/libclc/libspirv/lib/native_cpu/math/sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/sqrt.cl diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 01e3b13bcb9c6..d14aab811222e 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -309,8 +309,20 @@ DefShuffleINTEL_All(double, f64, double); DefShuffleINTEL_All(float, f32, float); DefShuffleINTEL_All(_Float16, f16, _Float16); -// Vector versions of shuffle are generated by the FixABIBuiltinsSYCLNativeCPU -// pass +#define DefineShuffleVec(T, N, Sfx, MuxType) \ + using vt##T##N = sycl::vec::vector_t; \ + using vt##MuxType##N = sycl::vec::vector_t; \ + DefShuffleINTEL_All(vt##T##N, v##N##Sfx, vt##MuxType##N) + +#define DefineShuffleVec2to16(Type, Sfx, MuxType) \ + DefineShuffleVec(Type, 2, Sfx, MuxType); \ + DefineShuffleVec(Type, 4, Sfx, MuxType); \ + DefineShuffleVec(Type, 8, Sfx, MuxType); \ + DefineShuffleVec(Type, 16, Sfx, MuxType) + +DefineShuffleVec2to16(int32_t, i32, int32_t); +DefineShuffleVec2to16(uint32_t, i32, int32_t); +DefineShuffleVec2to16(float, f32, float); #define Define2ArgForward(Type, Name, Callee) \ DEVICE_EXTERNAL Type Name(Type a, Type b) noexcept { return Callee(a, b); } \ diff --git a/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h deleted file mode 100644 index 9eea9a87fced2..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h +++ /dev/null @@ -1,29 +0,0 @@ -//===---- FixABIMuxBuiltins.h - Fixup ABI issues with called mux builtins ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Creates calls to shuffle up/down/xor mux builtins taking into account ABI of the -// SYCL functions. For now this only is used for vector variants. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" - - -namespace llvm { - -class FixABIMuxBuiltinsPass final - : public llvm::PassInfoMixin { - public: - llvm::PreservedAnalyses run(llvm::Module &, llvm::ModuleAnalysisManager &); -}; - -} // namespace llvm - diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index f499e8f768504..f176fcb48e164 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils PrepareSYCLNativeCPU.cpp RenameKernelSYCLNativeCPU.cpp ConvertToMuxBuiltinsSYCLNativeCPU.cpp - FixABIMuxBuiltinsSYCLNativeCPU.cpp FAtomicsNativeCPU.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp deleted file mode 100644 index b3ff7372b8d0f..0000000000000 --- a/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp +++ /dev/null @@ -1,226 +0,0 @@ -//===-- FixABIMuxBuiltinsSYCLNativeCPU.cpp - Fixup mux ABI issues ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Creates calls to shuffle up/down/xor mux builtins taking into account ABI of -// the SYCL functions. For now this only is used for vector variants. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -#define DEBUG_TYPE "fix-abi-mux-builtins" - -using namespace llvm; - -PreservedAnalyses FixABIMuxBuiltinsPass::run(Module &M, - ModuleAnalysisManager &AM) { - bool Changed = false; - - // Decide if a function needs updated and if so what parameters need changing, - // as well as the return value - auto FunctionNeedsFixing = - [](Function &F, - llvm::SmallVectorImpl> &Updates, - llvm::Type *&RetVal, std::string &MuxFuncNameToCall) { - if (!F.isDeclaration()) { - return false; - } - if (!F.getName().contains("__spirv_SubgroupShuffle")) { - return false; - } - Updates.clear(); - auto LIDvPos = F.getName().find("ELIDv"); - llvm::StringRef NameToMatch; - if (LIDvPos != llvm::StringRef::npos) { - // Add sizeof ELIDv to get num characters to match against - NameToMatch = F.getName().take_front(LIDvPos + 5); - } else { - return false; - } - - unsigned int StartIdx = 0; - unsigned int EndIdx = 1; - if (NameToMatch == "_Z32__spirv_SubgroupShuffleDownINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_down_"; - } else if (NameToMatch == "_Z30__spirv_SubgroupShuffleUpINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_up_"; - } else if (NameToMatch == "_Z28__spirv_SubgroupShuffleINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_"; - EndIdx = 0; - } else if (NameToMatch == "_Z31__spirv_SubgroupShuffleXorINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_xor_"; - EndIdx = 0; - } else { - return false; - } - - // We need to create the body for this. First we need to find out what - // the first arguments should be - llvm::StringRef RemainingName = - F.getName().drop_front(NameToMatch.size()); - std::string MuxFuncTypeStr = "UNKNOWN"; - - unsigned int VecWidth = 0; - if (RemainingName.consumeInteger(10, VecWidth)) { - return false; - } - if (!RemainingName.consume_front("_")) { - return false; - } - - char TypeCh = RemainingName[0]; - Type *BaseType = nullptr; - switch (TypeCh) { - case 'a': - case 'h': - BaseType = llvm::Type::getInt8Ty(F.getContext()); - MuxFuncTypeStr = "i8"; - break; - case 's': - case 't': - BaseType = llvm::Type::getInt16Ty(F.getContext()); - MuxFuncTypeStr = "i16"; - break; - - case 'i': - case 'j': - BaseType = llvm::Type::getInt32Ty(F.getContext()); - MuxFuncTypeStr = "i32"; - break; - case 'l': - case 'm': - BaseType = llvm::Type::getInt64Ty(F.getContext()); - MuxFuncTypeStr = "i64"; - break; - case 'f': - BaseType = llvm::Type::getFloatTy(F.getContext()); - MuxFuncTypeStr = "f32"; - break; - case 'd': - BaseType = llvm::Type::getDoubleTy(F.getContext()); - MuxFuncTypeStr = "f64"; - break; - default: - return false; - } - auto *VecType = llvm::FixedVectorType::get(BaseType, VecWidth); - RetVal = VecType; - - // Work out the mux function to call's type extension based on v##N##Sfx - MuxFuncNameToCall += "v"; - MuxFuncNameToCall += std::to_string(VecWidth); - MuxFuncNameToCall += MuxFuncTypeStr; - - unsigned int CurrentIndex = 0; - for (auto &Arg : F.args()) { - if (Arg.hasStructRetAttr()) { - StartIdx++; - EndIdx++; - } else { - if (CurrentIndex >= StartIdx && CurrentIndex <= EndIdx) { - if (Arg.getType() != VecType) { - Updates.push_back(std::pair( - CurrentIndex, VecType)); - } - } - } - CurrentIndex++; - } - return true; - }; - - llvm::SmallVector FuncsToProcess; - for (auto &F : M.functions()) { - FuncsToProcess.push_back(&F); - } - - for (auto *F : FuncsToProcess) { - llvm::SmallVector, 4> ArgUpdates; - llvm::Type *RetType = nullptr; - std::string MuxFuncNameToCall; - if (!FunctionNeedsFixing(*F, ArgUpdates, RetType, MuxFuncNameToCall)) { - continue; - } - if (!F->isDeclaration()) { - continue; - } - Changed = true; - IRBuilder<> IR(BasicBlock::Create(F->getContext(), "", F)); - - llvm::SmallVector Args; - unsigned int ArgIndex = 0; - unsigned int UpdateIndex = 0; - - for (auto &Arg : F->args()) { - if (!Arg.hasStructRetAttr()) { - if (UpdateIndex < ArgUpdates.size() && - std::get<0>(ArgUpdates[UpdateIndex]) == ArgIndex) { - Args.push_back(std::get<1>(ArgUpdates[UpdateIndex])); - UpdateIndex++; - } else { - Args.push_back(Arg.getType()); - } - } - ArgIndex++; - } - - FunctionType *FT = FunctionType::get(RetType, Args, false); - Function *NewFunc = - Function::Create(FT, F->getLinkage(), MuxFuncNameToCall, M); - llvm::SmallVector CallArgs; - auto NewFuncArgItr = NewFunc->args().begin(); - Argument *SretPtr = nullptr; - for (auto &Arg : F->args()) { - if (Arg.hasStructRetAttr()) { - SretPtr = &Arg; - } else { - if (Arg.getType() != (*NewFuncArgItr).getType()) { - if (Arg.getType()->isPointerTy()) { - Value *ArgLoad = IR.CreateLoad((*NewFuncArgItr).getType(), &Arg); - CallArgs.push_back(ArgLoad); - } else { - Value *ArgCast = IR.CreateBitCast(&Arg, (*NewFuncArgItr).getType()); - CallArgs.push_back(ArgCast); - } - } else { - CallArgs.push_back(&Arg); - } - NewFuncArgItr++; - } - } - - Value *Res = IR.CreateCall(NewFunc, CallArgs); - // If the return type is different to the initial function, then bitcast it - // unless it's void in which case we'd expect an StructRet parameter which - // needs stored to. - if (F->getReturnType() != RetType) { - if (F->getReturnType()->isVoidTy()) { - // If we don't have an StructRet parameter then something is wrong with - // the initial function - if (!SretPtr) { - llvm_unreachable( - "No struct ret pointer for Sub group shuffle function"); - } - - IR.CreateStore(Res, SretPtr); - } else { - Res = IR.CreateBitCast(Res, F->getReturnType()); - } - } - if (F->getReturnType()->isVoidTy()) { - IR.CreateRetVoid(); - } else { - IR.CreateRet(Res); - } - } - - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b30b6c41c2b99..f36d29da74b6b 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" -#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/SpecConstants.h" @@ -75,7 +74,6 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::PrepareBarriersPass()); MPM.addPass(compiler::utils::TransferKernelMetadataPass()); - MPM.addPass(FixABIMuxBuiltinsPass()); // Always enable vectorizer, unless explictly disabled or -O0 is set. if (OptLevel != OptimizationLevel::O0 && !SYCLNativeCPUNoVecz) { MAM.registerPass([] { return vecz::TargetInfoAnalysis(); }); diff --git a/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp b/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp deleted file mode 100644 index 8a94745f08100..0000000000000 --- a/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// REQUIRES: native_cpu_ock && linux - -// This doesn't test every possible case since it is quite slow to compile. -// long and double are not tested as it seems to generate loops in the code -// rather than vector versions. - -// RUN: %clangxx -DTYPE=int -DVEC_WIDTH=2 -DOPER=TF_SHIFT_UP -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix UP_V2_INT %s -// RUN: %clangxx -DTYPE=short -DVEC_WIDTH=4 -DOPER=TF_SHIFT_DOWN -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix DOWN_V4_SHORT %s -// RUN: %clangxx -DTYPE=char -DVEC_WIDTH=4 -DOPER=TF_SHIFT_XOR -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix XOR_V4_CHAR %s -// RUN: %clangxx -DTYPE=float -DVEC_WIDTH=8 -DOPER=TF_SHIFT_UP -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix UP_V8_FLOAT %s -// RUN: %clangxx -DTYPE="unsigned int" -DVEC_WIDTH=8 -DOPER=TF_SELECT -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix SELECT_V8_SELECT_I32 %s - -// Tests that sub-group shuffles work even when abi is different to what is -// expected - -#include - -static constexpr size_t NumElems = VEC_WIDTH; -static constexpr size_t NumWorkItems = 64; - -// UP_V2_INT: double @_Z30__spirv_SubgroupShuffleUpINTELIDv2_iET_S1_S1_j(double noundef %[[ARG0:[0-9]+]], double noundef %[[ARG1:[0-9]+]] -// UP_V2_INT: %[[UPV2I32_BITCAST_OP0:[0-9]+]] = bitcast double %[[ARG0]] to <2 x i32> -// UP_V2_INT: %[[UPV2I32_BITCAST_OP1:[0-9]+]] = bitcast double %[[ARG1]] to <2 x i32> -// UP_V2_INT: %[[UPV2I32_CALL_SHUFFLE:[0-9]+]] = call <2 x i32> @__mux_sub_group_shuffle_up_v2i32(<2 x i32> %[[UPV2I32_BITCAST_OP0]], <2 x i32> %[[UPV2I32_BITCAST_OP1]] -// UP_V2_INT: %[[UPV2I32_BITCAST_RESULT:[0-9]+]] = bitcast <2 x i32> %[[UPV2I32_CALL_SHUFFLE]] to double -// UP_V2_INT: ret double %[[UPV2I32_BITCAST_RESULT]] - -// DOWN_V4_SHORT: double @_Z32__spirv_SubgroupShuffleDownINTELIDv4_sET_S1_S1_j(double noundef %[[ARG0:[0-9]+]], double noundef %[[ARG1:[0-9]+]] -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_OP0:[0-9]+]] = bitcast double %[[ARG0]] to <4 x i16> -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_OP1:[0-9]+]] = bitcast double %[[ARG1]] to <4 x i16> -// DOWN_V4_SHORT: %[[DOWNV4I16_CALL_SHUFFLE:[0-9]+]] = call <4 x i16> @__mux_sub_group_shuffle_down_v4i16(<4 x i16> %[[DOWNV4I16_BITCAST_OP0]], <4 x i16> %[[DOWNV4I16_BITCAST_OP1]] -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_RESULT:[0-9]+]] = bitcast <4 x i16> %[[DOWNV4I16_CALL_SHUFFLE]] to double -// DOWN_V4_SHORT: ret double %[[DOWNV4I16_BITCAST_RESULT]] - -// XOR_V4_CHAR: i32 @_Z31__spirv_SubgroupShuffleXorINTELIDv4_aET_S1_j(i32 noundef %[[ARG0:[0-9]+]], i32 -// XOR_V4_CHAR: %[[XORV4I8_BITCAST_OP0:[0-9]+]] = bitcast i32 %[[ARG0]] to <4 x i8> -// XOR_V4_CHAR: %[[XORV4I8_CALL_SHUFFLE:[0-9]+]] = call <4 x i8> @__mux_sub_group_shuffle_xor_v4i8(<4 x i8> %[[XORV4I8_BITCAST_OP0]], i32 -// XOR_V4_CHAR: %[[XORV4I8_BITCAST_RESULT:[0-9]+]] = bitcast <4 x i8> %[[XORV4I8_CALL_SHUFFLE]] to i32 -// XOR_V4_CHAR: ret i32 %[[XORV4I8_BITCAST_RESULT]] - -// UP_V8_FLOAT: <8 x float> @_Z30__spirv_SubgroupShuffleUpINTELIDv8_fET_S1_S1_j(ptr noundef byval(<8 x float>) align 32 %[[ARG0:[0-9]+]], ptr noundef byval(<8 x float>) align 32 %[[ARG1:[0-9]+]] -// UP_V8_FLOAT: %[[UPV8F32_BYVAL_LOAD_OP0:[0-9]+]] = load <8 x float>, ptr %[[ARG0]], align 32 -// UP_V8_FLOAT: %[[UPV8F32_BYVAL_LOAD_OP1:[0-9]+]] = load <8 x float>, ptr %[[ARG1]], align 32 -// UP_V8_FLOAT: %[[UPV8F32_CALL_SHUFFLE:[0-9]+]] = call <8 x float> @__mux_sub_group_shuffle_up_v8f32(<8 x float> %[[UPV8F32_BYVAL_LOAD_OP0]], <8 x float> %[[UPV8F32_BYVAL_LOAD_OP1]], i32 -// UP_V8_FLOAT: ret <8 x float> %[[UPV8F32_CALL_SHUFFLE:[0-9]+]] - -// SELECT_V8_SELECT_I32: <8 x i32> @_Z28__spirv_SubgroupShuffleINTELIDv8_jET_S1_j(ptr noundef byval(<8 x i32>) align 32 %[[ARG0:[0-9]+]], -// SELECT_V8_SELECT_I32: %[[SELV8I32_BYVAL_LOAD_OP0:[0-9]+]] = load <8 x i32>, ptr %[[ARG0]], align 32 -// SELECT_V8_SELECT_I32: %[[SELV8I32_CALL_SHUFFLE:[0-9]+]] = call <8 x i32> @__mux_sub_group_shuffle_v8i32(<8 x i32> %[[SELV8I32_BYVAL_LOAD_OP0]], i32 -// SELECT_V8_SELECT_I32: ret <8 x i32> %[[SELV8I32_CALL_SHUFFLE:[0-9]+]] - -enum TEST_FUNC_CHOICE { TF_SHIFT_DOWN, TF_SHIFT_UP, TF_SHIFT_XOR, TF_SELECT }; - -template -void ShuffleOpTest() { - sycl::queue Q; - - ShiftType ShiftRes[NumWorkItems]; - - { - sycl::buffer ShuffleResBuf{ShiftRes, NumWorkItems}; - - Q.submit([&](sycl::handler &CGH) { - sycl::accessor ShuffleRes{ShuffleResBuf, CGH, sycl::write_only}; - - CGH.parallel_for( - sycl::nd_range<1>{sycl::range<1>{NumWorkItems}, - sycl::range<1>{NumWorkItems}}, - [=](sycl::nd_item<1> It) { - int GID = It.get_global_linear_id(); - ShiftType ItemVal{0}; - for (int I = 0; I < NumElems; ++I) - ItemVal[I] = I; - - sycl::sub_group SG = It.get_sub_group(); - if (Choice == TF_SHIFT_DOWN) { - ShuffleRes[GID] = sycl::shift_group_left(SG, ItemVal); - } else if (Choice == TF_SHIFT_UP) { - ShuffleRes[GID] = sycl::shift_group_right(SG, ItemVal); - } else if (Choice == TF_SHIFT_XOR) { - ShuffleRes[GID] = sycl::permute_group_by_xor(SG, ItemVal, 1); - } else if (Choice == TF_SELECT) { - ShuffleRes[GID] = sycl::select_from_group(SG, ItemVal, 1); - } - }); - }); - } -} - -int main() { - ShuffleOpTest, OPER>(); - return 0; -} diff --git a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp index c1bcd6ad0f822..afc019849eab7 100644 --- a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp +++ b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp @@ -2,4 +2,4 @@ // REQUIRES: native_cpu && linux // RUN: %clang -### -fsycl -fsycl-targets=native_cpu -target x86_64-unknown-linux-gnu %s 2> %t.ncpu.out // RUN: FileCheck %s --input-file %t.ncpu.out -// CHECK: {{(\\|/)}}remangled-l64-signed_char.libspirv-x86_64-unknown-linux-gnu.bc" +// CHECK: {{(\\|/)}}remangled-l64-signed_char.libspirv-native_cpu.bc" diff --git a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp index ef146e5110e09..915bf0af86b4e 100644 --- a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp +++ b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp @@ -4,4 +4,4 @@ // Check that l32 variant is selected for Windows // RUN: %clang -### -fsycl -fsycl-targets=native_cpu -target x86_64-windows %s 2> %t-win.ncpu.out // RUN: FileCheck %s --input-file %t-win.ncpu.out --check-prefix=CHECK-WIN -// CHECK-WIN: {{(\\|/)}}remangled-l32-signed_char.libspirv-x86_64-unknown-windows-msvc.bc" +// CHECK-WIN: {{(\\|/)}}remangled-l32-signed_char.libspirv-native_cpu.bc"