From 8249b0eebb1542c0199a9985b948b5bef8fad31a Mon Sep 17 00:00:00 2001 From: gejin Date: Tue, 3 Nov 2020 15:31:27 +0800 Subject: [PATCH 1/2] Enable Dead Function Elimination in sycl-post-link Signed-off-by: gejin --- clang/include/clang/Driver/Action.h | 5 ++ clang/lib/Driver/Driver.cpp | 53 ++++++++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++- llvm/tools/sycl-post-link/sycl-post-link.cpp | 53 ++++++++++++++++++-- 4 files changed, 100 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 3965ad1df7814..99b09167157fd 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -687,8 +687,13 @@ class SYCLPostLinkJobAction : public JobAction { bool getRTSetsSpecConstants() const { return RTSetsSpecConsts; } + void setDeadFunctionElimination(bool Val) { DeadFunctionElimination = Val; } + + bool getDeadFunctionElimination() const { return DeadFunctionElimination; } + private: bool RTSetsSpecConsts = true; + bool DeadFunctionElimination = false; }; class PartialLinkJobAction : public JobAction { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8c018f6b9deef..c3c70be6f08a3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3760,15 +3760,17 @@ class OffloadingActionBuilder final { for (auto SDA : SYCLDeviceActions) SYCLLinkBinaryList.push_back(SDA); if (WrapDeviceOnlyBinary) { + bool SYCLDeviceLibLinked = false; // If used without -fintelfpga, -fsycl-link is used to wrap device // objects for future host link. Device libraries should be linked // by default to resolve any undefined reference. if (!Args.hasArg(options::OPT_fintelfpga)) { const auto *TC = ToolChains.front(); - addSYCLDeviceLibs(TC, SYCLLinkBinaryList, true, - C.getDefaultToolChain() - .getTriple() - .isWindowsMSVCEnvironment()); + SYCLDeviceLibLinked = + addSYCLDeviceLibs(TC, SYCLLinkBinaryList, true, + C.getDefaultToolChain() + .getTriple() + .isWindowsMSVCEnvironment()); } // -fsycl-link behavior does the following to the unbundled device // binaries: @@ -3777,17 +3779,28 @@ class OffloadingActionBuilder final { // 3) Translate final .bc file to .spv // 4) Wrap the binary with the offload wrapper which can be used // by any compilation link step. + SYCLPostLinkJobAction *PostLinkDFEAction = nullptr; + SYCLPostLinkJobAction *PostLinkAction = nullptr; auto *DeviceLinkAction = C.MakeAction( SYCLLinkBinaryList, types::TY_Image); - auto *PostLinkAction = C.MakeAction( - DeviceLinkAction, types::TY_LLVM_BC); + if (!SYCLDeviceLibLinked) + PostLinkAction = C.MakeAction( + DeviceLinkAction, types::TY_LLVM_BC); + else { + PostLinkDFEAction = C.MakeAction( + DeviceLinkAction, types::TY_LLVM_BC); + PostLinkDFEAction->setDeadFunctionElimination(true); + PostLinkDFEAction->setRTSetsSpecConstants(false); + PostLinkAction = C.MakeAction( + PostLinkDFEAction, types::TY_LLVM_BC); + } auto *TranslateAction = C.MakeAction( PostLinkAction, types::TY_Image); SYCLLinkBinary = C.MakeAction( TranslateAction, types::TY_Object); } else { auto *Link = C.MakeAction(SYCLLinkBinaryList, - types::TY_Image); + types::TY_Image); SYCLLinkBinary = C.MakeAction( Link, types::TY_Image); } @@ -3936,7 +3949,9 @@ class OffloadingActionBuilder final { SYCLDeviceActions.clear(); } - void addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects, + // Return a bool value to indicate whether some device libraries are + // linked with users' device image. + bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects, bool isSpirvAOT, bool isMSVCEnv) { enum SYCLDeviceLibType { sycl_devicelib_wrapper, @@ -3947,6 +3962,7 @@ class OffloadingActionBuilder final { StringRef devicelib_option; }; + bool NumOfDeviceLibLinked = 0; bool NoDeviceLibs = false; // Currently, libc, libm-fp32 will be linked in by default. In order // to use libm-fp64, -fsycl-device-lib=libm-fp64/all should be used. @@ -4005,6 +4021,7 @@ class OffloadingActionBuilder final { llvm::sys::path::append(LibName, Lib.devicelib_name); llvm::sys::path::replace_extension(LibName, LibSuffix); if (llvm::sys::fs::exists(LibName)) { + ++NumOfDeviceLibLinked; Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); auto *SYCLDeviceLibsInputAction = @@ -4020,6 +4037,7 @@ class OffloadingActionBuilder final { addInputs(sycl_devicelib_wrapper); if (isSpirvAOT) addInputs(sycl_devicelib_fallback); + return NumOfDeviceLibLinked != 0; } void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { @@ -4102,6 +4120,7 @@ class OffloadingActionBuilder final { ActionList DeviceLibObjects; ActionList LinkObjects; auto TT = SYCLTripleList[I]; + bool SYCLDeviceLibLinked = false; auto isNVPTX = (*TC)->getTriple().isNVPTX(); bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || @@ -4118,7 +4137,7 @@ class OffloadingActionBuilder final { // device libraries are only needed when current toolchain is using // AOT compilation. if (!isNVPTX) { - addSYCLDeviceLibs( + SYCLDeviceLibLinked = addSYCLDeviceLibs( *TC, LinkObjects, true, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()); } @@ -4199,8 +4218,20 @@ class OffloadingActionBuilder final { types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps ? types::TY_LLVM_BC : types::TY_Tempfiletable; - auto *PostLinkAction = C.MakeAction( - DeviceLinkAction, PostLinkOutType); + + SYCLPostLinkJobAction *PostLinkDFEAction = nullptr; + SYCLPostLinkJobAction *PostLinkAction = nullptr; + if (!SYCLDeviceLibLinked) + PostLinkAction = C.MakeAction(DeviceLinkAction, + PostLinkOutType); + else { + PostLinkDFEAction = C.MakeAction( + DeviceLinkAction, types::TY_LLVM_BC); + PostLinkDFEAction->setDeadFunctionElimination(true); + PostLinkDFEAction->setRTSetsSpecConstants(false); + PostLinkAction = C.MakeAction( + PostLinkDFEAction, PostLinkOutType); + } PostLinkAction->setRTSetsSpecConstants(!isAOT); if (isNVPTX) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ee923a4220041..639b93512aefe 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8037,6 +8037,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // OPT_fsycl_device_code_split is not checked as it is an alias to // -fsycl-device-code-split=per_source + auto *SYCLPostLink = llvm::dyn_cast(&JA); // Turn on Dead Parameter Elimination Optimization with early optimizations if (!getToolChain().getTriple().isNVPTX() && TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization, @@ -8047,6 +8048,10 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // transformations (like specialization constant intrinsic lowering) and // output LLVMIR addArgs(CmdArgs, TCArgs, {"-ir-output-only"}); + // DeadFunctionElimination must work with IROutputOnly to clean the + // original LLVMIR + if (SYCLPostLink && SYCLPostLink->getDeadFunctionElimination()) + addArgs(CmdArgs, TCArgs, {"--dead-function-elimination"}); } else { assert(JA.getType() == types::TY_Tempfiletable); // Symbol file and specialization constant info generation is mandatory - @@ -8054,7 +8059,6 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, addArgs(CmdArgs, TCArgs, {"-symbols"}); } // specialization constants processing is mandatory - auto *SYCLPostLink = llvm::dyn_cast(&JA); if (SYCLPostLink && SYCLPostLink->getRTSetsSpecConstants()) addArgs(CmdArgs, TCArgs, {"-spec-const=rt"}); else diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e359ba494ddc0..a1addb0a40c68 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -124,6 +124,10 @@ static cl::opt EmitKernelParamInfo{ "emit-param-info", cl::desc("emit kernel parameter optimization info"), cl::cat(PostLinkCat)}; +static cl::opt DeadFunctionElimination{ + "dead-function-elimination", + cl::desc("Eliminate dead functions in device image"), cl::cat(PostLinkCat)}; + struct ImagePropSaveInfo { bool NeedDeviceLibReqMask; bool DoSpecConst; @@ -566,6 +570,32 @@ static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists) { return std::move(Res); } +// Eliminate 'dead' functions which are not called in device LLVM IR module, +// there is one execption: functions with 'reference-indirectly' attribute +// can't be eliminated since they will be called indirectly via function ptr. +static void eliminateDeadFunctions(Module &M) { + std::vector DeadFunctions; + bool NoDeadFunction = false; + while (!NoDeadFunction) { + DeadFunctions.clear(); + for (Function &F : M) { + if (F.user_empty() && (F.getCallingConv() == CallingConv::SPIR_FUNC) && + !F.getAttributes().hasFnAttribute("referenced-indirectly")) { + F.deleteBody(); + DeadFunctions.push_back(&F); + } + } + + if (!DeadFunctions.empty()) { + for (Function *F : DeadFunctions) { + M.getFunctionList().remove(F); + } + NoDeadFunction = false; + } else + NoDeadFunction = true; + } +} + #define CHECK_AND_EXIT(E) \ { \ Error LocE = std::move(E); \ @@ -612,11 +642,18 @@ int main(int argc, char **argv) { "will produce single output file example_p.bc suitable for SPIRV\n" "translation.\n"); - bool DoSplit = SplitMode.getNumOccurrences() > 0; - bool DoSpecConst = SpecConstLower.getNumOccurrences() > 0; - bool DoParamInfo = EmitKernelParamInfo.getNumOccurrences() > 0; - - if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo) { + // DeadFunctionElimination is used for removing some unused function in + // ORIGINAL IR, it must work with IROutputOnly and can't work with other + // options such as DoSplit, DoSpecConst, DoParamInfo... + bool DoSplit = + (SplitMode.getNumOccurrences() > 0 && !DeadFunctionElimination); + bool DoSpecConst = + (SpecConstLower.getNumOccurrences() > 0 && !DeadFunctionElimination); + bool DoParamInfo = + (EmitKernelParamInfo.getNumOccurrences() > 0 && !DeadFunctionElimination); + + if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo && + !DeadFunctionElimination) { errs() << "no actions specified; try --help for usage info\n"; return 1; } @@ -648,6 +685,12 @@ int main(int argc, char **argv) { if (OutputFilename.getNumOccurrences() == 0) OutputFilename = (Twine(sys::path::stem(InputFilename)) + ".files").str(); + if (DeadFunctionElimination && IROutputOnly) { + eliminateDeadFunctions(*MPtr); + saveModule(*MPtr, OutputFilename); + return 0; + } + std::map> GlobalsSet; if (DoSplit || DoSymGen) { From c161e5c9209feea395d821db6220e8dc9adebc11 Mon Sep 17 00:00:00 2001 From: gejin Date: Mon, 9 Nov 2020 10:12:07 +0800 Subject: [PATCH 2/2] Fix wrong type for NumOfDeviceLibLinked Signed-off-by: gejin --- clang/lib/Driver/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c3c70be6f08a3..4296201adba31 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3962,7 +3962,7 @@ class OffloadingActionBuilder final { StringRef devicelib_option; }; - bool NumOfDeviceLibLinked = 0; + int NumOfDeviceLibLinked = 0; bool NoDeviceLibs = false; // Currently, libc, libm-fp32 will be linked in by default. In order // to use libm-fp64, -fsycl-device-lib=libm-fp64/all should be used.