From ddebb0e69f734d02b0894ffa03f2923b8485fe82 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Mon, 28 Mar 2022 13:37:34 +0100 Subject: [PATCH 1/3] Added IPSCCP pass to O0 by default. new flag `use-ipsccp-nvptx-O0` can remove the IPSCCP pass from O0 when set false. --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 11 +++++++++++ sycl/doc/GetStartedGuide.md | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 99f37aa5c286b..a5c968a405163 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -37,6 +37,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" +#include "llvm/Transforms/IPO.h" #include #include @@ -63,6 +64,12 @@ static cl::opt UseShortPointersOpt( "Use 32-bit pointers for accessing const/local/shared address spaces."), cl::init(false), cl::Hidden); +static cl::opt + UseIPSCCPO0("use-ipsccp-nvptx-O0", + cl::desc("Use IPSCCP pass at O0 as a temp solution for " + "nvvm-reflect dead-code errors."), + cl::init(true), cl::Hidden); + namespace llvm { void initializeLocalAccessorToSharedMemoryPass(PassRegistry &); @@ -327,6 +334,10 @@ void NVPTXPassConfig::addIRPasses() { const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); addPass(createNVVMReflectPass(ST.getSmVersion())); + if (getOptLevel() == CodeGenOpt::None && UseIPSCCPO0) { + addPass(createIPSCCPPass()); + } + // FIXME: should the target triple check be done by the pass itself? // See createNVPTXLowerArgsPass as an example if (getTM().getTargetTriple().getOS() == Triple::CUDA) { diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 1f53b9f4697c8..1c1ad8087e302 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -825,6 +825,18 @@ which contains all the symbols required. significantly slower but matches the default precision used by `nvcc`, and this `clang++` flag is equivalent to the `nvcc` `-prec-sqrt` flag, except that it defaults to `false`. +* No Opt (O0) uses the IPSCCP compiler pass by default, although the IPSCCP pass + can be switched off at O0 using the `-mllvm -use-ipsccp-nvptx-O0=false` flag at + the user's discretion. + The reason that the IPSCCP pass is used by default even at O0 is that there is + currently an unresolved issue with the nvvm-reflect compiler pass: This pass is + used to pick the correct branches depending on the SM version which can be + optionally specified by the `--cuda-gpu-arch` flag. + If the arch flag is not specified by the user, the default value, SM 50, is used. + Without the execution of the IPSCCP pass at -O0 when using a low SM version, + dead instructions which require a higher SM version can remain. Since + corresponding issues occur in other backends future work will aim for a + universal solution to these issues. ### HIP back-end limitations From 02dd4ca74ff4d75be4c5a2c583607f446fdc9176 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Mon, 28 Mar 2022 13:58:59 +0100 Subject: [PATCH 2/3] format --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a5c968a405163..99bf6171c3d15 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -33,11 +33,11 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/IPO.h" #include #include From 68aa738e309d8a09cfcfdabbf06713d5f6118f62 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Mon, 28 Mar 2022 14:32:26 +0100 Subject: [PATCH 3/3] remove IPSCCP from param-load-store test using O0. --- llvm/test/CodeGen/NVPTX/param-load-store.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 099a26afb940b..c04dd8a5eb54a 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -1,5 +1,5 @@ ; Verifies correctness of load/store of parameters and return values. -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs -use-ipsccp-nvptx-O0=false | FileCheck -allow-deprecated-dag-overlap %s %s_i1 = type { i1 } %s_i8 = type { i8 }