From c019149ea669a4aaf933442cfed252aa9d620929 Mon Sep 17 00:00:00 2001 From: Mikhail Lychkov Date: Mon, 5 Apr 2021 15:26:48 +0300 Subject: [PATCH 1/8] [SYCL] Fix group local memory sharing when opts disabled Group local memory functions depend on inlining pass. When all optimizations are disabled then those functions are not inlined. If a user adds several group local memory calls with the same template argument then only one group local memory function is specialized and then one memory buffer is allocated. Thus all variables that save a result of the group local memory call point to the same buffer. Enable AlwaysInliner pass even when all llvm passes are disabled as a workaround. A More accurate solution with call graph analysis will be added later. Signed-off-by: Mikhail Lychkov --- clang/lib/CodeGen/BackendUtil.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index eededed3da8d3..06e6a06b64890 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -990,8 +990,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, } // Allocate static local memory in SYCL kernel scope for each allocation call. - if (LangOpts.SYCLIsDevice) + if (LangOpts.SYCLIsDevice) { + // Group local memory pass depends on inlining. Turn it on even in case if + // all llvm passes or SYCL early optimizations are disabled. + // TODO: Remove this workaround when dependency on inlining is eliminated. + if (CodeGenOpts.DisableLLVMPasses) + PerModulePasses.add(createAlwaysInlinerLegacyPass(false)); PerModulePasses.add(createSYCLLowerWGLocalMemoryLegacyPass()); + } switch (Action) { case Backend_EmitNothing: From b3efac3dce06d8773023c9a5817ba9230f41bf7d Mon Sep 17 00:00:00 2001 From: Mikhail Lychkov <51128024+mlychkov@users.noreply.github.com> Date: Mon, 5 Apr 2021 17:31:44 +0300 Subject: [PATCH 2/8] Rephrase TODO -> FIXME Co-authored-by: Aaron Ballman --- clang/lib/CodeGen/BackendUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 06e6a06b64890..1e4bbea0ac17a 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -993,7 +993,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, if (LangOpts.SYCLIsDevice) { // Group local memory pass depends on inlining. Turn it on even in case if // all llvm passes or SYCL early optimizations are disabled. - // TODO: Remove this workaround when dependency on inlining is eliminated. + // FIXME: Remove this workaround when dependency on inlining is eliminated. if (CodeGenOpts.DisableLLVMPasses) PerModulePasses.add(createAlwaysInlinerLegacyPass(false)); PerModulePasses.add(createSYCLLowerWGLocalMemoryLegacyPass()); From 20739b6cc0268d6e5db210a9fe0324e961844edc Mon Sep 17 00:00:00 2001 From: Mikhail Lychkov Date: Tue, 6 Apr 2021 11:05:44 +0300 Subject: [PATCH 3/8] Add FE test Signed-off-by: Mikhail Lychkov --- .../group-local-memory-inlining.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 clang/test/CodeGenSYCL/group-local-memory-inlining.cpp diff --git a/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp b/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp new file mode 100644 index 0000000000000..3b3ed24bb45d2 --- /dev/null +++ b/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp @@ -0,0 +1,26 @@ +// Check that AlwaysInliner pass is always run for compilation of SYCL device +// target code, even if all optimizations are disabled. + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-EARLYOPT +// CHECK-EARLYOPT: Function Integration/Inlining +// CHECK-EARLYOPT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -disable-llvm-passes 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES +// CHECK-NOPASSES: Inliner for always_inline functions +// CHECK-NOPASSES: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -fno-sycl-early-optimizations 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOEARLYOPT +// CHECK-NOEARLYOPT: Inliner for always_inline functions +// CHECK-NOEARLYOPT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -O0 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-O0opt +// CHECK-O0opt: Inliner for always_inline functions +// CHECK-O0opt: Replace __sycl_allocateLocalMemory with allocation of memory in local address space From 40b659d165ca2024e562dfaa6f2ac3d7932ced80 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 7 Apr 2021 18:24:54 +0300 Subject: [PATCH 4/8] Update BackendUtil.cpp Move passes to CreatePasses function. NOTE: `createSYCLLowerWGLocalMemoryLegacyPass` is not added to the the pipeline if CodeGenOpts.DisableLLVMPasses is ON. --- clang/lib/CodeGen/BackendUtil.cpp | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 1e4bbea0ac17a..11689b25cc452 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -860,6 +860,16 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateFunctionPassManager(FPM); PMBuilder.populateModulePassManager(MPM); + + if (LangOpts.SYCLIsDevice) { + // Eliminate dead arguments from SPIR kernels in SYCL environment. + // Run DAE when LLVM optimizations are applied as well. + if (LangOpts.EnableDAEInSpirKernels) + MPM.add(createDeadArgEliminationSYCLPass()); + + // Allocate static local memory in SYCL kernel scope for each allocation call. + MPM.add(createSYCLLowerWGLocalMemoryLegacyPass()); + } } static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { @@ -973,12 +983,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr ThinLinkOS, DwoOS; - // Eliminate dead arguments from SPIR kernels in SYCL environment. - // Run DAE when LLVM optimizations are applied as well. - if (LangOpts.SYCLIsDevice && !CodeGenOpts.DisableLLVMPasses && - LangOpts.EnableDAEInSpirKernels) - PerModulePasses.add(createDeadArgEliminationSYCLPass()); - // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be // used only with spir triple. @@ -989,16 +993,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, PerModulePasses.add(createSPIRITTAnnotationsPass()); } - // Allocate static local memory in SYCL kernel scope for each allocation call. - if (LangOpts.SYCLIsDevice) { - // Group local memory pass depends on inlining. Turn it on even in case if - // all llvm passes or SYCL early optimizations are disabled. - // FIXME: Remove this workaround when dependency on inlining is eliminated. - if (CodeGenOpts.DisableLLVMPasses) - PerModulePasses.add(createAlwaysInlinerLegacyPass(false)); - PerModulePasses.add(createSYCLLowerWGLocalMemoryLegacyPass()); - } - switch (Action) { case Backend_EmitNothing: break; From 9a308769aa473bac33db6025b0dafc82822b01da Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 7 Apr 2021 18:29:25 +0300 Subject: [PATCH 5/8] Update group-local-memory-inlining.cpp --- .../group-local-memory-inlining.cpp | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp b/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp index 3b3ed24bb45d2..a2d24e7a7cac9 100644 --- a/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp +++ b/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp @@ -1,5 +1,4 @@ -// Check that AlwaysInliner pass is always run for compilation of SYCL device -// target code, even if all optimizations are disabled. +// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device compilation pipeline with the inliner pass. // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ // RUN: -mllvm -debug-pass=Structure %s -o - 2>&1 \ @@ -7,20 +6,16 @@ // CHECK-EARLYOPT: Function Integration/Inlining // CHECK-EARLYOPT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - -disable-llvm-passes 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES -// CHECK-NOPASSES: Inliner for always_inline functions -// CHECK-NOPASSES: Replace __sycl_allocateLocalMemory with allocation of memory in local address space - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - -fno-sycl-early-optimizations 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-NOEARLYOPT -// CHECK-NOEARLYOPT: Inliner for always_inline functions -// CHECK-NOEARLYOPT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space - // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ // RUN: -mllvm -debug-pass=Structure %s -o - -O0 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-O0opt // CHECK-O0opt: Inliner for always_inline functions // CHECK-O0opt: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -disable-llvm-passes 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -fno-sycl-early-optimizations 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES +// CHECK-NOPASSES-NOT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space From 1d5865992433f6dc4fb346445ba17fcb4efb4601 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 7 Apr 2021 18:31:57 +0300 Subject: [PATCH 6/8] Fix formatting. --- clang/lib/CodeGen/BackendUtil.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 11689b25cc452..f2fb44f9cc9ce 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -867,7 +867,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (LangOpts.EnableDAEInSpirKernels) MPM.add(createDeadArgEliminationSYCLPass()); - // Allocate static local memory in SYCL kernel scope for each allocation call. + // Allocate static local memory in SYCL kernel scope for each allocation + // call. MPM.add(createSYCLLowerWGLocalMemoryLegacyPass()); } } From fe78c896fbdf129043808233408a5b6a5eab0fa4 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 7 Apr 2021 22:30:10 +0300 Subject: [PATCH 7/8] Update group-local-memory.cpp --- clang/test/CodeGenSYCL/group-local-memory.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/test/CodeGenSYCL/group-local-memory.cpp b/clang/test/CodeGenSYCL/group-local-memory.cpp index 600771b674349..a51473700a771 100644 --- a/clang/test/CodeGenSYCL/group-local-memory.cpp +++ b/clang/test/CodeGenSYCL/group-local-memory.cpp @@ -1,7 +1,4 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice \ -// RUN: -S -emit-llvm -mllvm -debug-pass=Structure -disable-llvm-passes \ -// RUN: -o - %s 2>&1 | FileCheck %s -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice \ // RUN: -S -emit-llvm -mllvm -debug-pass=Structure -o - %s 2>&1 \ // RUN: | FileCheck %s From 5e3e47ff71ed32566ab7a37b7910b3a3bc59cbc2 Mon Sep 17 00:00:00 2001 From: Mikhail Lychkov Date: Mon, 12 Apr 2021 15:11:52 +0300 Subject: [PATCH 8/8] Turn back the first version of the fix. This reverts commit 40b659d165ca2024e562dfaa6f2ac3d7932ced80. --- clang/lib/CodeGen/BackendUtil.cpp | 27 +++++++++++-------- .../group-local-memory-inlining.cpp | 21 --------------- clang/test/CodeGenSYCL/group-local-memory.cpp | 27 ++++++++++++++++--- 3 files changed, 40 insertions(+), 35 deletions(-) delete mode 100644 clang/test/CodeGenSYCL/group-local-memory-inlining.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f2fb44f9cc9ce..1e4bbea0ac17a 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -860,17 +860,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateFunctionPassManager(FPM); PMBuilder.populateModulePassManager(MPM); - - if (LangOpts.SYCLIsDevice) { - // Eliminate dead arguments from SPIR kernels in SYCL environment. - // Run DAE when LLVM optimizations are applied as well. - if (LangOpts.EnableDAEInSpirKernels) - MPM.add(createDeadArgEliminationSYCLPass()); - - // Allocate static local memory in SYCL kernel scope for each allocation - // call. - MPM.add(createSYCLLowerWGLocalMemoryLegacyPass()); - } } static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { @@ -984,6 +973,12 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr ThinLinkOS, DwoOS; + // Eliminate dead arguments from SPIR kernels in SYCL environment. + // Run DAE when LLVM optimizations are applied as well. + if (LangOpts.SYCLIsDevice && !CodeGenOpts.DisableLLVMPasses && + LangOpts.EnableDAEInSpirKernels) + PerModulePasses.add(createDeadArgEliminationSYCLPass()); + // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be // used only with spir triple. @@ -994,6 +989,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, PerModulePasses.add(createSPIRITTAnnotationsPass()); } + // Allocate static local memory in SYCL kernel scope for each allocation call. + if (LangOpts.SYCLIsDevice) { + // Group local memory pass depends on inlining. Turn it on even in case if + // all llvm passes or SYCL early optimizations are disabled. + // FIXME: Remove this workaround when dependency on inlining is eliminated. + if (CodeGenOpts.DisableLLVMPasses) + PerModulePasses.add(createAlwaysInlinerLegacyPass(false)); + PerModulePasses.add(createSYCLLowerWGLocalMemoryLegacyPass()); + } + switch (Action) { case Backend_EmitNothing: break; diff --git a/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp b/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp deleted file mode 100644 index a2d24e7a7cac9..0000000000000 --- a/clang/test/CodeGenSYCL/group-local-memory-inlining.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device compilation pipeline with the inliner pass. - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-EARLYOPT -// CHECK-EARLYOPT: Function Integration/Inlining -// CHECK-EARLYOPT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - -O0 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-O0opt -// CHECK-O0opt: Inliner for always_inline functions -// CHECK-O0opt: Replace __sycl_allocateLocalMemory with allocation of memory in local address space - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - -disable-llvm-passes 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ -// RUN: -mllvm -debug-pass=Structure %s -o - -fno-sycl-early-optimizations 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES -// CHECK-NOPASSES-NOT: Replace __sycl_allocateLocalMemory with allocation of memory in local address space diff --git a/clang/test/CodeGenSYCL/group-local-memory.cpp b/clang/test/CodeGenSYCL/group-local-memory.cpp index a51473700a771..e37c9ee468986 100644 --- a/clang/test/CodeGenSYCL/group-local-memory.cpp +++ b/clang/test/CodeGenSYCL/group-local-memory.cpp @@ -1,5 +1,26 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice \ -// RUN: -S -emit-llvm -mllvm -debug-pass=Structure -o - %s 2>&1 \ -// RUN: | FileCheck %s +// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device +// compilation pipeline with the inliner pass. +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - 2>&1 \ +// RUN: | FileCheck %s +// CHECK: Function Integration/Inlining // CHECK: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// Check that AlwaysInliner pass is always run for compilation of SYCL device +// target code, even if all optimizations are disabled. + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -disable-llvm-passes 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -fno-sycl-early-optimizations 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPASSES +// CHECK-NOPASSES: Inliner for always_inline functions +// CHECK-NOPASSES: Replace __sycl_allocateLocalMemory with allocation of memory in local address space + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm \ +// RUN: -mllvm -debug-pass=Structure %s -o - -O0 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-O0opt +// CHECK-O0opt: Inliner for always_inline functions +// CHECK-O0opt: Replace __sycl_allocateLocalMemory with allocation of memory in local address space