From 58359965fbd0bf2bf1f1d6001e4643b062a7cba4 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 14 Feb 2024 13:08:32 +0100 Subject: [PATCH 1/3] [AMDGPU] Let LowerModuleLDS run twice on the same module See #81491 Split from #75333 --- .../AMDGPU/AMDGPULowerModuleLDSPass.cpp | 26 +++++++++++++------ ...=> lds-reject-mixed-absolute-addresses.ll} | 4 +-- llvm/test/CodeGen/AMDGPU/lds-run-twice.ll | 14 ++++++++++ 3 files changed, 34 insertions(+), 10 deletions(-) rename llvm/test/CodeGen/AMDGPU/{lds-reject-absolute-addresses.ll => lds-reject-mixed-absolute-addresses.ll} (81%) create mode 100644 llvm/test/CodeGen/AMDGPU/lds-run-twice.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 5762f1906a16d..38475be1ec472 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -327,6 +327,8 @@ class AMDGPULowerModuleLDS { return convertUsersOfConstantsToInstructions(LDSGlobals); } + std::optional HasAbsoluteGVs; + public: AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {} @@ -334,9 +336,9 @@ class AMDGPULowerModuleLDS { using VariableFunctionMap = DenseMap>; - static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M, - FunctionVariableMap &kernels, - FunctionVariableMap &functions) { + void getUsesOfLDSByFunction(CallGraph const &CG, Module &M, + FunctionVariableMap &kernels, + FunctionVariableMap &functions) { // Get uses from the current function, excluding uses by called functions // Two output variables to avoid walking the globals list twice @@ -345,10 +347,18 @@ class AMDGPULowerModuleLDS { continue; } - if (GV.isAbsoluteSymbolRef()) { - report_fatal_error( - "LDS variables with absolute addresses are unimplemented."); - } + // Check if the module is consistent: either all GVs are absolute (happens + // when we run the pass more than once), or none are. + if (HasAbsoluteGVs.has_value()) { + if (*HasAbsoluteGVs != GV.isAbsoluteSymbolRef()) { + report_fatal_error( + "Module cannot mix absolute and non-absolute LDS GVs"); + } + } else + HasAbsoluteGVs = GV.isAbsoluteSymbolRef(); + + if (GV.isAbsoluteSymbolRef()) + continue; for (User *V : GV.users()) { if (auto *I = dyn_cast(V)) { @@ -368,7 +378,7 @@ class AMDGPULowerModuleLDS { FunctionVariableMap indirect_access; }; - static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) { + LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) { FunctionVariableMap direct_map_kernel; FunctionVariableMap direct_map_function; diff --git a/llvm/test/CodeGen/AMDGPU/lds-reject-absolute-addresses.ll b/llvm/test/CodeGen/AMDGPU/lds-reject-mixed-absolute-addresses.ll similarity index 81% rename from llvm/test/CodeGen/AMDGPU/lds-reject-absolute-addresses.ll rename to llvm/test/CodeGen/AMDGPU/lds-reject-mixed-absolute-addresses.ll index 659cdb55ded2f..b512a43aa1022 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-reject-absolute-addresses.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-reject-mixed-absolute-addresses.ll @@ -2,8 +2,9 @@ ; RUN: not --crash opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s @var1 = addrspace(3) global i32 undef, !absolute_symbol !0 +@var2 = addrspace(3) global i32 undef -; CHECK: LLVM ERROR: LDS variables with absolute addresses are unimplemented. +; CHECK: Module cannot mix absolute and non-absolute LDS GVs define amdgpu_kernel void @kern() { %val0 = load i32, ptr addrspace(3) @var1 %val1 = add i32 %val0, 4 @@ -12,4 +13,3 @@ define amdgpu_kernel void @kern() { } !0 = !{i32 0, i32 1} - diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll new file mode 100644 index 0000000000000..63aa426c9b9e6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll @@ -0,0 +1,14 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %s -o %t.ll +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o - +; RUN: diff -ub %t.ll %t.second.ll -I ".*ModuleID.*" + +; Check AMDGPULowerModuleLDS can run more than once on the same module, and that +; the second run is a no-op. + +@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4 + +define amdgpu_kernel void @test() { +entry: + store i32 1, ptr addrspace(3) @lds + ret void +} From da9ef0ba0fc0c79b82e0fe25b53f4e1b7a96c58a Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 14 Feb 2024 14:23:54 +0100 Subject: [PATCH 2/3] fix test --- llvm/test/CodeGen/AMDGPU/lds-run-twice.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll index 63aa426c9b9e6..b830ccb944a28 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %s -o %t.ll -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o - +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o %t.second.ll ; RUN: diff -ub %t.ll %t.second.ll -I ".*ModuleID.*" ; Check AMDGPULowerModuleLDS can run more than once on the same module, and that From a8f2124b5eceaa7cea1fd48b5d18e60ac9f4f027 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Thu, 22 Feb 2024 09:44:10 +0100 Subject: [PATCH 3/3] fix impl + add test --- .../Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 18 +++++++++--------- .../AMDGPU/lds-run-twice-absolute-md.ll | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 38475be1ec472..b85cb26fdc956 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -327,8 +327,6 @@ class AMDGPULowerModuleLDS { return convertUsersOfConstantsToInstructions(LDSGlobals); } - std::optional HasAbsoluteGVs; - public: AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {} @@ -336,12 +334,13 @@ class AMDGPULowerModuleLDS { using VariableFunctionMap = DenseMap>; - void getUsesOfLDSByFunction(CallGraph const &CG, Module &M, - FunctionVariableMap &kernels, - FunctionVariableMap &functions) { + static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M, + FunctionVariableMap &kernels, + FunctionVariableMap &functions) { // Get uses from the current function, excluding uses by called functions // Two output variables to avoid walking the globals list twice + std::optional HasAbsoluteGVs; for (auto &GV : M.globals()) { if (!AMDGPU::isLDSVariableToLower(GV)) { continue; @@ -349,15 +348,16 @@ class AMDGPULowerModuleLDS { // Check if the module is consistent: either all GVs are absolute (happens // when we run the pass more than once), or none are. + const bool IsAbsolute = GV.isAbsoluteSymbolRef(); if (HasAbsoluteGVs.has_value()) { - if (*HasAbsoluteGVs != GV.isAbsoluteSymbolRef()) { + if (*HasAbsoluteGVs != IsAbsolute) { report_fatal_error( "Module cannot mix absolute and non-absolute LDS GVs"); } } else - HasAbsoluteGVs = GV.isAbsoluteSymbolRef(); + HasAbsoluteGVs = IsAbsolute; - if (GV.isAbsoluteSymbolRef()) + if (IsAbsolute) continue; for (User *V : GV.users()) { @@ -378,7 +378,7 @@ class AMDGPULowerModuleLDS { FunctionVariableMap indirect_access; }; - LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) { + static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) { FunctionVariableMap direct_map_kernel; FunctionVariableMap direct_map_function; diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll new file mode 100644 index 0000000000000..52b44eea35c82 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll @@ -0,0 +1,16 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %s -o %t.ll +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o %t.second.ll +; RUN: diff -ub %t.ll %t.second.ll -I ".*ModuleID.*" + +; Check AMDGPULowerModuleLDS can run more than once on the same module, and that +; the second run is a no-op. + +@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4, !absolute_symbol !0 + +define amdgpu_kernel void @test() { +entry: + store i32 1, ptr addrspace(3) @lds + ret void +} + +!0 = !{i32 0, i32 1}