diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 369238436083c..bf473610a05aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -24,9 +24,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "NVPTXUtilities.h" -#include "MCTargetDesc/NVPTXBaseInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -55,8 +55,8 @@ class NVPTXLowerAlloca : public FunctionPass { char NVPTXLowerAlloca::ID = 1; -INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", - "Lower Alloca", false, false) +INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false, + false) // ============================================================================= // Main function for this pass. @@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) { for (auto &I : BB) { if (auto allocaInst = dyn_cast(&I)) { Changed = true; + + PointerType *AllocInstPtrTy = + cast(allocaInst->getType()->getScalarType()); + unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace(); + assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC || + AllocAddrSpace == ADDRESS_SPACE_LOCAL) && + "AllocaInst can only be in Generic or Local address space for " + "NVPTX."); + + Instruction *AllocaInLocalAS = allocaInst; auto ETy = allocaInst->getAllocatedType(); - auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); - auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); - auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); - auto NewASCToGeneric = - new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, ""); - NewASCToLocal->insertAfter(allocaInst); - NewASCToGeneric->insertAfter(NewASCToLocal); + + // We need to make sure that LLVM has info that alloca needs to go to + // ADDRESS_SPACE_LOCAL for InferAddressSpace pass. + // + // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to + // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that + // the alloca's users still use a generic pointer to operate on. + // + // For allocas already in ADDRESS_SPACE_LOCAL, we just need + // addrspacecast to ADDRESS_SPACE_GENERIC. + if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) { + auto ASCastToLocalAS = new AddrSpaceCastInst( + allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), ""); + ASCastToLocalAS->insertAfter(allocaInst); + AllocaInLocalAS = ASCastToLocalAS; + } + + auto AllocaInGenericAS = new AddrSpaceCastInst( + AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), ""); + AllocaInGenericAS->insertAfter(AllocaInLocalAS); + for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) { // Check Load, Store, GEP, and BitCast Uses on alloca and make them // use the converted generic address, in order to expose non-generic @@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) { auto LI = dyn_cast(AllocaUse.getUser()); if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { - LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); + LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto SI = dyn_cast(AllocaUse.getUser()); if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { - SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); + SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto GI = dyn_cast(AllocaUse.getUser()); if (GI && GI->getPointerOperand() == allocaInst) { - GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); + GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto BI = dyn_cast(AllocaUse.getUser()); if (BI && BI->getOperand(0) == allocaInst) { - BI->setOperand(0, NewASCToGeneric); + BI->setOperand(0, AllocaInGenericAS); continue; } } diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll index b1c34c8b5ecd7..400184aaefb21 100644 --- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll +++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s +; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} @@ -11,13 +12,32 @@ define void @kernel() { %A = alloca i32 ; CHECK: addrspacecast ptr %A to ptr addrspace(5) ; CHECK: store i32 0, ptr addrspace(5) {{%.+}} +; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5) +; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr +; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4 ; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} store i32 0, ptr %A call void @callee(ptr %A) ret void } +define void @alloca_in_explicit_local_as() { +; LABEL: @lower_alloca_addrspace5 +; PTX-LABEL: .visible .func alloca_in_explicit_local_as( + %A = alloca i32, addrspace(5) +; CHECK: store i32 0, ptr addrspace(5) {{%.+}} +; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}} +; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr +; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4 + store i32 0, ptr addrspace(5) %A + call void @callee(ptr addrspace(5) %A) + ret void +} + declare void @callee(ptr) +declare void @callee_addrspace5(ptr addrspace(5)) !nvvm.annotations = !{!0} +!nvvm.annotations = !{!1} !0 = !{ptr @kernel, !"kernel", i32 1} +!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}