Skip to content

Commit a88d580

Browse files
authored
[NVPTX] Pull invariant load identification into IR pass (#138015)
Pull invariant load identification, which was previously part of DAGToDAG ISel, into a new IR pass NVPTXTagInvariantLoads. This makes it possible to disable this optimization at O0 and reduces the complexity of the SelectionDAG pass. Moving this logic to an IR pass also allows for implementing a more powerful traversal in the future. Fixes llvm/llvm-project#138138
1 parent 5718460 commit a88d580

8 files changed

+301
-52
lines changed

llvm/lib/Target/NVPTX/CMakeLists.txt

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,35 @@ add_public_tablegen_target(NVPTXCommonTableGen)
1313
set(NVPTXCodeGen_sources
1414
NVPTXAliasAnalysis.cpp
1515
NVPTXAllocaHoisting.cpp
16-
NVPTXAtomicLower.cpp
1716
NVPTXAsmPrinter.cpp
1817
NVPTXAssignValidGlobalNames.cpp
18+
NVPTXAtomicLower.cpp
19+
NVPTXCtorDtorLowering.cpp
1920
NVPTXForwardParams.cpp
2021
NVPTXFrameLowering.cpp
2122
NVPTXGenericToNVVM.cpp
22-
NVPTXISelDAGToDAG.cpp
23-
NVPTXISelLowering.cpp
2423
NVPTXImageOptimizer.cpp
2524
NVPTXInstrInfo.cpp
25+
NVPTXISelDAGToDAG.cpp
26+
NVPTXISelLowering.cpp
2627
NVPTXLowerAggrCopies.cpp
27-
NVPTXLowerArgs.cpp
2828
NVPTXLowerAlloca.cpp
29+
NVPTXLowerArgs.cpp
2930
NVPTXLowerUnreachable.cpp
30-
NVPTXPeephole.cpp
3131
NVPTXMCExpr.cpp
32+
NVPTXPeephole.cpp
3233
NVPTXPrologEpilogPass.cpp
34+
NVPTXProxyRegErasure.cpp
3335
NVPTXRegisterInfo.cpp
3436
NVPTXReplaceImageHandles.cpp
3537
NVPTXSelectionDAGInfo.cpp
3638
NVPTXSubtarget.cpp
39+
NVPTXTagInvariantLoads.cpp
3740
NVPTXTargetMachine.cpp
3841
NVPTXTargetTransformInfo.cpp
3942
NVPTXUtilities.cpp
4043
NVVMIntrRange.cpp
4144
NVVMReflect.cpp
42-
NVPTXProxyRegErasure.cpp
43-
NVPTXCtorDtorLowering.cpp
4445
)
4546

4647
add_llvm_target(NVPTXCodeGen

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ FunctionPass *createNVPTXLowerArgsPass();
5151
FunctionPass *createNVPTXLowerAllocaPass();
5252
FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
5353
bool NoTrapAfterNoreturn);
54+
FunctionPass *createNVPTXTagInvariantLoadsPass();
5455
MachineFunctionPass *createNVPTXPeephole();
5556
MachineFunctionPass *createNVPTXProxyRegErasurePass();
5657
MachineFunctionPass *createNVPTXForwardParamsPass();
@@ -73,6 +74,7 @@ void initializeNVVMReflectPass(PassRegistry &);
7374
void initializeNVPTXAAWrapperPassPass(PassRegistry &);
7475
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
7576
void initializeNVPTXPeepholePass(PassRegistry &);
77+
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
7678

7779
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
7880
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -104,6 +106,10 @@ struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
104106
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
105107
};
106108

109+
struct NVPTXTagInvariantLoadsPass : PassInfoMixin<NVPTXTagInvariantLoadsPass> {
110+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
111+
};
112+
107113
namespace NVPTX {
108114
enum DrvInterface {
109115
NVCL,

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 8 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -767,46 +767,12 @@ NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
767767
llvm_unreachable("unhandled ordering");
768768
}
769769

770-
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
771-
unsigned CodeAddrSpace, MachineFunction *F) {
770+
static bool canLowerToLDG(const MemSDNode &N, const NVPTXSubtarget &Subtarget,
771+
unsigned CodeAddrSpace) {
772772
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
773773
// space.
774-
//
775-
// We have two ways of identifying invariant loads: Loads may be explicitly
776-
// marked as invariant, or we may infer them to be invariant.
777-
//
778-
// We currently infer invariance for loads from
779-
// - constant global variables, and
780-
// - kernel function pointer params that are noalias (i.e. __restrict) and
781-
// never written to.
782-
//
783-
// TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
784-
// not during the SelectionDAG phase).
785-
//
786-
// TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
787-
// explicitly invariant loads because these are how clang tells us to use ldg
788-
// when the user uses a builtin.
789-
if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::AddressSpace::Global)
790-
return false;
791-
792-
if (N->isInvariant())
793-
return true;
794-
795-
bool IsKernelFn = isKernelFunction(F->getFunction());
796-
797-
// We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
798-
// because the former looks through phi nodes while the latter does not. We
799-
// need to look through phi nodes to handle pointer induction variables.
800-
SmallVector<const Value *, 8> Objs;
801-
getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
802-
803-
return all_of(Objs, [&](const Value *V) {
804-
if (auto *A = dyn_cast<const Argument>(V))
805-
return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
806-
if (auto *GV = dyn_cast<const GlobalVariable>(V))
807-
return GV->isConstant();
808-
return false;
809-
});
774+
return Subtarget.hasLDG() && CodeAddrSpace == NVPTX::AddressSpace::Global &&
775+
N.isInvariant();
810776
}
811777

812778
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
@@ -1107,10 +1073,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
11071073
return false;
11081074

11091075
// Address Space Setting
1110-
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
1111-
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
1076+
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
1077+
if (canLowerToLDG(*LD, *Subtarget, CodeAddrSpace))
11121078
return tryLDGLDU(N);
1113-
}
11141079

11151080
SDLoc DL(N);
11161081
SDValue Chain = N->getOperand(0);
@@ -1196,10 +1161,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
11961161
const MVT MemVT = MemEVT.getSimpleVT();
11971162

11981163
// Address Space Setting
1199-
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1200-
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1164+
const unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1165+
if (canLowerToLDG(*MemSD, *Subtarget, CodeAddrSpace))
12011166
return tryLDGLDU(N);
1202-
}
12031167

12041168
EVT EltVT = N->getValueType(0);
12051169
SDLoc DL(N);

llvm/lib/Target/NVPTX/NVPTXPassRegistry.def

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,6 @@ FUNCTION_ALIAS_ANALYSIS("nvptx-aa", NVPTXAA())
3838
#endif
3939
FUNCTION_PASS("nvvm-intr-range", NVVMIntrRangePass())
4040
FUNCTION_PASS("nvptx-copy-byval-args", NVPTXCopyByValArgsPass())
41-
FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this));
41+
FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this))
42+
FUNCTION_PASS("nvptx-tag-invariant-loads", NVPTXTagInvariantLoadsPass())
4243
#undef FUNCTION_PASS
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
//===------ NVPTXTagInvariantLoads.cpp - Tag invariant loads --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements invaraint load tagging. It traverses load instructions
10+
// in a function, and determines if each load can be tagged as invariant.
11+
//
12+
// We currently infer invariance for loads from
13+
// - constant global variables, and
14+
// - kernel function pointer params that are noalias (i.e. __restrict) and
15+
// never written to.
16+
//
17+
// TODO: Perform a more powerful invariance analysis (ideally IPO).
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#include "NVPTXUtilities.h"
22+
#include "llvm/Analysis/ValueTracking.h"
23+
#include "llvm/IR/InstIterator.h"
24+
#include "llvm/IR/Instructions.h"
25+
#include "llvm/IR/Metadata.h"
26+
#include "llvm/Support/NVPTXAddrSpace.h"
27+
28+
using namespace llvm;
29+
30+
static bool isInvariantLoad(const LoadInst *LI, const bool IsKernelFn) {
31+
// Don't bother with non-global loads
32+
if (LI->getPointerAddressSpace() != NVPTXAS::ADDRESS_SPACE_GLOBAL)
33+
return false;
34+
35+
// If the load is already marked as invariant, we don't need to do anything
36+
if (LI->getMetadata(LLVMContext::MD_invariant_load))
37+
return false;
38+
39+
// We use getUnderlyingObjects() here instead of getUnderlyingObject()
40+
// mainly because the former looks through phi nodes while the latter does
41+
// not. We need to look through phi nodes to handle pointer induction
42+
// variables.
43+
SmallVector<const Value *, 8> Objs;
44+
getUnderlyingObjects(LI->getPointerOperand(), Objs);
45+
46+
return all_of(Objs, [&](const Value *V) {
47+
if (const auto *A = dyn_cast<const Argument>(V))
48+
return IsKernelFn && ((A->onlyReadsMemory() && A->hasNoAliasAttr()) ||
49+
isParamGridConstant(*A));
50+
if (const auto *GV = dyn_cast<const GlobalVariable>(V))
51+
return GV->isConstant();
52+
return false;
53+
});
54+
}
55+
56+
static void markLoadsAsInvariant(LoadInst *LI) {
57+
LI->setMetadata(LLVMContext::MD_invariant_load,
58+
MDNode::get(LI->getContext(), {}));
59+
}
60+
61+
static bool tagInvariantLoads(Function &F) {
62+
const bool IsKernelFn = isKernelFunction(F);
63+
64+
bool Changed = false;
65+
for (auto &I : instructions(F)) {
66+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
67+
if (isInvariantLoad(LI, IsKernelFn)) {
68+
markLoadsAsInvariant(LI);
69+
Changed = true;
70+
}
71+
}
72+
}
73+
return Changed;
74+
}
75+
76+
namespace {
77+
78+
struct NVPTXTagInvariantLoadLegacyPass : public FunctionPass {
79+
static char ID;
80+
81+
NVPTXTagInvariantLoadLegacyPass() : FunctionPass(ID) {}
82+
bool runOnFunction(Function &F) override;
83+
};
84+
85+
} // namespace
86+
87+
INITIALIZE_PASS(NVPTXTagInvariantLoadLegacyPass, "nvptx-tag-invariant-loads",
88+
"NVPTX Tag Invariant Loads", false, false)
89+
90+
bool NVPTXTagInvariantLoadLegacyPass::runOnFunction(Function &F) {
91+
return tagInvariantLoads(F);
92+
}
93+
94+
char NVPTXTagInvariantLoadLegacyPass::ID = 0;
95+
96+
FunctionPass *llvm::createNVPTXTagInvariantLoadsPass() {
97+
return new NVPTXTagInvariantLoadLegacyPass();
98+
}
99+
100+
PreservedAnalyses NVPTXTagInvariantLoadsPass::run(Function &F,
101+
FunctionAnalysisManager &) {
102+
return tagInvariantLoads(F) ? PreservedAnalyses::none()
103+
: PreservedAnalyses::all();
104+
}

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
112112
initializeNVPTXAAWrapperPassPass(PR);
113113
initializeNVPTXExternalAAWrapperPass(PR);
114114
initializeNVPTXPeepholePass(PR);
115+
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
115116
}
116117

117118
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -395,6 +396,7 @@ void NVPTXPassConfig::addIRPasses() {
395396
if (!DisableLoadStoreVectorizer)
396397
addPass(createLoadStoreVectorizerPass());
397398
addPass(createSROAPass());
399+
addPass(createNVPTXTagInvariantLoadsPass());
398400
}
399401

400402
if (ST.hasPTXASUnreachableBug()) {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
%struct = type { [2 x i64] }
8+
@G = external constant %struct
9+
10+
define void @foo() {
11+
; CHECK-LABEL: foo(
12+
; CHECK: {
13+
; CHECK-NEXT: .reg .b64 %rd<3>;
14+
; CHECK-EMPTY:
15+
; CHECK-NEXT: // %bb.0:
16+
; CHECK-NEXT: ld.global.u64 %rd1, [G];
17+
; CHECK-NEXT: ld.global.u64 %rd2, [G+8];
18+
; CHECK-NEXT: { // callseq 0, 0
19+
; CHECK-NEXT: .param .align 8 .b8 param0[16];
20+
; CHECK-NEXT: st.param.b64 [param0], %rd1;
21+
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
22+
; CHECK-NEXT: call.uni
23+
; CHECK-NEXT: bar,
24+
; CHECK-NEXT: (
25+
; CHECK-NEXT: param0
26+
; CHECK-NEXT: );
27+
; CHECK-NEXT: } // callseq 0
28+
; CHECK-NEXT: ret;
29+
call void @bar(ptr byval(%struct) @G)
30+
ret void
31+
}
32+
33+
declare void @bar(ptr)

0 commit comments

Comments
 (0)