Skip to content

Commit 921a353

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent b592917 commit 921a353

File tree

6 files changed

+364
-6
lines changed

6 files changed

+364
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
1212

1313
#include "llvm/CodeGen/MachinePassManager.h"
14+
#include "llvm/IR/CallingConv.h"
1415
#include "llvm/IR/PassManager.h"
1516
#include "llvm/Pass.h"
1617
#include "llvm/Support/AMDGPUAddrSpace.h"

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/TargetTransformInfo.h"
18+
#include "llvm/Analysis/UniformityAnalysis.h"
1719
#include "llvm/CodeGen/TargetPassConfig.h"
1820
#include "llvm/IR/IntrinsicsAMDGPU.h"
1921
#include "llvm/IR/IntrinsicsR600.h"
@@ -1014,6 +1016,109 @@ struct AAAMDGPUNoAGPR
10141016

10151017
const char AAAMDGPUNoAGPR::ID = 0;
10161018

1019+
struct AAAMDGPUUniform
1020+
: public IRAttribute<Attribute::InReg,
1021+
StateWrapper<BooleanState, AbstractAttribute>,
1022+
AAAMDGPUUniform> {
1023+
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
1024+
1025+
/// Create an abstract attribute view for the position \p IRP.
1026+
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
1027+
Attributor &A);
1028+
1029+
/// See AbstractAttribute::getName()
1030+
const std::string getName() const override { return "AAAMDGPUUniform"; }
1031+
1032+
const std::string getAsStr(Attributor *A) const override {
1033+
return getAssumed() ? "inreg" : "non-inreg";
1034+
}
1035+
1036+
void trackStatistics() const override {}
1037+
1038+
/// See AbstractAttribute::getIdAddr()
1039+
const char *getIdAddr() const override { return &ID; }
1040+
1041+
/// This function should return true if the type of the \p AA is
1042+
/// AAAMDGPUUniform
1043+
static bool classof(const AbstractAttribute *AA) {
1044+
return (AA->getIdAddr() == &ID);
1045+
}
1046+
1047+
/// Unique ID (due to the unique address)
1048+
static const char ID;
1049+
};
1050+
1051+
const char AAAMDGPUUniform::ID = 0;
1052+
1053+
namespace {
1054+
1055+
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
1056+
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
1057+
: AAAMDGPUUniform(IRP, A) {}
1058+
1059+
void initialize(Attributor &A) override {
1060+
assert(
1061+
!AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv()));
1062+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1063+
indicateOptimisticFixpoint();
1064+
}
1065+
1066+
ChangeStatus updateImpl(Attributor &A) override {
1067+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1068+
1069+
auto isUniform = [&](AbstractCallSite ACS) -> bool {
1070+
CallBase *CB = ACS.getInstruction();
1071+
Value *V = CB->getArgOperandUse(ArgNo);
1072+
if (isa<Constant>(V))
1073+
return true;
1074+
if (auto *I = dyn_cast<Instruction>(V)) {
1075+
auto *UA = A.getInfoCache()
1076+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(
1077+
*I->getFunction());
1078+
return UA && UA->isUniform(I);
1079+
} else if (auto *Arg = dyn_cast<Argument>(V)) {
1080+
auto *TTI =
1081+
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
1082+
*Arg->getParent());
1083+
if (TTI && !TTI->isSourceOfDivergence(Arg))
1084+
return true;
1085+
// At this point, if it is an entry point argument, it means it is for
1086+
// sure divergent.
1087+
if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
1088+
return false;
1089+
auto *AA =
1090+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
1091+
return AA && AA->isValidState();
1092+
}
1093+
return false;
1094+
};
1095+
1096+
bool UsedAssumedInformation = true;
1097+
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
1098+
UsedAssumedInformation))
1099+
return indicatePessimisticFixpoint();
1100+
1101+
if (!UsedAssumedInformation)
1102+
return indicateOptimisticFixpoint();
1103+
1104+
return ChangeStatus::UNCHANGED;
1105+
}
1106+
};
1107+
1108+
} // namespace
1109+
1110+
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
1111+
Attributor &A) {
1112+
switch (IRP.getPositionKind()) {
1113+
case IRPosition::IRP_ARGUMENT:
1114+
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
1115+
// TODO: Since inreg is also allowed for return value, maybe we need to add
1116+
// AAAMDGPUUniformCallSiteReturned?
1117+
default:
1118+
llvm_unreachable("not a valid position for AAAMDGPUUniform");
1119+
}
1120+
}
1121+
10171122
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10181123
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10191124
for (unsigned I = 0;
@@ -1046,7 +1151,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10461151
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10471152
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
10481153
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1049-
&AAInstanceInfo::ID});
1154+
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
10501155

10511156
AttributorConfig AC(CGUpdater);
10521157
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1092,6 +1197,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10921197
IRPosition::value(*CmpX->getPointerOperand()));
10931198
}
10941199
}
1200+
1201+
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1202+
for (auto &Arg : F->args())
1203+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
1204+
}
10951205
}
10961206

10971207
ChangeStatus Change = A.run();
@@ -1120,6 +1230,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
11201230

11211231
void getAnalysisUsage(AnalysisUsage &AU) const override {
11221232
AU.addRequired<CycleInfoWrapperPass>();
1233+
AU.addRequired<UniformityInfoWrapperPass>();
11231234
}
11241235

11251236
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define void @call_volatile_load_store_as_4(ptr addrspace(4) %p1, ptr addrspace(4
9090

9191
define internal void @can_infer_cmpxchg(ptr %word) {
9292
; CHECK-LABEL: define internal void @can_infer_cmpxchg(
93-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
93+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
9494
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
9595
; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr addrspace(1) [[TMP1]], i32 0, i32 4 monotonic monotonic, align 4
9696
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
@@ -144,7 +144,7 @@ define internal void @can_not_infer_cmpxchg(ptr %word) {
144144

145145
define internal void @can_infer_atomicrmw(ptr %word) {
146146
; CHECK-LABEL: define internal void @can_infer_atomicrmw(
147-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
147+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
148148
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
149149
; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[TMP1]], i32 12 monotonic, align 4
150150
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276

0 commit comments

Comments
 (0)