Skip to content

Commit 5cbcd58

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent d31e314 commit 5cbcd58

6 files changed

+365
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
1212

1313
#include "llvm/CodeGen/MachinePassManager.h"
14+
#include "llvm/IR/CallingConv.h"
1415
#include "llvm/IR/PassManager.h"
1516
#include "llvm/Pass.h"
1617
#include "llvm/Support/AMDGPUAddrSpace.h"

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/TargetTransformInfo.h"
18+
#include "llvm/Analysis/UniformityAnalysis.h"
1719
#include "llvm/CodeGen/TargetPassConfig.h"
1820
#include "llvm/IR/IntrinsicsAMDGPU.h"
1921
#include "llvm/IR/IntrinsicsR600.h"
@@ -1014,6 +1016,110 @@ struct AAAMDGPUNoAGPR
10141016

10151017
const char AAAMDGPUNoAGPR::ID = 0;
10161018

1019+
struct AAAMDGPUUniform
1020+
: public IRAttribute<Attribute::InReg,
1021+
StateWrapper<BooleanState, AbstractAttribute>,
1022+
AAAMDGPUUniform> {
1023+
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
1024+
1025+
/// Create an abstract attribute view for the position \p IRP.
1026+
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
1027+
Attributor &A);
1028+
1029+
/// See AbstractAttribute::getName()
1030+
const std::string getName() const override { return "AAAMDGPUUniform"; }
1031+
1032+
const std::string getAsStr(Attributor *A) const override {
1033+
return getAssumed() ? "inreg" : "non-inreg";
1034+
}
1035+
1036+
void trackStatistics() const override {}
1037+
1038+
/// See AbstractAttribute::getIdAddr()
1039+
const char *getIdAddr() const override { return &ID; }
1040+
1041+
/// This function should return true if the type of the \p AA is
1042+
/// AAAMDGPUUniform
1043+
static bool classof(const AbstractAttribute *AA) {
1044+
return (AA->getIdAddr() == &ID);
1045+
}
1046+
1047+
/// Unique ID (due to the unique address)
1048+
static const char ID;
1049+
};
1050+
1051+
const char AAAMDGPUUniform::ID = 0;
1052+
1053+
namespace {
1054+
1055+
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
1056+
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
1057+
: AAAMDGPUUniform(IRP, A) {}
1058+
1059+
void initialize(Attributor &A) override {
1060+
assert(
1061+
!AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv()));
1062+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1063+
indicateOptimisticFixpoint();
1064+
}
1065+
1066+
ChangeStatus updateImpl(Attributor &A) override {
1067+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1068+
1069+
auto isUniform = [&](AbstractCallSite ACS) -> bool {
1070+
CallBase *CB = ACS.getInstruction();
1071+
Value *V = CB->getArgOperandUse(ArgNo);
1072+
if (isa<Constant>(V))
1073+
return true;
1074+
if (auto *I = dyn_cast<Instruction>(V)) {
1075+
auto *UA = A.getInfoCache()
1076+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(
1077+
*I->getFunction());
1078+
return UA && UA->isUniform(I);
1079+
}
1080+
if (auto *Arg = dyn_cast<Argument>(V)) {
1081+
auto *UA = A.getInfoCache()
1082+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(
1083+
*Arg->getParent());
1084+
if (UA && UA->isUniform(Arg))
1085+
return true;
1086+
// We only rely on isArgPassedInSGPR when the function is terminal,
1087+
// assuming there is no call edge from a function to an entry function.
1088+
if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
1089+
return AMDGPU::isArgPassedInSGPR(Arg);
1090+
auto *AA =
1091+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
1092+
return AA && AA->isValidState();
1093+
}
1094+
return false;
1095+
};
1096+
1097+
bool UsedAssumedInformation = true;
1098+
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
1099+
UsedAssumedInformation))
1100+
return indicatePessimisticFixpoint();
1101+
1102+
if (!UsedAssumedInformation)
1103+
return indicateOptimisticFixpoint();
1104+
1105+
return ChangeStatus::UNCHANGED;
1106+
}
1107+
};
1108+
1109+
} // namespace
1110+
1111+
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
1112+
Attributor &A) {
1113+
switch (IRP.getPositionKind()) {
1114+
case IRPosition::IRP_ARGUMENT:
1115+
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
1116+
// TODO: Since inreg is also allowed for return value, maybe we need to add
1117+
// AAAMDGPUUniformCallSiteReturned?
1118+
default:
1119+
llvm_unreachable("not a valid position for AAAMDGPUUniform");
1120+
}
1121+
}
1122+
10171123
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10181124
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10191125
for (unsigned I = 0;
@@ -1046,7 +1152,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10461152
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10471153
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
10481154
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1049-
&AAInstanceInfo::ID});
1155+
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
10501156

10511157
AttributorConfig AC(CGUpdater);
10521158
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1092,6 +1198,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10921198
IRPosition::value(*CmpX->getPointerOperand()));
10931199
}
10941200
}
1201+
1202+
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1203+
for (auto &Arg : F->args())
1204+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
1205+
}
10951206
}
10961207

10971208
ChangeStatus Change = A.run();
@@ -1120,6 +1231,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
11201231

11211232
void getAnalysisUsage(AnalysisUsage &AU) const override {
11221233
AU.addRequired<CycleInfoWrapperPass>();
1234+
AU.addRequired<UniformityInfoWrapperPass>();
11231235
}
11241236

11251237
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define void @call_volatile_load_store_as_4(ptr addrspace(4) %p1, ptr addrspace(4
9090

9191
define internal void @can_infer_cmpxchg(ptr %word) {
9292
; CHECK-LABEL: define internal void @can_infer_cmpxchg(
93-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
93+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
9494
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
9595
; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr addrspace(1) [[TMP1]], i32 0, i32 4 monotonic monotonic, align 4
9696
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
@@ -144,7 +144,7 @@ define internal void @can_not_infer_cmpxchg(ptr %word) {
144144

145145
define internal void @can_infer_atomicrmw(ptr %word) {
146146
; CHECK-LABEL: define internal void @can_infer_atomicrmw(
147-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
147+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
148148
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
149149
; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[TMP1]], i32 12 monotonic, align 4
150150
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276

0 commit comments

Comments
 (0)