Skip to content

Commit 4eb932c

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent bdf03fc commit 4eb932c

File tree

5 files changed

+365
-6
lines changed

5 files changed

+365
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/TargetTransformInfo.h"
18+
#include "llvm/Analysis/UniformityAnalysis.h"
1719
#include "llvm/CodeGen/TargetPassConfig.h"
1820
#include "llvm/IR/IntrinsicsAMDGPU.h"
1921
#include "llvm/IR/IntrinsicsR600.h"
@@ -1299,6 +1301,111 @@ struct AAAMDGPUNoAGPR
12991301

13001302
const char AAAMDGPUNoAGPR::ID = 0;
13011303

1304+
struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
1305+
using Base = StateWrapper<BooleanState, AbstractAttribute>;
1306+
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1307+
1308+
/// Create an abstract attribute view for the position \p IRP.
1309+
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
1310+
Attributor &A);
1311+
1312+
/// See AbstractAttribute::getName()
1313+
const std::string getName() const override { return "AAAMDGPUUniform"; }
1314+
1315+
const std::string getAsStr(Attributor *A) const override {
1316+
return getAssumed() ? "inreg" : "non-inreg";
1317+
}
1318+
1319+
void trackStatistics() const override {}
1320+
1321+
/// See AbstractAttribute::getIdAddr()
1322+
const char *getIdAddr() const override { return &ID; }
1323+
1324+
/// This function should return true if the type of the \p AA is
1325+
/// AAAMDGPUUniform
1326+
static bool classof(const AbstractAttribute *AA) {
1327+
return (AA->getIdAddr() == &ID);
1328+
}
1329+
1330+
/// Unique ID (due to the unique address)
1331+
static const char ID;
1332+
};
1333+
1334+
const char AAAMDGPUUniform::ID = 0;
1335+
1336+
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
1337+
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
1338+
: AAAMDGPUUniform(IRP, A) {}
1339+
1340+
void initialize(Attributor &A) override {
1341+
Argument *Arg = getAssociatedArgument();
1342+
if (Arg->hasAttribute(Attribute::InReg) ||
1343+
AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
1344+
indicateOptimisticFixpoint();
1345+
}
1346+
1347+
ChangeStatus updateImpl(Attributor &A) override {
1348+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1349+
1350+
auto isUniform = [&](AbstractCallSite ACS) -> bool {
1351+
CallBase *CB = ACS.getInstruction();
1352+
Value *V = CB->getArgOperandUse(ArgNo);
1353+
if (isa<Constant>(V))
1354+
return true;
1355+
Function *F = nullptr;
1356+
if (auto *Arg = dyn_cast<Argument>(V)) {
1357+
auto *AA =
1358+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
1359+
if (AA)
1360+
return AA->isValidState();
1361+
F = Arg->getParent();
1362+
} else if (auto *I = dyn_cast<Instruction>(V)) {
1363+
F = I->getFunction();
1364+
}
1365+
1366+
if (F) {
1367+
auto *UA =
1368+
A.getInfoCache()
1369+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(*F);
1370+
return UA && UA->isUniform(V);
1371+
}
1372+
1373+
// What else can it be here?
1374+
return false;
1375+
};
1376+
1377+
bool UsedAssumedInformation = true;
1378+
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
1379+
UsedAssumedInformation))
1380+
return indicatePessimisticFixpoint();
1381+
1382+
if (!UsedAssumedInformation)
1383+
return indicateOptimisticFixpoint();
1384+
1385+
return ChangeStatus::UNCHANGED;
1386+
}
1387+
1388+
ChangeStatus manifest(Attributor &A) override {
1389+
Argument *Arg = getAssociatedArgument();
1390+
if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
1391+
return ChangeStatus::UNCHANGED;
1392+
return A.manifestAttrs(
1393+
getIRPosition(), {Attribute::get(Arg->getContext(), Attribute::InReg)});
1394+
}
1395+
};
1396+
1397+
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
1398+
Attributor &A) {
1399+
switch (IRP.getPositionKind()) {
1400+
case IRPosition::IRP_ARGUMENT:
1401+
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
1402+
// TODO: Since inreg is also allowed for return value, maybe we need to add
1403+
// AAAMDGPUUniformCallSiteReturned?
1404+
default:
1405+
llvm_unreachable("not a valid position for AAAMDGPUUniform");
1406+
}
1407+
}
1408+
13021409
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
13031410
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
13041411
/// Both attributes start with narrow ranges that expand during iteration.
@@ -1385,7 +1492,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13851492
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
13861493
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
13871494
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1388-
&AAInstanceInfo::ID});
1495+
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
13891496

13901497
AttributorConfig AC(CGUpdater);
13911498
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1438,6 +1545,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14381545
IRPosition::value(*CmpX->getPointerOperand()));
14391546
}
14401547
}
1548+
1549+
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1550+
for (auto &Arg : F->args())
1551+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
1552+
}
14411553
}
14421554

14431555
bool Changed = A.run() == ChangeStatus::CHANGED;
@@ -1470,6 +1582,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
14701582

14711583
void getAnalysisUsage(AnalysisUsage &AU) const override {
14721584
AU.addRequired<CycleInfoWrapperPass>();
1585+
AU.addRequired<UniformityInfoWrapperPass>();
14731586
}
14741587

14751588
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define void @call_volatile_load_store_as_4(ptr addrspace(4) %p1, ptr addrspace(4
9090

9191
define internal void @can_infer_cmpxchg(ptr %word) {
9292
; CHECK-LABEL: define internal void @can_infer_cmpxchg(
93-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
93+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
9494
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
9595
; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr addrspace(1) [[TMP1]], i32 0, i32 4 monotonic monotonic, align 4
9696
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
@@ -144,7 +144,7 @@ define internal void @can_not_infer_cmpxchg(ptr %word) {
144144

145145
define internal void @can_infer_atomicrmw(ptr %word) {
146146
; CHECK-LABEL: define internal void @can_infer_atomicrmw(
147-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
147+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
148148
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
149149
; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[TMP1]], i32 12 monotonic, align 4
150150
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276

0 commit comments

Comments
 (0)