Skip to content

Commit 5c095ae

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent a04cff1 commit 5c095ae

File tree

5 files changed

+357
-5
lines changed

5 files changed

+357
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/TargetTransformInfo.h"
18+
#include "llvm/Analysis/UniformityAnalysis.h"
1719
#include "llvm/CodeGen/TargetPassConfig.h"
1820
#include "llvm/IR/IntrinsicsAMDGPU.h"
1921
#include "llvm/IR/IntrinsicsR600.h"
@@ -1299,6 +1301,104 @@ struct AAAMDGPUNoAGPR
12991301

13001302
const char AAAMDGPUNoAGPR::ID = 0;
13011303

1304+
struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
1305+
using Base = StateWrapper<BooleanState, AbstractAttribute>;
1306+
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1307+
1308+
/// Create an abstract attribute view for the position \p IRP.
1309+
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
1310+
Attributor &A);
1311+
1312+
/// See AbstractAttribute::getName()
1313+
const std::string getName() const override { return "AAAMDGPUUniform"; }
1314+
1315+
const std::string getAsStr(Attributor *A) const override {
1316+
return getAssumed() ? "inreg" : "non-inreg";
1317+
}
1318+
1319+
void trackStatistics() const override {}
1320+
1321+
/// See AbstractAttribute::getIdAddr()
1322+
const char *getIdAddr() const override { return &ID; }
1323+
1324+
/// This function should return true if the type of the \p AA is
1325+
/// AAAMDGPUUniform
1326+
static bool classof(const AbstractAttribute *AA) {
1327+
return (AA->getIdAddr() == &ID);
1328+
}
1329+
1330+
/// Unique ID (due to the unique address)
1331+
static const char ID;
1332+
};
1333+
1334+
const char AAAMDGPUUniform::ID = 0;
1335+
1336+
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
1337+
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
1338+
: AAAMDGPUUniform(IRP, A) {}
1339+
1340+
void initialize(Attributor &A) override {
1341+
assert(
1342+
!AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv()));
1343+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1344+
indicateOptimisticFixpoint();
1345+
}
1346+
1347+
ChangeStatus updateImpl(Attributor &A) override {
1348+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1349+
1350+
auto isUniform = [&](AbstractCallSite ACS) -> bool {
1351+
CallBase *CB = ACS.getInstruction();
1352+
Value *V = CB->getArgOperandUse(ArgNo);
1353+
if (isa<Constant>(V))
1354+
return true;
1355+
Function *F = nullptr;
1356+
if (auto *Arg = dyn_cast<Argument>(V))
1357+
F = Arg->getParent();
1358+
else if (auto *I = dyn_cast<Instruction>(V))
1359+
F = I->getFunction();
1360+
1361+
if (F) {
1362+
auto *UA =
1363+
A.getInfoCache()
1364+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(*F);
1365+
return UA && UA->isUniform(V);
1366+
}
1367+
1368+
// What else can it be here?
1369+
return false;
1370+
};
1371+
1372+
bool UsedAssumedInformation = true;
1373+
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
1374+
UsedAssumedInformation))
1375+
return indicatePessimisticFixpoint();
1376+
1377+
if (!UsedAssumedInformation)
1378+
return indicateOptimisticFixpoint();
1379+
1380+
return ChangeStatus::UNCHANGED;
1381+
}
1382+
1383+
ChangeStatus manifest(Attributor &A) override {
1384+
return A.manifestAttrs(
1385+
getIRPosition(),
1386+
{Attribute::get(getAnchorValue().getContext(), Attribute::InReg)});
1387+
}
1388+
};
1389+
1390+
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
1391+
Attributor &A) {
1392+
switch (IRP.getPositionKind()) {
1393+
case IRPosition::IRP_ARGUMENT:
1394+
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
1395+
// TODO: Since inreg is also allowed for return value, maybe we need to add
1396+
// AAAMDGPUUniformCallSiteReturned?
1397+
default:
1398+
llvm_unreachable("not a valid position for AAAMDGPUUniform");
1399+
}
1400+
}
1401+
13021402
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
13031403
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
13041404
/// Both attributes start with narrow ranges that expand during iteration.
@@ -1385,7 +1485,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13851485
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
13861486
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
13871487
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1388-
&AAInstanceInfo::ID});
1488+
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
13891489

13901490
AttributorConfig AC(CGUpdater);
13911491
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1438,6 +1538,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14381538
IRPosition::value(*CmpX->getPointerOperand()));
14391539
}
14401540
}
1541+
1542+
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1543+
for (auto &Arg : F->args())
1544+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
1545+
}
14411546
}
14421547

14431548
bool Changed = A.run() == ChangeStatus::CHANGED;
@@ -1470,6 +1575,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
14701575

14711576
void getAnalysisUsage(AnalysisUsage &AU) const override {
14721577
AU.addRequired<CycleInfoWrapperPass>();
1578+
AU.addRequired<UniformityInfoWrapperPass>();
14731579
}
14741580

14751581
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define void @call_volatile_load_store_as_4(ptr addrspace(4) %p1, ptr addrspace(4
9090

9191
define internal void @can_infer_cmpxchg(ptr %word) {
9292
; CHECK-LABEL: define internal void @can_infer_cmpxchg(
93-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
93+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
9494
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
9595
; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr addrspace(1) [[TMP1]], i32 0, i32 4 monotonic monotonic, align 4
9696
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
@@ -144,7 +144,7 @@ define internal void @can_not_infer_cmpxchg(ptr %word) {
144144

145145
define internal void @can_infer_atomicrmw(ptr %word) {
146146
; CHECK-LABEL: define internal void @can_infer_atomicrmw(
147-
; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] {
147+
; CHECK-SAME: ptr inreg [[WORD:%.*]]) #[[ATTR0]] {
148148
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)
149149
; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[TMP1]], i32 12 monotonic, align 4
150150
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1)

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276

0 commit comments

Comments
 (0)