Skip to content

[FunctionAttrs] Add the "initializes" attribute inference #97373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 19, 2024
329 changes: 326 additions & 3 deletions llvm/lib/Transforms/IPO/FunctionAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
Expand All @@ -36,6 +37,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
Expand Down Expand Up @@ -581,6 +583,200 @@ struct ArgumentUsesTracker : public CaptureTracker {
const SCCNodeSet &SCCNodes;
};

/// A struct of argument use: a Use and the offset it accesses. This struct
/// is to track uses inside function via GEP. If GEP has a non-constant index,
/// the Offset field is nullopt.
struct ArgumentUse {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing documentation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for reminding! Done.

Use *U;
std::optional<int64_t> Offset;
};

/// A struct of argument access info. "Unknown" accesses are the cases like
/// unrecognized instructions, instructions that have more than one use of
/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
/// instructions that not only write an argument but also capture it.
struct ArgumentAccessInfo {
enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
AccessType ArgAccessType;
ConstantRangeList AccessRanges;
};

/// A struct to wrap the argument use info per block.
struct UsesPerBlockInfo {
SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
bool HasWrites = false;
bool HasUnknownAccess = false;
};

/// A struct to summarize the argument use info in a function.
struct ArgumentUsesSummary {
bool HasAnyWrite = false;
bool HasWriteOutsideEntryBB = false;
SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
};

ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I,
const ArgumentUse &ArgUse,
const DataLayout &DL) {
auto GetTypeAccessRange =
[&DL](Type *Ty,
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
auto TypeSize = DL.getTypeStoreSize(Ty);
if (!TypeSize.isScalable() && Offset) {
int64_t Size = TypeSize.getFixedValue();
return ConstantRange(APInt(64, *Offset, true),
APInt(64, *Offset + Size, true));
}
return std::nullopt;
};
auto GetConstantIntRange =
[](Value *Length,
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
auto *ConstantLength = dyn_cast<ConstantInt>(Length);
if (ConstantLength && Offset)
return ConstantRange(
APInt(64, *Offset, true),
APInt(64, *Offset + ConstantLength->getSExtValue(), true));
return std::nullopt;
};
if (auto *SI = dyn_cast<StoreInst>(I)) {
if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) {
// Get the fixed type size of "SI". Since the access range of a write
// will be unioned, if "SI" doesn't have a fixed type size, we just set
// the access range to empty.
ConstantRangeList AccessRanges;
if (auto TypeAccessRange =
GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
AccessRanges.insert(*TypeAccessRange);
return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)};
}
} else if (auto *LI = dyn_cast<LoadInst>(I)) {
if (LI->isSimple()) {
assert(&LI->getOperandUse(0) == ArgUse.U);
// Get the fixed type size of "LI". Different from Write, if "LI"
// doesn't have a fixed type size, we conservatively set as a clobber
// with an empty access range.
if (auto TypeAccessRange =
GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}};
}
} else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
if (!MemSet->isVolatile()) {
ConstantRangeList AccessRanges;
if (auto AccessRange =
GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
AccessRanges.insert(*AccessRange);
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
}
} else if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
if (!MTI->isVolatile()) {
if (&MTI->getOperandUse(0) == ArgUse.U) {
ConstantRangeList AccessRanges;
if (auto AccessRange =
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
AccessRanges.insert(*AccessRange);
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
} else if (&MTI->getOperandUse(1) == ArgUse.U) {
if (auto AccessRange =
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}};
}
}
} else if (auto *CB = dyn_cast<CallBase>(I)) {
if (CB->isArgOperand(ArgUse.U)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should add a test where the argument is passed as an operand bundle to a call, rather than an argument

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

unsigned ArgNo = CB->getArgOperandNo(ArgUse.U);
bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
// Argument is a Write when parameter is writeonly/readnone
// and nocapture. Otherwise, it's a WriteWithSideEffect.
auto Access = CB->onlyWritesMemory(ArgNo) &&
CB->paramHasAttr(ArgNo, Attribute::NoCapture)
? ArgumentAccessInfo::AccessType::Write
: ArgumentAccessInfo::AccessType::WriteWithSideEffect;
ConstantRangeList AccessRanges;
if (IsInitialize && ArgUse.Offset) {
Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
for (ConstantRange &CR : CBCRL)
AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset,
CR.getUpper() + *ArgUse.Offset));
return {Access, AccessRanges};
}
}
}
// Other unrecognized instructions are considered as unknown.
return {ArgumentAccessInfo::AccessType::Unknown, {}};
}

// Collect the uses of argument "A" in "F".
ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
auto &DL = F.getParent()->getDataLayout();
unsigned PointerSize =
DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
ArgumentUsesSummary Result;

BasicBlock &EntryBB = F.getEntryBlock();
SmallVector<ArgumentUse, 4> Worklist;
for (Use &U : A.uses())
Worklist.push_back({&U, 0});

// Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
// Return true if the block of "I" has write accesses after updating.
auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
auto *BB = I->getParent();
auto &BBInfo = Result.UsesPerBlock[BB];
bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
auto &IInfo = BBInfo.Insts[I];

// Instructions that have more than one use of the argument are considered
// as clobbers.
if (AlreadyVisitedInst) {
IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}};
BBInfo.HasUnknownAccess = true;
return false;
}

IInfo = std::move(Info);
BBInfo.HasUnknownAccess |=
IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
bool InfoHasWrites =
(IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
IInfo.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
!IInfo.AccessRanges.empty();
BBInfo.HasWrites |= InfoHasWrites;
return InfoHasWrites;
};

// No need for a visited set because we don't look through phis, so there are
// no cycles.
while (!Worklist.empty()) {
ArgumentUse ArgUse = Worklist.pop_back_val();
User *U = ArgUse.U->getUser();
// Add GEP uses to worklist.
// If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
if (auto *GEP = dyn_cast<GEPOperator>(U)) {
std::optional<int64_t> NewOffset = std::nullopt;
if (ArgUse.Offset) {
APInt Offset(PointerSize, 0);
if (GEP->accumulateConstantOffset(DL, Offset))
NewOffset = *ArgUse.Offset + Offset.getSExtValue();
}
for (Use &U : GEP->uses())
Worklist.push_back({&U, NewOffset});
continue;
}

auto *I = cast<Instruction>(U);
bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL));

Result.HasAnyWrite |= HasWrite;

if (HasWrite && I->getParent() != &EntryBB)
Result.HasWriteOutsideEntryBB = true;
}
return Result;
}

} // end anonymous namespace

namespace llvm {
Expand Down Expand Up @@ -867,9 +1063,129 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
return true;
}

static bool inferInitializes(Argument &A, Function &F) {
auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
// No write anywhere in the function, bail.
if (!ArgumentUses.HasAnyWrite)
return false;

auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
BasicBlock &EntryBB = F.getEntryBlock();
// A map to store the argument ranges initialized by a BasicBlock (including
// its successors).
DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
// Visit the successors of "BB" block and the instructions in BB (post-order)
// to get the argument ranges initialized by "BB" (including its successors).
// The result will be cached in "Initialized".
auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment on what VisitBlock does

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done and found a corner case :-)

"If this block has uses and none are writes, the argument is not initialized in this block."
Removed this early return. See the test.

auto UPB = UsesPerBlock.find(BB);
ConstantRangeList CRL;

// Start with intersection of successors.
// If this block has any clobbering use, we're going to clear out the
// ranges at some point in this block anyway, so don't bother looking at
// successors.
if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
bool HasAddedSuccessor = false;
for (auto *Succ : successors(BB)) {
if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
if (HasAddedSuccessor) {
CRL = CRL.intersectWith(SuccI->second);
} else {
CRL = SuccI->second;
HasAddedSuccessor = true;
}
} else {
CRL = ConstantRangeList();
break;
}
}
}

if (UPB != UsesPerBlock.end()) {
// Sort uses in this block by instruction order.
SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
append_range(Insts, UPB->second.Insts);
sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
return LHS.first->comesBefore(RHS.first);
});

// From the end of the block to the beginning of the block, set
// initializes ranges.
for (auto &[_, Info] : reverse(Insts)) {
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
Info.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect)
CRL = ConstantRangeList();
if (!Info.AccessRanges.empty()) {
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
Info.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
CRL = CRL.unionWith(Info.AccessRanges);
} else {
assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
for (const auto &ReadRange : Info.AccessRanges)
CRL.subtract(ReadRange);
}
}
}
}
return CRL;
};

ConstantRangeList EntryCRL;
// If all write instructions are in the EntryBB, or if the EntryBB has
// a clobbering use, we only need to look at EntryBB.
bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
if (!OnlyScanEntryBlock)
if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
EntryUPB != UsesPerBlock.end())
OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
if (OnlyScanEntryBlock) {
EntryCRL = VisitBlock(&EntryBB);
if (EntryCRL.empty())
return false;
} else {
// Now we have to go through CFG to get the initialized argument ranges
// across blocks. With dominance and post-dominance, the initialized ranges
// by a block include both accesses inside this block and accesses in its
// (transitive) successors. So visit successors before predecessors with a
// post-order walk of the blocks and memorize the results in "Initialized".
for (const BasicBlock *BB : post_order(&F)) {
ConstantRangeList CRL = VisitBlock(BB);
if (!CRL.empty())
Initialized[BB] = CRL;
}

auto EntryCRLI = Initialized.find(&EntryBB);
if (EntryCRLI == Initialized.end())
return false;

EntryCRL = EntryCRLI->second;
}

assert(!EntryCRL.empty() &&
"should have bailed already if EntryCRL is empty");

if (A.hasAttribute(Attribute::Initializes)) {
ConstantRangeList PreviousCRL =
A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
if (PreviousCRL == EntryCRL)
return false;
EntryCRL = EntryCRL.unionWith(PreviousCRL);
}

A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
EntryCRL.rangesRef()));

return true;
}

/// Deduce nocapture attributes for the SCC.
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
SmallSet<Function *, 8> &Changed,
bool SkipInitializes) {
ArgumentGraph AG;

// Check each function in turn, determining which pointer arguments are not
Expand Down Expand Up @@ -937,6 +1253,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
if (addAccessAttr(&A, R))
Changed.insert(F);
}
if (!SkipInitializes && !A.onlyReadsMemory()) {
if (inferInitializes(A, *F))
Changed.insert(F);
}
}
}

Expand Down Expand Up @@ -1910,13 +2230,16 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,

SmallSet<Function *, 8> Changed;
if (ArgAttrsOnly) {
addArgumentAttrs(Nodes.SCCNodes, Changed);
// ArgAttrsOnly means to only infer attributes that may aid optimizations
// on the *current* function. "initializes" attribute is to aid
// optimizations (like DSE) on the callers, so skip "initializes" here.
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
return Changed;
}

addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addColdAttrs(Nodes.SCCNodes, Changed);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
ret void
}

; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
define void @test0_no(ptr %p) nounwind {
store i32 0, ptr %p, !tbaa !2
ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
Expand Down
Loading