Skip to content

Reland "[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop #96752" #123616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,25 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType = true);

/// Calculate Start and End points of memory access.
/// Let's assume A is the first access and B is a memory access on N-th loop
/// iteration. Then B is calculated as:
/// B = A + Step*N .
/// Step value may be positive or negative.
/// N is a calculated back-edge taken count:
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
/// Start and End points are calculated in the following way:
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
/// where SizeOfElt is the size of single memory access in bytes.
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);

class LoopAccessInfoManager {
/// The cache.
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
Expand Down
113 changes: 60 additions & 53 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
Expand Down Expand Up @@ -277,84 +278,90 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
bool llvm::isDereferenceableAndAlignedInLoop(
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
const Align Alignment = LI->getAlign();
auto &DL = LI->getDataLayout();
Value *Ptr = LI->getPointerOperand();

APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
DL.getTypeStoreSize(LI->getType()).getFixedValue());
const Align Alignment = LI->getAlign();

Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt();

// If given a uniform (i.e. non-varying) address, see if we can prove the
// access is safe within the loop w/o needing predication.
if (L->isLoopInvariant(Ptr))
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
HeaderFirstNonPHI, AC, &DT);
return isDereferenceableAndAlignedPointer(
Ptr, Alignment, EltSize, DL, &*L->getHeader()->getFirstNonPHIIt(), AC,
&DT);

const SCEV *PtrScev = SE.getSCEV(Ptr);
auto *AddRec = dyn_cast<SCEVAddRecExpr>(PtrScev);

// Otherwise, check to see if we have a repeating access pattern where we can
// prove that all accesses are well aligned and dereferenceable.
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
// Check to see if we have a repeating access pattern and it's possible
// to prove all accesses are well aligned.
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
return false;
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));

auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
if (!Step)
return false;

auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
if (!TC)
// For the moment, restrict ourselves to the case where the access size is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
if (EltSize.urem(Alignment.value()) != 0)
return false;

// TODO: Handle overlapping accesses.
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
if (EltSize.sgt(Step->getAPInt()))
if (EltSize.ugt(Step->getAPInt().abs()))
return false;

const SCEV *MaxBECount =
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;

const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;

// Compute the total access size for access patterns with unit stride and
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
// same.
// For patterns with gaps (i.e. non unit stride), we are
// accessing EltSize bytes at every Step.
APInt AccessSize = TC * Step->getAPInt();
// Try to get the access size.
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);

assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
"implied by addrec definition");
Value *Base = nullptr;
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
Base = StartS->getValue();
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
// Handle (NewBase + offset) as start value.
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
if (StartS->getNumOperands() == 2 && Offset && NewBase) {
// The following code below assumes the offset is unsigned, but GEP
// offsets are treated as signed so we can end up with a signed value
// here too. For example, suppose the initial PHI value is (i8 255),
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
if (Offset->getAPInt().isNegative())
return false;
APInt AccessSize;
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
Base = NewBase->getValue();
AccessSize = MaxPtrDiff;
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
if (MinAdd->getNumOperands() != 2)
return false;

// For the moment, restrict ourselves to the case where the offset is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
if (Offset->getAPInt().urem(Alignment.value()) != 0)
return false;
Base = NewBase->getValue();
bool Overflow = false;
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
if (Overflow)
return false;
}
}
const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
if (!Offset || !NewBase)
return false;

if (!Base)
return false;
// The following code below assumes the offset is unsigned, but GEP
// offsets are treated as signed so we can end up with a signed value
// here too. For example, suppose the initial PHI value is (i8 255),
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
if (Offset->getAPInt().isNegative())
return false;

// For the moment, restrict ourselves to the case where the access size is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
if (EltSize.urem(Alignment.value()) != 0)
// For the moment, restrict ourselves to the case where the offset is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
if (Offset->getAPInt().urem(Alignment.value()) != 0)
return false;

AccessSize = MaxPtrDiff + Offset->getAPInt();
Base = NewBase->getValue();
} else
return false;

Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
HeaderFirstNonPHI, AC, &DT);
}
Expand Down
61 changes: 26 additions & 35 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,42 +190,29 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
Members.push_back(Index);
}

/// Calculate Start and End points of memory access.
/// Let's assume A is the first access and B is a memory access on N-th loop
/// iteration. Then B is calculated as:
/// B = A + Step*N .
/// Step value may be positive or negative.
/// N is a calculated back-edge taken count:
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
/// Start and End points are calculated in the following way:
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
/// where SizeOfElt is the size of single memory access in bytes.
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
PredicatedScalarEvolution &PSE,
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> &PointerBounds) {
ScalarEvolution *SE = PSE.getSE();

auto [Iter, Ins] = PointerBounds.insert(
{{PtrExpr, AccessTy},
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
if (!Ins)
return Iter->second;
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
if (PointerBounds) {
auto [Iter, Ins] = PointerBounds->insert(
{{PtrExpr, AccessTy},
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
if (!Ins)
return Iter->second;
PtrBoundsPair = &Iter->second;
}

const SCEV *ScStart;
const SCEV *ScEnd;

if (SE->isLoopInvariant(PtrExpr, Lp)) {
ScStart = ScEnd = PtrExpr;
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();

ScStart = AR->getStart();
ScEnd = AR->evaluateAtIteration(Ex, *SE);
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
const SCEV *Step = AR->getStepRecurrence(*SE);

// For expressions with negative step, the upper bound is ScStart and the
Expand All @@ -244,16 +231,18 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};

assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");

// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getDataLayout();
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);

Iter->second = {ScStart, ScEnd};
return Iter->second;
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
if (PointerBounds)
*PtrBoundsPair = Res;
return Res;
}

/// Calculate Start and End points of memory access using
Expand All @@ -263,8 +252,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
unsigned DepSetId, unsigned ASId,
PredicatedScalarEvolution &PSE,
bool NeedsFreeze) {
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds());
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
Expand Down Expand Up @@ -1938,10 +1928,11 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// required for correctness.
if (SE.isLoopInvariant(Src, InnermostLoop) ||
SE.isLoopInvariant(Sink, InnermostLoop)) {
const auto &[SrcStart_, SrcEnd_] =
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
const auto &[SinkStart_, SinkEnd_] =
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
Expand Down
Loading
Loading