Skip to content

Commit 37f8515

Browse files
committed
Address review comments + rebase test updates
1 parent 813bbb5 commit 37f8515

File tree

8 files changed

+87
-122
lines changed

8 files changed

+87
-122
lines changed

llvm/include/llvm/Analysis/Loads.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,6 @@ bool isDereferenceableAndAlignedInLoop(
8888
AssumptionCache *AC = nullptr,
8989
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
9090

91-
/// Return true if the loop \p L cannot fault on any iteration and only
92-
/// contains read-only memory accesses.
93-
bool isDereferenceableReadOnlyLoop(
94-
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
95-
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr,
96-
SmallVectorImpl<LoadInst *> *NonDerefLoads = nullptr);
97-
9891
/// Return true if we know that executing a load from this value cannot trap.
9992
///
10093
/// If DT and ScanFrom are specified this method performs context-sensitive

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -382,11 +382,18 @@ class LoopVectorizationLegality {
382382
const LoopAccessInfo *getLAI() const { return LAI; }
383383

384384
bool isSafeForAnyVectorWidth() const {
385-
return LAI->getDepChecker().isSafeForAnyVectorWidth();
385+
return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
386+
(!hasUncountableEarlyExit() || !getNumPotentiallyFaultingPointers());
386387
}
387388

388389
uint64_t getMaxSafeVectorWidthInBits() const {
389-
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
390+
uint64_t MaxSafeVectorWidth =
391+
LAI->getDepChecker().getMaxSafeVectorWidthInBits();
392+
// The legalizer bails out if getMinPageSize does not return a value.
393+
if (hasUncountableEarlyExit() && getNumPotentiallyFaultingPointers())
394+
MaxSafeVectorWidth =
395+
std::min(MaxSafeVectorWidth, uint64_t(*TTI->getMinPageSize()) * 8);
396+
return MaxSafeVectorWidth;
390397
}
391398

392399
/// Returns true if the loop has exactly one uncountable early exit, i.e. an
@@ -423,17 +430,17 @@ class LoopVectorizationLegality {
423430
unsigned getNumStores() const { return LAI->getNumStores(); }
424431
unsigned getNumLoads() const { return LAI->getNumLoads(); }
425432

426-
/// Return the number of loads in the loop that could potentially fault in a
427-
/// loop with uncountable early exits.
428-
unsigned getNumPotentiallyFaultingLoads() const {
429-
return PotentiallyFaultingLoads.size();
433+
/// Return the number of pointers in the loop that could potentially fault in
434+
/// a loop with uncountable early exits.
435+
unsigned getNumPotentiallyFaultingPointers() const {
436+
return PotentiallyFaultingPtrs.size();
430437
}
431438

432-
/// Return a vector of all potentially faulting loads in a loop with
439+
/// Return a vector of all potentially faulting pointers in a loop with
433440
/// uncountable early exits.
434-
const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *
435-
getPotentiallyFaultingLoads() const {
436-
return &PotentiallyFaultingLoads;
441+
const SmallVectorImpl<std::pair<const SCEV *, Type *>> *
442+
getPotentiallyFaultingPointers() const {
443+
return &PotentiallyFaultingPtrs;
437444
}
438445

439446
/// Returns a HistogramInfo* for the given instruction if it was determined
@@ -543,7 +550,7 @@ class LoopVectorizationLegality {
543550

544551
/// Returns true if all loads in the loop contained in \p Loads can be
545552
/// analyzed as potentially faulting. Any loads that may fault are added to
546-
/// the member variable PotentiallyFaultingLoads.
553+
/// the member variable PotentiallyFaultingPtrs.
547554
bool analyzePotentiallyFaultingLoads(SmallVectorImpl<LoadInst *> *Loads);
548555

549556
/// Return true if all of the instructions in the block can be speculatively
@@ -671,9 +678,9 @@ class LoopVectorizationLegality {
671678
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
672679
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
673680

674-
/// Keep a record of all potentially faulting loads in loops with
681+
/// Keep a record of all potentially faulting pointers in loops with
675682
/// uncountable early exits.
676-
SmallVector<std::pair<LoadInst *, const SCEV *>, 4> PotentiallyFaultingLoads;
683+
SmallVector<std::pair<const SCEV *, Type *>, 4> PotentiallyFaultingPtrs;
677684
};
678685

679686
} // namespace llvm

llvm/lib/Analysis/Loads.cpp

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -813,29 +813,3 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
813813

814814
return isPointerAlwaysReplaceable(From, To, DL);
815815
}
816-
817-
bool llvm::isDereferenceableReadOnlyLoop(
818-
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
819-
SmallVectorImpl<const SCEVPredicate *> *Predicates,
820-
SmallVectorImpl<LoadInst *> *NonDerefLoads) {
821-
bool Result = true;
822-
for (BasicBlock *BB : L->blocks()) {
823-
for (Instruction &I : *BB) {
824-
if (auto *LI = dyn_cast<LoadInst>(&I)) {
825-
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC,
826-
Predicates)) {
827-
if (!NonDerefLoads)
828-
return false;
829-
NonDerefLoads->push_back(LI);
830-
Result = false;
831-
}
832-
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
833-
I.mayThrow()) {
834-
if (!NonDerefLoads)
835-
return false;
836-
Result = false;
837-
}
838-
}
839-
}
840-
return Result;
841-
}

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,7 @@ bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads(
16411641
return false;
16421642

16431643
LLVM_DEBUG(dbgs() << "LV: SCEV for Load Ptr: " << *Start << '\n');
1644-
PotentiallyFaultingLoads.push_back({LI, Start});
1644+
PotentiallyFaultingPtrs.push_back({Start, LI->getType()});
16451645
}
16461646
return true;
16471647
}
@@ -1750,6 +1750,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17501750
}
17511751
};
17521752

1753+
Predicates.clear();
1754+
SmallVector<LoadInst *, 4> NonDerefLoads;
17531755
for (auto *BB : TheLoop->blocks())
17541756
for (auto &I : *BB) {
17551757
if (I.mayWriteToMemory()) {
@@ -1759,24 +1761,30 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17591761
"Cannot vectorize early exit loop with writes to memory",
17601762
"WritesInEarlyExitLoop", ORE, TheLoop);
17611763
return false;
1762-
} else if (!IsSafeOperation(&I)) {
1764+
} else if (I.mayThrow() || !IsSafeOperation(&I)) {
17631765
reportVectorizationFailure("Early exit loop contains operations that "
17641766
"cannot be speculatively executed",
17651767
"UnsafeOperationsEarlyExitLoop", ORE,
17661768
TheLoop);
17671769
return false;
1770+
} else if (I.mayReadFromMemory()) {
1771+
auto *LI = dyn_cast<LoadInst>(&I);
1772+
if (!LI) {
1773+
reportVectorizationFailure(
1774+
"Loop may fault",
1775+
"Cannot vectorize potentially faulting early exit loop",
1776+
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1777+
return false;
1778+
} else if (!isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(),
1779+
*DT, AC, &Predicates))
1780+
NonDerefLoads.push_back(LI);
17681781
}
17691782
}
17701783

1771-
// The vectoriser cannot handle loads that occur after the early exit block.
1772-
assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first &&
1773-
"Expected latch predecessor to be the early exiting block");
1774-
1775-
Predicates.clear();
1776-
SmallVector<LoadInst *, 4> Loads;
1777-
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, &Predicates,
1778-
&Loads)) {
1779-
if (!TTI->getMinPageSize() || !analyzePotentiallyFaultingLoads(&Loads)) {
1784+
if (!NonDerefLoads.empty()) {
1785+
if (!TTI->getMinPageSize() ||
1786+
!analyzePotentiallyFaultingLoads(&NonDerefLoads)) {
1787+
PotentiallyFaultingPtrs.clear();
17801788
reportVectorizationFailure(
17811789
"Loop may fault",
17821790
"Cannot vectorize potentially faulting early exit loop",
@@ -1786,6 +1794,10 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17861794
LLVM_DEBUG(dbgs() << "We can vectorize the loop with runtime checks.\n");
17871795
}
17881796

1797+
// The vectoriser cannot handle loads that occur after the early exit block.
1798+
assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first &&
1799+
"Expected latch predecessor to be the early exiting block");
1800+
17891801
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
17901802
PSE.getSymbolicMaxBackedgeTakenCount();
17911803
// Since we have an exact exit count for the latch and the early exit

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ static cl::opt<bool> EnableEarlyExitVectorization(
402402
"Enable vectorization of early exit loops with uncountable exits."));
403403

404404
static cl::opt<unsigned> MaxNumPotentiallyFaultingPointers(
405-
"max-num-faulting-pointers", cl::init(1), cl::Hidden,
405+
"max-num-faulting-pointers", cl::init(0), cl::Hidden,
406406
cl::desc(
407407
"The maximum number of potentially faulting pointers we permit when "
408408
"vectorizing loops with uncountable exits."));
@@ -1621,22 +1621,6 @@ class LoopVectorizationCostModel {
16211621
ElementCount MaxSafeVF,
16221622
bool FoldTailByMasking);
16231623

1624-
bool isSafeForAnyVectorWidth() const {
1625-
return Legal->isSafeForAnyVectorWidth() &&
1626-
(!Legal->hasUncountableEarlyExit() ||
1627-
!Legal->getNumPotentiallyFaultingLoads());
1628-
}
1629-
1630-
uint64_t getMaxSafeVectorWidthInBits() const {
1631-
uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits();
1632-
// The legalizer bails out if getMinPageSize does not return a value.
1633-
if (Legal->hasUncountableEarlyExit() &&
1634-
Legal->getNumPotentiallyFaultingLoads())
1635-
MaxSafeVectorWidth =
1636-
std::min(MaxSafeVectorWidth, uint64_t(*TTI.getMinPageSize()) * 8);
1637-
return MaxSafeVectorWidth;
1638-
}
1639-
16401624
/// Checks if scalable vectorization is supported and enabled. Caches the
16411625
/// result to avoid repeated debug dumps for repeated queries.
16421626
bool isScalableVectorizationAllowed();
@@ -2185,38 +2169,24 @@ class GeneratedRTChecks {
21852169
};
21862170
} // namespace
21872171

2188-
std::optional<unsigned> getMaxVScale(const Function &F,
2189-
const TargetTransformInfo &TTI) {
2190-
if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
2191-
return MaxVScale;
2192-
2193-
if (F.hasFnAttribute(Attribute::VScaleRange))
2194-
return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
2195-
2196-
return std::nullopt;
2197-
}
2198-
21992172
static void addPointerAlignmentChecks(
2200-
const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
2201-
Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2202-
ElementCount VF) {
2173+
const SmallVectorImpl<std::pair<const SCEV *, Type *>> *Ptrs, Function *F,
2174+
PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF,
2175+
unsigned IC) {
22032176
ScalarEvolution *SE = PSE.getSE();
22042177
const DataLayout &DL = SE->getDataLayout();
2205-
Type *PtrIntType = DL.getIntPtrType(SE->getContext());
22062178

2207-
const SCEV *Zero = SE->getZero(PtrIntType);
2208-
const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF);
2209-
2210-
for (auto Load : *Loads) {
2211-
APInt EltSize(
2212-
DL.getIndexTypeSizeInBits(Load.first->getPointerOperandType()),
2213-
DL.getTypeStoreSize(Load.first->getType()).getFixedValue());
2214-
const SCEV *Start = SE->getPtrToIntExpr(Load.second, PtrIntType);
2179+
for (auto Ptr : *Ptrs) {
2180+
Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType());
2181+
APInt EltSize(PtrIntType->getScalarSizeInBits(),
2182+
DL.getTypeStoreSize(Ptr.second).getFixedValue());
2183+
const SCEV *Start = SE->getPtrToIntExpr(Ptr.first, PtrIntType);
2184+
const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF * IC);
22152185
const SCEV *Align =
22162186
SE->getMulExpr(ScevEC, SE->getConstant(EltSize),
22172187
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
22182188
const SCEV *Rem = SE->getURemExpr(Start, Align);
2219-
PSE.addPredicate(*(SE->getEqualPredicate(Rem, Zero)));
2189+
PSE.addPredicate(*(SE->getEqualPredicate(Rem, SE->getZero(PtrIntType))));
22202190
}
22212191
}
22222192

@@ -2389,6 +2359,17 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
23892359
llvm_unreachable("invalid enum");
23902360
}
23912361

2362+
std::optional<unsigned> getMaxVScale(const Function &F,
2363+
const TargetTransformInfo &TTI) {
2364+
if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
2365+
return MaxVScale;
2366+
2367+
if (F.hasFnAttribute(Attribute::VScaleRange))
2368+
return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
2369+
2370+
return std::nullopt;
2371+
}
2372+
23922373
/// For the given VF and UF and maximum trip count computed for the loop, return
23932374
/// whether the induction variable might overflow in the vectorized loop. If not,
23942375
/// then we know a runtime overflow check always evaluates to false and can be
@@ -3881,15 +3862,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
38813862
return false;
38823863
}
38833864

3884-
if (!isSafeForAnyVectorWidth() && !getMaxVScale(*TheFunction, TTI)) {
3865+
if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(*TheFunction, TTI)) {
38853866
reportVectorizationInfo("The target does not provide maximum vscale value "
38863867
"for safe distance analysis.",
38873868
"ScalableVFUnfeasible", ORE, TheLoop);
38883869
return false;
38893870
}
38903871

38913872
if (Legal->hasUncountableEarlyExit() &&
3892-
Legal->getNumPotentiallyFaultingLoads() &&
3873+
Legal->getNumPotentiallyFaultingPointers() &&
38933874
!TTI.isVScaleKnownToBeAPowerOfTwo()) {
38943875
reportVectorizationInfo("Cannot vectorize potentially faulting early exit "
38953876
"loop with scalable vectors.",
@@ -3908,7 +3889,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
39083889

39093890
auto MaxScalableVF = ElementCount::getScalable(
39103891
std::numeric_limits<ElementCount::ScalarTy>::max());
3911-
if (isSafeForAnyVectorWidth())
3892+
if (Legal->isSafeForAnyVectorWidth())
39123893
return MaxScalableVF;
39133894

39143895
std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI);
@@ -3935,11 +3916,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
39353916
// the memory accesses that is most restrictive (involved in the smallest
39363917
// dependence distance).
39373918
unsigned MaxSafeElements =
3938-
llvm::bit_floor(getMaxSafeVectorWidthInBits() / WidestType);
3919+
llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
39393920

39403921
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
39413922
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
3942-
if (!isSafeForAnyVectorWidth())
3923+
if (!Legal->isSafeForAnyVectorWidth())
39433924
this->MaxSafeElements = MaxSafeElements;
39443925

39453926
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10492,7 +10473,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1049210473
}
1049310474

1049410475
unsigned NumPotentiallyFaultingPointers =
10495-
LVL.getNumPotentiallyFaultingLoads();
10476+
LVL.getNumPotentiallyFaultingPointers();
1049610477
if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
1049710478
reportVectorizationFailure("Not worth vectorizing loop with uncountable "
1049810479
"early exit, due to number of potentially "
@@ -10660,15 +10641,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1066010641
// Optimistically generate runtime checks if they are needed. Drop them if
1066110642
// they turn out to not be profitable.
1066210643
if (VF.Width.isVector() || SelectedIC > 1) {
10663-
if (LVL.getNumPotentiallyFaultingLoads()) {
10664-
assert(SelectedIC == 1 &&
10665-
"Interleaving not supported for early exit loops and "
10666-
"potentially faulting loads");
10644+
if (LVL.getNumPotentiallyFaultingPointers()) {
1066710645
assert(!CM.foldTailWithEVL() &&
1066810646
"Explicit vector length unsupported for early exit loops and "
1066910647
"potentially faulting loads");
10670-
addPointerAlignmentChecks(LVL.getPotentiallyFaultingLoads(), F, PSE,
10671-
TTI, VF.Width);
10648+
addPointerAlignmentChecks(LVL.getPotentiallyFaultingPointers(), F, PSE,
10649+
TTI, VF.Width, SelectedIC);
1067210650
}
1067310651
Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
1067410652
}

0 commit comments

Comments
 (0)