Skip to content

Commit ec58810

Browse files
committed
[LoopVectorize] Teach LoopVectorizationLegality about more early exits
This patch is split off from PR #88385 and concerns only the code related to the legality of vectorising early exit loops. It is the first step in adding support for vectorisation of a simple class of loops that typically involves searching for something, i.e. for (int i = 0; i < n; i++) { if (p[i] == val) return i; } return n; or for (int i = 0; i < n; i++) { if (p1[i] != p2[i]) return i; } return n; In this initial commit LoopVectorizationLegality will only consider early exit loops legal for vectorising if they follow these criteria: 1. There are no stores in the loop. 2. The loop must have only one early exit like those shown in the above example. I have referred to such exits as speculative early exits, to distinguish from existing support for early exits where the exit-not-taken count is known exactly at compile time. 3. The early exit block dominates the latch block. 4. The latch block must have an exact exit count. 5. There are no loads after the early exit block. 6. The loop must not contain reductions or recurrences. I don't see anything fundamental blocking vectorisation of such loops, but I just haven't done the work to support them yet. 7. We must be able to prove at compile-time that loops will not contain faulting loads. Tests have been added here: Transforms/LoopVectorize/AArch64/simple_early_exit.ll
1 parent b153cc5 commit ec58810

File tree

9 files changed

+1800
-23
lines changed

9 files changed

+1800
-23
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,8 @@ class LoopAccessInfo {
661661
bool isInvariant(Value *V) const;
662662

663663
unsigned getNumStores() const { return NumStores; }
664-
unsigned getNumLoads() const { return NumLoads;}
664+
unsigned getNumLoads() const { return NumLoads; }
665+
unsigned getNumCalls() const { return NumCalls; }
665666

666667
/// The diagnostics report generated for the analysis. E.g. why we
667668
/// couldn't analyze the loop.
@@ -754,6 +755,7 @@ class LoopAccessInfo {
754755

755756
unsigned NumLoads = 0;
756757
unsigned NumStores = 0;
758+
unsigned NumCalls = 0;
757759

758760
/// Cache the result of analyzeLoop.
759761
bool CanVecMem = false;

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,24 @@ class LoopVectorizationLegality {
377377
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
378378
}
379379

380+
/// Returns true if the loop has a speculative early exit, i.e. an
381+
/// uncountable exit that isn't the latch block.
382+
bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; }
383+
384+
/// Returns the speculative early exiting block.
385+
BasicBlock *getSpeculativeEarlyExitingBlock() const {
386+
assert(getUncountableExitingBlocks().size() == 1 &&
387+
"Expected only a single uncountable exiting block");
388+
return getUncountableExitingBlocks()[0];
389+
}
390+
391+
/// Returns the destination of a speculative early exiting block.
392+
BasicBlock *getSpeculativeEarlyExitBlock() const {
393+
assert(getUncountableExitBlocks().size() == 1 &&
394+
"Expected only a single uncountable exit block");
395+
return getUncountableExitBlocks()[0];
396+
}
397+
380398
/// Returns true if vector representation of the instruction \p I
381399
/// requires mask.
382400
bool isMaskRequired(const Instruction *I) const {
@@ -404,6 +422,22 @@ class LoopVectorizationLegality {
404422

405423
DominatorTree *getDominatorTree() const { return DT; }
406424

425+
/// Returns all exiting blocks with a countable exit, i.e. the
426+
/// exit-not-taken count is known exactly at compile time.
427+
const SmallVector<BasicBlock *, 4> &getCountableExitingBlocks() const {
428+
return CountableExitingBlocks;
429+
}
430+
431+
/// Returns all the exiting blocks with an uncountable exit.
432+
const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const {
433+
return UncountableExitingBlocks;
434+
}
435+
436+
/// Returns all the exit blocks from uncountable exiting blocks.
437+
SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const {
438+
return UncountableExitBlocks;
439+
}
440+
407441
private:
408442
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
409443
/// its nested loops are considered legal for vectorization. These legal
@@ -436,7 +470,7 @@ class LoopVectorizationLegality {
436470
/// we read and write from memory. This method checks if it is
437471
/// legal to vectorize the code, considering only memory constrains.
438472
/// Returns true if the loop is vectorizable
439-
bool canVectorizeMemory();
473+
bool canVectorizeMemory(bool IsEarlyExitLoop);
440474

441475
/// Return true if we can vectorize this loop using the IF-conversion
442476
/// transformation.
@@ -446,6 +480,10 @@ class LoopVectorizationLegality {
446480
/// specific checks for outer loop vectorization.
447481
bool canVectorizeOuterLoop();
448482

483+
/// Returns true if this is a supported early exit loop that we can
484+
/// vectorize.
485+
bool isVectorizableEarlyExitLoop();
486+
449487
/// Return true if all of the instructions in the block can be speculatively
450488
/// executed, and record the loads/stores that require masking.
451489
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -551,6 +589,16 @@ class LoopVectorizationLegality {
551589
/// (potentially) make a better decision on the maximum VF and enable
552590
/// the use of those function variants.
553591
bool VecCallVariantsFound = false;
592+
593+
/// Indicates whether this loop has a speculative early exit, i.e. an
594+
/// uncountable exiting block that is not the latch.
595+
bool HasSpeculativeEarlyExit = false;
596+
597+
/// Keeps track of all the exits with known or countable exit-not-taken
598+
/// counts.
599+
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
600+
SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
601+
SmallVector<BasicBlock *, 4> UncountableExitBlocks;
554602
};
555603

556604
} // namespace llvm

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,8 +2445,11 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
24452445
// vectorize a loop if it contains known function calls that don't set
24462446
// the flag. Therefore, it is safe to ignore this read from memory.
24472447
auto *Call = dyn_cast<CallInst>(&I);
2448-
if (Call && getVectorIntrinsicIDForCall(Call, TLI))
2449-
continue;
2448+
if (Call) {
2449+
NumCalls++;
2450+
if (getVectorIntrinsicIDForCall(Call, TLI))
2451+
continue;
2452+
}
24502453

24512454
// If this is a load, save it. If this instruction can read from memory
24522455
// but is not a load, then we quit. Notice that we don't handle function

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 146 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10511051
return true;
10521052
}
10531053

1054-
bool LoopVectorizationLegality::canVectorizeMemory() {
1054+
bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
10551055
LAI = &LAIs.getInfo(*TheLoop);
10561056
const OptimizationRemarkAnalysis *LAR = LAI->getReport();
10571057
if (LAR) {
@@ -1073,6 +1073,50 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
10731073
return false;
10741074
}
10751075

1076+
// For loops with uncountable early exiting blocks that are not the latch
1077+
// it's necessary to perform extra checks, since the vectoriser is currently
1078+
// only capable of handling simple search loops.
1079+
if (IsEarlyExitLoop) {
1080+
// We don't support calls or any memory accesses that write to memory.
1081+
if (LAI->getNumStores()) {
1082+
reportVectorizationFailure(
1083+
"Writes to memory unsupported in early exit loops",
1084+
"Cannot vectorize early exit loop with writes to memory",
1085+
"WritesInEarlyExitLoop", ORE, TheLoop);
1086+
return false;
1087+
}
1088+
1089+
if (LAI->getNumCalls()) {
1090+
reportVectorizationFailure(
1091+
"Calls unsupported in early exit loops",
1092+
"Cannot vectorize early exit loop with function calls",
1093+
"CallsInEarlyExitLoop", ORE, TheLoop);
1094+
return false;
1095+
}
1096+
1097+
// The vectoriser cannot handle loads that occur after the early exit block.
1098+
BasicBlock *LatchBB = TheLoop->getLoopLatch();
1099+
for (Instruction &I : *LatchBB) {
1100+
if (I.mayReadFromMemory()) {
1101+
reportVectorizationFailure(
1102+
"Loads not permitted after early exit",
1103+
"Cannot vectorize early exit loop with loads after early exit",
1104+
"LoadsAfterEarlyExit", ORE, TheLoop);
1105+
return false;
1106+
}
1107+
}
1108+
1109+
// The vectoriser does not yet handle loops that may fault, but this will
1110+
// be improved in a follow-on patch.
1111+
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
1112+
reportVectorizationFailure(
1113+
"Loop may fault",
1114+
"Cannot vectorize potentially faulting early exit loop",
1115+
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1116+
return false;
1117+
}
1118+
}
1119+
10761120
// We can vectorize stores to invariant address when final reduction value is
10771121
// guaranteed to be stored at the end of the loop. Also, if decision to
10781122
// vectorize loop is made, runtime checks are added so as to make sure that
@@ -1445,6 +1489,95 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
14451489
return Result;
14461490
}
14471491

1492+
bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1493+
// At least one of the exiting blocks must be the latch.
1494+
BasicBlock *LatchBB = TheLoop->getLoopLatch();
1495+
if (!LatchBB) {
1496+
reportVectorizationFailure("Loop does not have a latch",
1497+
"Cannot vectorize early exit loop",
1498+
"NoLatchEarlyExit", ORE, TheLoop);
1499+
return false;
1500+
}
1501+
1502+
SmallVector<BasicBlock *, 8> ExitingBlocks;
1503+
TheLoop->getExitingBlocks(ExitingBlocks);
1504+
1505+
// Keep a record of all the exiting blocks with exact exit counts, as well as
1506+
// those with inexact counts.
1507+
SmallVector<const SCEVPredicate *, 4> Predicates;
1508+
for (BasicBlock *BB1 : ExitingBlocks) {
1509+
const SCEV *EC =
1510+
PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates);
1511+
if (isa<SCEVCouldNotCompute>(EC)) {
1512+
UncountableExitingBlocks.push_back(BB1);
1513+
1514+
unsigned NumExitBlocks = 0;
1515+
for (BasicBlock *BB2 : successors(BB1)) {
1516+
if (!TheLoop->contains(BB2)) {
1517+
UncountableExitBlocks.push_back(BB2);
1518+
NumExitBlocks++;
1519+
}
1520+
}
1521+
if (NumExitBlocks > 1) {
1522+
reportVectorizationFailure(
1523+
"Early exiting block has more than one successor outside of loop",
1524+
"Too many successors from early exiting block",
1525+
"EarlyExitTooManySuccessors", ORE, TheLoop);
1526+
return false;
1527+
}
1528+
} else
1529+
CountableExitingBlocks.push_back(BB1);
1530+
}
1531+
Predicates.clear();
1532+
1533+
// We only support one uncountable early exit.
1534+
if (getUncountableExitingBlocks().size() != 1) {
1535+
reportVectorizationFailure(
1536+
"Loop has too many uncountable exits",
1537+
"Cannot vectorize early exit loop with more than one early exit",
1538+
"TooManyUncountableEarlyExits", ORE, TheLoop);
1539+
return false;
1540+
}
1541+
1542+
// The only supported early exit loops so far are ones where the early
1543+
// exiting block is a unique predecessor of the latch block.
1544+
BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1545+
if (!LatchPredBB || LatchPredBB != getUncountableExitingBlocks()[0]) {
1546+
reportVectorizationFailure("Early exit is not the latch predecessor",
1547+
"Cannot vectorize early exit loop",
1548+
"EarlyExitNotLatchPredecessor", ORE, TheLoop);
1549+
return false;
1550+
}
1551+
1552+
if (Reductions.size() || FixedOrderRecurrences.size()) {
1553+
reportVectorizationFailure(
1554+
"Found reductions or recurrences in early-exit loop",
1555+
"Cannot vectorize early exit loop with reductions or recurrences",
1556+
"RecurrencesInEarlyExitLoop", ORE, TheLoop);
1557+
return false;
1558+
}
1559+
1560+
LLVM_DEBUG(
1561+
dbgs()
1562+
<< "LV: Found an early exit. Retrying with speculative exit count.\n");
1563+
if (isa<SCEVCouldNotCompute>(
1564+
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1565+
reportVectorizationFailure(
1566+
"Cannot determine exact exit count for latch block",
1567+
"Cannot vectorize early exit loop",
1568+
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1569+
return false;
1570+
}
1571+
1572+
const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount();
1573+
assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
1574+
"Failed to get symbolic expression for backedge taken count");
1575+
1576+
LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: "
1577+
<< *SpecExitCount << '\n');
1578+
return true;
1579+
}
1580+
14481581
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
14491582
// Store the result and return it at the end instead of exiting early, in case
14501583
// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
@@ -1505,19 +1638,20 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15051638
return false;
15061639
}
15071640

1508-
// Go over each instruction and look at memory deps.
1509-
if (!canVectorizeMemory()) {
1510-
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1511-
if (DoExtraAnalysis)
1512-
Result = false;
1513-
else
1514-
return false;
1641+
HasSpeculativeEarlyExit = false;
1642+
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1643+
if (!isVectorizableEarlyExitLoop()) {
1644+
if (DoExtraAnalysis)
1645+
Result = false;
1646+
else
1647+
return false;
1648+
} else
1649+
HasSpeculativeEarlyExit = true;
15151650
}
15161651

1517-
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1518-
reportVectorizationFailure("could not determine number of loop iterations",
1519-
"could not determine number of loop iterations",
1520-
"CantComputeNumberOfIterations", ORE, TheLoop);
1652+
// Go over each instruction and look at memory deps.
1653+
if (!canVectorizeMemory(HasSpeculativeEarlyExit)) {
1654+
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
15211655
if (DoExtraAnalysis)
15221656
Result = false;
15231657
else

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9815,6 +9815,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98159815
return false;
98169816
}
98179817

9818+
if (LVL.hasSpeculativeEarlyExit()) {
9819+
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Auto-vectorization of early "
9820+
<< "exit loops is not yet supported.\n");
9821+
return false;
9822+
}
9823+
98189824
// Entrance to the VPlan-native vectorization path. Outer loops are processed
98199825
// here. They may require CFG and instruction level transformations before
98209826
// even evaluating whether vectorization is profitable. Since we cannot modify

0 commit comments

Comments
 (0)