Skip to content

Commit f081ffe

Browse files
committed
[LV] Simplify & clarify bypass handling for IV resume values (NFC)
Split off NFC part refactoring from #110577. This simplifies and clarifies induction resume value creation for bypass blocks.
1 parent 9a24f21 commit f081ffe

File tree

1 file changed

+46
-41
lines changed

1 file changed

+46
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -517,13 +517,15 @@ class InnerLoopVectorizer {
517517
/// iteration count in the scalar epilogue, from where the vectorized loop
518518
/// left off. \p Step is the SCEV-expanded induction step to use. In cases
519519
/// where the loop skeleton is more complicated (i.e., epilogue vectorization)
520-
/// and the resume values can come from an additional bypass block, the \p
521-
/// AdditionalBypass pair provides information about the bypass block and the
522-
/// end value on the edge from bypass to this loop.
523-
PHINode *createInductionResumeValue(
524-
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525-
ArrayRef<BasicBlock *> BypassBlocks,
526-
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
520+
/// and the resume values can come from an additional bypass block,
521+
/// \p MainVectorTripCount provides the trip count of the main vector loop,
522+
/// used to compute the resume value reaching the scalar loop preheader
523+
/// directly from this additional bypass block.
524+
PHINode *createInductionResumeValue(PHINode *OrigPhi,
525+
const InductionDescriptor &ID,
526+
Value *Step,
527+
ArrayRef<BasicBlock *> BypassBlocks,
528+
Value *MainVectorTripCount = nullptr);
527529

528530
/// Returns the original loop trip count.
529531
Value *getTripCount() const { return TripCount; }
@@ -533,6 +535,14 @@ class InnerLoopVectorizer {
533535
/// count of the original loop for both main loop and epilogue vectorization.
534536
void setTripCount(Value *TC) { TripCount = TC; }
535537

538+
/// Return the additional bypass block which targets the scalar loop by
539+
/// skipping the epilogue loop after completing the main loop.
540+
BasicBlock *getAdditionalBypassBlock() const {
541+
assert(AdditionalBypassBlock &&
542+
"Trying to access AdditionalBypassBlock but it has not been set");
543+
return AdditionalBypassBlock;
544+
}
545+
536546
protected:
537547
friend class LoopVectorizationPlanner;
538548

@@ -568,13 +578,11 @@ class InnerLoopVectorizer {
568578

569579
/// Create new phi nodes for the induction variables to resume iteration count
570580
/// in the scalar epilogue, from where the vectorized loop left off.
571-
/// In cases where the loop skeleton is more complicated (eg. epilogue
572-
/// vectorization) and the resume values can come from an additional bypass
573-
/// block, the \p AdditionalBypass pair provides information about the bypass
574-
/// block and the end value on the edge from bypass to this loop.
575-
void createInductionResumeValues(
576-
const SCEV2ValueTy &ExpandedSCEVs,
577-
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
581+
/// In cases where the loop skeleton is more complicated (i.e. epilogue
582+
/// vectorization), \p MainVectorTripCount provides the trip count of the main
583+
/// loop, used to compute these resume values.
584+
void createInductionResumeValues(const SCEV2ValueTy &ExpandedSCEVs,
585+
Value *MainVectorTripCount = nullptr);
578586

579587
/// Allow subclasses to override and print debug traces before/after vplan
580588
/// execution, when trace information is requested.
@@ -664,6 +672,11 @@ class InnerLoopVectorizer {
664672
/// for cleaning the checks, if vectorization turns out unprofitable.
665673
GeneratedRTChecks &RTChecks;
666674

675+
/// The additional bypass block which conditionally skips over the epilogue
676+
/// loop after executing the main loop. Needed to resume inductions and
677+
/// reductions during epilogue vectorization.
678+
BasicBlock *AdditionalBypassBlock = nullptr;
679+
667680
VPlan &Plan;
668681
};
669682

@@ -2582,18 +2595,16 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25822595

25832596
PHINode *InnerLoopVectorizer::createInductionResumeValue(
25842597
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2585-
ArrayRef<BasicBlock *> BypassBlocks,
2586-
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598+
ArrayRef<BasicBlock *> BypassBlocks, Value *MainVectorTripCount) {
25872599
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
25882600
assert(VectorTripCount && "Expected valid arguments");
25892601

25902602
Instruction *OldInduction = Legal->getPrimaryInduction();
2591-
Value *EndValue = nullptr;
2592-
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
2593-
if (OrigPhi == OldInduction) {
2594-
// We know what the end value is.
2595-
EndValue = VectorTripCount;
2596-
} else {
2603+
// For the primary induction the end values are known.
2604+
Value *EndValue = VectorTripCount;
2605+
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2606+
// Otherwise compute them accordingly.
2607+
if (OrigPhi != OldInduction) {
25972608
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
25982609

25992610
// Fast-math-flags propagate from the original induction instruction.
@@ -2605,12 +2616,12 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26052616
EndValue->setName("ind.end");
26062617

26072618
// Compute the end value for the additional bypass (if applicable).
2608-
if (AdditionalBypass.first) {
2609-
B.SetInsertPoint(AdditionalBypass.first,
2610-
AdditionalBypass.first->getFirstInsertionPt());
2619+
if (MainVectorTripCount) {
2620+
B.SetInsertPoint(getAdditionalBypassBlock(),
2621+
getAdditionalBypassBlock()->getFirstInsertionPt());
26112622
EndValueFromAdditionalBypass =
2612-
emitTransformedIndex(B, AdditionalBypass.second, II.getStartValue(),
2613-
Step, II.getKind(), II.getInductionBinOp());
2623+
emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step,
2624+
II.getKind(), II.getInductionBinOp());
26142625
EndValueFromAdditionalBypass->setName("ind.end");
26152626
}
26162627
}
@@ -2632,8 +2643,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26322643
for (BasicBlock *BB : BypassBlocks)
26332644
BCResumeVal->addIncoming(II.getStartValue(), BB);
26342645

2635-
if (AdditionalBypass.first)
2636-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2646+
if (MainVectorTripCount)
2647+
BCResumeVal->setIncomingValueForBlock(getAdditionalBypassBlock(),
26372648
EndValueFromAdditionalBypass);
26382649
return BCResumeVal;
26392650
}
@@ -2653,11 +2664,7 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26532664
}
26542665

26552666
void InnerLoopVectorizer::createInductionResumeValues(
2656-
const SCEV2ValueTy &ExpandedSCEVs,
2657-
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2658-
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
2659-
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
2660-
"Inconsistent information about additional bypass.");
2667+
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
26612668
// We are going to resume the execution of the scalar loop.
26622669
// Go over all of the induction variables that we found and fix the
26632670
// PHIs that are left in the scalar version of the loop.
@@ -2670,7 +2677,7 @@ void InnerLoopVectorizer::createInductionResumeValues(
26702677
const InductionDescriptor &II = InductionEntry.second;
26712678
PHINode *BCResumeVal = createInductionResumeValue(
26722679
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2673-
AdditionalBypass);
2680+
MainVectorTripCount);
26742681
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
26752682
}
26762683
}
@@ -7918,6 +7925,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79187925
nullptr, "vec.epilog.iter.check", true);
79197926
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
79207927
VecEpilogueIterationCountCheck);
7928+
AdditionalBypassBlock = VecEpilogueIterationCountCheck;
79217929

79227930
// Adjust the control flow taking the state info from the main loop
79237931
// vectorization into account.
@@ -8002,11 +8010,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80028010
// iterations left once the vector loop has completed.
80038011
// Note that when the vectorized epilogue is skipped due to iteration count
80048012
// check, then the resume value for the induction variable comes from
8005-
// the trip count of the main vector loop, hence passing the AdditionalBypass
8006-
// argument.
8007-
createInductionResumeValues(ExpandedSCEVs,
8008-
{VecEpilogueIterationCountCheck,
8009-
EPI.VectorTripCount} /* AdditionalBypass */);
8013+
// the trip count of the main vector loop, passed as the second argument.
8014+
createInductionResumeValues(ExpandedSCEVs, EPI.VectorTripCount);
80108015

80118016
return {LoopVectorPreHeader, EPResumeVal};
80128017
}
@@ -10325,7 +10330,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032510330

1032610331
ResumeV = MainILV.createInductionResumeValue(
1032710332
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10328-
{EPI.MainLoopIterationCountCheck});
10333+
EPI.MainLoopIterationCountCheck);
1032910334
}
1033010335
assert(ResumeV && "Must have a resume value");
1033110336
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);

0 commit comments

Comments
 (0)