@@ -519,14 +519,14 @@ class InnerLoopVectorizer {
519
519
/// the induction resume value, and the value for the bypass block, if needed.
520
520
/// \p Step is the SCEV-expanded induction step to use. In cases where the
521
521
/// loop skeleton is more complicated (i.e., epilogue vectorization) and the
522
- /// resume values can come from an additional bypass block, the \p
523
- /// AdditionalBypass pair provides this additional bypass block along with the
524
- /// resume value coming from it .
525
- void createInductionResumeVPValue(
526
- VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527
- Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528
- VPBuilder &ScalarPHBuilder,
529
- std::pair<BasicBlock *, Value *> AdditionalBypass = { nullptr, nullptr} );
522
+ /// resume values can come from an additional bypass block, \p
523
+ /// AdditionalBypassValue provides the end value on the edge from bypass to
524
+ /// this loop .
525
+ void createInductionResumeVPValue(VPIRInstruction *InductionPhiIRI,
526
+ const InductionDescriptor &ID, Value *Step ,
527
+ ArrayRef<BasicBlock *> BypassBlocks,
528
+ VPBuilder &ScalarPHBuilder,
529
+ Value *AdditionalBypassValue = nullptr);
530
530
531
531
/// Returns the original loop trip count.
532
532
Value *getTripCount() const { return TripCount; }
@@ -539,12 +539,14 @@ class InnerLoopVectorizer {
539
539
/// Retrieve the bypass value associated with an original induction header
540
540
/// phi.
541
541
Value *getInductionAdditionalBypassValue(PHINode *OrigPhi) const {
542
- return Induction2AdditionalBypass .at(OrigPhi).second ;
542
+ return Induction2AdditionalBypassValue .at(OrigPhi);
543
543
}
544
544
545
545
/// Return the additional bypass block.
546
- BasicBlock *getInductionAdditionalBypassBlock() const {
547
- return Induction2AdditionalBypass.begin()->second.first;
546
+ BasicBlock *getAdditionalBypassBlock() const {
547
+ assert(AdditionalBypassBlock &&
548
+ "Trying to access AdditionalBypassBlock but it has not been set");
549
+ return AdditionalBypassBlock;
548
550
}
549
551
550
552
protected:
@@ -584,11 +586,10 @@ class InnerLoopVectorizer {
584
586
/// in the scalar epilogue, from where the vectorized loop left off.
585
587
/// In cases where the loop skeleton is more complicated (eg. epilogue
586
588
/// vectorization) and the resume values can come from an additional bypass
587
- /// block, the \p AdditionalBypass pair provides information about the bypass
588
- /// block and the end value on the edge from bypass to this loop.
589
- void createInductionResumeVPValues(
590
- const SCEV2ValueTy &ExpandedSCEVs,
591
- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
589
+ /// block, the \p AdditionalBypassValue provides the end value on the edge
590
+ /// from bypass to this loop.
591
+ void createInductionResumeVPValues(const SCEV2ValueTy &ExpandedSCEVs,
592
+ Value *AdditionalBypassValue = nullptr);
592
593
593
594
/// Allow subclasses to override and print debug traces before/after vplan
594
595
/// execution, when trace information is requested.
@@ -678,11 +679,15 @@ class InnerLoopVectorizer {
678
679
/// for cleaning the checks, if vectorization turns out unprofitable.
679
680
GeneratedRTChecks &RTChecks;
680
681
681
- /// Mapping of induction phis to their bypass values and bypass blocks. They
682
+ /// The additional bypass block which conditionally skips over the epilogue
683
+ /// loop after executing the main loop. Needed to resume inductions and
684
+ /// reductions during epilogue vectorization.
685
+ BasicBlock *AdditionalBypassBlock = nullptr;
686
+
687
+ /// Mapping of induction phis to their additional bypass values. They
682
688
/// need to be added as operands to phi nodes in the scalar loop preheader
683
689
/// after the epilogue skeleton has been created.
684
- DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685
- Induction2AdditionalBypass;
690
+ DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
686
691
687
692
VPlan &Plan;
688
693
};
@@ -2603,14 +2608,14 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2603
2608
void InnerLoopVectorizer::createInductionResumeVPValue(
2604
2609
VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step,
2605
2610
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2606
- std::pair<BasicBlock *, Value *> AdditionalBypass ) {
2611
+ Value *AdditionalBypassValue ) {
2607
2612
auto *OrigPhi = cast<PHINode>(&InductionPhiRI->getInstruction());
2608
2613
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2609
2614
assert(VectorTripCount && "Expected valid arguments");
2610
2615
2611
2616
Instruction *OldInduction = Legal->getPrimaryInduction();
2612
2617
Value *EndValue = nullptr;
2613
- Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2618
+ Value *EndValueFromAdditionalBypass = AdditionalBypassValue ;
2614
2619
if (OrigPhi == OldInduction) {
2615
2620
// We know what the end value is.
2616
2621
EndValue = VectorTripCount;
@@ -2626,11 +2631,11 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
2626
2631
EndValue->setName("ind.end");
2627
2632
2628
2633
// Compute the end value for the additional bypass (if applicable).
2629
- if (AdditionalBypass.first ) {
2630
- B.SetInsertPoint(AdditionalBypass.first ,
2631
- AdditionalBypass.first ->getFirstInsertionPt());
2634
+ if (AdditionalBypassValue ) {
2635
+ B.SetInsertPoint(getAdditionalBypassBlock() ,
2636
+ getAdditionalBypassBlock() ->getFirstInsertionPt());
2632
2637
EndValueFromAdditionalBypass =
2633
- emitTransformedIndex(B, AdditionalBypass.second , II.getStartValue(),
2638
+ emitTransformedIndex(B, AdditionalBypassValue , II.getStartValue(),
2634
2639
Step, II.getKind(), II.getInductionBinOp());
2635
2640
EndValueFromAdditionalBypass->setName("ind.end");
2636
2641
}
@@ -2644,14 +2649,13 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
2644
2649
"InductionPhiRI should not have any operands");
2645
2650
InductionPhiRI->addOperand(ResumePhiRecipe);
2646
2651
2647
- if (AdditionalBypass.first ) {
2652
+ if (AdditionalBypassValue ) {
2648
2653
// Store the bypass value here, as it needs to be added as operand to its
2649
2654
// scalar preheader phi node after the epilogue skeleton has been created.
2650
2655
// TODO: Directly add as extra operand to the VPResumePHI recipe.
2651
- assert(!Induction2AdditionalBypass .contains(OrigPhi) &&
2656
+ assert(!Induction2AdditionalBypassValue .contains(OrigPhi) &&
2652
2657
"entry for OrigPhi already exits");
2653
- Induction2AdditionalBypass[OrigPhi] = {AdditionalBypass.first,
2654
- EndValueFromAdditionalBypass};
2658
+ Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2655
2659
}
2656
2660
}
2657
2661
@@ -2670,19 +2674,13 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2670
2674
}
2671
2675
2672
2676
void InnerLoopVectorizer::createInductionResumeVPValues(
2673
- const SCEV2ValueTy &ExpandedSCEVs,
2674
- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2675
- assert(((AdditionalBypass.first && AdditionalBypass.second) ||
2676
- (!AdditionalBypass.first && !AdditionalBypass.second)) &&
2677
- "Inconsistent information about additional bypass.");
2677
+ const SCEV2ValueTy &ExpandedSCEVs, Value *AdditionalBypassValue) {
2678
2678
// We are going to resume the execution of the scalar loop.
2679
2679
// Go over all of the induction variable PHIs of the scalar loop header and
2680
2680
// fix their starting values, which depend on the counter of the last
2681
- // iteration of the vectorized loop. The starting values of PHI nodes depend
2682
- // on the counter of the last iteration in the vectorized loop. If we come
2683
- // from one of the LoopBypassBlocks then we need to start from the original
2684
- // start value. If we come from the AdditionalBypass then we need to start
2685
- // from its value.
2681
+ // iteration of the vectorized loop. If we come from one of the
2682
+ // LoopBypassBlocks then we need to start from the original start value. If we
2683
+ // come from the AdditionalBypass then we need to start from its value.
2686
2684
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
2687
2685
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
2688
2686
for (VPRecipeBase &R : *Plan.getScalarHeader()) {
@@ -2695,7 +2693,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
2695
2693
const InductionDescriptor &II = Legal->getInductionVars().find(Phi)->second;
2696
2694
createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs),
2697
2695
LoopBypassBlocks, ScalarPHBuilder,
2698
- AdditionalBypass );
2696
+ AdditionalBypassValue );
2699
2697
}
2700
2698
}
2701
2699
@@ -7744,7 +7742,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7744
7742
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
7745
7743
// values from the additional bypass block.
7746
7744
if (VectorizingEpilogue) {
7747
- BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock ();
7745
+ BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock ();
7748
7746
for (VPRecipeBase &R : *ExitVPBB) {
7749
7747
fixReductionScalarResumeWhenVectorizingEpilog(
7750
7748
&R, State, State.CFG.VPBB2IRBB[ExitVPBB], BypassBlock);
@@ -7941,6 +7939,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7941
7939
nullptr, "vec.epilog.iter.check", true);
7942
7940
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
7943
7941
VecEpilogueIterationCountCheck);
7942
+ AdditionalBypassBlock = VecEpilogueIterationCountCheck;
7944
7943
7945
7944
// Adjust the control flow taking the state info from the main loop
7946
7945
// vectorization into account.
@@ -8017,12 +8016,13 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8017
8016
// preheader.
8018
8017
PHINode *EPResumeVal = nullptr;
8019
8018
Type *IdxTy = Legal->getWidestInductionType();
8019
+ Value *TC = EPI.VectorTripCount;
8020
+ Constant *Init = ConstantInt::get(IdxTy, 0);
8021
+
8020
8022
for (PHINode &P : LoopVectorPreHeader->phis()) {
8021
8023
if (P.getType() == IdxTy &&
8022
- P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) ==
8023
- EPI.VectorTripCount &&
8024
- P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) ==
8025
- ConstantInt::get(IdxTy, 0)) {
8024
+ P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) == TC &&
8025
+ P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) == Init) {
8026
8026
EPResumeVal = &P;
8027
8027
EPResumeVal->setName("vec.epilog.resume.val");
8028
8028
break;
@@ -8031,22 +8031,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8031
8031
if (!EPResumeVal) {
8032
8032
EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
8033
8033
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
8034
- EPResumeVal->addIncoming(EPI.VectorTripCount,
8035
- VecEpilogueIterationCountCheck);
8036
- EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
8037
- EPI.MainLoopIterationCountCheck);
8034
+ EPResumeVal->addIncoming(TC, VecEpilogueIterationCountCheck);
8035
+ EPResumeVal->addIncoming(Init, EPI.MainLoopIterationCountCheck);
8038
8036
}
8039
8037
8040
8038
// Generate induction resume values. These variables save the new starting
8041
8039
// indexes for the scalar loop. They are used to test if there are any tail
8042
8040
// iterations left once the vector loop has completed.
8043
8041
// Note that when the vectorized epilogue is skipped due to iteration count
8044
8042
// check, then the resume value for the induction variable comes from
8045
- // the trip count of the main vector loop, hence passing the AdditionalBypass
8046
- // argument.
8047
- createInductionResumeVPValues(ExpandedSCEVs,
8048
- {VecEpilogueIterationCountCheck,
8049
- EPI.VectorTripCount} /* AdditionalBypass */);
8043
+ // the trip count of the main vector loop, hence passing the
8044
+ // AdditionalBypassValue argument.
8045
+ createInductionResumeVPValues(
8046
+ ExpandedSCEVs, EPI.VectorTripCount /* AdditionalBypassValue */);
8050
8047
8051
8048
return {LoopVectorPreHeader, EPResumeVal};
8052
8049
}
@@ -10358,6 +10355,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10358
10355
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
10359
10356
IndPhi = WidenInd->getPHINode();
10360
10357
}
10358
+ // Hook up to the PHINode generated by a ResumePhi recipe of main
10359
+ // loop VPlan, which feeds the scalar loop.
10361
10360
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
10362
10361
}
10363
10362
assert(ResumeV && "Must have a resume value");
0 commit comments