@@ -517,13 +517,15 @@ class InnerLoopVectorizer {
517
517
// / iteration count in the scalar epilogue, from where the vectorized loop
518
518
// / left off. \p Step is the SCEV-expanded induction step to use. In cases
519
519
// / where the loop skeleton is more complicated (i.e., epilogue vectorization)
520
- // / and the resume values can come from an additional bypass block, the \p
521
- // / AdditionalBypass pair provides information about the bypass block and the
522
- // / end value on the edge from bypass to this loop.
523
- PHINode *createInductionResumeValue (
524
- PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525
- ArrayRef<BasicBlock *> BypassBlocks,
526
- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
520
+ // / and the resume values can come from an additional bypass block,
521
+ // / \p MainVectorTripCount provides the trip count of the main vector loop,
522
+ // / used to compute the resume value reaching the scalar loop preheader
523
+ // / directly from this additional bypass block.
524
+ PHINode *createInductionResumeValue (PHINode *OrigPhi,
525
+ const InductionDescriptor &ID,
526
+ Value *Step,
527
+ ArrayRef<BasicBlock *> BypassBlocks,
528
+ Value *MainVectorTripCount = nullptr );
527
529
528
530
// / Returns the original loop trip count.
529
531
Value *getTripCount () const { return TripCount; }
@@ -533,6 +535,14 @@ class InnerLoopVectorizer {
533
535
// / count of the original loop for both main loop and epilogue vectorization.
534
536
void setTripCount (Value *TC) { TripCount = TC; }
535
537
538
+ // / Return the additional bypass block which targets the scalar loop by
539
+ // / skipping the epilogue loop after completing the main loop.
540
+ BasicBlock *getAdditionalBypassBlock () const {
541
+ assert (AdditionalBypassBlock &&
542
+ " Trying to access AdditionalBypassBlock but it has not been set" );
543
+ return AdditionalBypassBlock;
544
+ }
545
+
536
546
protected:
537
547
friend class LoopVectorizationPlanner ;
538
548
@@ -568,13 +578,11 @@ class InnerLoopVectorizer {
568
578
569
579
// / Create new phi nodes for the induction variables to resume iteration count
570
580
// / in the scalar epilogue, from where the vectorized loop left off.
571
- // / In cases where the loop skeleton is more complicated (eg. epilogue
572
- // / vectorization) and the resume values can come from an additional bypass
573
- // / block, the \p AdditionalBypass pair provides information about the bypass
574
- // / block and the end value on the edge from bypass to this loop.
575
- void createInductionResumeValues (
576
- const SCEV2ValueTy &ExpandedSCEVs,
577
- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
581
+ // / In cases where the loop skeleton is more complicated (i.e. epilogue
582
+ // / vectorization), \p MainVectorTripCount provides the trip count of the main
583
+ // / loop, used to compute these resume values.
584
+ void createInductionResumeValues (const SCEV2ValueTy &ExpandedSCEVs,
585
+ Value *MainVectorTripCount = nullptr );
578
586
579
587
// / Allow subclasses to override and print debug traces before/after vplan
580
588
// / execution, when trace information is requested.
@@ -664,6 +672,11 @@ class InnerLoopVectorizer {
664
672
// / for cleaning the checks, if vectorization turns out unprofitable.
665
673
GeneratedRTChecks &RTChecks;
666
674
675
+ // / The additional bypass block which conditionally skips over the epilogue
676
+ // / loop after executing the main loop. Needed to resume inductions and
677
+ // / reductions during epilogue vectorization.
678
+ BasicBlock *AdditionalBypassBlock = nullptr ;
679
+
667
680
VPlan &Plan;
668
681
};
669
682
@@ -2582,18 +2595,16 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2582
2595
2583
2596
PHINode *InnerLoopVectorizer::createInductionResumeValue (
2584
2597
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2585
- ArrayRef<BasicBlock *> BypassBlocks,
2586
- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598
+ ArrayRef<BasicBlock *> BypassBlocks, Value *MainVectorTripCount) {
2587
2599
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2588
2600
assert (VectorTripCount && " Expected valid arguments" );
2589
2601
2590
2602
Instruction *OldInduction = Legal->getPrimaryInduction ();
2591
- Value *EndValue = nullptr ;
2592
- Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2593
- if (OrigPhi == OldInduction) {
2594
- // We know what the end value is.
2595
- EndValue = VectorTripCount;
2596
- } else {
2603
+ // For the primary induction the end values are known.
2604
+ Value *EndValue = VectorTripCount;
2605
+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2606
+ // Otherwise compute them accordingly.
2607
+ if (OrigPhi != OldInduction) {
2597
2608
IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
2598
2609
2599
2610
// Fast-math-flags propagate from the original induction instruction.
@@ -2605,12 +2616,12 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
2605
2616
EndValue->setName (" ind.end" );
2606
2617
2607
2618
// Compute the end value for the additional bypass (if applicable).
2608
- if (AdditionalBypass. first ) {
2609
- B.SetInsertPoint (AdditionalBypass. first ,
2610
- AdditionalBypass. first ->getFirstInsertionPt ());
2619
+ if (MainVectorTripCount ) {
2620
+ B.SetInsertPoint (getAdditionalBypassBlock () ,
2621
+ getAdditionalBypassBlock () ->getFirstInsertionPt ());
2611
2622
EndValueFromAdditionalBypass =
2612
- emitTransformedIndex (B, AdditionalBypass. second , II.getStartValue (),
2613
- Step, II.getKind (), II.getInductionBinOp ());
2623
+ emitTransformedIndex (B, MainVectorTripCount , II.getStartValue (), Step ,
2624
+ II.getKind (), II.getInductionBinOp ());
2614
2625
EndValueFromAdditionalBypass->setName (" ind.end" );
2615
2626
}
2616
2627
}
@@ -2632,8 +2643,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
2632
2643
for (BasicBlock *BB : BypassBlocks)
2633
2644
BCResumeVal->addIncoming (II.getStartValue (), BB);
2634
2645
2635
- if (AdditionalBypass. first )
2636
- BCResumeVal->setIncomingValueForBlock (AdditionalBypass. first ,
2646
+ if (MainVectorTripCount )
2647
+ BCResumeVal->setIncomingValueForBlock (getAdditionalBypassBlock () ,
2637
2648
EndValueFromAdditionalBypass);
2638
2649
return BCResumeVal;
2639
2650
}
@@ -2653,11 +2664,7 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2653
2664
}
2654
2665
2655
2666
void InnerLoopVectorizer::createInductionResumeValues (
2656
- const SCEV2ValueTy &ExpandedSCEVs,
2657
- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2658
- assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2659
- (!AdditionalBypass.first && !AdditionalBypass.second )) &&
2660
- " Inconsistent information about additional bypass." );
2667
+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2661
2668
// We are going to resume the execution of the scalar loop.
2662
2669
// Go over all of the induction variables that we found and fix the
2663
2670
// PHIs that are left in the scalar version of the loop.
@@ -2670,7 +2677,7 @@ void InnerLoopVectorizer::createInductionResumeValues(
2670
2677
const InductionDescriptor &II = InductionEntry.second ;
2671
2678
PHINode *BCResumeVal = createInductionResumeValue (
2672
2679
OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2673
- AdditionalBypass );
2680
+ MainVectorTripCount );
2674
2681
OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2675
2682
}
2676
2683
}
@@ -7918,6 +7925,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7918
7925
nullptr , " vec.epilog.iter.check" , true );
7919
7926
emitMinimumVectorEpilogueIterCountCheck (LoopScalarPreHeader,
7920
7927
VecEpilogueIterationCountCheck);
7928
+ AdditionalBypassBlock = VecEpilogueIterationCountCheck;
7921
7929
7922
7930
// Adjust the control flow taking the state info from the main loop
7923
7931
// vectorization into account.
@@ -8002,11 +8010,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8002
8010
// iterations left once the vector loop has completed.
8003
8011
// Note that when the vectorized epilogue is skipped due to iteration count
8004
8012
// check, then the resume value for the induction variable comes from
8005
- // the trip count of the main vector loop, hence passing the AdditionalBypass
8006
- // argument.
8007
- createInductionResumeValues (ExpandedSCEVs,
8008
- {VecEpilogueIterationCountCheck,
8009
- EPI.VectorTripCount } /* AdditionalBypass */ );
8013
+ // the trip count of the main vector loop, passed as the second argument.
8014
+ createInductionResumeValues (ExpandedSCEVs, EPI.VectorTripCount );
8010
8015
8011
8016
return {LoopVectorPreHeader, EPResumeVal};
8012
8017
}
@@ -10325,7 +10330,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10325
10330
10326
10331
ResumeV = MainILV.createInductionResumeValue (
10327
10332
IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10328
- { EPI.MainLoopIterationCountCheck } );
10333
+ EPI.MainLoopIterationCountCheck );
10329
10334
}
10330
10335
assert (ResumeV && " Must have a resume value" );
10331
10336
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
0 commit comments