@@ -2575,22 +2575,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
2575
2575
assert (VectorTripCount && " Expected valid arguments" );
2576
2576
2577
2577
Instruction *OldInduction = Legal->getPrimaryInduction ();
2578
- Value *EndValue = nullptr ;
2579
2578
Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2580
2579
if (OrigPhi == OldInduction) {
2581
- // We know what the end value is.
2582
- EndValue = VectorTripCount;
2583
2580
} else {
2584
2581
IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
2585
2582
2586
2583
// Fast-math-flags propagate from the original induction instruction.
2587
2584
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2588
2585
B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2589
2586
2590
- EndValue = emitTransformedIndex (B, VectorTripCount, II.getStartValue (),
2591
- Step, II.getKind (), II.getInductionBinOp ());
2592
- EndValue->setName (" ind.end" );
2593
-
2594
2587
// Compute the end value for the additional bypass (if applicable).
2595
2588
if (AdditionalBypass.first ) {
2596
2589
B.SetInsertPoint (AdditionalBypass.first ,
@@ -2602,26 +2595,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
2602
2595
}
2603
2596
}
2604
2597
2605
- VPBasicBlock *MiddleVPBB =
2606
- cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
2607
-
2608
- VPBasicBlock *ScalarPHVPBB = nullptr ;
2609
- if (MiddleVPBB->getNumSuccessors () == 2 ) {
2610
- // Order is strict: first is the exit block, second is the scalar preheader.
2611
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
2612
- } else {
2613
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
2614
- }
2615
-
2616
- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
2617
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2618
- VPInstruction::ResumePhi,
2619
- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2620
- OrigPhi->getDebugLoc (), " bc.resume.val" );
2621
-
2622
- auto *ScalarLoopHeader =
2623
- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
2624
- addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2625
2598
InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2626
2599
EndValueFromAdditionalBypass};
2627
2600
}
@@ -7660,10 +7633,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7660
7633
ILV.getOrCreateVectorTripCount (nullptr ),
7661
7634
CanonicalIVStartValue, State);
7662
7635
7636
+ VPBasicBlock *MiddleVPBB =
7637
+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7638
+
7639
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
7640
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
7641
+ // Order is strict: first is the exit block, second is the scalar
7642
+ // preheader.
7643
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
7644
+ } else {
7645
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
7646
+ }
7647
+
7663
7648
BestVPlan.execute (&State);
7664
7649
7665
7650
// 2.5 Collect reduction resume values.
7666
- auto *ExitVPBB =
7651
+ VPBasicBlock *ExitVPBB =
7667
7652
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7668
7653
for (VPRecipeBase &R : *ExitVPBB) {
7669
7654
createAndCollectMergePhiForReduction (
@@ -7948,6 +7933,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7948
7933
// Generate a resume induction for the vector epilogue and put it in the
7949
7934
// vector epilogue preheader
7950
7935
Type *IdxTy = Legal->getWidestInductionType ();
7936
+
7951
7937
PHINode *EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
7952
7938
EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
7953
7939
EPResumeVal->addIncoming (EPI.VectorTripCount , VecEpilogueIterationCountCheck);
@@ -8835,6 +8821,74 @@ addUsersInExitBlock(VPlan &Plan,
8835
8821
}
8836
8822
}
8837
8823
8824
+ static void addResumeValuesForInductions (VPlan &Plan) {
8825
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8826
+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8827
+
8828
+ VPBuilder Builder (
8829
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ()));
8830
+ for (VPRecipeBase &R : Header->phis ()) {
8831
+ PHINode *OrigPhi;
8832
+ const InductionDescriptor *ID;
8833
+ VPValue *Start;
8834
+ VPValue *Step;
8835
+ Type *ScalarTy;
8836
+ bool IsCanonical = false ;
8837
+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8838
+ if (WideIV->getTruncInst ())
8839
+ continue ;
8840
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8841
+ ID = &WideIV->getInductionDescriptor ();
8842
+ Start = WideIV->getStartValue ();
8843
+ Step = WideIV->getStepValue ();
8844
+ ScalarTy = WideIV->getScalarType ();
8845
+ IsCanonical = WideIV->isCanonical ();
8846
+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8847
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8848
+ ID = &WideIV->getInductionDescriptor ();
8849
+ Start = WideIV->getStartValue ();
8850
+ Step = WideIV->getOperand (1 );
8851
+ ScalarTy = Start->getLiveInIRValue ()->getType ();
8852
+ } else {
8853
+ continue ;
8854
+ }
8855
+
8856
+ VPValue *EndValue = &Plan.getVectorTripCount ();
8857
+ if (!IsCanonical) {
8858
+ EndValue = Builder.createDerivedIV (
8859
+ ID->getKind (),
8860
+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8861
+ &Plan.getVectorTripCount (), Step);
8862
+ }
8863
+
8864
+ if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8865
+ EndValue =
8866
+ Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8867
+ }
8868
+
8869
+ VPBasicBlock *MiddleVPBB =
8870
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8871
+
8872
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
8873
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
8874
+ // Order is strict: first is the exit block, second is the scalar
8875
+ // preheader.
8876
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8877
+ } else {
8878
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8879
+ }
8880
+
8881
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8882
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8883
+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc (),
8884
+ " bc.resume.val" );
8885
+
8886
+ auto *ScalarLoopHeader =
8887
+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8888
+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8889
+ }
8890
+ }
8891
+
8838
8892
// / Handle live-outs for first order reductions, both in the scalar preheader
8839
8893
// / and the original exit block:
8840
8894
// / 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9145,6 +9199,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9145
9199
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9146
9200
addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9147
9201
addUsersInExitBlock (*Plan, ExitUsersToFix);
9202
+ addResumeValuesForInductions (*Plan);
9148
9203
9149
9204
// ---------------------------------------------------------------------------
9150
9205
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9250,6 +9305,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9250
9305
bool HasNUW = true ;
9251
9306
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
9252
9307
DebugLoc ());
9308
+ addResumeValuesForInductions (*Plan);
9253
9309
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9254
9310
return Plan;
9255
9311
}
@@ -9533,7 +9589,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
9533
9589
State.Builder , CanonicalIV, getStartValue ()->getLiveInIRValue (), Step,
9534
9590
Kind, cast_if_present<BinaryOperator>(FPBinOp));
9535
9591
DerivedIV->setName (" offset.idx" );
9536
- assert (DerivedIV != CanonicalIV && " IV didn't need transforming?" );
9592
+ assert ((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9593
+ " IV didn't need transforming?" );
9537
9594
9538
9595
State.set (this , DerivedIV, VPLane (0 ));
9539
9596
}
@@ -10202,6 +10259,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10202
10259
EPI, &LVL, &CM, BFI, PSI, Checks,
10203
10260
*BestMainPlan);
10204
10261
10262
+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10263
+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10264
+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10265
+ for (VPRecipeBase &R :
10266
+ BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10267
+ if (!isa<VPWidenIntOrFpInductionRecipe,
10268
+ VPWidenPointerInductionRecipe>(&R))
10269
+ continue ;
10270
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10271
+ WidenedPhis.insert (
10272
+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10273
+ else
10274
+ WidenedPhis.insert (
10275
+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10276
+ }
10277
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10278
+ BestMainPlan->getVectorLoopRegion ()->getSingleSuccessor ());
10279
+
10280
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
10281
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
10282
+ // Order is strict: first is the exit block, second is the scalar
10283
+ // preheader.
10284
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
10285
+ } else {
10286
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
10287
+ }
10288
+
10289
+ for (VPRecipeBase &R :
10290
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
10291
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10292
+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10293
+ if (!IRI)
10294
+ break ;
10295
+ if (WidenedPhis.contains (IRI) ||
10296
+ !LVL.getInductionVars ().contains (IRI))
10297
+ continue ;
10298
+ VPRecipeBase *ResumePhi =
10299
+ VPIRInst->getOperand (0 )->getDefiningRecipe ();
10300
+ VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10301
+ Constant::getNullValue (IRI->getType ())));
10302
+ ResumePhi->eraseFromParent ();
10303
+ }
10304
+ VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10305
+
10205
10306
auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10206
10307
*BestMainPlan, MainILV, DT, true );
10207
10308
++LoopsVectorized;
@@ -10210,7 +10311,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10210
10311
// edges from the first pass.
10211
10312
EPI.MainLoopVF = EPI.EpilogueVF ;
10212
10313
EPI.MainLoopUF = EPI.EpilogueUF ;
10213
- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10214
10314
EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
10215
10315
ORE, EPI, &LVL, &CM, BFI, PSI,
10216
10316
Checks, BestEpiPlan);
0 commit comments