@@ -599,10 +599,6 @@ class InnerLoopVectorizer {
599
599
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
600
600
VPlan &Plan, VPTransformState &State);
601
601
602
- /// Create the phi node for the resume value of first order recurrences in the
603
- /// scalar preheader and update the users in the scalar loop.
604
- void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
605
-
606
602
/// Iteratively sink the scalarized operands of a predicated instruction into
607
603
/// the block that was created for it.
608
604
void sinkScalarOperands(Instruction *PredInst);
@@ -3286,19 +3282,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
3286
3282
if (EnableVPlanNativePath)
3287
3283
fixNonInductionPHIs(Plan, State);
3288
3284
3289
- // At this point every instruction in the original loop is widened to a
3290
- // vector form. Note that fixing reduction phis, as well as extracting the
3291
- // exit and resume values for fixed-order recurrences are already modeled in
3292
- // VPlan. All that remains to do here is to create a phi in the scalar
3293
- // pre-header for each fixed-order recurrence resume value.
3294
- // TODO: Also model creating phis in the scalar pre-header in VPlan.
3295
- for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
3296
- if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
3297
- continue;
3298
- fixFixedOrderRecurrence(LO, State);
3299
- Plan.removeLiveOut(LO->getPhi());
3300
- }
3301
-
3302
3285
// Forget the original basic block.
3303
3286
PSE.getSE()->forgetLoop(OrigLoop);
3304
3287
PSE.getSE()->forgetBlockAndLoopDispositions();
@@ -3335,10 +3318,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
3335
3318
VectorLoop->getHeader(), Plan, State);
3336
3319
}
3337
3320
3338
- // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated
3339
- // in the exit block, so update the builder.
3340
- State.Builder.SetInsertPoint(State.CFG.ExitBB,
3341
- State.CFG.ExitBB->getFirstNonPHIIt());
3321
+ // Fix live-out phis not already fixed earlier.
3342
3322
for (const auto &KV : Plan.getLiveOuts())
3343
3323
KV.second->fixPhi(Plan, State);
3344
3324
@@ -3366,32 +3346,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
3366
3346
VF.getKnownMinValue() * UF);
3367
3347
}
3368
3348
3369
- void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
3370
- VPTransformState &State) {
3371
- // Extract the last vector element in the middle block. This will be the
3372
- // initial value for the recurrence when jumping to the scalar loop.
3373
- VPValue *VPExtract = LO->getOperand(0);
3374
- using namespace llvm::VPlanPatternMatch;
3375
- assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
3376
- m_VPValue(), m_VPValue())) &&
3377
- "FOR LiveOut expects to use an extract from end.");
3378
- Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
3379
-
3380
- // Fix the initial value of the original recurrence in the scalar loop.
3381
- PHINode *ScalarHeaderPhi = LO->getPhi();
3382
- auto *InitScalarFOR =
3383
- ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
3384
- Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
3385
- auto *ScalarPreheaderPhi =
3386
- Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
3387
- for (auto *BB : predecessors(LoopScalarPreHeader)) {
3388
- auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
3389
- ScalarPreheaderPhi->addIncoming(Incoming, BB);
3390
- }
3391
- ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
3392
- ScalarPreheaderPhi);
3393
- }
3394
-
3395
3349
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
3396
3350
// The basic block and loop containing the predicated instruction.
3397
3351
auto *PredBB = PredInst->getParent();
@@ -8798,6 +8752,59 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
8798
8752
}
8799
8753
}
8800
8754
8755
+ /// Feed a resume value for every FOR from the vector loop to the scalar loop,
8756
+ /// if middle block branches to scalar preheader, by introducing ExtractFromEnd
8757
+ /// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the
8758
+ /// latter and corresponds to the scalar header.
8759
+ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
8760
+ VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8761
+
8762
+ // Start by finding out if middle block branches to scalar preheader, which is
8763
+ // not a VPIRBasicBlock, unlike Exit block - the other possible successor of
8764
+ // middle block.
8765
+ // TODO: Should be replaced by
8766
+ // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8767
+ // scalar region is modeled as well.
8768
+ VPBasicBlock *ScalarPHVPBB = nullptr;
8769
+ auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8770
+ for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8771
+ if (isa<VPIRBasicBlock>(Succ))
8772
+ continue;
8773
+ assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?");
8774
+ ScalarPHVPBB = cast<VPBasicBlock>(Succ);
8775
+ }
8776
+ if (!ScalarPHVPBB)
8777
+ return;
8778
+
8779
+ VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8780
+ VPBuilder MiddleBuilder(MiddleVPBB);
8781
+ // Reset insert point so new recipes are inserted before terminator and
8782
+ // condition, if there is either the former or both.
8783
+ if (auto *Terminator = MiddleVPBB->getTerminator()) {
8784
+ auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
8785
+ assert((!Condition || Condition->getParent() == MiddleVPBB) &&
8786
+ "Condition expected in MiddleVPBB");
8787
+ MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator);
8788
+ }
8789
+ VPValue *OneVPV = Plan.getOrAddLiveIn(
8790
+ ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
8791
+
8792
+ for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
8793
+ auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8794
+ if (!FOR)
8795
+ continue;
8796
+
8797
+ // Extract the resume value and create a new VPLiveOut for it.
8798
+ auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
8799
+ {FOR->getBackedgeValue(), OneVPV},
8800
+ {}, "vector.recur.extract");
8801
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8802
+ VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
8803
+ "scalar.recur.init");
8804
+ Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe);
8805
+ }
8806
+ }
8807
+
8801
8808
VPlanPtr
8802
8809
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8803
8810
@@ -8967,6 +8974,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8967
8974
"VPBasicBlock");
8968
8975
RecipeBuilder.fixHeaderPhis();
8969
8976
8977
+ addLiveOutsForFirstOrderRecurrences(*Plan);
8978
+
8970
8979
// ---------------------------------------------------------------------------
8971
8980
// Transform initial VPlan: Apply previously taken decisions, in order, to
8972
8981
// bring the VPlan to its final state.
0 commit comments