@@ -7422,8 +7422,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
7422
7422
}
7423
7423
7424
7424
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7425
- // create a merge phi node for it and add it to \p ReductionResumeValues.
7426
- static void createAndCollectMergePhiForReduction (
7425
+ // add it to \p ReductionResumeValues and add incoming values from the main
7426
+ // vector loop.
7427
+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
7427
7428
VPInstruction *RedResult,
7428
7429
DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
7429
7430
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
@@ -7432,15 +7433,24 @@ static void createAndCollectMergePhiForReduction(
7432
7433
RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
7433
7434
return ;
7434
7435
7436
+ using namespace VPlanPatternMatch ;
7437
+ VPValue *ResumePhiVPV =
7438
+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7439
+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7440
+ m_VPValue ()));
7441
+ }));
7442
+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
7435
7443
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
7436
7444
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7445
+ if (!VectorizingEpilogue) {
7446
+ ReductionResumeValues[&RdxDesc] = BCBlockPhi;
7447
+ return ;
7448
+ }
7437
7449
7438
- Value *FinalValue =
7439
- State.get (RedResult, VPIteration (0 , VPLane::getFirstLane ()));
7440
7450
auto *ResumePhi =
7441
7451
dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7442
- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7443
- RdxDesc.getRecurrenceKind ())) {
7452
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7453
+ RdxDesc.getRecurrenceKind ())) {
7444
7454
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
7445
7455
assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
7446
7456
assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7450,42 +7460,15 @@ static void createAndCollectMergePhiForReduction(
7450
7460
" when vectorizing the epilogue loop, we need a resume phi from main "
7451
7461
" vector loop" );
7452
7462
7453
- // TODO: bc.merge.rdx should not be created here, instead it should be
7454
- // modeled in VPlan.
7455
7463
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7456
- // Create a phi node that merges control-flow from the backedge-taken check
7457
- // block and the middle block.
7458
- auto *BCBlockPhi =
7459
- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7460
- LoopScalarPreHeader->getTerminator ()->getIterator ());
7461
-
7462
7464
// If we are fixing reductions in the epilogue loop then we should already
7463
7465
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
7464
7466
// we carry over the incoming values correctly.
7465
7467
for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7466
- if (Incoming == LoopMiddleBlock)
7467
- BCBlockPhi->addIncoming (FinalValue, Incoming);
7468
- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7469
- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7470
- Incoming);
7471
- else
7472
- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7468
+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7469
+ BCBlockPhi->setIncomingValueForBlock (
7470
+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
7473
7471
}
7474
-
7475
- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7476
- // TODO: This fixup should instead be modeled in VPlan.
7477
- // Fix the scalar loop reduction variable with the incoming reduction sum
7478
- // from the vector body and from the backedge value.
7479
- int IncomingEdgeBlockIdx =
7480
- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7481
- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7482
- // Pick the other block.
7483
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7484
- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7485
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7486
- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7487
-
7488
- ReductionResumeValues[&RdxDesc] = BCBlockPhi;
7489
7472
}
7490
7473
7491
7474
std::pair<DenseMap<const SCEV *, Value *>,
@@ -7579,11 +7562,12 @@ LoopVectorizationPlanner::executePlan(
7579
7562
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
7580
7563
auto *ExitVPBB =
7581
7564
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7582
- for (VPRecipeBase &R : *ExitVPBB) {
7583
- createAndCollectMergePhiForReduction (
7584
- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7586
- }
7565
+ if (IsEpilogueVectorization)
7566
+ for (VPRecipeBase &R : *ExitVPBB) {
7567
+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7568
+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7569
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7570
+ }
7587
7571
7588
7572
// 2.6. Maintain Loop Hints
7589
7573
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9369,6 +9353,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9369
9353
m_VPValue ()));
9370
9354
});
9371
9355
9356
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9357
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9358
+ // Order is strict: first is the exit block, second is the scalar
9359
+ // preheader.
9360
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9361
+ } else {
9362
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9363
+ }
9364
+
9365
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9366
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9367
+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9368
+ {}, " bc.merge.rdx" );
9369
+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9370
+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9371
+
9372
9372
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9373
9373
// with a boolean reduction phi node to check if the condition is true in
9374
9374
// any iteration. The final value is selected by the final
0 commit comments