@@ -7435,23 +7435,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
7435
7435
}
7436
7436
7437
7437
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7438
- // create a merge phi node for it.
7439
- static void createAndCollectMergePhiForReduction (
7440
- VPInstruction *RedResult,
7441
- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock ,
7442
- bool VectorizingEpilogue) {
7438
+ // create a merge phi node for it and add incoming values from the main vector
7439
+ // loop.
7440
+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
7441
+ VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7442
+ BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
7443
7443
if (!RedResult ||
7444
7444
RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
7445
7445
return ;
7446
7446
7447
+ using namespace VPlanPatternMatch ;
7448
+ VPValue *ResumePhiVPV =
7449
+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7450
+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7451
+ m_VPValue ()));
7452
+ }));
7453
+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
7447
7454
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
7448
7455
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7456
+ if (!VectorizingEpilogue)
7457
+ return ;
7449
7458
7450
- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
7451
7459
auto *ResumePhi =
7452
7460
dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7453
- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7454
- RdxDesc.getRecurrenceKind ())) {
7461
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7462
+ RdxDesc.getRecurrenceKind ())) {
7455
7463
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
7456
7464
assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
7457
7465
assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7461,40 +7469,15 @@ static void createAndCollectMergePhiForReduction(
7461
7469
" when vectorizing the epilogue loop, we need a resume phi from main "
7462
7470
" vector loop" );
7463
7471
7464
- // TODO: bc.merge.rdx should not be created here, instead it should be
7465
- // modeled in VPlan.
7466
7472
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7467
- // Create a phi node that merges control-flow from the backedge-taken check
7468
- // block and the middle block.
7469
- auto *BCBlockPhi =
7470
- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7471
- LoopScalarPreHeader->getTerminator ()->getIterator ());
7472
-
7473
7473
// If we are fixing reductions in the epilogue loop then we should already
7474
7474
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
7475
7475
// we carry over the incoming values correctly.
7476
7476
for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7477
- if (Incoming == LoopMiddleBlock)
7478
- BCBlockPhi->addIncoming (FinalValue, Incoming);
7479
- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7480
- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7481
- Incoming);
7482
- else
7483
- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7477
+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7478
+ BCBlockPhi->setIncomingValueForBlock (
7479
+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
7484
7480
}
7485
-
7486
- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7487
- // TODO: This fixup should instead be modeled in VPlan.
7488
- // Fix the scalar loop reduction variable with the incoming reduction sum
7489
- // from the vector body and from the backedge value.
7490
- int IncomingEdgeBlockIdx =
7491
- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7492
- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7493
- // Pick the other block.
7494
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7495
- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7496
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7497
- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7498
7481
}
7499
7482
7500
7483
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7585,11 +7568,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7585
7568
// 2.5 Collect reduction resume values.
7586
7569
auto *ExitVPBB =
7587
7570
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7588
- for (VPRecipeBase &R : *ExitVPBB) {
7589
- createAndCollectMergePhiForReduction (
7590
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7591
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7592
- }
7571
+ if (IsEpilogueVectorization)
7572
+ for (VPRecipeBase &R : *ExitVPBB) {
7573
+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7574
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7575
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7576
+ }
7593
7577
7594
7578
// 2.6. Maintain Loop Hints
7595
7579
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9383,6 +9367,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9383
9367
});
9384
9368
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9385
9369
9370
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9371
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9372
+ // Order is strict: first is the exit block, second is the scalar
9373
+ // preheader.
9374
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9375
+ } else {
9376
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9377
+ }
9378
+
9379
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9380
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9381
+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9382
+ {}, " bc.merge.rdx" );
9383
+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9384
+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9385
+
9386
9386
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9387
9387
// with a boolean reduction phi node to check if the condition is true in
9388
9388
// any iteration. The final value is selected by the final
0 commit comments