@@ -7422,8 +7422,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74227422}
74237423
74247424// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7425- // create a merge phi node for it and add it to \p ReductionResumeValues.
7426- static void createAndCollectMergePhiForReduction (
7425+ // add it to \p ReductionResumeValues and add incoming values from the main
7426+ // vector loop.
7427+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
74277428 VPInstruction *RedResult,
74287429 DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
74297430 VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
@@ -7432,15 +7433,24 @@ static void createAndCollectMergePhiForReduction(
74327433 RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
74337434 return ;
74347435
7436+ using namespace VPlanPatternMatch ;
7437+ VPValue *ResumePhiVPV =
7438+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7439+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7440+ m_VPValue ()));
7441+ }));
7442+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
74357443 auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
74367444 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7445+ if (!VectorizingEpilogue) {
7446+ ReductionResumeValues[&RdxDesc] = BCBlockPhi;
7447+ return ;
7448+ }
74377449
7438- Value *FinalValue =
7439- State.get (RedResult, VPIteration (0 , VPLane::getFirstLane ()));
74407450 auto *ResumePhi =
74417451 dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7442- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7443- RdxDesc.getRecurrenceKind ())) {
7452+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7453+ RdxDesc.getRecurrenceKind ())) {
74447454 auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
74457455 assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
74467456 assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7450,42 +7460,15 @@ static void createAndCollectMergePhiForReduction(
74507460 " when vectorizing the epilogue loop, we need a resume phi from main "
74517461 " vector loop" );
74527462
7453- // TODO: bc.merge.rdx should not be created here, instead it should be
7454- // modeled in VPlan.
74557463 BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7456- // Create a phi node that merges control-flow from the backedge-taken check
7457- // block and the middle block.
7458- auto *BCBlockPhi =
7459- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7460- LoopScalarPreHeader->getTerminator ()->getIterator ());
7461-
74627464 // If we are fixing reductions in the epilogue loop then we should already
74637465 // have created a bc.merge.rdx Phi after the main vector body. Ensure that
74647466 // we carry over the incoming values correctly.
74657467 for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7466- if (Incoming == LoopMiddleBlock)
7467- BCBlockPhi->addIncoming (FinalValue, Incoming);
7468- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7469- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7470- Incoming);
7471- else
7472- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7468+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7469+ BCBlockPhi->setIncomingValueForBlock (
7470+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
74737471 }
7474-
7475- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7476- // TODO: This fixup should instead be modeled in VPlan.
7477- // Fix the scalar loop reduction variable with the incoming reduction sum
7478- // from the vector body and from the backedge value.
7479- int IncomingEdgeBlockIdx =
7480- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7481- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7482- // Pick the other block.
7483- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7484- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7485- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7486- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7487-
7488- ReductionResumeValues[&RdxDesc] = BCBlockPhi;
74897472}
74907473
74917474std::pair<DenseMap<const SCEV *, Value *>,
@@ -7579,11 +7562,12 @@ LoopVectorizationPlanner::executePlan(
75797562 DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
75807563 auto *ExitVPBB =
75817564 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7582- for (VPRecipeBase &R : *ExitVPBB) {
7583- createAndCollectMergePhiForReduction (
7584- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7586- }
7565+ if (IsEpilogueVectorization)
7566+ for (VPRecipeBase &R : *ExitVPBB) {
7567+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7568+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7569+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7570+ }
75877571
75887572 // 2.6. Maintain Loop Hints
75897573 // Keep all loop hints from the original loop on the vector loop (we'll
@@ -9369,6 +9353,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93699353 m_VPValue ()));
93709354 });
93719355
9356+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9357+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9358+ // Order is strict: first is the exit block, second is the scalar
9359+ // preheader.
9360+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9361+ } else {
9362+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9363+ }
9364+
9365+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9366+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9367+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9368+ {}, " bc.merge.rdx" );
9369+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9370+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9371+
93729372 // Adjust AnyOf reductions; replace the reduction phi for the selected value
93739373 // with a boolean reduction phi node to check if the condition is true in
93749374 // any iteration. The final value is selected by the final
0 commit comments