@@ -7435,23 +7435,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74357435}
74367436
74377437// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7438- // create a merge phi node for it.
7439- static void createAndCollectMergePhiForReduction (
7440- VPInstruction *RedResult,
7441- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock ,
7442- bool VectorizingEpilogue) {
7438+ // create a merge phi node for it and add incoming values from the main vector
7439+ // loop.
7440+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
7441+ VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7442+ BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
74437443 if (!RedResult ||
74447444 RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
74457445 return ;
74467446
7447+ using namespace VPlanPatternMatch ;
7448+ VPValue *ResumePhiVPV =
7449+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7450+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7451+ m_VPValue ()));
7452+ }));
7453+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
74477454 auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
74487455 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7456+ if (!VectorizingEpilogue)
7457+ return ;
74497458
7450- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
74517459 auto *ResumePhi =
74527460 dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7453- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7454- RdxDesc.getRecurrenceKind ())) {
7461+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7462+ RdxDesc.getRecurrenceKind ())) {
74557463 auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
74567464 assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
74577465 assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7461,40 +7469,15 @@ static void createAndCollectMergePhiForReduction(
74617469 " when vectorizing the epilogue loop, we need a resume phi from main "
74627470 " vector loop" );
74637471
7464- // TODO: bc.merge.rdx should not be created here, instead it should be
7465- // modeled in VPlan.
74667472 BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7467- // Create a phi node that merges control-flow from the backedge-taken check
7468- // block and the middle block.
7469- auto *BCBlockPhi =
7470- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7471- LoopScalarPreHeader->getTerminator ()->getIterator ());
7472-
74737473 // If we are fixing reductions in the epilogue loop then we should already
74747474 // have created a bc.merge.rdx Phi after the main vector body. Ensure that
74757475 // we carry over the incoming values correctly.
74767476 for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7477- if (Incoming == LoopMiddleBlock)
7478- BCBlockPhi->addIncoming (FinalValue, Incoming);
7479- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7480- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7481- Incoming);
7482- else
7483- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7477+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7478+ BCBlockPhi->setIncomingValueForBlock (
7479+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
74847480 }
7485-
7486- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7487- // TODO: This fixup should instead be modeled in VPlan.
7488- // Fix the scalar loop reduction variable with the incoming reduction sum
7489- // from the vector body and from the backedge value.
7490- int IncomingEdgeBlockIdx =
7491- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7492- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7493- // Pick the other block.
7494- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7495- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7496- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7497- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
74987481}
74997482
75007483DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7585,11 +7568,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
75857568 // 2.5 Collect reduction resume values.
75867569 auto *ExitVPBB =
75877570 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7588- for (VPRecipeBase &R : *ExitVPBB) {
7589- createAndCollectMergePhiForReduction (
7590- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7591- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7592- }
7571+ if (IsEpilogueVectorization)
7572+ for (VPRecipeBase &R : *ExitVPBB) {
7573+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7574+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7575+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7576+ }
75937577
75947578 // 2.6. Maintain Loop Hints
75957579 // Keep all loop hints from the original loop on the vector loop (we'll
@@ -9383,6 +9367,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93839367 });
93849368 FinalReductionResult->insertBefore (*MiddleVPBB, IP);
93859369
9370+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9371+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9372+ // Order is strict: first is the exit block, second is the scalar
9373+ // preheader.
9374+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9375+ } else {
9376+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9377+ }
9378+
9379+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9380+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9381+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9382+ {}, " bc.merge.rdx" );
9383+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9384+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9385+
93869386 // Adjust AnyOf reductions; replace the reduction phi for the selected value
93879387 // with a boolean reduction phi node to check if the condition is true in
93889388 // any iteration. The final value is selected by the final
0 commit comments