@@ -7467,23 +7467,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74677467}
74687468
74697469// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7470- // create a merge phi node for it.
7471- static void createAndCollectMergePhiForReduction (
7472- VPInstruction *RedResult,
7473- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock ,
7474- bool VectorizingEpilogue) {
7470+ // create a merge phi node for it and add incoming values from the main vector
7471+ // loop.
7472+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
7473+ VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7474+ BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
74757475 if (!RedResult ||
74767476 RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
74777477 return ;
74787478
7479+ using namespace VPlanPatternMatch ;
7480+ VPValue *ResumePhiVPV =
7481+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7482+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7483+ m_VPValue ()));
7484+ }));
7485+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
74797486 auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
74807487 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7488+ if (!VectorizingEpilogue)
7489+ return ;
74817490
7482- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
74837491 auto *ResumePhi =
74847492 dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7485- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7486- RdxDesc.getRecurrenceKind ())) {
7493+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7494+ RdxDesc.getRecurrenceKind ())) {
74877495 auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
74887496 assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
74897497 assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7493,40 +7501,15 @@ static void createAndCollectMergePhiForReduction(
74937501 " when vectorizing the epilogue loop, we need a resume phi from main "
74947502 " vector loop" );
74957503
7496- // TODO: bc.merge.rdx should not be created here, instead it should be
7497- // modeled in VPlan.
74987504 BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7499- // Create a phi node that merges control-flow from the backedge-taken check
7500- // block and the middle block.
7501- auto *BCBlockPhi =
7502- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7503- LoopScalarPreHeader->getTerminator ()->getIterator ());
7504-
75057505 // If we are fixing reductions in the epilogue loop then we should already
75067506 // have created a bc.merge.rdx Phi after the main vector body. Ensure that
75077507 // we carry over the incoming values correctly.
75087508 for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7509- if (Incoming == LoopMiddleBlock)
7510- BCBlockPhi->addIncoming (FinalValue, Incoming);
7511- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7512- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7513- Incoming);
7514- else
7515- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7509+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7510+ BCBlockPhi->setIncomingValueForBlock (
7511+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
75167512 }
7517-
7518- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7519- // TODO: This fixup should instead be modeled in VPlan.
7520- // Fix the scalar loop reduction variable with the incoming reduction sum
7521- // from the vector body and from the backedge value.
7522- int IncomingEdgeBlockIdx =
7523- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7524- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7525- // Pick the other block.
7526- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7527- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7528- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7529- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
75307513}
75317514
75327515DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7617,11 +7600,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76177600 // 2.5 Collect reduction resume values.
76187601 auto *ExitVPBB =
76197602 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7620- for (VPRecipeBase &R : *ExitVPBB) {
7621- createAndCollectMergePhiForReduction (
7622- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7623- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7624- }
7603+ if (IsEpilogueVectorization)
7604+ for (VPRecipeBase &R : *ExitVPBB) {
7605+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7606+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7607+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7608+ }
76257609
76267610 // 2.6. Maintain Loop Hints
76277611 // Keep all loop hints from the original loop on the vector loop (we'll
@@ -9411,6 +9395,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94119395 });
94129396 FinalReductionResult->insertBefore (*MiddleVPBB, IP);
94139397
9398+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9399+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9400+ // Order is strict: first is the exit block, second is the scalar
9401+ // preheader.
9402+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9403+ } else {
9404+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9405+ }
9406+
9407+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9408+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9409+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9410+ {}, " bc.merge.rdx" );
9411+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9412+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9413+
94149414 // Adjust AnyOf reductions; replace the reduction phi for the selected value
94159415 // with a boolean reduction phi node to check if the condition is true in
94169416 // any iteration. The final value is selected by the final
0 commit comments