@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224224
225225VPTransformState::VPTransformState (ElementCount VF, unsigned UF, LoopInfo *LI,
226226 DominatorTree *DT, IRBuilderBase &Builder,
227- InnerLoopVectorizer *ILV, VPlan *Plan)
227+ InnerLoopVectorizer *ILV, VPlan *Plan,
228+ Type *CanonicalIVTy)
228229 : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229- LVer(nullptr ), TypeAnalysis(Plan-> getCanonicalIV ()->getScalarType() ) {}
230+ LVer(nullptr ), TypeAnalysis(CanonicalIVTy ) {}
230231
231232Value *VPTransformState::get (VPValue *Def, const VPLane &Lane) {
232233 if (Def->isLiveIn ())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275276 // Place the code for broadcasting invariant variables in the new preheader.
276277 IRBuilder<>::InsertPointGuard Guard (Builder);
277278 if (SafeToHoist) {
278- BasicBlock *LoopVectorPreHeader = CFG. VPBB2IRBB [cast<VPBasicBlock>(
279- Plan-> getVectorLoopRegion ()-> getSinglePredecessor ())];
279+ BasicBlock *LoopVectorPreHeader =
280+ CFG. VPBB2IRBB [cast<VPBasicBlock>(Plan-> getEntry ())];
280281 if (LoopVectorPreHeader)
281282 Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
282283 }
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417418 PrevBB->getParent (), CFG.ExitBB );
418419 LLVM_DEBUG (dbgs () << " LV: created " << NewBB->getName () << ' \n ' );
419420
421+ connectToPredecessors (NewBB, CFG);
422+ return NewBB;
423+ }
424+
425+ void VPBasicBlock::connectToPredecessors (BasicBlock *NewBB,
426+ VPTransformState::CFGState &CFG) {
420427 // Hook up the new basic block to its predecessors.
421428 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
422429 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447454 }
448455 CFG.DTU .applyUpdates ({{DominatorTree::Insert, PredBB, NewBB}});
449456 }
450- return NewBB;
451457}
452-
453458void VPIRBasicBlock::execute (VPTransformState *State) {
454459 assert (getHierarchicalSuccessors ().size () <= 2 &&
455460 " VPIRBasicBlock can have at most two successors at the moment!" );
456461 State->Builder .SetInsertPoint (getIRBasicBlock ()->getTerminator ());
457462 executeRecipes (State, getIRBasicBlock ());
458- if (getSingleSuccessor ()) {
459- assert (isa<UnreachableInst>(getIRBasicBlock ()->getTerminator ()));
460- auto *Br = State->Builder .CreateBr (getIRBasicBlock ());
461- Br->setOperand (0 , nullptr );
462- getIRBasicBlock ()->getTerminator ()->eraseFromParent ();
463- }
464-
465- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
466- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
467- BasicBlock *PredBB = State->CFG .VPBB2IRBB [PredVPBB];
468- assert (PredBB && " Predecessor basic-block not found building successor." );
469- LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
470463
471- auto *PredBBTerminator = PredBB->getTerminator ();
472- auto *TermBr = cast<BranchInst>(PredBBTerminator);
473- // Set each forward successor here when it is created, excluding
474- // backedges. A backward successor is set when the branch is created.
475- const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
476- unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
477- assert (!TermBr->getSuccessor (idx) &&
478- " Trying to reset an existing successor block." );
479- TermBr->setSuccessor (idx, IRBB);
480- State->CFG .DTU .applyUpdates ({{DominatorTree::Insert, PredBB, IRBB}});
481- }
464+ connectToPredecessors (getIRBasicBlock (), State->CFG );
482465}
483466
484467void VPBasicBlock::execute (VPTransformState *State) {
@@ -962,7 +945,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
962945
963946 IRBuilder<> Builder (State.CFG .PrevBB ->getTerminator ());
964947 // FIXME: Model VF * UF computation completely in VPlan.
965- assert (VFxUF.getNumUsers () && " VFxUF expected to always have users" );
966948 unsigned UF = getUF ();
967949 if (VF.getNumUsers ()) {
968950 Value *RuntimeVF = getRuntimeVF (Builder, TCTy, State.VF );
@@ -1034,8 +1016,13 @@ void VPlan::execute(VPTransformState *State) {
10341016 // skeleton creation, so we can only create the VPIRBasicBlocks now during
10351017 // VPlan execution rather than earlier during VPlan construction.
10361018 BasicBlock *MiddleBB = State->CFG .ExitBB ;
1037- VPBasicBlock *MiddleVPBB =
1038- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor ());
1019+ VPBlockBase *Leaf = nullptr ;
1020+ for (VPBlockBase *VPB : vp_depth_first_shallow (getEntry ()))
1021+ if (VPB->getNumSuccessors () == 0 ) {
1022+ Leaf = VPB;
1023+ break ;
1024+ }
1025+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor ());
10391026 // Find the VPBB for the scalar preheader, relying on the current structure
10401027 // when creating the middle block and its successrs: if there's a single
10411028 // predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1063,53 +1050,59 @@ void VPlan::execute(VPTransformState *State) {
10631050 for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
10641051 Block->execute (State);
10651052
1066- VPBasicBlock *LatchVPBB = getVectorLoopRegion ()->getExitingBasicBlock ();
1067- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1068-
1069- // Fix the latch value of canonical, reduction and first-order recurrences
1070- // phis in the vector loop.
1071- VPBasicBlock *Header = getVectorLoopRegion ()->getEntryBasicBlock ();
1072- for (VPRecipeBase &R : Header->phis ()) {
1073- // Skip phi-like recipes that generate their backedege values themselves.
1074- if (isa<VPWidenPHIRecipe>(&R))
1075- continue ;
1076-
1077- if (isa<VPWidenPointerInductionRecipe>(&R) ||
1078- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1079- PHINode *Phi = nullptr ;
1080- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1081- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1082- } else {
1083- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1084- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1085- " recipe generating only scalars should have been replaced" );
1086- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1087- Phi = cast<PHINode>(GEP->getPointerOperand ());
1088- }
1089-
1090- Phi->setIncomingBlock (1 , VectorLatchBB);
1053+ if (auto *LoopRegion =
1054+ dyn_cast<VPRegionBlock>(getEntry ()->getSingleSuccessor ())) {
1055+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
1056+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1057+
1058+ // Fix the latch value of canonical, reduction and first-order recurrences
1059+ // phis in the vector loop.
1060+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1061+ for (VPRecipeBase &R : Header->phis ()) {
1062+ // Skip phi-like recipes that generate their backedege values themselves.
1063+ if (isa<VPWidenPHIRecipe>(&R))
1064+ continue ;
10911065
1092- // Move the last step to the end of the latch block. This ensures
1093- // consistent placement of all induction updates.
1094- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1095- Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1066+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
1067+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1068+ PHINode *Phi = nullptr ;
1069+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1070+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1071+ } else {
1072+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1073+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1074+ " recipe generating only scalars should have been replaced" );
1075+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1076+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1077+ }
1078+
1079+ Phi->setIncomingBlock (1 , VectorLatchBB);
1080+
1081+ // Move the last step to the end of the latch block. This ensures
1082+ // consistent placement of all induction updates.
1083+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1084+ Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1085+
1086+ // Use the steps for the last part as backedge value for the induction.
1087+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1088+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1089+ continue ;
1090+ }
10961091
1097- // Use the steps for the last part as backedge value for the induction.
1098- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1099- Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1100- continue ;
1092+ // For canonical IV, first-order recurrences and in-order reduction phis,
1093+ // only a single part is generated, which provides the last part from the
1094+ // previous iteration. For non-ordered reductions all UF parts are
1095+ // generated.
1096+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1097+ bool NeedsScalar =
1098+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1099+ (isa<VPReductionPHIRecipe>(PhiR) &&
1100+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1101+ Value *Phi = State->get (PhiR, NeedsScalar);
1102+ Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1103+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
11011104 }
1102-
1103- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1104- bool NeedsScalar =
1105- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1106- (isa<VPReductionPHIRecipe>(PhiR) &&
1107- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1108- Value *Phi = State->get (PhiR, NeedsScalar);
1109- Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1110- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
11111105 }
1112-
11131106 State->CFG .DTU .flush ();
11141107 assert (State->CFG .DTU .getDomTree ().verify (
11151108 DominatorTree::VerificationLevel::Fast) &&
0 commit comments