@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224224
225225VPTransformState::VPTransformState (ElementCount VF, unsigned UF, LoopInfo *LI,
226226 DominatorTree *DT, IRBuilderBase &Builder,
227- InnerLoopVectorizer *ILV, VPlan *Plan)
227+ InnerLoopVectorizer *ILV, VPlan *Plan,
228+ Type *CanonicalIVTy)
228229 : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229- LVer(nullptr ), TypeAnalysis(Plan-> getCanonicalIV ()->getScalarType() ) {}
230+ LVer(nullptr ), TypeAnalysis(CanonicalIVTy ) {}
230231
231232Value *VPTransformState::get (VPValue *Def, const VPLane &Lane) {
232233 if (Def->isLiveIn ())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275276 // Place the code for broadcasting invariant variables in the new preheader.
276277 IRBuilder<>::InsertPointGuard Guard (Builder);
277278 if (SafeToHoist) {
278- BasicBlock *LoopVectorPreHeader = CFG. VPBB2IRBB [cast<VPBasicBlock>(
279- Plan-> getVectorLoopRegion ()-> getSinglePredecessor ())];
279+ BasicBlock *LoopVectorPreHeader =
280+ CFG. VPBB2IRBB [cast<VPBasicBlock>(Plan-> getEntry ())];
280281 if (LoopVectorPreHeader)
281282 Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
282283 }
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417418 PrevBB->getParent (), CFG.ExitBB );
418419 LLVM_DEBUG (dbgs () << " LV: created " << NewBB->getName () << ' \n ' );
419420
421+ connectToPredecessors (NewBB, CFG);
422+ return NewBB;
423+ }
424+
425+ void VPBasicBlock::connectToPredecessors (BasicBlock *NewBB,
426+ VPTransformState::CFGState &CFG) {
420427 // Hook up the new basic block to its predecessors.
421428 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
422429 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447454 }
448455 CFG.DTU .applyUpdates ({{DominatorTree::Insert, PredBB, NewBB}});
449456 }
450- return NewBB;
451457}
452-
453458void VPIRBasicBlock::execute (VPTransformState *State) {
454459 assert (getHierarchicalSuccessors ().size () <= 2 &&
455460 " VPIRBasicBlock can have at most two successors at the moment!" );
456461 State->Builder .SetInsertPoint (getIRBasicBlock ()->getTerminator ());
457462 executeRecipes (State, getIRBasicBlock ());
458- if (getSingleSuccessor ()) {
459- assert (isa<UnreachableInst>(getIRBasicBlock ()->getTerminator ()));
460- auto *Br = State->Builder .CreateBr (getIRBasicBlock ());
461- Br->setOperand (0 , nullptr );
462- getIRBasicBlock ()->getTerminator ()->eraseFromParent ();
463- }
464-
465- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
466- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
467- BasicBlock *PredBB = State->CFG .VPBB2IRBB [PredVPBB];
468- assert (PredBB && " Predecessor basic-block not found building successor." );
469- LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
470463
471- auto *PredBBTerminator = PredBB->getTerminator ();
472- auto *TermBr = cast<BranchInst>(PredBBTerminator);
473- // Set each forward successor here when it is created, excluding
474- // backedges. A backward successor is set when the branch is created.
475- const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
476- unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
477- assert (!TermBr->getSuccessor (idx) &&
478- " Trying to reset an existing successor block." );
479- TermBr->setSuccessor (idx, IRBB);
480- State->CFG .DTU .applyUpdates ({{DominatorTree::Insert, PredBB, IRBB}});
481- }
464+ connectToPredecessors (getIRBasicBlock (), State->CFG );
482465}
483466
484467void VPBasicBlock::execute (VPTransformState *State) {
@@ -954,7 +937,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
954937
955938 IRBuilder<> Builder (State.CFG .PrevBB ->getTerminator ());
956939 // FIXME: Model VF * UF computation completely in VPlan.
957- assert (VFxUF.getNumUsers () && " VFxUF expected to always have users" );
958940 unsigned UF = getUF ();
959941 if (VF.getNumUsers ()) {
960942 Value *RuntimeVF = getRuntimeVF (Builder, TCTy, State.VF );
@@ -1026,8 +1008,13 @@ void VPlan::execute(VPTransformState *State) {
10261008 // skeleton creation, so we can only create the VPIRBasicBlocks now during
10271009 // VPlan execution rather than earlier during VPlan construction.
10281010 BasicBlock *MiddleBB = State->CFG .ExitBB ;
1029- VPBasicBlock *MiddleVPBB =
1030- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor ());
1011+ VPBlockBase *Leaf = nullptr ;
1012+ for (VPBlockBase *VPB : vp_depth_first_shallow (getEntry ()))
1013+ if (VPB->getNumSuccessors () == 0 ) {
1014+ Leaf = VPB;
1015+ break ;
1016+ }
1017+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor ());
10311018 // Find the VPBB for the scalar preheader, relying on the current structure
10321019 // when creating the middle block and its successrs: if there's a single
10331020 // predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1055,53 +1042,59 @@ void VPlan::execute(VPTransformState *State) {
10551042 for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
10561043 Block->execute (State);
10571044
1058- VPBasicBlock *LatchVPBB = getVectorLoopRegion ()->getExitingBasicBlock ();
1059- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1060-
1061- // Fix the latch value of canonical, reduction and first-order recurrences
1062- // phis in the vector loop.
1063- VPBasicBlock *Header = getVectorLoopRegion ()->getEntryBasicBlock ();
1064- for (VPRecipeBase &R : Header->phis ()) {
1065- // Skip phi-like recipes that generate their backedege values themselves.
1066- if (isa<VPWidenPHIRecipe>(&R))
1067- continue ;
1068-
1069- if (isa<VPWidenPointerInductionRecipe>(&R) ||
1070- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1071- PHINode *Phi = nullptr ;
1072- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1073- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1074- } else {
1075- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1076- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1077- " recipe generating only scalars should have been replaced" );
1078- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1079- Phi = cast<PHINode>(GEP->getPointerOperand ());
1080- }
1081-
1082- Phi->setIncomingBlock (1 , VectorLatchBB);
1045+ if (auto *LoopRegion =
1046+ dyn_cast<VPRegionBlock>(getEntry ()->getSingleSuccessor ())) {
1047+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
1048+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1049+
1050+ // Fix the latch value of canonical, reduction and first-order recurrences
1051+ // phis in the vector loop.
1052+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1053+ for (VPRecipeBase &R : Header->phis ()) {
1054+ // Skip phi-like recipes that generate their backedege values themselves.
1055+ if (isa<VPWidenPHIRecipe>(&R))
1056+ continue ;
10831057
1084- // Move the last step to the end of the latch block. This ensures
1085- // consistent placement of all induction updates.
1086- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1087- Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1058+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
1059+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1060+ PHINode *Phi = nullptr ;
1061+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1062+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1063+ } else {
1064+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1065+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1066+ " recipe generating only scalars should have been replaced" );
1067+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1068+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1069+ }
1070+
1071+ Phi->setIncomingBlock (1 , VectorLatchBB);
1072+
1073+ // Move the last step to the end of the latch block. This ensures
1074+ // consistent placement of all induction updates.
1075+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1076+ Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1077+
1078+ // Use the steps for the last part as backedge value for the induction.
1079+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1080+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1081+ continue ;
1082+ }
10881083
1089- // Use the steps for the last part as backedge value for the induction.
1090- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1091- Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1092- continue ;
1084+ // For canonical IV, first-order recurrences and in-order reduction phis,
1085+ // only a single part is generated, which provides the last part from the
1086+ // previous iteration. For non-ordered reductions all UF parts are
1087+ // generated.
1088+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1089+ bool NeedsScalar =
1090+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1091+ (isa<VPReductionPHIRecipe>(PhiR) &&
1092+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1093+ Value *Phi = State->get (PhiR, NeedsScalar);
1094+ Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1095+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
10931096 }
1094-
1095- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1096- bool NeedsScalar =
1097- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1098- (isa<VPReductionPHIRecipe>(PhiR) &&
1099- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1100- Value *Phi = State->get (PhiR, NeedsScalar);
1101- Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1102- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
11031097 }
1104-
11051098 State->CFG .DTU .flush ();
11061099 assert (State->CFG .DTU .getDomTree ().verify (
11071100 DominatorTree::VerificationLevel::Fast) &&
0 commit comments