@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224
224
225
225
VPTransformState::VPTransformState (ElementCount VF, unsigned UF, LoopInfo *LI,
226
226
DominatorTree *DT, IRBuilderBase &Builder,
227
- InnerLoopVectorizer *ILV, VPlan *Plan)
227
+ InnerLoopVectorizer *ILV, VPlan *Plan,
228
+ Type *CanonicalIVTy)
228
229
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229
- LVer(nullptr ), TypeAnalysis(Plan-> getCanonicalIV ()->getScalarType() ) {}
230
+ LVer(nullptr ), TypeAnalysis(CanonicalIVTy ) {}
230
231
231
232
Value *VPTransformState::get (VPValue *Def, const VPLane &Lane) {
232
233
if (Def->isLiveIn ())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275
276
// Place the code for broadcasting invariant variables in the new preheader.
276
277
IRBuilder<>::InsertPointGuard Guard (Builder);
277
278
if (SafeToHoist) {
278
- BasicBlock *LoopVectorPreHeader = CFG. VPBB2IRBB [cast<VPBasicBlock>(
279
- Plan-> getVectorLoopRegion ()-> getSinglePredecessor ())];
279
+ BasicBlock *LoopVectorPreHeader =
280
+ CFG. VPBB2IRBB [cast<VPBasicBlock>(Plan-> getEntry ())];
280
281
if (LoopVectorPreHeader)
281
282
Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
282
283
}
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417
418
PrevBB->getParent (), CFG.ExitBB );
418
419
LLVM_DEBUG (dbgs () << " LV: created " << NewBB->getName () << ' \n ' );
419
420
421
+ connectToPredecessors (NewBB, CFG);
422
+ return NewBB;
423
+ }
424
+
425
+ void VPBasicBlock::connectToPredecessors (BasicBlock *NewBB,
426
+ VPTransformState::CFGState &CFG) {
420
427
// Hook up the new basic block to its predecessors.
421
428
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
422
429
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447
454
}
448
455
CFG.DTU .applyUpdates ({{DominatorTree::Insert, PredBB, NewBB}});
449
456
}
450
- return NewBB;
451
457
}
452
-
453
458
void VPIRBasicBlock::execute (VPTransformState *State) {
454
459
assert (getHierarchicalSuccessors ().size () <= 2 &&
455
460
" VPIRBasicBlock can have at most two successors at the moment!" );
456
461
State->Builder .SetInsertPoint (getIRBasicBlock ()->getTerminator ());
457
462
executeRecipes (State, getIRBasicBlock ());
458
- if (getSingleSuccessor ()) {
459
- assert (isa<UnreachableInst>(getIRBasicBlock ()->getTerminator ()));
460
- auto *Br = State->Builder .CreateBr (getIRBasicBlock ());
461
- Br->setOperand (0 , nullptr );
462
- getIRBasicBlock ()->getTerminator ()->eraseFromParent ();
463
- }
464
-
465
- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
466
- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
467
- BasicBlock *PredBB = State->CFG .VPBB2IRBB [PredVPBB];
468
- assert (PredBB && " Predecessor basic-block not found building successor." );
469
- LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
470
463
471
- auto *PredBBTerminator = PredBB->getTerminator ();
472
- auto *TermBr = cast<BranchInst>(PredBBTerminator);
473
- // Set each forward successor here when it is created, excluding
474
- // backedges. A backward successor is set when the branch is created.
475
- const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
476
- unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
477
- assert (!TermBr->getSuccessor (idx) &&
478
- " Trying to reset an existing successor block." );
479
- TermBr->setSuccessor (idx, IRBB);
480
- State->CFG .DTU .applyUpdates ({{DominatorTree::Insert, PredBB, IRBB}});
481
- }
464
+ connectToPredecessors (getIRBasicBlock (), State->CFG );
482
465
}
483
466
484
467
void VPBasicBlock::execute (VPTransformState *State) {
@@ -962,7 +945,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
962
945
963
946
IRBuilder<> Builder (State.CFG .PrevBB ->getTerminator ());
964
947
// FIXME: Model VF * UF computation completely in VPlan.
965
- assert (VFxUF.getNumUsers () && " VFxUF expected to always have users" );
966
948
unsigned UF = getUF ();
967
949
if (VF.getNumUsers ()) {
968
950
Value *RuntimeVF = getRuntimeVF (Builder, TCTy, State.VF );
@@ -1034,8 +1016,13 @@ void VPlan::execute(VPTransformState *State) {
1034
1016
// skeleton creation, so we can only create the VPIRBasicBlocks now during
1035
1017
// VPlan execution rather than earlier during VPlan construction.
1036
1018
BasicBlock *MiddleBB = State->CFG .ExitBB ;
1037
- VPBasicBlock *MiddleVPBB =
1038
- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor ());
1019
+ VPBlockBase *Leaf = nullptr ;
1020
+ for (VPBlockBase *VPB : vp_depth_first_shallow (getEntry ()))
1021
+ if (VPB->getNumSuccessors () == 0 ) {
1022
+ Leaf = VPB;
1023
+ break ;
1024
+ }
1025
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor ());
1039
1026
// Find the VPBB for the scalar preheader, relying on the current structure
1040
1027
// when creating the middle block and its successrs: if there's a single
1041
1028
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1063,53 +1050,59 @@ void VPlan::execute(VPTransformState *State) {
1063
1050
for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
1064
1051
Block->execute (State);
1065
1052
1066
- VPBasicBlock *LatchVPBB = getVectorLoopRegion ()->getExitingBasicBlock ();
1067
- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1068
-
1069
- // Fix the latch value of canonical, reduction and first-order recurrences
1070
- // phis in the vector loop.
1071
- VPBasicBlock *Header = getVectorLoopRegion ()->getEntryBasicBlock ();
1072
- for (VPRecipeBase &R : Header->phis ()) {
1073
- // Skip phi-like recipes that generate their backedege values themselves.
1074
- if (isa<VPWidenPHIRecipe>(&R))
1075
- continue ;
1076
-
1077
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
1078
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1079
- PHINode *Phi = nullptr ;
1080
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1081
- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1082
- } else {
1083
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1084
- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1085
- " recipe generating only scalars should have been replaced" );
1086
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1087
- Phi = cast<PHINode>(GEP->getPointerOperand ());
1088
- }
1089
-
1090
- Phi->setIncomingBlock (1 , VectorLatchBB);
1053
+ if (auto *LoopRegion =
1054
+ dyn_cast<VPRegionBlock>(getEntry ()->getSingleSuccessor ())) {
1055
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
1056
+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1057
+
1058
+ // Fix the latch value of canonical, reduction and first-order recurrences
1059
+ // phis in the vector loop.
1060
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1061
+ for (VPRecipeBase &R : Header->phis ()) {
1062
+ // Skip phi-like recipes that generate their backedege values themselves.
1063
+ if (isa<VPWidenPHIRecipe>(&R))
1064
+ continue ;
1091
1065
1092
- // Move the last step to the end of the latch block. This ensures
1093
- // consistent placement of all induction updates.
1094
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1095
- Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1066
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
1067
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1068
+ PHINode *Phi = nullptr ;
1069
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1070
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1071
+ } else {
1072
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1073
+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1074
+ " recipe generating only scalars should have been replaced" );
1075
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1076
+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1077
+ }
1078
+
1079
+ Phi->setIncomingBlock (1 , VectorLatchBB);
1080
+
1081
+ // Move the last step to the end of the latch block. This ensures
1082
+ // consistent placement of all induction updates.
1083
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1084
+ Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1085
+
1086
+ // Use the steps for the last part as backedge value for the induction.
1087
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1088
+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1089
+ continue ;
1090
+ }
1096
1091
1097
- // Use the steps for the last part as backedge value for the induction.
1098
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1099
- Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1100
- continue ;
1092
+ // For canonical IV, first-order recurrences and in-order reduction phis,
1093
+ // only a single part is generated, which provides the last part from the
1094
+ // previous iteration. For non-ordered reductions all UF parts are
1095
+ // generated.
1096
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1097
+ bool NeedsScalar =
1098
+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1099
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1100
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1101
+ Value *Phi = State->get (PhiR, NeedsScalar);
1102
+ Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1103
+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1101
1104
}
1102
-
1103
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1104
- bool NeedsScalar =
1105
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1106
- (isa<VPReductionPHIRecipe>(PhiR) &&
1107
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1108
- Value *Phi = State->get (PhiR, NeedsScalar);
1109
- Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1110
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1111
1105
}
1112
-
1113
1106
State->CFG .DTU .flush ();
1114
1107
assert (State->CFG .DTU .getDomTree ().verify (
1115
1108
DominatorTree::VerificationLevel::Fast) &&
0 commit comments