@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel {
1363
1363
// If we might exit from anywhere but the latch, must run the exiting
1364
1364
// iteration in scalar form.
1365
1365
if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1366
- LLVM_DEBUG (
1367
- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1368
- return true ;
1366
+ if (!Legal->canVectorizeEarlyExit ()) {
1367
+ LLVM_DEBUG (
1368
+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1369
+ return true ;
1370
+ }
1369
1371
}
1370
1372
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1371
1373
LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2575,7 +2577,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2575
2577
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
2576
2578
assert (LoopVectorPreHeader && " Invalid loop structure" );
2577
2579
LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
2578
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2580
+ assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ()) ||
2581
+ Legal->canVectorizeEarlyExit ()) &&
2579
2582
" multiple exit loop without required epilogue?" );
2580
2583
2581
2584
LoopMiddleBlock =
@@ -2758,8 +2761,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2758
2761
// value (the value that feeds into the phi from the loop latch).
2759
2762
// We allow both, but they, obviously, have different values.
2760
2763
2761
- assert (OrigLoop->getUniqueExitBlock () && " Expected a single exit block" );
2762
-
2763
2764
DenseMap<Value *, Value *> MissingVals;
2764
2765
2765
2766
// An external user of the last iteration's value should see the value that
@@ -2819,6 +2820,9 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2819
2820
if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2820
2821
PHI->addIncoming (I.second , MiddleBlock);
2821
2822
}
2823
+
2824
+ assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2825
+ " Expected a single exit block" );
2822
2826
}
2823
2827
2824
2828
namespace {
@@ -3599,7 +3603,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3599
3603
TheLoop->getExitingBlocks (Exiting);
3600
3604
for (BasicBlock *E : Exiting) {
3601
3605
auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3602
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3606
+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3607
+ (TheLoop->getLoopLatch () == E || !Legal->canVectorizeEarlyExit ()))
3603
3608
AddToWorklistIfAllowed (Cmp);
3604
3609
}
3605
3610
@@ -7692,12 +7697,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7692
7697
BestVPlan.execute (&State);
7693
7698
7694
7699
// 2.5 Collect reduction resume values.
7695
- auto *ExitVPBB =
7696
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7697
- for (VPRecipeBase &R : *ExitVPBB) {
7698
- createAndCollectMergePhiForReduction (
7699
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7700
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7700
+ VPBasicBlock *ExitVPBB = nullptr ;
7701
+ if (BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ()) {
7702
+ ExitVPBB = cast<VPBasicBlock>(
7703
+ BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7704
+ for (VPRecipeBase &R : *ExitVPBB) {
7705
+ createAndCollectMergePhiForReduction (
7706
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7707
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7708
+ }
7701
7709
}
7702
7710
7703
7711
// 2.6. Maintain Loop Hints
@@ -7723,6 +7731,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7723
7731
LoopVectorizeHints Hints (L, true , *ORE);
7724
7732
Hints.setAlreadyVectorized ();
7725
7733
}
7734
+
7726
7735
TargetTransformInfo::UnrollingPreferences UP;
7727
7736
TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
7728
7737
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7735,15 +7744,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7735
7744
ILV.printDebugTracesAtEnd ();
7736
7745
7737
7746
// 4. Adjust branch weight of the branch in the middle block.
7738
- auto *MiddleTerm =
7739
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7740
- if (MiddleTerm->isConditional () &&
7741
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7742
- // Assume that `Count % VectorTripCount` is equally distributed.
7743
- unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7744
- assert (TripCount > 0 && " trip count should not be zero" );
7745
- const uint32_t Weights[] = {1 , TripCount - 1 };
7746
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7747
+ if (ExitVPBB) {
7748
+ auto *MiddleTerm =
7749
+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7750
+ if (MiddleTerm->isConditional () &&
7751
+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7752
+ // Assume that `Count % VectorTripCount` is equally distributed.
7753
+ unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7754
+ assert (TripCount > 0 && " trip count should not be zero" );
7755
+ const uint32_t Weights[] = {1 , TripCount - 1 };
7756
+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7757
+ }
7747
7758
}
7748
7759
7749
7760
return State.ExpandedSCEVs ;
@@ -8128,7 +8139,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8128
8139
// If source is an exiting block, we know the exit edge is dynamically dead
8129
8140
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8130
8141
// adding uses of an otherwise potentially dead instruction.
8131
- if (OrigLoop->isLoopExiting (Src))
8142
+ if (!Legal-> canVectorizeEarlyExit () && OrigLoop->isLoopExiting (Src))
8132
8143
return EdgeMaskCache[Edge] = SrcMask;
8133
8144
8134
8145
VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8778,6 +8789,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8778
8789
static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8779
8790
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8780
8791
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8792
+ if (!Plan.getVectorLoopRegion ()->getSingleSuccessor ())
8793
+ return {};
8781
8794
auto *MiddleVPBB =
8782
8795
cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8783
8796
// No edge from the middle block to the unique exit block has been inserted
@@ -8863,6 +8876,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8863
8876
// TODO: Should be replaced by
8864
8877
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8865
8878
// scalar region is modeled as well.
8879
+ if (!VectorRegion->getSingleSuccessor ())
8880
+ return ;
8866
8881
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
8867
8882
VPBasicBlock *ScalarPHVPBB = nullptr ;
8868
8883
if (MiddleVPBB->getNumSuccessors () == 2 ) {
@@ -9146,10 +9161,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9146
9161
" VPBasicBlock" );
9147
9162
RecipeBuilder.fixHeaderPhis ();
9148
9163
9149
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9150
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9151
- addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9152
- addUsersInExitBlock (*Plan, ExitUsersToFix);
9164
+ if (Legal->canVectorizeEarlyExit ()) {
9165
+ VPlanTransforms::convertToMultiCond (*Plan, *PSE.getSE (), OrigLoop,
9166
+ RecipeBuilder);
9167
+ } else {
9168
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9169
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9170
+ addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9171
+ addUsersInExitBlock (*Plan, ExitUsersToFix);
9172
+ }
9153
9173
9154
9174
// ---------------------------------------------------------------------------
9155
9175
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9277,8 +9297,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9277
9297
using namespace VPlanPatternMatch ;
9278
9298
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9279
9299
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9280
- VPBasicBlock *MiddleVPBB =
9281
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9282
9300
for (VPRecipeBase &R : Header->phis ()) {
9283
9301
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9284
9302
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
@@ -9297,8 +9315,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9297
9315
for (VPUser *U : Cur->users ()) {
9298
9316
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9299
9317
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9300
- assert (UserRecipe->getParent () == MiddleVPBB &&
9301
- " U must be either in the loop region or the middle block." );
9302
9318
continue ;
9303
9319
}
9304
9320
Worklist.insert (UserRecipe);
@@ -9403,6 +9419,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9403
9419
}
9404
9420
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9405
9421
Builder.setInsertPoint (&*LatchVPBB->begin ());
9422
+ if (!VectorLoopRegion->getSingleSuccessor ())
9423
+ return ;
9424
+ VPBasicBlock *MiddleVPBB =
9425
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9406
9426
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9407
9427
for (VPRecipeBase &R :
9408
9428
Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
0 commit comments