@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel {
1363
1363
// If we might exit from anywhere but the latch, must run the exiting
1364
1364
// iteration in scalar form.
1365
1365
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1366
- LLVM_DEBUG(
1367
- dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1368
- return true;
1366
+ if (!Legal->canVectorizeEarlyExit()) {
1367
+ LLVM_DEBUG(
1368
+ dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1369
+ return true;
1370
+ }
1369
1371
}
1370
1372
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
1371
1373
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2575,7 +2577,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2575
2577
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
2576
2578
assert(LoopVectorPreHeader && "Invalid loop structure");
2577
2579
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
2578
- assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2580
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) ||
2581
+ Legal->canVectorizeEarlyExit()) &&
2579
2582
"multiple exit loop without required epilogue?");
2580
2583
2581
2584
LoopMiddleBlock =
@@ -2758,8 +2761,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2758
2761
// value (the value that feeds into the phi from the loop latch).
2759
2762
// We allow both, but they, obviously, have different values.
2760
2763
2761
- assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");
2762
-
2763
2764
DenseMap<Value *, Value *> MissingVals;
2764
2765
2765
2766
// An external user of the last iteration's value should see the value that
@@ -2819,6 +2820,9 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2819
2820
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2820
2821
PHI->addIncoming(I.second, MiddleBlock);
2821
2822
}
2823
+
2824
+ assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2825
+ "Expected a single exit block");
2822
2826
}
2823
2827
2824
2828
namespace {
@@ -3599,7 +3603,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3599
3603
TheLoop->getExitingBlocks(Exiting);
3600
3604
for (BasicBlock *E : Exiting) {
3601
3605
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
3602
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
3606
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
3607
+ (TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit()))
3603
3608
AddToWorklistIfAllowed(Cmp);
3604
3609
}
3605
3610
@@ -7692,12 +7697,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7692
7697
BestVPlan.execute(&State);
7693
7698
7694
7699
// 2.5 Collect reduction resume values.
7695
- auto *ExitVPBB =
7696
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7697
- for (VPRecipeBase &R : *ExitVPBB) {
7698
- createAndCollectMergePhiForReduction(
7699
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7700
- State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7700
+ VPBasicBlock *ExitVPBB = nullptr;
7701
+ if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) {
7702
+ ExitVPBB = cast<VPBasicBlock>(
7703
+ BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7704
+ for (VPRecipeBase &R : *ExitVPBB) {
7705
+ createAndCollectMergePhiForReduction(
7706
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7707
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7708
+ }
7701
7709
}
7702
7710
7703
7711
// 2.6. Maintain Loop Hints
@@ -7723,6 +7731,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7723
7731
LoopVectorizeHints Hints(L, true, *ORE);
7724
7732
Hints.setAlreadyVectorized();
7725
7733
}
7734
+
7726
7735
TargetTransformInfo::UnrollingPreferences UP;
7727
7736
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7728
7737
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7735,15 +7744,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7735
7744
ILV.printDebugTracesAtEnd();
7736
7745
7737
7746
// 4. Adjust branch weight of the branch in the middle block.
7738
- auto *MiddleTerm =
7739
- cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7740
- if (MiddleTerm->isConditional() &&
7741
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7742
- // Assume that `Count % VectorTripCount` is equally distributed.
7743
- unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7744
- assert(TripCount > 0 && "trip count should not be zero");
7745
- const uint32_t Weights[] = {1, TripCount - 1};
7746
- setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7747
+ if (ExitVPBB) {
7748
+ auto *MiddleTerm =
7749
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7750
+ if (MiddleTerm->isConditional() &&
7751
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7752
+ // Assume that `Count % VectorTripCount` is equally distributed.
7753
+ unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7754
+ assert(TripCount > 0 && "trip count should not be zero");
7755
+ const uint32_t Weights[] = {1, TripCount - 1};
7756
+ setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7757
+ }
7747
7758
}
7748
7759
7749
7760
return State.ExpandedSCEVs;
@@ -8128,7 +8139,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8128
8139
// If source is an exiting block, we know the exit edge is dynamically dead
8129
8140
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8130
8141
// adding uses of an otherwise potentially dead instruction.
8131
- if (OrigLoop->isLoopExiting(Src))
8142
+ if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src))
8132
8143
return EdgeMaskCache[Edge] = SrcMask;
8133
8144
8134
8145
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8778,6 +8789,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8778
8789
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
8779
8790
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8780
8791
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8792
+ if (!Plan.getVectorLoopRegion()->getSingleSuccessor())
8793
+ return {};
8781
8794
auto *MiddleVPBB =
8782
8795
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8783
8796
// No edge from the middle block to the unique exit block has been inserted
@@ -8863,6 +8876,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8863
8876
// TODO: Should be replaced by
8864
8877
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8865
8878
// scalar region is modeled as well.
8879
+ if (!VectorRegion->getSingleSuccessor())
8880
+ return;
8866
8881
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8867
8882
VPBasicBlock *ScalarPHVPBB = nullptr;
8868
8883
if (MiddleVPBB->getNumSuccessors() == 2) {
@@ -9146,10 +9161,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9146
9161
"VPBasicBlock");
9147
9162
RecipeBuilder.fixHeaderPhis();
9148
9163
9149
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
9150
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9151
- addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9152
- addUsersInExitBlock(*Plan, ExitUsersToFix);
9164
+ if (Legal->canVectorizeEarlyExit()) {
9165
+ VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop,
9166
+ RecipeBuilder);
9167
+ } else {
9168
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
9169
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9170
+ addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9171
+ addUsersInExitBlock(*Plan, ExitUsersToFix);
9172
+ }
9153
9173
9154
9174
// ---------------------------------------------------------------------------
9155
9175
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9277,8 +9297,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9277
9297
using namespace VPlanPatternMatch;
9278
9298
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
9279
9299
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
9280
- VPBasicBlock *MiddleVPBB =
9281
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9282
9300
for (VPRecipeBase &R : Header->phis()) {
9283
9301
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9284
9302
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
@@ -9297,8 +9315,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9297
9315
for (VPUser *U : Cur->users()) {
9298
9316
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9299
9317
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
9300
- assert(UserRecipe->getParent() == MiddleVPBB &&
9301
- "U must be either in the loop region or the middle block.");
9302
9318
continue;
9303
9319
}
9304
9320
Worklist.insert(UserRecipe);
@@ -9403,6 +9419,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9403
9419
}
9404
9420
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
9405
9421
Builder.setInsertPoint(&*LatchVPBB->begin());
9422
+ if (!VectorLoopRegion->getSingleSuccessor())
9423
+ return;
9424
+ VPBasicBlock *MiddleVPBB =
9425
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9406
9426
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
9407
9427
for (VPRecipeBase &R :
9408
9428
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
0 commit comments