@@ -1362,9 +1362,11 @@ class LoopVectorizationCostModel {
1362
1362
// If we might exit from anywhere but the latch, must run the exiting
1363
1363
// iteration in scalar form.
1364
1364
if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1365
- LLVM_DEBUG (
1366
- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1367
- return true ;
1365
+ if (!Legal->canVectorizeMultiCond ()) {
1366
+ LLVM_DEBUG (
1367
+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1368
+ return true ;
1369
+ }
1368
1370
}
1369
1371
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1370
1372
LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2535,8 +2537,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2535
2537
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
2536
2538
assert (LoopVectorPreHeader && " Invalid loop structure" );
2537
2539
LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
2538
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2539
- " multiple exit loop without required epilogue?" );
2540
+ if (Legal->canVectorizeMultiCond ()) {
2541
+ BasicBlock *Latch = OrigLoop->getLoopLatch ();
2542
+ BasicBlock *TrueSucc =
2543
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (0 );
2544
+ BasicBlock *FalseSucc =
2545
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (1 );
2546
+ LoopExitBlock = OrigLoop->contains (TrueSucc) ? FalseSucc : TrueSucc;
2547
+ } else {
2548
+ assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2549
+ " multiple exit loop without required epilogue?" );
2550
+ }
2540
2551
2541
2552
LoopMiddleBlock =
2542
2553
SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2910,7 +2921,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2910
2921
for (PHINode &PN : Exit->phis ())
2911
2922
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
2912
2923
2913
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2924
+ if (Legal->canVectorizeMultiCond () ||
2925
+ Cost->requiresScalarEpilogue (VF.isVector ())) {
2914
2926
// No edge from the middle block to the unique exit block has been inserted
2915
2927
// and there is nothing to fix from vector loop; phis should have incoming
2916
2928
// from scalar loop only.
@@ -3554,7 +3566,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3554
3566
TheLoop->getExitingBlocks (Exiting);
3555
3567
for (BasicBlock *E : Exiting) {
3556
3568
auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3557
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3569
+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3570
+ (TheLoop->getLoopLatch () == E || !Legal->canVectorizeMultiCond ()))
3558
3571
AddToWorklistIfAllowed (Cmp);
3559
3572
}
3560
3573
@@ -7643,12 +7656,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7643
7656
BestVPlan.execute (&State);
7644
7657
7645
7658
// 2.5 Collect reduction resume values.
7646
- auto *ExitVPBB =
7647
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7648
- for (VPRecipeBase &R : *ExitVPBB) {
7649
- createAndCollectMergePhiForReduction (
7650
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7651
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7659
+ VPBasicBlock *ExitVPBB = nullptr ;
7660
+ if (BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ()) {
7661
+ ExitVPBB = cast<VPBasicBlock>(
7662
+ BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7663
+ for (VPRecipeBase &R : *ExitVPBB) {
7664
+ createAndCollectMergePhiForReduction (
7665
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7666
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7667
+ }
7652
7668
}
7653
7669
7654
7670
// 2.6. Maintain Loop Hints
@@ -7674,6 +7690,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7674
7690
LoopVectorizeHints Hints (L, true , *ORE);
7675
7691
Hints.setAlreadyVectorized ();
7676
7692
}
7693
+
7677
7694
TargetTransformInfo::UnrollingPreferences UP;
7678
7695
TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
7679
7696
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7686,15 +7703,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7686
7703
ILV.printDebugTracesAtEnd ();
7687
7704
7688
7705
// 4. Adjust branch weight of the branch in the middle block.
7689
- auto *MiddleTerm =
7690
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7691
- if (MiddleTerm->isConditional () &&
7692
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7693
- // Assume that `Count % VectorTripCount` is equally distributed.
7694
- unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7695
- assert (TripCount > 0 && " trip count should not be zero" );
7696
- const uint32_t Weights[] = {1 , TripCount - 1 };
7697
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7706
+ if (ExitVPBB) {
7707
+ auto *MiddleTerm =
7708
+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7709
+ if (MiddleTerm->isConditional () &&
7710
+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7711
+ // Assume that `Count % VectorTripCount` is equally distributed.
7712
+ unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7713
+ assert (TripCount > 0 && " trip count should not be zero" );
7714
+ const uint32_t Weights[] = {1 , TripCount - 1 };
7715
+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7716
+ }
7698
7717
}
7699
7718
7700
7719
return State.ExpandedSCEVs ;
@@ -8079,7 +8098,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8079
8098
// If source is an exiting block, we know the exit edge is dynamically dead
8080
8099
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8081
8100
// adding uses of an otherwise potentially dead instruction.
8082
- if (OrigLoop->isLoopExiting (Src))
8101
+ if (!Legal-> canVectorizeMultiCond () && OrigLoop->isLoopExiting (Src))
8083
8102
return EdgeMaskCache[Edge] = SrcMask;
8084
8103
8085
8104
VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8729,6 +8748,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8729
8748
static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8730
8749
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8731
8750
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8751
+ if (!Plan.getVectorLoopRegion ()->getSingleSuccessor ())
8752
+ return {};
8732
8753
auto *MiddleVPBB =
8733
8754
cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8734
8755
// No edge from the middle block to the unique exit block has been inserted
@@ -8814,6 +8835,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8814
8835
// TODO: Should be replaced by
8815
8836
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8816
8837
// scalar region is modeled as well.
8838
+ if (!VectorRegion->getSingleSuccessor ())
8839
+ return ;
8817
8840
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
8818
8841
VPBasicBlock *ScalarPHVPBB = nullptr ;
8819
8842
if (MiddleVPBB->getNumSuccessors () == 2 ) {
@@ -9100,10 +9123,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9100
9123
" VPBasicBlock" );
9101
9124
RecipeBuilder.fixHeaderPhis ();
9102
9125
9103
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9104
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9105
- addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9106
- addUsersInExitBlock (*Plan, ExitUsersToFix);
9126
+ if (Legal->canVectorizeMultiCond ()) {
9127
+ VPlanTransforms::convertToMultiCond (*Plan, *PSE.getSE (), OrigLoop,
9128
+ RecipeBuilder);
9129
+ } else {
9130
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9131
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9132
+ addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9133
+ addUsersInExitBlock (*Plan, ExitUsersToFix);
9134
+ }
9107
9135
9108
9136
// ---------------------------------------------------------------------------
9109
9137
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9231,8 +9259,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9231
9259
using namespace VPlanPatternMatch ;
9232
9260
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9233
9261
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9234
- VPBasicBlock *MiddleVPBB =
9235
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9236
9262
for (VPRecipeBase &R : Header->phis ()) {
9237
9263
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9238
9264
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
@@ -9251,8 +9277,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9251
9277
for (VPUser *U : Cur->users ()) {
9252
9278
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9253
9279
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9254
- assert (UserRecipe->getParent () == MiddleVPBB &&
9255
- " U must be either in the loop region or the middle block." );
9256
9280
continue ;
9257
9281
}
9258
9282
Worklist.insert (UserRecipe);
@@ -9357,6 +9381,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9357
9381
}
9358
9382
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9359
9383
Builder.setInsertPoint (&*LatchVPBB->begin ());
9384
+ if (!VectorLoopRegion->getSingleSuccessor ())
9385
+ return ;
9386
+ VPBasicBlock *MiddleVPBB =
9387
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9360
9388
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9361
9389
for (VPRecipeBase &R :
9362
9390
Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
0 commit comments