@@ -1387,9 +1387,11 @@ class LoopVectorizationCostModel {
1387
1387
// If we might exit from anywhere but the latch, must run the exiting
1388
1388
// iteration in scalar form.
1389
1389
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1390
- LLVM_DEBUG (
1391
- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1392
- return true ;
1390
+ if (!Legal->canVectorizeMultiCond()) {
1391
+ LLVM_DEBUG(
1392
+ dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1393
+ return true;
1394
+ }
1393
1395
}
1394
1396
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
1395
1397
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2571,8 +2573,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2571
2573
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
2572
2574
assert(LoopVectorPreHeader && "Invalid loop structure");
2573
2575
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
2574
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2575
- " multiple exit loop without required epilogue?" );
2576
+ if (Legal->canVectorizeMultiCond()) {
2577
+ BasicBlock *Latch = OrigLoop->getLoopLatch();
2578
+ BasicBlock *TrueSucc =
2579
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(0);
2580
+ BasicBlock *FalseSucc =
2581
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(1);
2582
+ LoopExitBlock = OrigLoop->contains(TrueSucc) ? FalseSucc : TrueSucc;
2583
+ } else {
2584
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2585
+ "multiple exit loop without required epilogue?");
2586
+ }
2576
2587
2577
2588
LoopMiddleBlock =
2578
2589
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -2943,24 +2954,26 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2943
2954
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2944
2955
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock();
2945
2956
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
2946
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2947
- // No edge from the middle block to the unique exit block has been inserted
2948
- // and there is nothing to fix from vector loop; phis should have incoming
2949
- // from scalar loop only.
2950
- } else {
2951
- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2952
- // the cost model.
2953
-
2954
- // If we inserted an edge from the middle block to the unique exit block,
2955
- // update uses outside the loop (phis) to account for the newly inserted
2956
- // edge.
2957
-
2958
- // Fix-up external users of the induction variables.
2959
- for (const auto &Entry : Legal->getInductionVars ())
2960
- fixupIVUsers (Entry.first , Entry.second ,
2961
- getOrCreateVectorTripCount (VectorLoop->getLoopPreheader ()),
2962
- IVEndValues[Entry.first ], LoopMiddleBlock,
2963
- VectorLoop->getHeader (), Plan, State);
2957
+ if (OrigLoop->getUniqueExitBlock()) {
2958
+ if (Cost->requiresScalarEpilogue(VF.isVector())) {
2959
+ // No edge from the middle block to the unique exit block has been
2960
+ // inserted and there is nothing to fix from vector loop; phis should have
2961
+ // incoming from scalar loop only.
2962
+ } else {
2963
+ // TODO: Check VPLiveOuts to see if IV users need fixing instead of
2964
+ // checking the cost model.
2965
+
2966
+ // If we inserted an edge from the middle block to the unique exit block,
2967
+ // update uses outside the loop (phis) to account for the newly inserted
2968
+ // edge.
2969
+
2970
+ // Fix-up external users of the induction variables.
2971
+ for (const auto &Entry : Legal->getInductionVars())
2972
+ fixupIVUsers(Entry.first, Entry.second,
2973
+ getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()),
2974
+ IVEndValues[Entry.first], LoopMiddleBlock,
2975
+ VectorLoop->getHeader(), Plan, State);
2976
+ }
2964
2977
}
2965
2978
2966
2979
// Fix live-out phis not already fixed earlier.
@@ -3584,7 +3597,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3584
3597
TheLoop->getExitingBlocks(Exiting);
3585
3598
for (BasicBlock *E : Exiting) {
3586
3599
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
3587
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3600
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
3601
+ (TheLoop->getLoopLatch() == E || !Legal->canVectorizeMultiCond()))
3588
3602
AddToWorklistIfAllowed(Cmp);
3589
3603
}
3590
3604
@@ -7515,7 +7529,8 @@ LoopVectorizationPlanner::executePlan(
7515
7529
LLVM_DEBUG(BestVPlan.dump());
7516
7530
7517
7531
// Perform the actual loop transformation.
7518
- VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan);
7532
+ VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
7533
+ OrigLoop);
7519
7534
7520
7535
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7521
7536
// before making any changes to the CFG.
@@ -7577,12 +7592,15 @@ LoopVectorizationPlanner::executePlan(
7577
7592
7578
7593
// 2.5 Collect reduction resume values.
7579
7594
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
7580
- auto *ExitVPBB =
7581
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7582
- for (VPRecipeBase &R : *ExitVPBB) {
7583
- createAndCollectMergePhiForReduction (
7584
- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7595
+ VPBasicBlock *ExitVPBB = nullptr;
7596
+ if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) {
7597
+ ExitVPBB = cast<VPBasicBlock>(
7598
+ BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7599
+ for (VPRecipeBase &R : *ExitVPBB) {
7600
+ createAndCollectMergePhiForReduction(
7601
+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7602
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7603
+ }
7586
7604
}
7587
7605
7588
7606
// 2.6. Maintain Loop Hints
@@ -7608,6 +7626,7 @@ LoopVectorizationPlanner::executePlan(
7608
7626
LoopVectorizeHints Hints(L, true, *ORE);
7609
7627
Hints.setAlreadyVectorized();
7610
7628
}
7629
+
7611
7630
TargetTransformInfo::UnrollingPreferences UP;
7612
7631
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7613
7632
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7620,15 +7639,17 @@ LoopVectorizationPlanner::executePlan(
7620
7639
ILV.printDebugTracesAtEnd();
7621
7640
7622
7641
// 4. Adjust branch weight of the branch in the middle block.
7623
- auto *MiddleTerm =
7624
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7625
- if (MiddleTerm->isConditional () &&
7626
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7627
- // Assume that `Count % VectorTripCount` is equally distributed.
7628
- unsigned TripCount = State.UF * State.VF .getKnownMinValue ();
7629
- assert (TripCount > 0 && " trip count should not be zero" );
7630
- const uint32_t Weights[] = {1 , TripCount - 1 };
7631
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7642
+ if (ExitVPBB) {
7643
+ auto *MiddleTerm =
7644
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7645
+ if (MiddleTerm->isConditional() &&
7646
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7647
+ // Assume that `Count % VectorTripCount` is equally distributed.
7648
+ unsigned TripCount = State.UF * State.VF.getKnownMinValue();
7649
+ assert(TripCount > 0 && "trip count should not be zero");
7650
+ const uint32_t Weights[] = {1, TripCount - 1};
7651
+ setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7652
+ }
7632
7653
}
7633
7654
7634
7655
return {State.ExpandedSCEVs, ReductionResumeValues};
@@ -8013,7 +8034,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8013
8034
// If source is an exiting block, we know the exit edge is dynamically dead
8014
8035
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8015
8036
// adding uses of an otherwise potentially dead instruction.
8016
- if (OrigLoop->isLoopExiting (Src))
8037
+ if (!Legal->canVectorizeMultiCond() && OrigLoop->isLoopExiting(Src))
8017
8038
return EdgeMaskCache[Edge] = SrcMask;
8018
8039
8019
8040
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8630,6 +8651,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8630
8651
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
8631
8652
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8632
8653
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8654
+ if (!Plan.getVectorLoopRegion()->getSingleSuccessor())
8655
+ return {};
8633
8656
auto *MiddleVPBB =
8634
8657
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8635
8658
// No edge from the middle block to the unique exit block has been inserted
@@ -8717,6 +8740,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8717
8740
// TODO: Should be replaced by
8718
8741
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8719
8742
// scalar region is modeled as well.
8743
+ if (!VectorRegion->getSingleSuccessor())
8744
+ return;
8720
8745
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8721
8746
VPBasicBlock *ScalarPHVPBB = nullptr;
8722
8747
if (MiddleVPBB->getNumSuccessors() == 2) {
@@ -8991,6 +9016,67 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8991
9016
"VPBasicBlock");
8992
9017
RecipeBuilder.fixHeaderPhis();
8993
9018
9019
+ SmallVector<BasicBlock *> Exiting;
9020
+ OrigLoop->getExitingBlocks(Exiting);
9021
+
9022
+ if (Legal->canVectorizeMultiCond()) {
9023
+ auto *LatchVPBB =
9024
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getExiting());
9025
+ VPBuilder::InsertPointGuard Guard(Builder);
9026
+ Builder.setInsertPoint(LatchVPBB->getTerminator());
9027
+ auto *MiddleVPBB =
9028
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
9029
+
9030
+ VPValue *EarlyExitTaken = nullptr;
9031
+ SmallVector<VPValue *> ExitTaken;
9032
+ SmallVector<PHINode *> ExitPhis;
9033
+ SmallVector<Value *> ExitValues;
9034
+ BasicBlock *ExitBlock;
9035
+ for (BasicBlock *E : Exiting) {
9036
+ if (E == OrigLoop->getLoopLatch()) {
9037
+ BasicBlock *TrueSucc =
9038
+ cast<BranchInst>(E->getTerminator())->getSuccessor(0);
9039
+ BasicBlock *FalseSucc =
9040
+ cast<BranchInst>(E->getTerminator())->getSuccessor(1);
9041
+ auto EB = !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
9042
+
9043
+ auto *VPExitBlock = new VPIRBasicBlock(EB);
9044
+ VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
9045
+ VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock);
9046
+ VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
9047
+ continue;
9048
+ }
9049
+ BasicBlock *TrueSucc =
9050
+ cast<BranchInst>(E->getTerminator())->getSuccessor(0);
9051
+ BasicBlock *FalseSucc =
9052
+ cast<BranchInst>(E->getTerminator())->getSuccessor(1);
9053
+ VPValue *M = RecipeBuilder.getBlockInMask(
9054
+ OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
9055
+
9056
+ auto *N = Builder.createNot(M);
9057
+ auto *EC = Builder.createNaryOp(VPInstruction::AnyOf, {N});
9058
+ ExitTaken.push_back(EC);
9059
+ if (EarlyExitTaken)
9060
+ EarlyExitTaken = Builder.createOr(EarlyExitTaken, EC);
9061
+ else
9062
+ EarlyExitTaken = EC;
9063
+ ExitBlock = !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
9064
+ }
9065
+
9066
+ auto *Term = dyn_cast<VPInstruction>(LatchVPBB->getTerminator());
9067
+ auto *IsLatchExiting = Builder.createICmp(
9068
+ CmpInst::ICMP_EQ, Term->getOperand(0), Term->getOperand(1));
9069
+ Builder.createNaryOp(VPInstruction::BranchMultipleConds,
9070
+ {EarlyExitTaken, IsLatchExiting});
9071
+ Term->eraseFromParent();
9072
+
9073
+ auto *EA = new VPIRBasicBlock(ExitBlock);
9074
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
9075
+ VPBlockUtils::disconnectBlocks(LoopRegion, MiddleVPBB);
9076
+ VPBlockUtils::connectBlocks(LoopRegion, EA);
9077
+ VPBlockUtils::connectBlocks(LoopRegion, MiddleVPBB);
9078
+ }
9079
+
8994
9080
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
8995
9081
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
8996
9082
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9062,6 +9148,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9062
9148
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
9063
9149
WithoutRuntimeCheck);
9064
9150
}
9151
+
9065
9152
return Plan;
9066
9153
}
9067
9154
@@ -9286,6 +9373,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9286
9373
}
9287
9374
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
9288
9375
Builder.setInsertPoint(&*LatchVPBB->begin());
9376
+ if (!VectorLoopRegion->getSingleSuccessor())
9377
+ return;
9289
9378
VPBasicBlock *MiddleVPBB =
9290
9379
cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9291
9380
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
0 commit comments