@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel {
1363
1363
// If we might exit from anywhere but the latch, must run the exiting
1364
1364
// iteration in scalar form.
1365
1365
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1366
- LLVM_DEBUG(
1367
- dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1368
- return true;
1366
+ if (!Legal->canVectorizeMultiCond()) {
1367
+ LLVM_DEBUG(
1368
+ dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1369
+ return true;
1370
+ }
1369
1371
}
1370
1372
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
1371
1373
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2544,8 +2546,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2544
2546
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
2545
2547
assert(LoopVectorPreHeader && "Invalid loop structure");
2546
2548
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
2547
- assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2548
- "multiple exit loop without required epilogue?");
2549
+ if (Legal->canVectorizeMultiCond()) {
2550
+ BasicBlock *Latch = OrigLoop->getLoopLatch();
2551
+ BasicBlock *TrueSucc =
2552
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(0);
2553
+ BasicBlock *FalseSucc =
2554
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(1);
2555
+ LoopExitBlock = OrigLoop->contains(TrueSucc) ? FalseSucc : TrueSucc;
2556
+ } else {
2557
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2558
+ "multiple exit loop without required epilogue?");
2559
+ }
2549
2560
2550
2561
LoopMiddleBlock =
2551
2562
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -2912,7 +2923,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2912
2923
for (PHINode &PN : Exit->phis())
2913
2924
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2914
2925
2915
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2926
+ if (Legal->canVectorizeMultiCond() ||
2927
+ Cost->requiresScalarEpilogue(VF.isVector())) {
2916
2928
// No edge from the middle block to the unique exit block has been inserted
2917
2929
// and there is nothing to fix from vector loop; phis should have incoming
2918
2930
// from scalar loop only.
@@ -3557,7 +3569,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3557
3569
TheLoop->getExitingBlocks(Exiting);
3558
3570
for (BasicBlock *E : Exiting) {
3559
3571
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
3560
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
3572
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
3573
+ (TheLoop->getLoopLatch() == E || !Legal->canVectorizeMultiCond()))
3561
3574
AddToWorklistIfAllowed(Cmp);
3562
3575
}
3563
3576
@@ -7522,7 +7535,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7522
7535
LLVM_DEBUG(BestVPlan.dump());
7523
7536
7524
7537
// Perform the actual loop transformation.
7525
- VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan);
7538
+ VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
7539
+ OrigLoop);
7526
7540
7527
7541
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7528
7542
// before making any changes to the CFG.
@@ -7583,12 +7597,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7583
7597
BestVPlan.execute(&State);
7584
7598
7585
7599
// 2.5 Collect reduction resume values.
7586
- auto *ExitVPBB =
7587
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7588
- for (VPRecipeBase &R : *ExitVPBB) {
7589
- createAndCollectMergePhiForReduction(
7590
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7591
- State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7600
+ VPBasicBlock *ExitVPBB = nullptr;
7601
+ if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) {
7602
+ ExitVPBB = cast<VPBasicBlock>(
7603
+ BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7604
+ for (VPRecipeBase &R : *ExitVPBB) {
7605
+ createAndCollectMergePhiForReduction(
7606
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7607
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7608
+ }
7592
7609
}
7593
7610
7594
7611
// 2.6. Maintain Loop Hints
@@ -7614,6 +7631,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7614
7631
LoopVectorizeHints Hints(L, true, *ORE);
7615
7632
Hints.setAlreadyVectorized();
7616
7633
}
7634
+
7617
7635
TargetTransformInfo::UnrollingPreferences UP;
7618
7636
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7619
7637
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7626,15 +7644,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7626
7644
ILV.printDebugTracesAtEnd();
7627
7645
7628
7646
// 4. Adjust branch weight of the branch in the middle block.
7629
- auto *MiddleTerm =
7630
- cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7631
- if (MiddleTerm->isConditional() &&
7632
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7633
- // Assume that `Count % VectorTripCount` is equally distributed.
7634
- unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7635
- assert(TripCount > 0 && "trip count should not be zero");
7636
- const uint32_t Weights[] = {1, TripCount - 1};
7637
- setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7647
+ if (ExitVPBB) {
7648
+ auto *MiddleTerm =
7649
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7650
+ if (MiddleTerm->isConditional() &&
7651
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7652
+ // Assume that `Count % VectorTripCount` is equally distributed.
7653
+ unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7654
+ assert(TripCount > 0 && "trip count should not be zero");
7655
+ const uint32_t Weights[] = {1, TripCount - 1};
7656
+ setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7657
+ }
7638
7658
}
7639
7659
7640
7660
return State.ExpandedSCEVs;
@@ -8019,7 +8039,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8019
8039
// If source is an exiting block, we know the exit edge is dynamically dead
8020
8040
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8021
8041
// adding uses of an otherwise potentially dead instruction.
8022
- if (OrigLoop->isLoopExiting(Src))
8042
+ if (!Legal->canVectorizeMultiCond() && OrigLoop->isLoopExiting(Src))
8023
8043
return EdgeMaskCache[Edge] = SrcMask;
8024
8044
8025
8045
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8664,6 +8684,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8664
8684
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
8665
8685
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8666
8686
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8687
+ if (!Plan.getVectorLoopRegion()->getSingleSuccessor())
8688
+ return {};
8667
8689
auto *MiddleVPBB =
8668
8690
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8669
8691
// No edge from the middle block to the unique exit block has been inserted
@@ -8751,6 +8773,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8751
8773
// TODO: Should be replaced by
8752
8774
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8753
8775
// scalar region is modeled as well.
8776
+ if (!VectorRegion->getSingleSuccessor())
8777
+ return;
8754
8778
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8755
8779
VPBasicBlock *ScalarPHVPBB = nullptr;
8756
8780
if (MiddleVPBB->getNumSuccessors() == 2) {
@@ -9037,6 +9061,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9037
9061
"VPBasicBlock");
9038
9062
RecipeBuilder.fixHeaderPhis();
9039
9063
9064
+ if (Legal->canVectorizeMultiCond()) {
9065
+ VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop,
9066
+ RecipeBuilder);
9067
+ }
9068
+
9040
9069
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
9041
9070
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9042
9071
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9168,8 +9197,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9168
9197
using namespace VPlanPatternMatch;
9169
9198
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
9170
9199
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
9171
- VPBasicBlock *MiddleVPBB =
9172
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9173
9200
for (VPRecipeBase &R : Header->phis()) {
9174
9201
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9175
9202
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
@@ -9188,8 +9215,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9188
9215
for (VPUser *U : Cur->users()) {
9189
9216
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9190
9217
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
9191
- assert(UserRecipe->getParent() == MiddleVPBB &&
9192
- "U must be either in the loop region or the middle block.");
9193
9218
continue;
9194
9219
}
9195
9220
Worklist.insert(UserRecipe);
@@ -9294,6 +9319,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9294
9319
}
9295
9320
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
9296
9321
Builder.setInsertPoint(&*LatchVPBB->begin());
9322
+ if (!VectorLoopRegion->getSingleSuccessor())
9323
+ return;
9324
+ VPBasicBlock *MiddleVPBB =
9325
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9297
9326
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
9298
9327
for (VPRecipeBase &R :
9299
9328
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
0 commit comments