Skip to content

Commit 5fbd065

Browse files
authored
[VPlan] Add initial CFG simplification, removing BranchOnCond true. (#106748)
Add an initial CFG simplification transform, which removes the dead edges for blocks terminated with BranchOnCond true. At the moment, this removes the edge between middle block and scalar preheader when folding the tail. PR: #106748
1 parent d4002b4 commit 5fbd065

File tree

101 files changed

+862
-1072
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+862
-1072
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2486,12 +2486,13 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
24862486
PreVectorPH->swapSuccessors();
24872487

24882488
// We just connected a new block to the scalar preheader. Update all
2489-
// ResumePhis by adding an incoming value for it.
2489+
// ResumePhis by adding an incoming value for it, replicating the last value.
24902490
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
24912491
auto *ResumePhi = dyn_cast<VPInstruction>(&R);
24922492
if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
24932493
continue;
2494-
ResumePhi->addOperand(ResumePhi->getOperand(1));
2494+
ResumePhi->addOperand(
2495+
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
24952496
}
24962497
}
24972498

@@ -2660,7 +2661,10 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26602661
LoopScalarPreHeader =
26612662
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
26622663
LI, nullptr, Twine(Prefix) + "scalar.ph");
2663-
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
2664+
// NOTE: The Plan's scalar preheader VPBB isn't replaced with a VPIRBasicBlock
2665+
// wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar
2666+
// preheader may be unreachable at this point. Instead it is replaced in
2667+
// createVectorizedLoopSkeleton.
26642668
}
26652669

26662670
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2756,6 +2760,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
27562760
// faster.
27572761
emitMemRuntimeChecks(LoopScalarPreHeader);
27582762

2763+
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
27592764
return LoopVectorPreHeader;
27602765
}
27612766

@@ -7909,6 +7914,7 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
79097914
// Generate the induction variable.
79107915
EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
79117916

7917+
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
79127918
return LoopVectorPreHeader;
79137919
}
79147920

@@ -8057,6 +8063,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
80578063
Phi->removeIncomingValue(EPI.MemSafetyCheck);
80588064
}
80598065

8066+
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
80608067
return LoopVectorPreHeader;
80618068
}
80628069

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3529,12 +3529,28 @@ class VPlan {
35293529

35303530
/// Returns the 'middle' block of the plan, that is the block that selects
35313531
/// whether to execute the scalar tail loop or the exit block from the loop
3532-
/// latch.
3533-
const VPBasicBlock *getMiddleBlock() const {
3534-
return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3535-
}
3532+
/// latch. If there is an early exit from the vector loop, the middle block
3533+
/// conceptully has the early exit block as third successor, split accross 2
3534+
/// VPBBs. In that case, the second VPBB selects whether to execute the scalar
3535+
/// tail loop or the exit bock. If the scalar tail loop or exit block are
3536+
/// known to always execute, the middle block may branch directly to that
3537+
/// block. This function cannot be called once the vector loop region has been
3538+
/// removed.
35363539
VPBasicBlock *getMiddleBlock() {
3537-
return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3540+
VPRegionBlock *LoopRegion = getVectorLoopRegion();
3541+
assert(
3542+
LoopRegion &&
3543+
"cannot call the function after vector loop region has been removed");
3544+
auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
3545+
if (RegionSucc->getSingleSuccessor() ||
3546+
is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
3547+
return RegionSucc;
3548+
// There is an early exit. The successor of RegionSucc is the middle block.
3549+
return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
3550+
}
3551+
3552+
const VPBasicBlock *getMiddleBlock() const {
3553+
return const_cast<VPlan *>(this)->getMiddleBlock();
35383554
}
35393555

35403556
/// Return the VPBasicBlock for the preheader of the scalar loop.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,6 +1682,52 @@ void VPlanTransforms::truncateToMinimalBitwidths(
16821682
"some entries in MinBWs haven't been processed");
16831683
}
16841684

1685+
/// Remove BranchOnCond recipes with true conditions together with removing
1686+
/// dead edges to their successors.
1687+
static void removeBranchOnCondTrue(VPlan &Plan) {
1688+
using namespace llvm::VPlanPatternMatch;
1689+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1690+
vp_depth_first_shallow(Plan.getEntry()))) {
1691+
if (VPBB->getNumSuccessors() != 2 ||
1692+
!match(&VPBB->back(), m_BranchOnCond(m_True())))
1693+
continue;
1694+
1695+
VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
1696+
const auto &Preds = RemovedSucc->getPredecessors();
1697+
assert(count(Preds, VPBB) == 1 &&
1698+
"There must be a single edge between VPBB and its successor");
1699+
unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB));
1700+
1701+
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
1702+
// from these recipes.
1703+
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
1704+
assert((!isa<VPIRInstruction>(&R) ||
1705+
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
1706+
!isa<VPHeaderPHIRecipe>(&R) &&
1707+
"Cannot update VPIRInstructions wrapping phis or header phis yet");
1708+
auto *VPI = dyn_cast<VPInstruction>(&R);
1709+
if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi)
1710+
break;
1711+
VPBuilder B(VPI);
1712+
SmallVector<VPValue *> NewOperands;
1713+
// Create new operand list, with the dead incoming value filtered out.
1714+
for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
1715+
if (Idx == DeadIdx)
1716+
continue;
1717+
NewOperands.push_back(Op);
1718+
}
1719+
VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi,
1720+
NewOperands, VPI->getDebugLoc(),
1721+
VPI->getName()));
1722+
VPI->eraseFromParent();
1723+
}
1724+
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
1725+
// automatically on VPlan destruction if it becomes unreachable.
1726+
VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc);
1727+
VPBB->back().eraseFromParent();
1728+
}
1729+
}
1730+
16851731
void VPlanTransforms::optimize(VPlan &Plan) {
16861732
runPass(removeRedundantCanonicalIVs, Plan);
16871733
runPass(removeRedundantInductionCasts, Plan);
@@ -1691,6 +1737,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
16911737
runPass(legalizeAndOptimizeInductions, Plan);
16921738
runPass(removeRedundantExpandSCEVRecipes, Plan);
16931739
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1740+
runPass(removeBranchOnCondTrue, Plan);
16941741
runPass(removeDeadRecipes, Plan);
16951742

16961743
runPass(createAndOptimizeReplicateRegions, Plan);

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1616
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
18-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1918
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
2019
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
2120
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -41,10 +40,10 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
4140
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
4241
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4342
; CHECK: middle.block:
44-
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
43+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
4544
; CHECK: scalar.ph:
46-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
47-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ]
45+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
46+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[DST]], [[ENTRY]] ]
4847
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4948
; CHECK: for.body:
5049
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -100,7 +99,6 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
10099
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
101100
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
102101
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
103-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
104102
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
105103
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
106104
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -126,10 +124,10 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
126124
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
127125
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
128126
; CHECK: middle.block:
129-
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
127+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
130128
; CHECK: scalar.ph:
131-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
132-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[FOR_BODY_PREHEADER]] ]
129+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ]
130+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[DST]], [[FOR_BODY_PREHEADER]] ]
133131
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
134132
; CHECK: for.body:
135133
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -457,17 +457,17 @@ define void @latch_branch_cost(ptr %dst) {
457457
; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
458458
; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
459459
; PRED: [[MIDDLE_BLOCK]]:
460-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
460+
; PRED-NEXT: br label %[[EXIT:.*]]
461461
; PRED: [[SCALAR_PH]]:
462-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
462+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
463463
; PRED-NEXT: br label %[[LOOP:.*]]
464464
; PRED: [[LOOP]]:
465-
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
465+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
466466
; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
467467
; PRED-NEXT: store i8 0, ptr [[GEP]], align 1
468-
; PRED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[IV]], 1
469-
; PRED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
470-
; PRED-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
468+
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
469+
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
470+
; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
471471
; PRED: [[EXIT]]:
472472
; PRED-NEXT: ret void
473473
;
@@ -713,9 +713,6 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
713713
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
714714
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
715715
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
716-
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8
717-
; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
718-
; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2
719716
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
720717
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
721718
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -741,10 +738,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
741738
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0
742739
; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
743740
; PRED: [[MIDDLE_BLOCK]]:
744-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
741+
; PRED-NEXT: br label %[[EXIT:.*]]
745742
; PRED: [[SCALAR_PH]]:
746-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[DST]], %[[ENTRY]] ]
747-
; PRED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
743+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ]
744+
; PRED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
748745
; PRED-NEXT: br label %[[LOOP:.*]]
749746
; PRED: [[LOOP]]:
750747
; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
@@ -869,9 +866,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
869866
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
870867
; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
871868
; DEFAULT: [[MIDDLE_BLOCK]]:
872-
; DEFAULT-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
869+
; DEFAULT-NEXT: br label %[[EXIT:.*]]
873870
; DEFAULT: [[SCALAR_PH]]:
874-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
871+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
875872
; DEFAULT-NEXT: br label %[[LOOP:.*]]
876873
; DEFAULT: [[LOOP]]:
877874
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -971,9 +968,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
971968
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
972969
; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
973970
; PRED: [[MIDDLE_BLOCK]]:
974-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
971+
; PRED-NEXT: br label %[[EXIT:.*]]
975972
; PRED: [[SCALAR_PH]]:
976-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
973+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
977974
; PRED-NEXT: br label %[[LOOP:.*]]
978975
; PRED: [[LOOP]]:
979976
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -1408,9 +1405,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
14081405
; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0
14091406
; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
14101407
; PRED: [[MIDDLE_BLOCK]]:
1411-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
1408+
; PRED-NEXT: br label %[[EXIT:.*]]
14121409
; PRED: [[SCALAR_PH]]:
1413-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
1410+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
14141411
; PRED-NEXT: br label %[[LOOP_HEADER:.*]]
14151412
; PRED: [[LOOP_HEADER]]:
14161413
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -1521,9 +1518,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
15211518
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
15221519
; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
15231520
; DEFAULT: [[MIDDLE_BLOCK]]:
1524-
; DEFAULT-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
1521+
; DEFAULT-NEXT: br label %[[EXIT:.*]]
15251522
; DEFAULT: [[SCALAR_PH]]:
1526-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1523+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
15271524
; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]]
15281525
; DEFAULT: [[LOOP_HEADER]]:
15291526
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -1584,9 +1581,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
15841581
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
15851582
; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
15861583
; PRED: [[MIDDLE_BLOCK]]:
1587-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
1584+
; PRED-NEXT: br label %[[EXIT:.*]]
15881585
; PRED: [[SCALAR_PH]]:
1589-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1586+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
15901587
; PRED-NEXT: br label %[[LOOP_HEADER:.*]]
15911588
; PRED: [[LOOP_HEADER]]:
15921589
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
158158
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <vscale x 2 x i1> [[TMP36]], i32 0
159159
; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
160160
; CHECK: [[MIDDLE_BLOCK]]:
161-
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
161+
; CHECK-NEXT: br label %[[EXIT:.*]]
162162
; CHECK: [[SCALAR_PH]]:
163-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
163+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
164164
; CHECK-NEXT: br label %[[LOOP:.*]]
165165
; CHECK: [[LOOP]]:
166166
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -284,9 +284,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
284284
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
285285
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
286286
; CHECK: [[MIDDLE_BLOCK]]:
287-
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
287+
; CHECK-NEXT: br label %[[EXIT:.*]]
288288
; CHECK: [[SCALAR_PH]]:
289-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
289+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
290290
; CHECK-NEXT: br label %[[LOOP:.*]]
291291
; CHECK: [[LOOP]]:
292292
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

0 commit comments

Comments
 (0)