Skip to content

Commit 087668b

Browse files
committed
[VPlan] Compute induction end values in VPlan.
Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on #110004 and #109975.
1 parent 7c81573 commit 087668b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+365
-338
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ class VPBuilder {
233233

234234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
235235
FPMathOperator *FPBinOp, VPValue *Start,
236-
VPCanonicalIVPHIRecipe *CanonicalIV,
237-
VPValue *Step, const Twine &Name = "") {
236+
VPValue *CanonicalIV, VPValue *Step,
237+
const Twine &Name = "") {
238238
return tryInsertInstruction(
239239
new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name));
240240
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 159 additions & 89 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6464
case VPInstruction::FirstOrderRecurrenceSplice:
6565
case VPInstruction::LogicalAnd:
6666
case VPInstruction::PtrAdd:
67+
case VPInstruction::ResumePhi:
6768
return false;
6869
default:
6970
return true;

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1716
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
18+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
2020
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
@@ -102,9 +102,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102102
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103103
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104104
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106105
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107106
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
107+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
108108
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109109
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,11 +785,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
785785
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]]
786786
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
787787
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
788+
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
789+
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
788790
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8
789791
; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
790792
; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2
791-
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
792-
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
793793
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
794794
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
795795
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4848
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
4949
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5050
; CHECK: scalar.ph:
51-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
5251
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
52+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
5353
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5454
; CHECK: for.cond.cleanup.loopexit:
5555
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
156156
; CHECK: scalar.ph:
157-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
158157
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ]
159158
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ]
160159
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
160+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
161161
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
162162
; CHECK: for.cond.cleanup.loopexit:
163163
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
863863
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
864864
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
865865
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
866-
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
867866
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
867+
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
868868
; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
869869
; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
870870
; PRED-NEXT: br label [[LOOP:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
101101
; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4
102102
; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP3]], [[TMP35]]
103103
; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]]
104-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC4]]
105104
; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64()
106105
; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 4
106+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC4]]
107107
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[CONV]], i64 0
108108
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT8]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
109109
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
@@ -316,9 +316,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
316316
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]]
317317
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]]
318318
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
319-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]]
320319
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
321320
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16
321+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]]
322322
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]])
323323
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
324324
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ define i64 @mul_select_operand_known_1_via_scev() {
1919
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC_PHI]])
2020
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
2121
; CHECK: [[SCALAR_PH]]:
22-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
2322
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
23+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
2424
; CHECK-NEXT: br label %[[LOOP:.*]]
2525
; CHECK: [[LOOP]]:
2626
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
9797
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
9898
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
9999
; DEFAULT: scalar.ph:
100-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
101-
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
100+
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
102101
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
102+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
103103
; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP60]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
104104
; DEFAULT-NEXT: br label [[LOOP:%.*]]
105105
; DEFAULT: loop:
@@ -220,9 +220,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
220220
; PRED-NEXT: [[VECTOR_RECUR_EXTRACT8:%.*]] = extractelement <vscale x 4 x i32> [[TMP22]], i32 [[TMP51]]
221221
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
222222
; PRED: scalar.ph:
223-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
224-
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
223+
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
225224
; PRED-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
225+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
226226
; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP45]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
227227
; PRED-NEXT: br label [[LOOP:%.*]]
228228
; PRED: loop:

0 commit comments

Comments
 (0)