Skip to content

Commit ca478bc

Browse files
authored
[SCEV] Support ule/sle exit counts via widening (#92206)
If we have an exit condition of the form IV <= Limit, we will first try to convert it into IV < Limit+1 or IV-1 < Limit based on range info (in icmp simplification). If that fails, we try to convert it to IV < Limit + 1 based on controlling exits in non-infinite loops. However, if all else fails, we can still determine the exit count by rewriting to ext(IV) < ext(Limit) + 1, where the zero/sign extension ensures that the addition does not overflow. Proof: https://alive2.llvm.org/ce/z/iR-iYd
1 parent 5ec91b3 commit ca478bc

File tree

4 files changed

+68
-44
lines changed

4 files changed

+68
-44
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9198,8 +9198,25 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
91989198
// Since the loop is finite, an invariant RHS cannot include the boundary
91999199
// value, otherwise it would loop forever.
92009200
if (!EnableFiniteLoopControl || !ControllingFiniteLoop ||
9201-
!isLoopInvariant(RHS, L))
9202-
break;
9201+
!isLoopInvariant(RHS, L)) {
9202+
// Otherwise, perform the addition in a wider type, to avoid overflow.
9203+
// If the LHS is an addrec with the appropriate nowrap flag, the
9204+
// extension will be sunk into it and the exit count can be analyzed.
9205+
auto *OldType = dyn_cast<IntegerType>(LHS->getType());
9206+
if (!OldType)
9207+
break;
9208+
// Prefer doubling the bitwidth over adding a single bit to make it more
9209+
// likely that we use a legal type.
9210+
auto *NewType =
9211+
Type::getIntNTy(OldType->getContext(), OldType->getBitWidth() * 2);
9212+
if (ICmpInst::isSigned(Pred)) {
9213+
LHS = getSignExtendExpr(LHS, NewType);
9214+
RHS = getSignExtendExpr(RHS, NewType);
9215+
} else {
9216+
LHS = getZeroExtendExpr(LHS, NewType);
9217+
RHS = getZeroExtendExpr(RHS, NewType);
9218+
}
9219+
}
92039220
RHS = getAddExpr(getOne(RHS->getType()), RHS);
92049221
[[fallthrough]];
92059222
case ICmpInst::ICMP_SLT:

llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
define void @ule_from_zero(i32 %M, i32 %N) {
55
; CHECK-LABEL: 'ule_from_zero'
66
; CHECK-NEXT: Determining loop execution counts for: @ule_from_zero
7-
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
8-
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
7+
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
8+
; CHECK-NEXT: exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
99
; CHECK-NEXT: exit count for latch: %N
10-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
11-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %N
12-
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
10+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
11+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
12+
; CHECK-NEXT: symbolic max exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
1313
; CHECK-NEXT: symbolic max exit count for latch: %N
14+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
1415
;
1516
entry:
1617
br label %loop
@@ -61,13 +62,14 @@ exit:
6162
define void @ule_from_unknown(i32 %M, i32 %N, i32 %S) {
6263
; CHECK-LABEL: 'ule_from_unknown'
6364
; CHECK-NEXT: Determining loop execution counts for: @ule_from_unknown
64-
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
65-
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
65+
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
66+
; CHECK-NEXT: exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
6667
; CHECK-NEXT: exit count for latch: ((-1 * %S) + %N)
67-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
68-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
69-
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
68+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
69+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
70+
; CHECK-NEXT: symbolic max exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
7071
; CHECK-NEXT: symbolic max exit count for latch: ((-1 * %S) + %N)
72+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
7173
;
7274
entry:
7375
br label %loop
@@ -96,6 +98,9 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
9698
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %N
9799
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
98100
; CHECK-NEXT: symbolic max exit count for latch: %N
101+
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
102+
; CHECK-NEXT: Predicates:
103+
; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
99104
;
100105
entry:
101106
br label %loop
@@ -117,13 +122,14 @@ exit:
117122
define void @sle_from_int_min(i32 %M, i32 %N) {
118123
; CHECK-LABEL: 'sle_from_int_min'
119124
; CHECK-NEXT: Determining loop execution counts for: @sle_from_int_min
120-
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
121-
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
125+
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
126+
; CHECK-NEXT: exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
122127
; CHECK-NEXT: exit count for latch: (-2147483648 + %N)
123-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
124-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
125-
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
128+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
129+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
130+
; CHECK-NEXT: symbolic max exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
126131
; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N)
132+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
127133
;
128134
entry:
129135
br label %loop
@@ -174,13 +180,14 @@ exit:
174180
define void @sle_from_unknown(i32 %M, i32 %N, i32 %S) {
175181
; CHECK-LABEL: 'sle_from_unknown'
176182
; CHECK-NEXT: Determining loop execution counts for: @sle_from_unknown
177-
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
178-
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
183+
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
184+
; CHECK-NEXT: exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
179185
; CHECK-NEXT: exit count for latch: ((-1 * %S) + %N)
180-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
181-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
182-
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
186+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
187+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
188+
; CHECK-NEXT: symbolic max exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
183189
; CHECK-NEXT: symbolic max exit count for latch: ((-1 * %S) + %N)
190+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
184191
;
185192
entry:
186193
br label %loop
@@ -209,6 +216,9 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
209216
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
210217
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
211218
; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N)
219+
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
220+
; CHECK-NEXT: Predicates:
221+
; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
212222
;
213223
entry:
214224
br label %loop

llvm/test/CodeGen/PowerPC/ctrloop-le.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,7 @@ for.end: ; preds = %for.body, %entry
293293

294294

295295
; CHECK: test_pos1_rr_sle
296-
; FIXME: Support this loop!
297-
; CHECK-NOT: bdnz
296+
; CHECK: bdnz
298297
; a < b
299298
define void @test_pos1_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
300299
entry:
@@ -323,8 +322,7 @@ for.end: ; preds = %for.body, %entry
323322

324323

325324
; CHECK: test_pos2_rr_sle
326-
; FIXME: Support this loop!
327-
; CHECK-NOT: bdnz
325+
; CHECK: bdnz
328326
; a < b
329327
define void @test_pos2_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
330328
entry:
@@ -353,8 +351,7 @@ for.end: ; preds = %for.body, %entry
353351

354352

355353
; CHECK: test_pos4_rr_sle
356-
; FIXME: Support this loop!
357-
; CHECK-NOT: bdnz
354+
; CHECK: bdnz
358355
; a < b
359356
define void @test_pos4_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
360357
entry:
@@ -383,8 +380,7 @@ for.end: ; preds = %for.body, %entry
383380

384381

385382
; CHECK: test_pos8_rr_sle
386-
; FIXME: Support this loop!
387-
; CHECK-NOT: bdnz
383+
; CHECK: bdnz
388384
; a < b
389385
define void @test_pos8_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
390386
entry:
@@ -413,8 +409,7 @@ for.end: ; preds = %for.body, %entry
413409

414410

415411
; CHECK: test_pos16_rr_sle
416-
; FIXME: Support this loop!
417-
; CHECK-NOT: bdnz
412+
; CHECK: bdnz
418413
; a < b
419414
define void @test_pos16_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
420415
entry:

llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -265,16 +265,17 @@ define i32 @test5(ptr %a, i32 %b) {
265265
; CHECK-LABEL: @test5(
266266
; CHECK-NEXT: entry:
267267
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
268+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
268269
; CHECK-NEXT: br label [[FOR_COND:%.*]]
269270
; CHECK: for.cond:
270271
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
271272
; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
272-
; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
273-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
273+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
274+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
274275
; CHECK: for.body:
275276
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
276-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
277-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]]
277+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
278+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
278279
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
279280
; CHECK-NEXT: br label [[FOR_COND]]
280281
; CHECK: for.end:
@@ -349,22 +350,23 @@ define i32 @test7(ptr %a, i32 %b) {
349350
; CHECK-LABEL: @test7(
350351
; CHECK-NEXT: entry:
351352
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
353+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
352354
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 -1)
353-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 2
354-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64
355+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2
356+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64
355357
; CHECK-NEXT: br label [[FOR_COND:%.*]]
356358
; CHECK: for.cond:
357359
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
358360
; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
359-
; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
360-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
361+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
362+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
361363
; CHECK: for.body:
362364
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
363-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
364-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
365+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
366+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]]
365367
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
366-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
367-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]]
368+
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
369+
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[FOR_COND]], label [[FOR_END]]
368370
; CHECK: for.end:
369371
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ]
370372
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]

0 commit comments

Comments
 (0)