Skip to content

Commit 512aa0d

Browse files
committed
[LV] Use SCEV to check if minimum iteration check is known.
Use SCEV to check if the minimum iteration check (TC < Step) is known to be false. This is a first step towards addressing llvm#111098. To catch the exact case from the issue, we need to do extra work to make sure the wrap flags on the shl are preserved and used by SCEV.
1 parent 4986510 commit 512aa0d

8 files changed

+30
-55
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2438,12 +2438,21 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24382438
};
24392439

24402440
TailFoldingStyle Style = Cost->getTailFoldingStyle();
2441-
if (Style == TailFoldingStyle::None)
2442-
CheckMinIters =
2443-
Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
2444-
else if (VF.isScalable() &&
2445-
!isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
2446-
Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
2441+
if (Style == TailFoldingStyle::None) {
2442+
Value *Step = CreateStep();
2443+
ScalarEvolution &SE = *PSE.getSE();
2444+
// Check if we can prove that the trip count is >= the step.
2445+
const SCEV *TripCountSCEV = SE.getTripCountFromExitCount(
2446+
PSE.getBackedgeTakenCount(), CountTy, OrigLoop);
2447+
if (SE.isKnownPredicate(CmpInst::getInversePredicate(P),
2448+
SE.applyLoopGuards(TripCountSCEV, OrigLoop),
2449+
SE.getSCEV(Step)))
2450+
CheckMinIters = Builder.getFalse();
2451+
else
2452+
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
2453+
} else if (VF.isScalable() &&
2454+
!isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
2455+
Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
24472456
// vscale is not necessarily a power-of-2, which means we cannot guarantee
24482457
// an overflow to zero when updating induction variables and so an
24492458
// additional overflow check is required before entering the vector loop.

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ define void @f1(ptr %A) #0 {
1111
; CHECK-NEXT: entry:
1212
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1313
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
14-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
15-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1615
; CHECK: vector.ph:
1716
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
1111
define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
1212
; TFNONE-LABEL: @test_widen(
1313
; TFNONE-NEXT: entry:
14-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
15-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
16-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
17-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1815
; TFNONE: vector.ph:
1916
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
2017
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -146,10 +143,7 @@ for.cond.cleanup:
146143
define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
147144
; TFNONE-LABEL: @test_if_then(
148145
; TFNONE-NEXT: entry:
149-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
150-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
151-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
152-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
146+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
153147
; TFNONE: vector.ph:
154148
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
155149
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -310,10 +304,7 @@ for.cond.cleanup:
310304
define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
311305
; TFNONE-LABEL: @test_widen_if_then_else(
312306
; TFNONE-NEXT: entry:
313-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
314-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
315-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
316-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
307+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
317308
; TFNONE: vector.ph:
318309
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
319310
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -490,10 +481,7 @@ for.cond.cleanup:
490481
define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
491482
; TFNONE-LABEL: @test_widen_nomask(
492483
; TFNONE-NEXT: entry:
493-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
494-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
495-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
496-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
484+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
497485
; TFNONE: vector.ph:
498486
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
499487
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -548,11 +536,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
548536
;
549537
; TFFALLBACK-LABEL: @test_widen_nomask(
550538
; TFFALLBACK-NEXT: entry:
551-
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
552-
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
553-
; TFFALLBACK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
554-
; TFFALLBACK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
555-
; TFFALLBACK: vector.ph:
556539
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
557540
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
558541
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
@@ -561,20 +544,17 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
561544
; TFFALLBACK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
562545
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
563546
; TFFALLBACK: vector.body:
564-
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
547+
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
565548
; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
566549
; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
567550
; TFFALLBACK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
568551
; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
569552
; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
570553
; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
571554
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
572-
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
573-
; TFFALLBACK: scalar.ph:
574-
; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ]
575-
; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]]
555+
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
576556
; TFFALLBACK: for.body:
577-
; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
557+
; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VECTOR_BODY]] ]
578558
; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
579559
; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
580560
; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
@@ -626,10 +606,7 @@ for.cond.cleanup:
626606
define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
627607
; TFNONE-LABEL: @test_widen_optmask(
628608
; TFNONE-NEXT: entry:
629-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
630-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
631-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
632-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
609+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
633610
; TFNONE: vector.ph:
634611
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
635612
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -791,10 +768,7 @@ for.cond.cleanup:
791768
define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, double %m) #4 {
792769
; TFNONE-LABEL: @test_widen_fmuladd_and_call(
793770
; TFNONE-NEXT: entry:
794-
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
795-
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
796-
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
797-
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
771+
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
798772
; TFNONE: vector.ph:
799773
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
800774
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2

llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ define void @test_invar_gep(ptr %dst) #0 {
1010
; CHECK-NEXT: entry:
1111
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1212
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
13-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP1]]
14-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
13+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1514
; CHECK: vector.ph:
1615
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1716
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,8 +757,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 {
757757
; CHECK-NEXT: entry:
758758
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
759759
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
760-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
761-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
760+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
762761
; CHECK: vector.ph:
763762
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
764763
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4

llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
77
define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
88
; WIDE-LABEL: @test_widen(
99
; WIDE-NEXT: entry:
10-
; WIDE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11-
; WIDE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
12-
; WIDE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
13-
; WIDE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10+
; WIDE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1411
; WIDE: vector.ph:
1512
; WIDE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1613
; WIDE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4

llvm/test/Transforms/LoopVectorize/if-reduction.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,8 +1668,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
16681668
; CHECK: [[FOR_HEADER]]:
16691669
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
16701670
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[ZEXT]]
1671-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
1672-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1671+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16731672
; CHECK: [[VECTOR_PH]]:
16741673
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
16751674
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
423423
; CHECK-NEXT: [[G_64:%.*]] = zext i1 [[G]] to i64
424424
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 15, [[G_64]]
425425
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
426-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
427-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
426+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
428427
; CHECK: vector.scevcheck:
429428
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
430429
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]

0 commit comments

Comments
 (0)