-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LV] Use SCEV to check if minimum iteration check is known. #111310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
512aa0d
a2e66a5
86ea8c3
41e31e9
8a89ff3
0030b1b
5339f4d
31cee3e
cacf91b
aac81c6
5e455de
092c2d6
660fe44
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,8 +11,7 @@ define void @f1(ptr %A) #0 { | |
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 | ||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] | ||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Step of 4 * vscale is known to be smaller than count of 1024, based on vscale_range(1,16) attribute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||
; CHECK: vector.ph: | ||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,10 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" | |
define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { | ||
; TFNONE-LABEL: @test_widen( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
@@ -146,10 +143,7 @@ for.cond.cleanup: | |
define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { | ||
; TFNONE-LABEL: @test_if_then( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
@@ -310,10 +304,7 @@ for.cond.cleanup: | |
define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { | ||
; TFNONE-LABEL: @test_widen_if_then_else( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
@@ -490,10 +481,7 @@ for.cond.cleanup: | |
define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { | ||
; TFNONE-LABEL: @test_widen_nomask( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
@@ -548,11 +536,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { | |
; | ||
; TFFALLBACK-LABEL: @test_widen_nomask( | ||
; TFFALLBACK-NEXT: entry: | ||
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFFALLBACK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFFALLBACK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFFALLBACK: vector.ph: | ||
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] | ||
|
@@ -561,20 +544,17 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { | |
; TFFALLBACK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 | ||
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; TFFALLBACK: vector.body: | ||
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] | ||
; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 | ||
; TFFALLBACK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) | ||
; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] | ||
; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 | ||
; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] | ||
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | ||
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; TFFALLBACK: scalar.ph: | ||
; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] | ||
; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] | ||
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; TFFALLBACK: for.body: | ||
; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] | ||
; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] | ||
; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] | ||
; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 | ||
; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] | ||
|
@@ -626,10 +606,7 @@ for.cond.cleanup: | |
define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { | ||
; TFNONE-LABEL: @test_widen_optmask( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
@@ -791,10 +768,7 @@ for.cond.cleanup: | |
define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, double %m) #4 { | ||
; TFNONE-LABEL: @test_widen_fmuladd_and_call( | ||
; TFNONE-NEXT: entry: | ||
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 | ||
; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] | ||
; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto with vscale_range(2,16) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||
; TFNONE: vector.ph: | ||
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1659,6 +1659,7 @@ for.end: ; preds = %for.body, %entry | |
ret i64 %1 | ||
} | ||
|
||
; FIXME: %indvars.iv.next is poison on first iteration due to sub nuw 0, 1. | ||
define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { | ||
; CHECK-LABEL: define i32 @fcmp_0_sub_select1( | ||
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { | ||
|
@@ -1668,8 +1669,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { | |
; CHECK: [[FOR_HEADER]]: | ||
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 | ||
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[ZEXT]] | ||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 | ||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] | ||
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somewhat confusing (min) iter check here, bumping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, might be worth fixing independently. The simplification is fine for the input I think, BTC is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, worth fixing test independently, before or after. Subtracting 1 from 0 on first iteration, and implicitly casting the above negative BTC and trip count to unsigned, defy the claimed nuw. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth leaving behind a FIXME note. |
||
; CHECK: [[VECTOR_PH]]: | ||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 | ||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -415,6 +415,7 @@ exit: | |
|
||
; Test case to make sure that uses of versioned strides of type i1 are properly | ||
; extended. From https://github.com/llvm/llvm-project/issues/91369. | ||
; TODO: Better check (udiv i64 15, %g.64) after checking if %g == 1. | ||
define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { | ||
; CHECK-LABEL: define void @zext_of_i1_stride( | ||
; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { | ||
|
@@ -423,8 +424,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { | |
; CHECK-NEXT: [[G_64:%.*]] = zext i1 [[G]] to i64 | ||
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 15, [[G_64]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better divide 15 by G_64 after scevcheck'ing below that G is 1 (not 0), than before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep would probably be better for this particular check. There are other SCEV checks that are much more expensive (like wrapping checks), so we would probably need to distinguish between them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, guards should be ordered according to cost and frequency, but in this case a potential division by zero is introduced, unguarded. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth leaving behind a FIXME note. |
||
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 | ||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4 | ||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Count of 16 (assuming G = 1) is known to be greater than step of 4. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the step of 4 is used here, based on the versioned G |
||
; CHECK: vector.scevcheck: | ||
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true | ||
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] | ||
|
@@ -489,8 +489,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { | |
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 | ||
; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], [[G_64]] | ||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 | ||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 | ||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] | ||
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] | ||
; CHECK: vector.scevcheck: | ||
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true | ||
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The optimization you're doing here applies to the check in this if-block as well. Maybe factor out an getOptimizedCompare lambda or something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The runtime comparison introduced below checks for overflow, in case the overflow check is not known (to be false) at compile time. Perhaps worth asserting that this predicate is indeed unknown to SCEV.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added an assert, thanks!