Skip to content

Commit 050e2d3

Browse files
authored
[LV] Remove assertions in IV overflow check (#115705)
In #111310 an assert was added that for the IV overflow check used with tail folding, the overflow check is never known. However when applying the loop guards, it looks like it's possible that we might actually know the IV won't overflow: this occurs in 500.perlbench_r from SPEC CPU 2017 and triggers the assertion: Assertion failed: (!isIndvarOverflowCheckKnownFalse(Cost, VF * UF) && !SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT), TC2OverflowSCEV, SE.getSCEV(Step)) && "unexpectedly proved overflow check to be known"), function emitIterationCountCheck, file LoopVectorize.cpp, line 2501. There is a discrepancy between `isIndvarOverflowCheckKnownFalse` and the ICMP_ULT check, because the former uses `getSmallConstantMaxTripCount` which only takes into trip counts that fit into 32 bits. There doesn't seem to be an easy way to make the assertion aware of this, so this PR just removes it for now. There are two potential follow up things from this PR: 1. We miss calculating the max trip count in `@trip_count_max_1024`, it looks like we might need to apply loop guards somewhere in `ScalarEvolution::computeExitLimitFromICmp` 2. In `@overflow_at_0`, if `%tc == 0` then we the overflow check will always return false, even though it will overflow Fixes #115755
1 parent 9e77f59 commit 050e2d3

File tree

2 files changed

+242
-11
lines changed

2 files changed

+242
-11
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,18 +2485,8 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24852485
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
24862486
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
24872487

2488-
Value *Step = CreateStep();
2489-
#ifndef NDEBUG
2490-
ScalarEvolution &SE = *PSE.getSE();
2491-
const SCEV *TC2OverflowSCEV = SE.applyLoopGuards(SE.getSCEV(LHS), OrigLoop);
2492-
assert(
2493-
!isIndvarOverflowCheckKnownFalse(Cost, VF * UF) &&
2494-
!SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT),
2495-
TC2OverflowSCEV, SE.getSCEV(Step)) &&
2496-
"unexpectedly proved overflow check to be known");
2497-
#endif
24982488
// Don't execute the vector loop if (UMax - n) < (VF * UF).
2499-
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
2489+
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
25002490
}
25012491

25022492
// Create new preheader for vector loop.
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-vectorize \
3+
; RUN: -force-tail-folding-style=data-with-evl \
4+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5+
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s
6+
7+
; TODO: We know the IV will never overflow here so we can skip the overflow
8+
; check
9+
10+
define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
11+
; CHECK-LABEL: define void @trip_count_max_1024(
12+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: [[ENTRY:.*:]]
14+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
15+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
16+
; CHECK: [[LOOP_PREHEADER]]:
17+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TC]], i64 1)
18+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
19+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
20+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
21+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
22+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
23+
; CHECK: [[VECTOR_PH]]:
24+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
25+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
26+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
27+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]]
28+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
29+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
30+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
31+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
32+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
33+
; CHECK: [[VECTOR_BODY]]:
34+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
35+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
36+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[UMAX]], [[EVL_BASED_IV]]
37+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
38+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
39+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
40+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
41+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
42+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
43+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
44+
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
45+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
46+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
47+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
48+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
49+
; CHECK: [[MIDDLE_BLOCK]]:
50+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
51+
; CHECK: [[SCALAR_PH]]:
52+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
53+
; CHECK-NEXT: br label %[[LOOP:.*]]
54+
; CHECK: [[LOOP]]:
55+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
56+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
57+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
58+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
59+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
60+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
61+
; CHECK-NEXT: [[DONE:%.*]] = icmp uge i64 [[I_NEXT]], [[TC]]
62+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
63+
; CHECK: [[EXIT_LOOPEXIT]]:
64+
; CHECK-NEXT: br label %[[EXIT]]
65+
; CHECK: [[EXIT]]:
66+
; CHECK-NEXT: ret void
67+
;
68+
entry:
69+
%guard = icmp ugt i64 %tc, 1024
70+
br i1 %guard, label %exit, label %loop
71+
loop:
72+
%i = phi i64 [%i.next, %loop], [0, %entry]
73+
%gep = getelementptr i64, ptr %p, i64 %i
74+
%x = load i64, ptr %gep
75+
%y = add i64 %x, 1
76+
store i64 %y, ptr %gep
77+
%i.next = add i64 %i, 1
78+
%done = icmp uge i64 %i.next, %tc
79+
br i1 %done, label %exit, label %loop
80+
exit:
81+
ret void
82+
}
83+
84+
; If %tc = 0 the IV will overflow, so we need to emit an overflow check
85+
; FIXME: The check still allows %tc =0
86+
87+
define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
88+
; CHECK-LABEL: define void @overflow_at_0(
89+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
90+
; CHECK-NEXT: [[ENTRY:.*:]]
91+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
92+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
93+
; CHECK: [[LOOP_PREHEADER]]:
94+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[TC]]
95+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
96+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
97+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
98+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
99+
; CHECK: [[VECTOR_PH]]:
100+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
101+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
102+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
103+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]]
104+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
105+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
106+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
107+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
108+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
109+
; CHECK: [[VECTOR_BODY]]:
110+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
111+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
112+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
113+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
114+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
115+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
116+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
117+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
118+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
119+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
120+
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
121+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
122+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
123+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
124+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
125+
; CHECK: [[MIDDLE_BLOCK]]:
126+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
127+
; CHECK: [[SCALAR_PH]]:
128+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
129+
; CHECK-NEXT: br label %[[LOOP:.*]]
130+
; CHECK: [[LOOP]]:
131+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
132+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
133+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
134+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
135+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
136+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
137+
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC]]
138+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
139+
; CHECK: [[EXIT_LOOPEXIT]]:
140+
; CHECK-NEXT: br label %[[EXIT]]
141+
; CHECK: [[EXIT]]:
142+
; CHECK-NEXT: ret void
143+
;
144+
entry:
145+
%guard = icmp ugt i64 %tc, 1024
146+
br i1 %guard, label %exit, label %loop
147+
loop:
148+
%i = phi i64 [%i.next, %loop], [0, %entry]
149+
%gep = getelementptr i64, ptr %p, i64 %i
150+
%x = load i64, ptr %gep
151+
%y = add i64 %x, 1
152+
store i64 %y, ptr %gep
153+
%i.next = add i64 %i, 1
154+
%done = icmp eq i64 %i.next, %tc
155+
br i1 %done, label %exit, label %loop
156+
exit:
157+
ret void
158+
}
159+
160+
; %tc won't = 0 so the IV won't overflow
161+
162+
define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
163+
; CHECK-LABEL: define void @no_overflow_at_0(
164+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
165+
; CHECK-NEXT: [[ENTRY:.*:]]
166+
; CHECK-NEXT: [[TC_ADD:%.*]] = add i64 [[TC]], 1
167+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
168+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
169+
; CHECK: [[LOOP_PREHEADER]]:
170+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
171+
; CHECK: [[VECTOR_PH]]:
172+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
173+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
174+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
175+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC_ADD]], [[TMP2]]
176+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
177+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
178+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
179+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
180+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
181+
; CHECK: [[VECTOR_BODY]]:
182+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
183+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
184+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC_ADD]], [[EVL_BASED_IV]]
185+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
186+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
187+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP6]]
188+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
189+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
190+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
191+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
192+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
193+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
194+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
195+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
196+
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
197+
; CHECK: [[MIDDLE_BLOCK]]:
198+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
199+
; CHECK: [[SCALAR_PH]]:
200+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
201+
; CHECK-NEXT: br label %[[LOOP:.*]]
202+
; CHECK: [[LOOP]]:
203+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
204+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
205+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
206+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
207+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
208+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
209+
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC_ADD]]
210+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
211+
; CHECK: [[EXIT_LOOPEXIT]]:
212+
; CHECK-NEXT: br label %[[EXIT]]
213+
; CHECK: [[EXIT]]:
214+
; CHECK-NEXT: ret void
215+
;
216+
entry:
217+
%tc.add = add nuw i64 %tc, 1
218+
%guard = icmp ugt i64 %tc, 1024
219+
br i1 %guard, label %exit, label %loop
220+
loop:
221+
%i = phi i64 [%i.next, %loop], [0, %entry]
222+
%gep = getelementptr i64, ptr %p, i64 %i
223+
%x = load i64, ptr %gep
224+
%y = add i64 %x, 1
225+
store i64 %y, ptr %gep
226+
%i.next = add i64 %i, 1
227+
%done = icmp eq i64 %i.next, %tc.add
228+
br i1 %done, label %exit, label %loop
229+
exit:
230+
ret void
231+
}
232+
;.
233+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
234+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
235+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
236+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
237+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
238+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
239+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
240+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
241+
;.

0 commit comments

Comments
 (0)