Skip to content

Commit f34dedb

Browse files
authored
[LoopPeel] Support min/max intrinsics in loop peeling (#93162)
This patch adds processing of min/max intrinsics in LoopPeel in the similar way as it was done for conditional statements: for min/max(IterVal, BoundVal) we peel iterations where IterVal < BoundVal for monotonically increasing IterVal; for monotonically decreasing IterVal we peel iterations where IterVal > BoundVal (strict comparision predicates are used to minimize number of peeled iterations).
1 parent 46b3145 commit f34dedb

File tree

2 files changed

+192
-53
lines changed

2 files changed

+192
-53
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,21 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
351351
MaxPeelCount =
352352
std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
353353

354+
// Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
355+
// return true if inversed condition become known before reaching the
356+
// MaxPeelCount limit.
357+
auto PeelWhilePredicateIsKnown =
358+
[&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
359+
const SCEV *Step, ICmpInst::Predicate Pred) {
360+
while (PeelCount < MaxPeelCount &&
361+
SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
362+
IterVal = SE.getAddExpr(IterVal, Step);
363+
++PeelCount;
364+
}
365+
return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
366+
BoundSCEV);
367+
};
368+
354369
const unsigned MaxDepth = 4;
355370
std::function<void(Value *, unsigned)> ComputePeelCount =
356371
[&](Value *Condition, unsigned Depth) -> void {
@@ -411,48 +426,73 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
411426
Pred = ICmpInst::getInversePredicate(Pred);
412427

413428
const SCEV *Step = LeftAR->getStepRecurrence(SE);
414-
const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
415-
auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
416-
&NewPeelCount]() {
417-
IterVal = NextIterVal;
418-
NextIterVal = SE.getAddExpr(IterVal, Step);
419-
NewPeelCount++;
420-
};
421-
422-
auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
423-
return NewPeelCount < MaxPeelCount;
424-
};
425-
426-
while (CanPeelOneMoreIteration() &&
427-
SE.isKnownPredicate(Pred, IterVal, RightSCEV))
428-
PeelOneMoreIteration();
429-
430-
// With *that* peel count, does the predicate !Pred become known in the
431-
// first iteration of the loop body after peeling?
432-
if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
433-
RightSCEV))
434-
return; // If not, give up.
429+
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
430+
Pred))
431+
return;
435432

436433
// However, for equality comparisons, that isn't always sufficient to
437434
// eliminate the comparsion in loop body, we may need to peel one more
438435
// iteration. See if that makes !Pred become unknown again.
436+
const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
439437
if (ICmpInst::isEquality(Pred) &&
440438
!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
441439
RightSCEV) &&
442440
!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
443441
SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
444-
if (!CanPeelOneMoreIteration())
442+
if (NewPeelCount >= MaxPeelCount)
445443
return; // Need to peel one more iteration, but can't. Give up.
446-
PeelOneMoreIteration(); // Great!
444+
++NewPeelCount; // Great!
447445
}
448446

449447
DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
450448
};
451449

450+
auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
451+
if (!MinMax->getType()->isIntegerTy())
452+
return;
453+
Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
454+
const SCEV *BoundSCEV, *IterSCEV;
455+
if (L.isLoopInvariant(LHS)) {
456+
BoundSCEV = SE.getSCEV(LHS);
457+
IterSCEV = SE.getSCEV(RHS);
458+
} else if (L.isLoopInvariant(RHS)) {
459+
BoundSCEV = SE.getSCEV(RHS);
460+
IterSCEV = SE.getSCEV(LHS);
461+
} else
462+
return;
463+
const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
464+
// For simplicity, we support only affine recurrences.
465+
if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
466+
return;
467+
const SCEV *Step = AddRec->getStepRecurrence(SE);
468+
bool IsSigned = MinMax->isSigned();
469+
// To minimize number of peeled iterations, we use strict relational
470+
// predicates here.
471+
ICmpInst::Predicate Pred;
472+
if (SE.isKnownPositive(Step))
473+
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
474+
else if (SE.isKnownNegative(Step))
475+
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
476+
else
477+
return;
478+
// Check that AddRec is not wrapping.
479+
if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
480+
return;
481+
unsigned NewPeelCount = DesiredPeelCount;
482+
const SCEV *IterVal = AddRec->evaluateAtIteration(
483+
SE.getConstant(AddRec->getType(), NewPeelCount), SE);
484+
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
485+
Pred))
486+
return;
487+
DesiredPeelCount = NewPeelCount;
488+
};
489+
452490
for (BasicBlock *BB : L.blocks()) {
453491
for (Instruction &I : *BB) {
454492
if (SelectInst *SI = dyn_cast<SelectInst>(&I))
455493
ComputePeelCount(SI->getCondition(), 0);
494+
if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(&I))
495+
ComputePeelCountMinMax(MinMax);
456496
}
457497

458498
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());

llvm/test/Transforms/LoopUnroll/peel-loop-min-max-intrinsics.ll

Lines changed: 129 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,37 @@ define void @test_umin(i32 %N) {
1111
; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[N]], 0
1212
; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
1313
; CHECK: for.body.preheader:
14+
; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
15+
; CHECK: for.body.peel.begin:
16+
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
17+
; CHECK: for.body.peel:
18+
; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.umin.i32(i32 0, i32 2)
19+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
20+
; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw i32 0, 1
21+
; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[N]]
22+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]]
23+
; CHECK: for.body.peel.next:
24+
; CHECK-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
25+
; CHECK: for.body.peel2:
26+
; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.umin.i32(i32 [[INC_PEEL]], i32 2)
27+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
28+
; CHECK-NEXT: [[INC_PEEL4:%.*]] = add nuw i32 [[INC_PEEL]], 1
29+
; CHECK-NEXT: [[EXITCOND_NOT_PEEL5:%.*]] = icmp eq i32 [[INC_PEEL4]], [[N]]
30+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL5]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY_PEEL_NEXT1:%.*]]
31+
; CHECK: for.body.peel.next1:
32+
; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
33+
; CHECK: for.body.peel.next6:
34+
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
35+
; CHECK: for.body.preheader.peel.newph:
1436
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1537
; CHECK: for.body:
16-
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
17-
; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.umin.i32(i32 [[I_06]], i32 2)
18-
; CHECK-NEXT: tail call void @foo(i32 [[COND]])
38+
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[INC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
39+
; CHECK-NEXT: tail call void @foo(i32 2)
1940
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_06]], 1
2041
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
21-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
42+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43+
; CHECK: for.cond.cleanup.loopexit.loopexit:
44+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
2245
; CHECK: for.cond.cleanup.loopexit:
2346
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
2447
; CHECK: for.cond.cleanup:
@@ -47,14 +70,37 @@ define void @test_umax(i32 %N) {
4770
; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[N]], 0
4871
; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
4972
; CHECK: for.body.preheader:
73+
; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
74+
; CHECK: for.body.peel.begin:
75+
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
76+
; CHECK: for.body.peel:
77+
; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.umax.i32(i32 0, i32 2)
78+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
79+
; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw i32 0, 1
80+
; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[N]]
81+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]]
82+
; CHECK: for.body.peel.next:
83+
; CHECK-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
84+
; CHECK: for.body.peel2:
85+
; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.umax.i32(i32 [[INC_PEEL]], i32 2)
86+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
87+
; CHECK-NEXT: [[INC_PEEL4:%.*]] = add nuw i32 [[INC_PEEL]], 1
88+
; CHECK-NEXT: [[EXITCOND_NOT_PEEL5:%.*]] = icmp eq i32 [[INC_PEEL4]], [[N]]
89+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL5]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY_PEEL_NEXT1:%.*]]
90+
; CHECK: for.body.peel.next1:
91+
; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
92+
; CHECK: for.body.peel.next6:
93+
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
94+
; CHECK: for.body.preheader.peel.newph:
5095
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5196
; CHECK: for.body:
52-
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
53-
; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.umax.i32(i32 [[I_06]], i32 2)
54-
; CHECK-NEXT: tail call void @foo(i32 [[COND]])
97+
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[INC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
98+
; CHECK-NEXT: tail call void @foo(i32 [[I_06]])
5599
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_06]], 1
56100
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
57-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
101+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
102+
; CHECK: for.cond.cleanup.loopexit.loopexit:
103+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
58104
; CHECK: for.cond.cleanup.loopexit:
59105
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
60106
; CHECK: for.cond.cleanup:
@@ -83,14 +129,37 @@ define void @test_smax(i32 %N) {
83129
; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[N]], 0
84130
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
85131
; CHECK: for.body.preheader:
132+
; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
133+
; CHECK: for.body.peel.begin:
134+
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
135+
; CHECK: for.body.peel:
136+
; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.smax.i32(i32 0, i32 -2)
137+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
138+
; CHECK-NEXT: [[DEC_PEEL:%.*]] = add nsw i32 0, -1
139+
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp sgt i32 [[DEC_PEEL]], [[N]]
140+
; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
141+
; CHECK: for.body.peel.next:
142+
; CHECK-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
143+
; CHECK: for.body.peel2:
144+
; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DEC_PEEL]], i32 -2)
145+
; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
146+
; CHECK-NEXT: [[DEC_PEEL4:%.*]] = add nsw i32 [[DEC_PEEL]], -1
147+
; CHECK-NEXT: [[CMP_PEEL5:%.*]] = icmp sgt i32 [[DEC_PEEL4]], [[N]]
148+
; CHECK-NEXT: br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
149+
; CHECK: for.body.peel.next1:
150+
; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
151+
; CHECK: for.body.peel.next6:
152+
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
153+
; CHECK: for.body.preheader.peel.newph:
86154
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
87155
; CHECK: for.body:
88-
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
89-
; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.smax.i32(i32 [[I_06]], i32 -2)
90-
; CHECK-NEXT: tail call void @foo(i32 [[COND]])
156+
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[DEC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
157+
; CHECK-NEXT: tail call void @foo(i32 -2)
91158
; CHECK-NEXT: [[DEC]] = add nsw i32 [[I_06]], -1
92159
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[N]]
93-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
160+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
161+
; CHECK: for.cond.cleanup.loopexit.loopexit:
162+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
94163
; CHECK: for.cond.cleanup.loopexit:
95164
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
96165
; CHECK: for.cond.cleanup:
@@ -119,14 +188,37 @@ define void @test_smin(i32 %N) {
119188
; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[N]], 0
120189
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
121190
; CHECK: for.body.preheader:
191+
; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
192+
; CHECK: for.body.peel.begin:
193+
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
194+
; CHECK: for.body.peel:
195+
; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.smin.i32(i32 0, i32 -2)
196+
; CHECK-NEXT: tail call void @foo(i32 noundef signext [[COND_PEEL]])
197+
; CHECK-NEXT: [[DEC_PEEL:%.*]] = add nsw i32 0, -1
198+
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp sgt i32 [[DEC_PEEL]], [[N]]
199+
; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
200+
; CHECK: for.body.peel.next:
201+
; CHECK-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
202+
; CHECK: for.body.peel2:
203+
; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.smin.i32(i32 [[DEC_PEEL]], i32 -2)
204+
; CHECK-NEXT: tail call void @foo(i32 noundef signext [[COND_PEEL3]])
205+
; CHECK-NEXT: [[DEC_PEEL4:%.*]] = add nsw i32 [[DEC_PEEL]], -1
206+
; CHECK-NEXT: [[CMP_PEEL5:%.*]] = icmp sgt i32 [[DEC_PEEL4]], [[N]]
207+
; CHECK-NEXT: br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
208+
; CHECK: for.body.peel.next1:
209+
; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
210+
; CHECK: for.body.peel.next6:
211+
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
212+
; CHECK: for.body.preheader.peel.newph:
122213
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
123214
; CHECK: for.body:
124-
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
125-
; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.smin.i32(i32 [[I_06]], i32 -2)
126-
; CHECK-NEXT: tail call void @foo(i32 noundef signext [[COND]])
215+
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[DEC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
216+
; CHECK-NEXT: tail call void @foo(i32 noundef signext [[I_06]])
127217
; CHECK-NEXT: [[DEC]] = add nsw i32 [[I_06]], -1
128218
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[N]]
129-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
219+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
220+
; CHECK: for.cond.cleanup.loopexit.loopexit:
221+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
130222
; CHECK: for.cond.cleanup.loopexit:
131223
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
132224
; CHECK: for.cond.cleanup:
@@ -191,14 +283,14 @@ define void @test_max_count_threshold(i32 %N) {
191283
; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[N]], 0
192284
; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
193285
; CHECK: for.body.preheader:
194-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
286+
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
195287
; CHECK: for.body:
196-
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
197-
; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.umin.i32(i32 [[I_06]], i32 5)
198-
; CHECK-NEXT: tail call void @foo(i32 [[COND]])
199-
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_06]], 1
200-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
201-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
288+
; CHECK-NEXT: [[I_6:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_BODY1]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
289+
; CHECK-NEXT: [[COND1:%.*]] = tail call i32 @llvm.umin.i32(i32 [[I_6]], i32 5)
290+
; CHECK-NEXT: tail call void @foo(i32 [[COND1]])
291+
; CHECK-NEXT: [[INC1]] = add nuw i32 [[I_6]], 1
292+
; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[INC1]], [[N]]
293+
; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY1]]
202294
; CHECK: for.cond.cleanup.loopexit:
203295
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
204296
; CHECK: for.cond.cleanup:
@@ -227,14 +319,14 @@ define void @test_wrap(i8 %N) {
227319
; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i8 [[N]], 0
228320
; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
229321
; CHECK: for.body.preheader:
230-
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
322+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
231323
; CHECK: for.body:
232-
; CHECK-NEXT: [[I_06:%.*]] = phi i8 [ [[INC1:%.*]], [[FOR_BODY1]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
233-
; CHECK-NEXT: [[COND1:%.*]] = tail call i8 @llvm.umin.i8(i8 [[I_06]], i8 -2)
234-
; CHECK-NEXT: tail call void @bar(i8 [[COND1]])
235-
; CHECK-NEXT: [[INC1]] = add i8 [[I_06]], 127
236-
; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i8 [[INC1]], [[N]]
237-
; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY1]]
324+
; CHECK-NEXT: [[I_06:%.*]] = phi i8 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
325+
; CHECK-NEXT: [[COND:%.*]] = tail call i8 @llvm.umin.i8(i8 [[I_06]], i8 -2)
326+
; CHECK-NEXT: tail call void @bar(i8 [[COND]])
327+
; CHECK-NEXT: [[INC]] = add i8 [[I_06]], 127
328+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i8 [[INC]], [[N]]
329+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
238330
; CHECK: for.cond.cleanup.loopexit:
239331
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
240332
; CHECK: for.cond.cleanup:
@@ -255,3 +347,10 @@ for.body:
255347
for.cond.cleanup:
256348
ret void
257349
}
350+
;.
351+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
352+
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 2}
353+
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
354+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
355+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
356+
;.

0 commit comments

Comments
 (0)