Skip to content

Commit c995d68

Browse files
committed
[SCEV] Apply loop guards to End computeMaxBECountForLT
This is a follow on from llvm#115705. Applying the loop guard allows us to calculate the maximum trip count in more places, which in turn allows isIndvarOverflowCheckKnownFalse to skip the overflow check.
1 parent ae119a4 commit c995d68

File tree

3 files changed

+14
-18
lines changed

3 files changed

+14
-18
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,8 +2239,8 @@ class ScalarEvolution {
22392239
/// actually doesn't, or we'd have to immediately execute UB)
22402240
/// We *don't* assert these preconditions so please be careful.
22412241
const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
2242-
const SCEV *End, unsigned BitWidth,
2243-
bool IsSigned);
2242+
const SCEV *End, const Loop *L,
2243+
unsigned BitWidth, bool IsSigned);
22442244

22452245
/// Verify if an linear IV with positive stride can overflow when in a
22462246
/// less-than comparison, knowing the invariant term of the comparison,

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12862,11 +12862,10 @@ const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) {
1286212862
return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D));
1286312863
}
1286412864

12865-
const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
12866-
const SCEV *Stride,
12867-
const SCEV *End,
12868-
unsigned BitWidth,
12869-
bool IsSigned) {
12865+
const SCEV *
12866+
ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
12867+
const SCEV *End, const Loop *L,
12868+
unsigned BitWidth, bool IsSigned) {
1287012869
// The logic in this function assumes we can represent a positive stride.
1287112870
// If we can't, the backedge-taken count must be zero.
1287212871
if (IsSigned && BitWidth == 1)
@@ -12900,8 +12899,10 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
1290012899
// the case End = RHS of the loop termination condition. This is safe because
1290112900
// in the other case (End - Start) is zero, leading to a zero maximum backedge
1290212901
// taken count.
12903-
APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
12904-
: APIntOps::umin(getUnsignedRangeMax(End), Limit);
12902+
const SCEV *GuardedEnd = applyLoopGuards(End, L);
12903+
APInt MaxEnd = IsSigned
12904+
? APIntOps::smin(getSignedRangeMax(GuardedEnd), Limit)
12905+
: APIntOps::umin(getUnsignedRangeMax(GuardedEnd), Limit);
1290512906

1290612907
// MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride)
1290712908
MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart)
@@ -13155,7 +13156,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1315513156
// loop (RHS), and the fact that IV does not overflow (which is
1315613157
// checked above).
1315713158
const SCEV *MaxBECount = computeMaxBECountForLT(
13158-
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13159+
Start, Stride, RHS, L, getTypeSizeInBits(LHS->getType()), IsSigned);
1315913160
return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
1316013161
MaxBECount, false /*MaxOrZero*/, Predicates);
1316113162
}
@@ -13339,7 +13340,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1333913340
MaxOrZero = true;
1334013341
} else {
1334113342
ConstantMaxBECount = computeMaxBECountForLT(
13342-
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13343+
Start, Stride, RHS, L, getTypeSizeInBits(LHS->getType()), IsSigned);
1334313344
}
1334413345

1334513346
if (isa<SCEVCouldNotCompute>(ConstantMaxBECount) &&

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
55
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s
66

7-
; TODO: We know the IV will never overflow here so we can skip the overflow
8-
; check
7+
; We know the IV will never overflow here so we can skip the overflow check
98

109
define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
1110
; CHECK-LABEL: define void @trip_count_max_1024(
@@ -15,11 +14,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
1514
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
1615
; CHECK: [[LOOP_PREHEADER]]:
1716
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TC]], i64 1)
18-
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
19-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
20-
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
21-
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
22-
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
2318
; CHECK: [[VECTOR_PH]]:
2419
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2520
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2

0 commit comments

Comments
 (0)