Skip to content

Commit 19fe791

Browse files
committed
[LAA] Be more careful when evaluating AddRecs at symbolic max BTC.
Evaluating AR at the symbolic max BTC may wrap and create an expression that is less than the start of the AddRec due to wrapping (for example consider MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd will get incremented by EltSize before returning, so this effectively sets ScEnd to unsigned max. Note that LAA separately checks that accesses cannot not wrap, so unsigned max represents an upper bound.
1 parent b10ddfa commit 19fe791

File tree

5 files changed

+133
-18
lines changed

5 files changed

+133
-18
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -871,8 +871,8 @@ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
871871
/// There is no conflict when the intervals are disjoint:
872872
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
873873
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
874-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
875-
ScalarEvolution *SE,
874+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
875+
const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
876876
DenseMap<std::pair<const SCEV *, Type *>,
877877
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
878878

llvm/lib/Analysis/Loads.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,14 @@ bool llvm::isDereferenceableAndAlignedInLoop(
319319
const SCEV *MaxBECount =
320320
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
321321
: SE.getConstantMaxBackedgeTakenCount(L);
322+
const SCEV *SymbolicMaxBECount =
323+
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
324+
: SE.getConstantMaxBackedgeTakenCount(L);
322325
if (isa<SCEVCouldNotCompute>(MaxBECount))
323326
return false;
324327

325328
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
326-
L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
329+
L, PtrScev, LI->getType(), MaxBECount, SymbolicMaxBECount, &SE, nullptr);
327330
if (isa<SCEVCouldNotCompute>(AccessStart) ||
328331
isa<SCEVCouldNotCompute>(AccessEnd))
329332
return false;

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
189189
}
190190

191191
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
192-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
193-
ScalarEvolution *SE,
192+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
193+
const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
194194
DenseMap<std::pair<const SCEV *, Type *>,
195195
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
196196
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +206,31 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
206206
const SCEV *ScStart;
207207
const SCEV *ScEnd;
208208

209+
auto &DL = Lp->getHeader()->getDataLayout();
210+
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
211+
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
209212
if (SE->isLoopInvariant(PtrExpr, Lp)) {
210213
ScStart = ScEnd = PtrExpr;
211214
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
212215
ScStart = AR->getStart();
213-
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
216+
if (!isa<SCEVCouldNotCompute>(BTC))
217+
// Evaluating AR at an exact BTC is safe: LAA separately checks that
218+
// accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
219+
// the loop either triggers UB when executing a memory access with a
220+
// poison pointer or the wrapping/poisoned pointer is not used.
221+
ScEnd = AR->evaluateAtIteration(BTC, *SE);
222+
else {
223+
// Evaluating AR at MaxBTC may wrap and create an expression that is less
224+
// than the start of the AddRec due to wrapping (for example consider
225+
// MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
226+
// will get incremented by EltSize before returning, so this effectively
227+
// sets ScEnd to unsigned max. Note that LAA separately checks that
228+
// accesses cannot not wrap, so unsigned max represents an upper bound.
229+
ScEnd = AR->evaluateAtIteration(SymbolicMaxBTC, *SE);
230+
if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
231+
ScEnd = SE->getNegativeSCEV(
232+
SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
233+
}
214234
const SCEV *Step = AR->getStepRecurrence(*SE);
215235

216236
// For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +252,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
232252
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
233253

234254
// Add the size of the pointed element to ScEnd.
235-
auto &DL = Lp->getHeader()->getDataLayout();
236-
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
237-
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
238255
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
239256

240257
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +267,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
250267
unsigned DepSetId, unsigned ASId,
251268
PredicatedScalarEvolution &PSE,
252269
bool NeedsFreeze) {
253-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
270+
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
271+
const SCEV *BTC = PSE.getBackedgeTakenCount();
254272
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
255-
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
273+
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
274+
&DC.getPointerBounds());
256275
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
257276
!isa<SCEVCouldNotCompute>(ScEnd) &&
258277
"must be able to compute both start and end expressions");
@@ -1933,11 +1952,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19331952
// required for correctness.
19341953
if (SE.isLoopInvariant(Src, InnermostLoop) ||
19351954
SE.isLoopInvariant(Sink, InnermostLoop)) {
1936-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1955+
const SCEV *BTC = PSE.getBackedgeTakenCount();
1956+
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
19371957
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1938-
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1958+
InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, PSE.getSE(),
1959+
&PointerBounds);
19391960
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1940-
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
1961+
InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, PSE.getSE(),
1962+
&PointerBounds);
19411963
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19421964
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
19431965
!isa<SCEVCouldNotCompute>(SinkStart_) &&
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5+
6+
; Note: The datalayout for the test specifies a 32 bit index type.
7+
8+
; No UB: accessing last valid byte, pointer after the object
9+
; doesnt wrap (%p + 2147483647).
10+
define void @pointer_after_object_does_not_wrap(i32 %y, ptr %s, ptr %p) {
11+
; CHECK-LABEL: 'pointer_after_object_does_not_wrap'
12+
; CHECK-NEXT: loop:
13+
; CHECK-NEXT: Memory dependences are safe with run-time checks
14+
; CHECK-NEXT: Dependences:
15+
; CHECK-NEXT: Run-time memory checks:
16+
; CHECK-NEXT: Check 0:
17+
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
18+
; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
19+
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
20+
; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
21+
; CHECK-NEXT: Grouped accesses:
22+
; CHECK-NEXT: Group [[GRP1]]:
23+
; CHECK-NEXT: (Low: (%y + %p) High: (2147483647 + %p))
24+
; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
25+
; CHECK-NEXT: Group [[GRP2]]:
26+
; CHECK-NEXT: (Low: (%y + %s) High: (2147483647 + %s))
27+
; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
28+
; CHECK-EMPTY:
29+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
30+
; CHECK-NEXT: SCEV assumptions:
31+
; CHECK-EMPTY:
32+
; CHECK-NEXT: Expressions re-written:
33+
;
34+
entry:
35+
br label %loop
36+
37+
loop:
38+
%iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
39+
%gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
40+
%load = load i8, ptr %gep1.iv, align 4
41+
%gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
42+
store i8 %load, ptr %gep2.iv, align 4
43+
%iv.next = add nsw i32 %iv, 1
44+
%c.2 = icmp slt i32 %iv.next, 2147483647
45+
br i1 %c.2, label %loop, label %exit
46+
47+
exit:
48+
ret void
49+
}
50+
51+
; UB: accessing %p + 2147483646 and p + 2147483647.
52+
; Pointer the past the object would wrap in signed.
53+
define void @pointer_after_object_would_wrap(i32 %y, ptr %s, ptr %p) {
54+
; CHECK-LABEL: 'pointer_after_object_would_wrap'
55+
; CHECK-NEXT: loop:
56+
; CHECK-NEXT: Memory dependences are safe with run-time checks
57+
; CHECK-NEXT: Dependences:
58+
; CHECK-NEXT: Run-time memory checks:
59+
; CHECK-NEXT: Check 0:
60+
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
61+
; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
62+
; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
63+
; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
64+
; CHECK-NEXT: Grouped accesses:
65+
; CHECK-NEXT: Group [[GRP3]]:
66+
; CHECK-NEXT: (Low: (%y + %p) High: (-2147483648 + %p))
67+
; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
68+
; CHECK-NEXT: Group [[GRP4]]:
69+
; CHECK-NEXT: (Low: (%y + %s) High: (-2147483648 + %s))
70+
; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
71+
; CHECK-EMPTY:
72+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
73+
; CHECK-NEXT: SCEV assumptions:
74+
; CHECK-EMPTY:
75+
; CHECK-NEXT: Expressions re-written:
76+
;
77+
entry:
78+
br label %loop
79+
80+
loop:
81+
%iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
82+
%gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
83+
%load = load i16, ptr %gep1.iv, align 4
84+
%gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
85+
store i16 %load, ptr %gep2.iv, align 4
86+
%iv.next = add nsw i32 %iv, 1
87+
%c.2 = icmp slt i32 %iv.next, 2147483647
88+
br i1 %c.2, label %loop, label %exit
89+
90+
exit:
91+
ret void
92+
}

llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
55

6-
; FIXME: Start == End for access group with AddRec.
76
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
87
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
98
; CHECK-NEXT: loop:
@@ -17,7 +16,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x,
1716
; CHECK-NEXT: ptr %S
1817
; CHECK-NEXT: Grouped accesses:
1918
; CHECK-NEXT: Group [[GRP1]]:
20-
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: ((4 * %y) + %P))
19+
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
2120
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
2221
; CHECK-NEXT: Group [[GRP2]]:
2322
; CHECK-NEXT: (Low: %S High: (4 + %S))
@@ -44,7 +43,6 @@ exit:
4443
ret void
4544
}
4645

47-
; FIXME: Start > End for access group with AddRec.
4846
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
4947
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
5048
; CHECK-NEXT: loop:
@@ -58,7 +56,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i
5856
; CHECK-NEXT: ptr %S
5957
; CHECK-NEXT: Grouped accesses:
6058
; CHECK-NEXT: Group [[GRP3]]:
61-
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: (-4 + (4 * %y) + %P))
59+
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
6260
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
6361
; CHECK-NEXT: Group [[GRP4]]:
6462
; CHECK-NEXT: (Low: %S High: (4 + %S))

0 commit comments

Comments
 (0)