Skip to content

Commit 6683b55

Browse files
committed
[DA] disambiguate evolution of base addresses
This patch fixes two bugs: #41488 #53942 The dependence analysis assumes that the base address of array accesses is invariant across loop iterations. In both bugs the base address evolves following loop iterations: the base address flip-flops between two different memory objects. Based on the scalar evolution of base addresses, the patch adds code to separate the 3 alias cases {must, no, may}-alias where the base addresses are identical at every iteration, never the same, and unknown.
1 parent d492001 commit 6683b55

File tree

2 files changed

+194
-21
lines changed

2 files changed

+194
-21
lines changed

llvm/lib/Analysis/DependenceAnalysis.cpp

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -712,10 +712,60 @@ void Dependence::dump(raw_ostream &OS) const {
712712
// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
713713
// Otherwise the underlying objects are checked to see if they point to
714714
// different identifiable objects.
715-
static AliasResult underlyingObjectsAlias(AAResults *AA,
716-
const DataLayout &DL,
717-
const MemoryLocation &LocA,
718-
const MemoryLocation &LocB) {
715+
static AliasResult underlyingObjectsAlias(AAResults *AA, LoopInfo *LI,
716+
ScalarEvolution *SE, Instruction *A,
717+
Instruction *B) {
718+
const MemoryLocation &LocA = MemoryLocation::get(A);
719+
const MemoryLocation &LocB = MemoryLocation::get(B);
720+
721+
// Check the underlying objects are the same
722+
const Value *AObj = getUnderlyingObject(LocA.Ptr);
723+
const Value *BObj = getUnderlyingObject(LocB.Ptr);
724+
725+
// If the underlying objects are the same, they must alias.
726+
if (AObj == BObj)
727+
return AliasResult::MustAlias;
728+
729+
if (auto *APhi = dyn_cast<PHINode>(AObj)) {
730+
if (auto *BPhi = dyn_cast<PHINode>(BObj)) {
731+
Loop *ALoop = LI->getLoopFor(APhi->getParent());
732+
Loop *BLoop = LI->getLoopFor(BPhi->getParent());
733+
if (ALoop == BLoop) {
734+
auto *SCEVa = SE->getSCEV(const_cast<Value *>(AObj));
735+
auto *SCEVb = SE->getSCEV(const_cast<Value *>(BObj));
736+
737+
// If the SCEVs are the same, they must alias.
738+
if (SCEVa == SCEVb)
739+
return AliasResult::MustAlias;
740+
741+
// If SCEV cannot analyze one of the values, then they may alias.
742+
if (isa<SCEVUnknown>(SCEVa) || isa<SCEVUnknown>(SCEVb))
743+
return AliasResult::MayAlias;
744+
745+
// Check whether the start values alias.
746+
const SCEV *StartA = SCEVa;
747+
while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(StartA))
748+
StartA = AR->getStart();
749+
750+
const SCEV *StartB = SCEVb;
751+
while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(StartB))
752+
StartB = AR->getStart();
753+
754+
if (const SCEVUnknown *UA = dyn_cast<SCEVUnknown>(StartA)) {
755+
if (const SCEVUnknown *UB = dyn_cast<SCEVUnknown>(StartB)) {
756+
MemoryLocation LocAS =
757+
MemoryLocation::getBeforeOrAfter(UA->getValue());
758+
MemoryLocation LocBS =
759+
MemoryLocation::getBeforeOrAfter(UB->getValue());
760+
if (AA->isNoAlias(LocAS, LocBS))
761+
return AliasResult::NoAlias;
762+
}
763+
}
764+
return AliasResult::MayAlias;
765+
}
766+
}
767+
}
768+
719769
// Check the original locations (minus size) for noalias, which can happen for
720770
// tbaa, incompatible underlying object locations, etc.
721771
MemoryLocation LocAS =
@@ -725,14 +775,6 @@ static AliasResult underlyingObjectsAlias(AAResults *AA,
725775
if (AA->isNoAlias(LocAS, LocBS))
726776
return AliasResult::NoAlias;
727777

728-
// Check the underlying objects are the same
729-
const Value *AObj = getUnderlyingObject(LocA.Ptr);
730-
const Value *BObj = getUnderlyingObject(LocB.Ptr);
731-
732-
// If the underlying objects are the same, they must alias
733-
if (AObj == BObj)
734-
return AliasResult::MustAlias;
735-
736778
// We may have hit the recursion limit for underlying objects, or have
737779
// underlying objects where we don't know they will alias.
738780
if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
@@ -743,7 +785,6 @@ static AliasResult underlyingObjectsAlias(AAResults *AA,
743785
return AliasResult::NoAlias;
744786
}
745787

746-
747788
// Returns true if the load or store can be analyzed. Atomic and volatile
748789
// operations have properties which this analysis does not understand.
749790
static
@@ -3606,9 +3647,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
36063647
Value *SrcPtr = getLoadStorePointerOperand(Src);
36073648
Value *DstPtr = getLoadStorePointerOperand(Dst);
36083649

3609-
switch (underlyingObjectsAlias(AA, F->getDataLayout(),
3610-
MemoryLocation::get(Dst),
3611-
MemoryLocation::get(Src))) {
3650+
switch (underlyingObjectsAlias(AA, LI, SE, Dst, Src)) {
36123651
case AliasResult::MayAlias:
36133652
case AliasResult::PartialAlias:
36143653
// cannot analyse objects if we don't understand their aliasing.
@@ -4030,11 +4069,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
40304069
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
40314070
assert(isLoadOrStore(Src));
40324071
assert(isLoadOrStore(Dst));
4033-
Value *SrcPtr = getLoadStorePointerOperand(Src);
4034-
Value *DstPtr = getLoadStorePointerOperand(Dst);
4035-
assert(underlyingObjectsAlias(
4036-
AA, F->getDataLayout(), MemoryLocation::get(Dst),
4037-
MemoryLocation::get(Src)) == AliasResult::MustAlias);
4072+
assert(underlyingObjectsAlias(AA, LI, SE, Dst, Src) == AliasResult::MustAlias);
40384073

40394074
// establish loop nesting levels
40404075
establishNestingLevels(Src, Dst);
@@ -4043,6 +4078,8 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
40434078

40444079
unsigned Pairs = 1;
40454080
SmallVector<Subscript, 2> Pair(Pairs);
4081+
Value *SrcPtr = getLoadStorePointerOperand(Src);
4082+
Value *DstPtr = getLoadStorePointerOperand(Dst);
40464083
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
40474084
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
40484085
Pair[0].Src = SrcSCEV;
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
2+
; RUN: | FileCheck %s
3+
4+
; Check that dependence analysis correctly handles flip-flop of base addresses.
5+
; Bug 41488 - https://github.com/llvm/llvm-project/issues/41488
6+
7+
; CHECK-LABEL: bug41488_test1
8+
; CHECK-NOT: da analyze - none!
9+
10+
define float @bug41488_test1() {
11+
entry:
12+
%g = alloca float, align 4
13+
%h = alloca float, align 4
14+
br label %for.body
15+
16+
for.body:
17+
%p = phi float* [ %g, %entry ], [ %q, %for.body ]
18+
%q = phi float* [ %h, %entry ], [ %p, %for.body ]
19+
%0 = load float, float* %p, align 4
20+
store float undef, float* %q, align 4
21+
%branch_cond = fcmp ugt float %0, 0.0
22+
br i1 %branch_cond, label %for.cond.cleanup, label %for.body
23+
24+
for.cond.cleanup:
25+
ret float undef
26+
}
27+
28+
; CHECK-LABEL: bug41488_test2
29+
; CHECK-NOT: da analyze - none!
30+
31+
define void @bug41488_test2(i32 %n) {
32+
entry:
33+
%g = alloca float, align 4
34+
%h = alloca float, align 4
35+
br label %for.body
36+
37+
for.body:
38+
%i = phi i32 [0, %entry ], [ %inc, %for.body ]
39+
%p = phi float* [ %g, %entry ], [ %q, %for.body ]
40+
%q = phi float* [ %h, %entry ], [ %p, %for.body ]
41+
%0 = load float, float* %p, align 4
42+
store float 0.0, float* %q, align 4
43+
%inc = add nuw i32 %i, 1
44+
%branch_cond = icmp ult i32 %i, %n
45+
br i1 %branch_cond, label %for.body, label %for.cond.cleanup
46+
47+
for.cond.cleanup:
48+
ret void
49+
}
50+
51+
; Bug 53942 - https://github.com/llvm/llvm-project/issues/53942
52+
; CHECK-LABEL: bug53942_foo
53+
; CHECK-NOT: da analyze - none!
54+
55+
define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonly %A, ptr noalias nocapture noundef %B) {
56+
entry:
57+
%cmp8 = icmp sgt i32 %n, 1
58+
br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
59+
60+
for.body.preheader: ; preds = %entry
61+
%wide.trip.count = zext nneg i32 %n to i64
62+
br label %for.body
63+
64+
for.cond.cleanup: ; preds = %for.body, %entry
65+
ret void
66+
67+
for.body: ; preds = %for.body.preheader, %for.body
68+
%indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
69+
%ptr1.011 = phi ptr [ %A, %for.body.preheader ], [ %ptr2.09, %for.body ]
70+
%ptr2.09 = phi ptr [ %B, %for.body.preheader ], [ %ptr1.011, %for.body ]
71+
%.pre = load double, ptr %B, align 8
72+
%arrayidx2 = getelementptr inbounds double, ptr %ptr1.011, i64 %indvars.iv
73+
store double %.pre, ptr %arrayidx2, align 8
74+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
75+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
76+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
77+
}
78+
79+
80+
; Bug 53942 - https://github.com/llvm/llvm-project/issues/53942
81+
; CHECK-LABEL: bug53942_bar
82+
; CHECK-NOT: da analyze - none!
83+
84+
define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) {
85+
entry:
86+
br label %for.cond
87+
88+
for.cond: ; preds = %for.inc, %entry
89+
%i.0 = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
90+
%cmp = icmp slt i32 %i.0, %n
91+
br i1 %cmp, label %for.body, label %for.cond.cleanup
92+
93+
for.cond.cleanup: ; preds = %for.cond
94+
br label %for.end
95+
96+
for.body: ; preds = %for.cond
97+
%and = and i32 %i.0, 2
98+
%tobool.not = icmp eq i32 %and, 0
99+
br i1 %tobool.not, label %cond.false, label %cond.true
100+
101+
cond.true: ; preds = %for.body
102+
br label %cond.end
103+
104+
cond.false: ; preds = %for.body
105+
br label %cond.end
106+
107+
cond.end: ; preds = %cond.false, %cond.true
108+
%cond = phi ptr [ %A, %cond.true ], [ %B, %cond.false ]
109+
%and1 = and i32 %i.0, 2
110+
%tobool2.not = icmp eq i32 %and1, 0
111+
br i1 %tobool2.not, label %cond.false4, label %cond.true3
112+
113+
cond.true3: ; preds = %cond.end
114+
br label %cond.end5
115+
116+
cond.false4: ; preds = %cond.end
117+
br label %cond.end5
118+
119+
cond.end5: ; preds = %cond.false4, %cond.true3
120+
%cond6 = phi ptr [ %B, %cond.true3 ], [ %A, %cond.false4 ]
121+
%sub = add nsw i32 %i.0, -1
122+
%idxprom = sext i32 %sub to i64
123+
%arrayidx = getelementptr inbounds double, ptr %cond6, i64 %idxprom
124+
%0 = load double, ptr %arrayidx, align 8
125+
%idxprom7 = zext nneg i32 %i.0 to i64
126+
%arrayidx8 = getelementptr inbounds double, ptr %cond, i64 %idxprom7
127+
store double %0, ptr %arrayidx8, align 8
128+
br label %for.inc
129+
130+
for.inc: ; preds = %cond.end5
131+
%inc = add nuw nsw i32 %i.0, 1
132+
br label %for.cond
133+
134+
for.end: ; preds = %for.cond.cleanup
135+
ret void
136+
}

0 commit comments

Comments
 (0)