Skip to content

Commit e355a59

Browse files
nikiccuviper
authored andcommitted
[LAA] Drop alias scope metadata that is not valid across iterations (llvm#79161)
LAA currently adds memory locations with their original AATags to AST. However, scoped alias AATags may be valid only within one loop iteration, while LAA reasons across iterations. Fix this by determining which alias scopes are defined inside the loop, and drop AATags that reference these scopes. Fixes llvm#79137. (cherry picked from commit cd7ea4e)
1 parent 816d53b commit e355a59

File tree

3 files changed

+63
-82
lines changed

3 files changed

+63
-82
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

+46-5
Original file line numberDiff line numberDiff line change
@@ -625,14 +625,17 @@ class AccessAnalysis {
625625

626626
AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
627627
MemoryDepChecker::DepCandidates &DA,
628-
PredicatedScalarEvolution &PSE)
629-
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
628+
PredicatedScalarEvolution &PSE,
629+
SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
630+
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
631+
LoopAliasScopes(LoopAliasScopes) {
630632
// We're analyzing dependences across loop iterations.
631633
BAA.enableCrossIterationMode();
632634
}
633635

634636
/// Register a load and whether it is only read from.
635-
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
637+
void addLoad(MemoryLocation Loc, Type *AccessTy, bool IsReadOnly) {
638+
Loc = adjustLoc(Loc);
636639
Value *Ptr = const_cast<Value*>(Loc.Ptr);
637640
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
638641
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
@@ -641,7 +644,8 @@ class AccessAnalysis {
641644
}
642645

643646
/// Register a store.
644-
void addStore(MemoryLocation &Loc, Type *AccessTy) {
647+
void addStore(MemoryLocation Loc, Type *AccessTy) {
648+
Loc = adjustLoc(Loc);
645649
Value *Ptr = const_cast<Value*>(Loc.Ptr);
646650
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
647651
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
@@ -694,6 +698,32 @@ class AccessAnalysis {
694698
private:
695699
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
696700

701+
/// Adjust the MemoryLocation so that it represents accesses to this
702+
/// location across all iterations, rather than a single one.
703+
MemoryLocation adjustLoc(MemoryLocation Loc) const {
704+
// The accessed location varies within the loop, but remains within the
705+
// underlying object.
706+
Loc.Size = LocationSize::beforeOrAfterPointer();
707+
Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
708+
Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
709+
return Loc;
710+
}
711+
712+
/// Drop alias scopes that are only valid within a single loop iteration.
713+
MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
714+
if (!ScopeList)
715+
return nullptr;
716+
717+
// For the sake of simplicity, drop the whole scope list if any scope is
718+
// iteration-local.
719+
if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
720+
return LoopAliasScopes.contains(cast<MDNode>(Scope));
721+
}))
722+
return nullptr;
723+
724+
return ScopeList;
725+
}
726+
697727
/// Go over all memory access and check whether runtime pointer checks
698728
/// are needed and build sets of dependency check candidates.
699729
void processMemAccesses();
@@ -736,6 +766,10 @@ class AccessAnalysis {
736766

737767
/// The SCEV predicate containing all the SCEV-related assumptions.
738768
PredicatedScalarEvolution &PSE;
769+
770+
/// Alias scopes that are declared inside the loop, and as such not valid
771+
/// across iterations.
772+
SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
739773
};
740774

741775
} // end anonymous namespace
@@ -2149,6 +2183,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
21492183
// Holds the Load and Store instructions.
21502184
SmallVector<LoadInst *, 16> Loads;
21512185
SmallVector<StoreInst *, 16> Stores;
2186+
SmallPtrSet<MDNode *, 8> LoopAliasScopes;
21522187

21532188
// Holds all the different accesses in the loop.
21542189
unsigned NumReads = 0;
@@ -2192,6 +2227,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
21922227
if (HasComplexMemInst)
21932228
continue;
21942229

2230+
// Record alias scopes defined inside the loop.
2231+
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2232+
for (Metadata *Op : Decl->getScopeList()->operands())
2233+
LoopAliasScopes.insert(cast<MDNode>(Op));
2234+
21952235
// If this is a load, save it. If this instruction can read from memory
21962236
// but is not a load, then we quit. Notice that we don't handle function
21972237
// calls that read or write.
@@ -2273,7 +2313,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
22732313
}
22742314

22752315
MemoryDepChecker::DepCandidates DependentAccesses;
2276-
AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
2316+
AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
2317+
LoopAliasScopes);
22772318

22782319
// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
22792320
// multiple times on the same object. If the ptr is accessed twice, once

llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll

+10-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@
77
define void @test_scope_in_loop(ptr %arg, i64 %num) {
88
; CHECK-LABEL: 'test_scope_in_loop'
99
; CHECK-NEXT: loop:
10-
; CHECK-NEXT: Memory dependences are safe
10+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
11+
; CHECK-NEXT: Backward loop carried data dependence.
1112
; CHECK-NEXT: Dependences:
13+
; CHECK-NEXT: Backward:
14+
; CHECK-NEXT: %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 ->
15+
; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
16+
; CHECK-EMPTY:
17+
; CHECK-NEXT: Forward:
18+
; CHECK-NEXT: %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 ->
19+
; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
20+
; CHECK-EMPTY:
1221
; CHECK-NEXT: Run-time memory checks:
1322
; CHECK-NEXT: Grouped accesses:
1423
; CHECK-EMPTY:

llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll

+7-76
Original file line numberDiff line numberDiff line change
@@ -14,89 +14,25 @@ entry:
1414
}
1515

1616
; This loop should not get vectorized.
17-
; FIXME: This is a miscompile.
1817
define void @accsum(ptr noundef %vals, i64 noundef %num) #0 {
1918
; CHECK-LABEL: define void @accsum(
2019
; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
2120
; CHECK-NEXT: entry:
2221
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1
23-
; CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
24-
; CHECK: iter.check:
25-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[NUM]], -1
26-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM]], 9
27-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
28-
; CHECK: vector.main.loop.iter.check:
29-
; CHECK-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[NUM]], 33
30-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
31-
; CHECK: vector.ph:
32-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], -32
33-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
34-
; CHECK: vector.body:
35-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
36-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1
37-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[OFFSET_IDX]]
38-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -1
39-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
40-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
41-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 15
42-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1, !alias.scope [[META3]], !noalias [[META0]]
43-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1, !alias.scope [[META3]], !noalias [[META0]]
44-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
45-
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1, !alias.scope [[META0]], !noalias [[META3]]
46-
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1, !alias.scope [[META0]], !noalias [[META3]]
47-
; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD5]], [[WIDE_LOAD]]
48-
; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD6]], [[WIDE_LOAD4]]
49-
; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP1]], align 1, !alias.scope [[META0]], !noalias [[META3]]
50-
; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP4]], align 1, !alias.scope [[META0]], !noalias [[META3]]
51-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
52-
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53-
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
54-
; CHECK: middle.block:
55-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
56-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
57-
; CHECK: vec.epilog.iter.check:
58-
; CHECK-NEXT: [[IND_END9:%.*]] = or disjoint i64 [[N_VEC]], 1
59-
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24
60-
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
61-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY_PREHEADER]], label [[VEC_EPILOG_PH]]
62-
; CHECK: vec.epilog.ph:
63-
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
64-
; CHECK-NEXT: [[N_VEC8:%.*]] = and i64 [[TMP0]], -8
65-
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[N_VEC8]], 1
66-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
67-
; CHECK: vec.epilog.vector.body:
68-
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
69-
; CHECK-NEXT: [[OFFSET_IDX12:%.*]] = or disjoint i64 [[INDEX11]], 1
70-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[OFFSET_IDX12]]
71-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1
72-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
73-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3]])
74-
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i8>, ptr [[TMP9]], align 1, !alias.scope [[META3]], !noalias [[META0]]
75-
; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1, !alias.scope [[META0]], !noalias [[META3]]
76-
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i8> [[WIDE_LOAD14]], [[WIDE_LOAD13]]
77-
; CHECK-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP8]], align 1, !alias.scope [[META0]], !noalias [[META3]]
78-
; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX11]], 8
79-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC8]]
80-
; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
81-
; CHECK: vec.epilog.middle.block:
82-
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC8]]
83-
; CHECK-NEXT: br i1 [[CMP_N10]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
22+
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
8423
; CHECK: for.body.preheader:
85-
; CHECK-NEXT: [[I_02_PH:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
24+
; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1
8625
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8726
; CHECK: for.body:
88-
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[I_02_PH]], [[FOR_BODY_PREHEADER]] ]
27+
; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ]
28+
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
8929
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]]
90-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[ARRAYIDX]], i64 -1
91-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
92-
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3]])
93-
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !alias.scope [[META3]], !noalias [[META0]]
94-
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
95-
; CHECK-NEXT: [[ADD_I:%.*]] = add i8 [[TMP13]], [[TMP12]]
30+
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
31+
; CHECK-NEXT: [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]]
9632
; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
9733
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_02]], 1
9834
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]]
99-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
35+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
10036
; CHECK: for.end:
10137
; CHECK-NEXT: ret void
10238
;
@@ -139,9 +75,4 @@ attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"}
13975
; CHECK: [[META2]] = distinct !{[[META2]], !"acc"}
14076
; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
14177
; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"}
142-
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
143-
; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
144-
; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
145-
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]}
146-
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]}
14778
;.

0 commit comments

Comments
 (0)