diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 428342f51ad2e..5188830b831a1 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. Src and Dst can be in different loops, so we need to check + // the base pointer is invariant in both loops. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { + LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index d3301520fd107..e0def901d1759 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -113,7 +113,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; ; NORMALIZE-LABEL: 'banerjee1' ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 @@ -127,7 +127,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; NORMALIZE-NEXT: da analyze - output [* *]! +; NORMALIZE-NEXT: da analyze - confused! ; ; DELIN-LABEL: 'banerjee1' ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 @@ -141,7 +141,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; DELIN-NEXT: da analyze - output [* *]! +; DELIN-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 3e3426afab0f7..bf2268b746a6f 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store float 0.000000e+00, ptr %q, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: br label %for.cond @@ -166,14 +166,14 @@ for.end: ; preds = %for.cond.cleanup ; (j % 2 == 0 ? A[i][j] : A[i][j+1]) = 1; ; } ; -; FIXME: There are loop-carried dependencies between the store instruction. For +; There are loop-carried dependencies between the store instruction. For ; example, the value of %ptr0 when (i, j) = (0, 1) is %A+8, which is the same ; as when (i, j) = (0, 2). define void @non_invariant_baseptr_with_identical_obj(ptr %A) { ; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj' ; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - confused! ; entry: br label %loop.i.header @@ -216,13 +216,13 @@ exit: ; Similar to the above case, but ptr0 is loop-invariant with respsect to the ; k-loop. ; -; FIXME: Same as the above case, there are loop-carried dependencies between -; the store. +; Same as the above case, there are loop-carried dependencies between the +; store. define void @non_invariant_baseptr_with_identical_obj2(ptr %A) { ; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj2' ; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - confused! ; entry: br label %loop.i.header @@ -259,3 +259,58 @@ loop.i.latch: exit: ret void } + +; Pseudo-code that is approximately semantically equivalent to the below IR: +; +; void f(int A[][32]) { +; for (int i = 0; i < 100; i++) +; for (int j = 0; j < 15; j++) { +; int offset = (j % 2 == 0) ? 1 : 0; +; A[i][2 * j + offset + 0] = 1; +; A[i][2 * j + offset + 1] = 1; +; } +; } +; +; There are loop-carried dependencies between the two stores. For example, +; A[0][2] is accessed from both the former one when (i, j) = (0, 1) and the +; latter one when (i, j) = (0, 0). +; +define void @non_invariant_baseptr_with_identical_obj3(ptr %A) { +; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj3' +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop.i.header + +loop.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + %A1 = getelementptr i32, ptr %A, i32 1 + br label %loop.j + +loop.j: + %j = phi i32 [ 0, %loop.i.header ], [ %j.inc, %loop.j ] + %ptr0 = phi ptr [ %A1, %loop.i.header ], [ %ptr1, %loop.j ] + %ptr1 = phi ptr [ %A, %loop.i.header ], [ %ptr0, %loop.j ] + %j2_0 = shl i32 %j, 1 + %j2_1 = add i32 %j2_0, 1 + %idx0 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_0 + %idx1 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_1 + store i32 1, ptr %idx0 + store i32 1, ptr %idx1 + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 15 + br i1 %cmp.j, label %loop.j, label %loop.i.latch + +loop.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 100 + br i1 %cmp.i, label %loop.i.header, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index c0e1362c82b50..03343e7a98211 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -398,7 +398,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %2, ptr %B.addr.12, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i64 %n, 0 @@ -475,7 +475,7 @@ define void @gcd7(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %0 = zext i32 %n to i64 @@ -566,7 +566,7 @@ define void @gcd8(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %5 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %5, ptr %B.addr.12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i32 %n, 0 @@ -650,7 +650,7 @@ define void @gcd9(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %0 = zext i32 %n to i64 diff --git a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll index d983bd49d49d6..3e110acbefb20 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll @@ -12,7 +12,7 @@ define void @f(ptr %a, i32 %n, i1 %arg) align 2 { ; CHECK-NEXT: Src: %t.2 = load ptr, ptr %a, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %t.4 = load i32, ptr %t.3, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4 -; CHECK-NEXT: da analyze - input [* *]! +; CHECK-NEXT: da analyze - confused! ; for.preheader: %t.0 = ashr exact i32 %n, 3 diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll index 4ab87771417e5..8cb0e2ac770dc 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -69,7 +69,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp10 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll index 404018707c0a5..e67cae7d39a75 100644 --- a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll +++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll @@ -28,7 +28,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - confused! ; ; LIN-LABEL: 'p2' ; LIN-NEXT: Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8 @@ -42,7 +42,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; LIN-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; LIN-NEXT: da analyze - confused! ; LIN-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; LIN-NEXT: da analyze - output [* * *]! +; LIN-NEXT: da analyze - confused! ; entry: %cmp10 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll index f64a7483d7b66..8b9aa257a7c57 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll @@ -437,7 +437,7 @@ define void @symbolicrdiv6(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ss ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.12, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp eq i64 %n1, 0