Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/lib/Analysis/DependenceAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3642,6 +3642,16 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,

Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);

if (GetElementPtrInst *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr)) {
if (GetElementPtrInst *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr)) {
if (SrcGEP->getSourceElementType() != DstGEP->getSourceElementType()) {
LLVM_DEBUG(dbgs() << "can't analyze GEPs with different types\n");
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
}
}
}
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
Expand Down
11 changes: 3 additions & 8 deletions llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,9 @@ define i32 @gep_i8_vs_i32(ptr nocapture %A, i64 %n, i64 %m) {
; CHECK-NEXT: Src: store i32 42, ptr %arrayidx0, align 1 --> Dst: store i32 42, ptr %arrayidx0, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 42, ptr %arrayidx0, align 1 --> Dst: store i32 42, ptr %arrayidx1, align 4
; CHECK-NEXT: da analyze - output [|<]!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 42, ptr %arrayidx1, align 4 --> Dst: store i32 42, ptr %arrayidx1, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
;
entry:
%arrayidx0 = getelementptr inbounds i8, ptr %A, i64 %n
Expand All @@ -98,7 +94,7 @@ define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4
; CHECK-NEXT: da analyze - output [* * *]!
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - output [* * *|<]!
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - none!
;
Expand Down Expand Up @@ -149,8 +145,7 @@ define void @multidim_accesses(ptr %A) {
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; FIXME: the dependence distance is not constant. Distance vector should be [* * *|<]!
; CHECK-NEXT: da analyze - consistent output [0 0 0|<]!
; CHECK-NEXT: da analyze - confused!
Comment on lines -152 to +148
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just an example of DA missing some dependencies that should exist, due to the delinearization reasoning based on the GEP source element type. There are variants of this example that is not resolved by your patch, e.g., like the following:

define void @multidim_accesses(ptr %A) {
entry:
  br label %for.i

for.i:
  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
  br label %for.j

for.j:
  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
  br label %for.k

for.k:
  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
  %k.inc = add nsw i64 %k, 1
  %idx0 = getelementptr inbounds [256 x [256 x [256 x i32]]], ptr %A, i64 %i, i64 %j, i64 %k
  store i64 1, ptr %idx0
  %idx1 = getelementptr inbounds [256 x [256 x [256 x i32]]], ptr %A, i64 %i, i64 %j, i64 %k.inc
  store i64 1, ptr %idx1
  br label %for.k.inc

for.k.inc:
  %k.exitcond = icmp eq i64 %k.inc, 255
  br i1 %k.exitcond, label %for.j.inc, label %for.k

for.j.inc:
  %j.inc = add nsw i64 %j, 1
  %j.exitcond = icmp eq i64 %j.inc, 256
  br i1 %j.exitcond, label %for.i.inc, label %for.j

for.i.inc:
  %i.inc = add nsw i64 %i, 1
  %i.exitcond = icmp eq i64 %i.inc, 256
  br i1 %i.exitcond, label %end, label %for.i

end:
  ret void
}

A portion of the output is as follows:

Src:  store i64 1, ptr %idx0, align 4 --> Dst:  store i64 1, ptr %idx1, align 4
  da analyze - output [= = >]!

However, the latter half of the first store overlaps with the first half of the second store (within the same iteration). Therefore, an = dependency exists for the k-loop, but DA currently does not recognize it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wanted to double check the following: but doesn't this mean that DA is more conservative for this example? So that is okay in the sense it won't trigger problems?

Copy link
Contributor

@kasuga-fj kasuga-fj Jul 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is as follows (some of the notations may not be entirely accurate):

The two addresses (idx0 and idx1) are calculated as below:

  • idx0 = %A + 262114 * i0 + 1024 * j0 + 4 * k0
  • idx1 = %A + 262114 * i1 + 1024 * j1 + 4 * (k1 + 1)

The memory address range affected by the first store is from idx0 to idx0 + 7, and the second store affects the range from idx1 to idx1 + 7. DA calculates the conditions between [i0, j0, k0] and [i1, j1, k1] under which these two ranges overlap. In this case, they overlap when either of the following conditions1 holds:

  • [i0, j0, k0] = [i1, j1, k1]
  • [i0, j0, k0] = [i1, j1, k1 + 1]
  • i0 = i1 and j0 = j1 + 1 and k0 = 0 and k1 = 254

Therefore, at least, k has * dependence, and j has = and > dependencies.

Footnotes

  1. This is a necessary condition, but not a sufficient condition.

Copy link
Contributor

@kasuga-fj kasuga-fj Jul 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned in #145050 (comment), both the test in DifferentOffsets.ll and the IR in the above comment are incorrect. Sorry for the confusion. For the former one, I created a PR #147479 to fix it. For the latter case, the correct IR should be as follows:

define void @multidim_accesses(ptr %A) {
entry:
  br label %for.i

for.i:
  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
  br label %for.j

for.j:
  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
  br label %for.k

for.k:
  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
  %k.inc = add nsw i64 %k, 1
  %idx0 = getelementptr inbounds [256 x [256 x [256 x i32]]], ptr %A, i64 0, i64 %i, i64 %j, i64 %k
  store i64 1, ptr %idx0
  %idx1 = getelementptr inbounds [256 x [256 x [256 x i32]]], ptr %A, i64 0, i64 %i, i64 %j, i64 %k.inc
  store i64 1, ptr %idx1
  br label %for.k.inc

for.k.inc:
  %k.exitcond = icmp eq i64 %k.inc, 255
  br i1 %k.exitcond, label %for.j.inc, label %for.k

for.j.inc:
  %j.inc = add nsw i64 %j, 1
  %j.exitcond = icmp eq i64 %j.inc, 256
  br i1 %j.exitcond, label %for.i.inc, label %for.j

for.i.inc:
  %i.inc = add nsw i64 %i, 1
  %i.exitcond = icmp eq i64 %i.inc, 256
  br i1 %i.exitcond, label %end, label %for.i

end:
  ret void
}

I tried the revised version, and found that DA correctly handles the two stores for idx0 and idx1.

Src:  store i64 1, ptr %idx0, align 4 --> Dst:  store i64 1, ptr %idx1, align 4
  da analyze - confused!

However, DA still fails to handle loop-carried dependency on the same instruction, which is not resolved by the change in this PR.

Src:  store i64 1, ptr %idx0, align 4 --> Dst:  store i64 1, ptr %idx0, align 4
  da analyze - none!

; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - none!
;
Expand Down
Loading