Skip to content

LAA: check nusw on GEP in place of inbounds #112223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,9 +1412,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
// Look through the potentially overflowing instruction to try to prove
// non-wrapping for the *specific* value of Ptr.

// The arithmetic implied by an inbounds GEP can't overflow.
// The arithmetic implied by an nusw GEP can't overflow.
const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP || !GEP->isInBounds())
if (!GEP || !GEP->hasNoUnsignedSignedWrap())
return false;

// Make sure there is only one non-const index and analyze that.
Expand Down Expand Up @@ -1516,12 +1516,12 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
return Stride;

// An inbounds getelementptr that is a AddRec with a unit stride
// An nusw getelementptr that is a AddRec with a unit stride
// cannot wrap per definition. If it did, the result would be poison
// and any memory access dependent on it would be immediate UB
// when executed.
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
GEP && GEP->hasNoUnsignedSignedWrap() && (Stride == 1 || Stride == -1))
return Stride;

// If the null pointer is undefined, then a access sequence which would
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,51 @@ exit: ; preds = %loop
ret void
}

; A forwarding in the presence of symbolic strides,
; with nusw instead of inbounds on the GEPs.
define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_nusw'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop>
; CHECK-NEXT: --> {%A,+,4}<%loop>
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr nusw i32, ptr %A, i64 %mul
%load = load i32, ptr %gep.A, align 4
%gep.B = getelementptr nusw i32, ptr %B, i64 %iv
%load_1 = load i32, ptr %gep.B, align 4
%add = add i32 %load_1, %load
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next
store i32 %add, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop

exit: ; preds = %loop
ret void
}

; Similar to @single_stride, but with struct types.
define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_struct'
Expand Down
Loading