Skip to content

[indvars] Prove non-negative for widening IVs in count down loops #71214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,7 @@ class WidenIV {
bool widenLoopCompare(NarrowIVDefUse DU);
bool widenWithVariantUse(NarrowIVDefUse DU);

bool isKnownNonNegative(const SCEV *S);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);

private:
Expand Down Expand Up @@ -1885,12 +1886,45 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
return WideUse;
}

// A special version of isKnownNonNegative which additionally tries
// to prove that an addrec with a negative step would be non-negative
// because the start is non-negative, and the increment would have
// "nuw" if using a sub-instruction.
// TODO: All of this should be sunk into SCEV once we figure out how to
// reasonable do so without exploding compile time.
bool WidenIV::isKnownNonNegative(const SCEV *S) {
const SCEV *Zero = SE->getZero(S->getType());
if (SE->isKnownPredicate(ICmpInst::ICMP_SGE, S, Zero))
return true;
auto *AR = dyn_cast<SCEVAddRecExpr>(S);
if (!AR || !AR->isAffine())
return false;

const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*SE);
const SCEV *PostInc = AR->getPostIncExpr(*SE);
// For a negative step, we can prove the result non-negative if the addrec
// only traverses values in the range zext([0,UINT_MAX]).
// TODO: Consider extending this for unknown steps? Would need to
// handle the positive step bound
if (!SE->isKnownNegative(Step))
return false;
if (!SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, Start, Zero))
return false;

// Check for the unsigned form of these comparisons. The signed form
// should have been handled recursively in the query above.
uint32_t BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
const SCEV *N = SE->getConstant(APInt::getMaxValue(BitWidth) -
SE->getSignedRangeMin(Step));
return SE->isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
SE->isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, PostInc, N);
}

/// Add eligible users of NarrowDef to NarrowIVUsers.
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
bool NonNegativeDef =
SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
SE->getZero(NarrowSCEV->getType()));
assert(L->contains(NarrowDef));
const bool NonNegativeDef = isKnownNonNegative(SE->getSCEV(NarrowDef));
for (User *U : NarrowDef->users()) {
Instruction *NarrowUser = cast<Instruction>(U);

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -507,15 +507,15 @@ declare void @consume.i1(i1)
define i32 @test10(i32 %v) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[V:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[V:%.*]] to i64
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]]
; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]])
; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP0]]
; CHECK-NEXT: call void @consume.i1(i1 [[CMP]])
; CHECK-NEXT: call void @consume.i64(i64 [[TMP1]])
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]]
; CHECK: leave:
Expand Down
47 changes: 21 additions & 26 deletions llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,10 @@ define void @zext_postinc(ptr %A, i32 %start) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -124,8 +122,8 @@ define void @zext_preinc(ptr %A, i32 %start) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -223,8 +221,7 @@ define void @sext_postinc(ptr %A, i32 %start) {
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -262,8 +259,7 @@ define void @sext_preinc(ptr %A, i32 %start) {
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -365,15 +361,13 @@ define void @zext_postinc_offset_constant_one(ptr %A, i32 %start) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -414,8 +408,8 @@ define void @zext_preinc_offset_constant_one(ptr %A, i32 %start) {
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -521,8 +515,7 @@ define void @sext_postinc_offset_constant_one(ptr %A, i32 %start) {
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -564,8 +557,7 @@ define void @sext_preinc_offset_constant_one(ptr %A, i32 %start) {
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -662,15 +654,18 @@ define void @zext_postinc_offset_constant_minus_one(ptr %A, i32 %start) {
; CHECK-NEXT: [[NONPOS:%.*]] = icmp slt i32 [[START:%.*]], 2
; CHECK-NEXT: br i1 [[NONPOS]], label [[EXIT:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START]] to i64
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -711,9 +706,9 @@ define void @zext_preinc_offset_constant_minus_one(ptr %A, i32 %start) {
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -811,14 +806,14 @@ define void @sext_postinc_offset_constant_minus_one(ptr %A, i32 %start) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regression should be resolved by #70967. I didn't make it a dependent review as all this one does is shift around where we have that problem. As you can see in some of the test changes above, it sometimes removes the fallback and sometimes adds it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change above landed. I did realize we need one other change as well, to fully handle these cases: #71557

; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1
; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -854,14 +849,14 @@ define void @sext_preinc_offset_constant_minus_one(ptr %A, i32 %start) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1
; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
Expand Down