Skip to content

Commit f264da4

Browse files
authored
[lsr][term-fold] Restrict transform to low cost expansions (#74747)
This is a follow up to an item I noted in my submission comment for e947f95. I don't have a real world example where this is triggering unprofitably, but avoiding the transform when we estimate the loop to be short running from profiling seems quite reasonable. It's also now come up as a possibility in a regression twice in two days, so I'd like to get this in to close out the possibility if nothing else. The original review dropped the threshold for short trip count loops. I will return to that in a separate review if this lands.
1 parent db68e92 commit f264da4

File tree

2 files changed

+16
-17
lines changed

2 files changed

+16
-17
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6762,7 +6762,7 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
67626762

67636763
static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
67646764
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
6765-
const LoopInfo &LI) {
6765+
const LoopInfo &LI, const TargetTransformInfo &TTI) {
67666766
if (!L->isInnermost()) {
67676767
LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
67686768
return std::nullopt;
@@ -6820,6 +6820,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68206820
PHINode *ToHelpFold = nullptr;
68216821
const SCEV *TermValueS = nullptr;
68226822
bool MustDropPoison = false;
6823+
auto InsertPt = L->getLoopPreheader()->getTerminator();
68236824
for (PHINode &PN : L->getHeader()->phis()) {
68246825
if (ToFold == &PN)
68256826
continue;
@@ -6861,6 +6862,15 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68616862
continue;
68626863
}
68636864

6865+
if (Expander.isHighCostExpansion(TermValueSLocal, L,
6866+
2*SCEVCheapExpansionBudget,
6867+
&TTI, InsertPt)) {
6868+
LLVM_DEBUG(
6869+
dbgs() << "Is too expensive to expand terminating value for phi node"
6870+
<< PN << "\n");
6871+
continue;
6872+
}
6873+
68646874
// The candidate IV may have been otherwise dead and poison from the
68656875
// very first iteration. If we can't disprove that, we can't use the IV.
68666876
if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
@@ -6986,7 +6996,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
69866996
}();
69876997

69886998
if (EnableFormTerm) {
6989-
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
6999+
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
69907000
auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
69917001

69927002
Changed = true;

llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -599,26 +599,15 @@ define void @expensive_expand_unknown_tc2(ptr %a, i32 %offset, i32 %n, i32 %step
599599
; CHECK-NEXT: entry:
600600
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
601601
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
602-
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[STEP:%.*]], i32 [[N:%.*]])
603-
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], [[STEP]]
604-
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 1)
605-
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
606-
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1)
607-
; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], [[UMAX]]
608-
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMIN]], [[TMP2]]
609-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
610-
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
611-
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
612-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]]
613-
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP7]], 84
614-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
615602
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
616603
; CHECK: for.body:
617604
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
605+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
618606
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
607+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], [[STEP:%.*]]
619608
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
620-
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
621-
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
609+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sge i32 [[LSR_IV_NEXT]], [[N:%.*]]
610+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
622611
; CHECK: for.end:
623612
; CHECK-NEXT: ret void
624613
;

0 commit comments

Comments
 (0)