Skip to content

Commit 28865da

Browse files
preamesnikic
andauthored
[LSR][term-fold] Adjust expansion budget based on trip count (#80304)
Follow up to #74747 This change extends the previously added fixed expansion threshold by scaling down the cost allowed for an expansion for a loop with either a small known trip count or a profile which indicates the trip count is likely small. The goal here is to improve code generation for a loop nest where the outer loop has a high trip count, and the inner loop runs only a handful of iterations. --------- Co-authored-by: Nikita Popov <[email protected]>
1 parent b629414 commit 28865da

File tree

2 files changed

+25
-22
lines changed

2 files changed

+25
-22
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -6813,6 +6813,18 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68136813
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
68146814
return std::nullopt;
68156815

6816+
// Inserting instructions in the preheader has a runtime cost, scale
6817+
// the allowed cost with the loops trip count as best we can.
6818+
const unsigned ExpansionBudget = [&]() {
6819+
unsigned Budget = 2 * SCEVCheapExpansionBudget;
6820+
if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
6821+
return std::min(Budget, SmallTC);
6822+
if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
6823+
return std::min(Budget, *SmallTC);
6824+
// Unknown trip count, assume long running by default.
6825+
return Budget;
6826+
}();
6827+
68166828
const SCEV *BECount = SE.getBackedgeTakenCount(L);
68176829
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
68186830
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
@@ -6862,8 +6874,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68626874
continue;
68636875
}
68646876

6865-
if (Expander.isHighCostExpansion(TermValueSLocal, L,
6866-
2*SCEVCheapExpansionBudget,
6877+
if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
68676878
&TTI, InsertPt)) {
68686879
LLVM_DEBUG(
68696880
dbgs() << "Is too expensive to expand terminating value for phi node"

llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

+12-20
Original file line numberDiff line numberDiff line change
@@ -482,20 +482,15 @@ define void @profiled_short_tc(ptr %a, i32 %offset, i32 %n) {
482482
; CHECK-NEXT: entry:
483483
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
484484
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
485-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
486-
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
487-
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
488-
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
489-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
490-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84
491-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
492485
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
493486
; CHECK: for.body:
494487
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
488+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
495489
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
490+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
496491
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
497-
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
498-
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]]
492+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], [[N:%.*]]
493+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]]
499494
; CHECK: for.end:
500495
; CHECK-NEXT: ret void
501496
;
@@ -637,16 +632,15 @@ define void @small_tc_trivial_loop(ptr %a, i32 %offset) {
637632
; CHECK-NEXT: entry:
638633
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
639634
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
640-
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
641-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 84
642-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
643635
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
644636
; CHECK: for.body:
645637
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
638+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
646639
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
640+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
647641
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
648-
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
649-
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
642+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 1
643+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
650644
; CHECK: for.end:
651645
; CHECK-NEXT: ret void
652646
;
@@ -673,17 +667,15 @@ define void @small_tc_below_threshold(ptr %a, i32 %offset) {
673667
; CHECK-NEXT: entry:
674668
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
675669
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
676-
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
677-
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
678-
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 84
679-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
680670
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
681671
; CHECK: for.body:
682672
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
673+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
683674
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
675+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
684676
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
685-
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
686-
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
677+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 2
678+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
687679
; CHECK: for.end:
688680
; CHECK-NEXT: ret void
689681
;

0 commit comments

Comments
 (0)