Skip to content

Commit f63c971

Browse files
committed
[LSR] Use simplest constant threshold for the moment
1 parent 4d57ba1 commit f63c971

File tree

2 files changed

+6
-25
lines changed

2 files changed

+6
-25
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

+2-10
Original file line numberDiff line numberDiff line change
@@ -6813,15 +6813,6 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68136813
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
68146814
return std::nullopt;
68156815

6816-
// Inserting instructions in the preheader has a runtime cost, scale
6817-
// the allowed cost with the loops trip count as best we can.
6818-
const unsigned ExpansionBudget = [&]() {
6819-
if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
6820-
return std::min(2*SCEVCheapExpansionBudget, *SmallTC);
6821-
// Unknown trip count, assume long running by default.
6822-
return 2*SCEVCheapExpansionBudget;
6823-
}();
6824-
68256816
const SCEV *BECount = SE.getBackedgeTakenCount(L);
68266817
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
68276818
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
@@ -6871,7 +6862,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
68716862
continue;
68726863
}
68736864

6874-
if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
6865+
if (Expander.isHighCostExpansion(TermValueSLocal, L,
6866+
2*SCEVCheapExpansionBudget,
68756867
&TTI, InsertPt)) {
68766868
LLVM_DEBUG(
68776869
dbgs() << "Is too expensive to expand terminating value for phi node"

llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

+4-15
Original file line numberDiff line numberDiff line change
@@ -599,26 +599,15 @@ define void @expensive_expand_unknown_tc2(ptr %a, i32 %offset, i32 %n, i32 %step
599599
; CHECK-NEXT: entry:
600600
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
601601
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
602-
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[STEP:%.*]], i32 [[N:%.*]])
603-
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], [[STEP]]
604-
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 1)
605-
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
606-
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1)
607-
; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], [[UMAX]]
608-
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMIN]], [[TMP2]]
609-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
610-
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
611-
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
612-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]]
613-
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP7]], 84
614-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
615602
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
616603
; CHECK: for.body:
617604
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
605+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
618606
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
607+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], [[STEP:%.*]]
619608
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
620-
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
621-
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
609+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sge i32 [[LSR_IV_NEXT]], [[N:%.*]]
610+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
622611
; CHECK: for.end:
623612
; CHECK-NEXT: ret void
624613
;

0 commit comments

Comments
 (0)