Skip to content

Commit 0659000

Browse files
committed
[LICM] Don't duplicate instructions just because they're free
D37076 makes LICM duplicate instructions into exit blocks if the instruction is free. For GEPs, the motivation appears to be that this allows the GEP to be folded into addressing modes, while non-foldable users outside the loop might prevent this. TBH I don't think LICM is the place to do this (why doesn't CGP apply this heuristic itself?) but at least I understand the motivation. However, the transform is also applied to all other "free" instructions, which are just that (removed during lowering and not "folded" in some way). For such instructions, this transform seems somewhere between useless, counter-productive (undoing CSE/GVN) and actively incorrect. For example, this transform can duplicate freeze instructions, which is illegal. This patch limits the transform to just foldable GEPs, though we might want to drop it from LICM entirely as a followup. This is a small compile-time improvement, because querying TTI cost model for every single instruction is expensive. Differential Revision: https://reviews.llvm.org/D149136
1 parent 78d8d01 commit 0659000

File tree

5 files changed

+47
-48
lines changed

5 files changed

+47
-48
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
149149
"enable memory promotion."));
150150

151151
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
152-
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
153-
const LoopSafetyInfo *SafetyInfo,
154-
TargetTransformInfo *TTI, bool &FreeInLoop,
155-
bool LoopNestMode);
152+
static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
153+
const LoopSafetyInfo *SafetyInfo,
154+
TargetTransformInfo *TTI,
155+
bool &FoldableInLoop, bool LoopNestMode);
156156
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
157157
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
158158
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
@@ -582,14 +582,15 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
582582
// outside of the loop. In this case, it doesn't even matter if the
583583
// operands of the instruction are loop invariant.
584584
//
585-
bool FreeInLoop = false;
585+
bool FoldableInLoop = false;
586586
bool LoopNestMode = OutermostLoop != nullptr;
587587
if (!I.mayHaveSideEffects() &&
588-
isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
589-
SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
588+
isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
589+
SafetyInfo, TTI, FoldableInLoop,
590+
LoopNestMode) &&
590591
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
591592
if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
592-
if (!FreeInLoop) {
593+
if (!FoldableInLoop) {
593594
++II;
594595
salvageDebugInfo(I);
595596
eraseInstruction(I, *SafetyInfo, MSSAU);
@@ -1338,13 +1339,12 @@ static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
13381339
return true;
13391340
}
13401341

1341-
/// Return true if the instruction is free in the loop.
1342-
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1342+
/// Return true if the instruction is foldable in the loop.
1343+
static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
13431344
const TargetTransformInfo *TTI) {
1344-
InstructionCost CostI =
1345-
TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1346-
13471345
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1346+
InstructionCost CostI =
1347+
TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
13481348
if (CostI != TargetTransformInfo::TCC_Free)
13491349
return false;
13501350
// For a GEP, we cannot simply use getInstructionCost because currently
@@ -1361,7 +1361,7 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
13611361
return true;
13621362
}
13631363

1364-
return CostI == TargetTransformInfo::TCC_Free;
1364+
return false;
13651365
}
13661366

13671367
/// Return true if the only users of this instruction are outside of
@@ -1370,12 +1370,12 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
13701370
///
13711371
/// We also return true if the instruction could be folded away in lowering.
13721372
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1373-
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1374-
const LoopSafetyInfo *SafetyInfo,
1375-
TargetTransformInfo *TTI, bool &FreeInLoop,
1376-
bool LoopNestMode) {
1373+
static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1374+
const LoopSafetyInfo *SafetyInfo,
1375+
TargetTransformInfo *TTI,
1376+
bool &FoldableInLoop, bool LoopNestMode) {
13771377
const auto &BlockColors = SafetyInfo->getBlockColors();
1378-
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1378+
bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
13791379
for (const User *U : I.users()) {
13801380
const Instruction *UI = cast<Instruction>(U);
13811381
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
@@ -1402,8 +1402,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
14021402
}
14031403

14041404
if (CurLoop->contains(UI)) {
1405-
if (IsFree) {
1406-
FreeInLoop = true;
1405+
if (IsFoldable) {
1406+
FoldableInLoop = true;
14071407
continue;
14081408
}
14091409
return false;

llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,17 +126,17 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
126126
; CHECK: # %bb.0:
127127
; CHECK-NEXT: sync
128128
; CHECK-NEXT: li 6, 0
129-
; CHECK-NEXT: lwz 7, 0(3)
129+
; CHECK-NEXT: lwz 5, 0(3)
130130
; CHECK-NEXT: b .LBB2_2
131131
; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
132132
; CHECK-NEXT: #
133133
; CHECK-NEXT: cmplw 5, 7
134-
; CHECK-NEXT: mr 7, 5
135134
; CHECK-NEXT: beq 0, .LBB2_7
136135
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
137136
; CHECK-NEXT: # =>This Loop Header: Depth=1
138137
; CHECK-NEXT: # Child Loop BB2_5 Depth 2
139-
; CHECK-NEXT: addi 5, 7, 1
138+
; CHECK-NEXT: mr 7, 5
139+
; CHECK-NEXT: addi 5, 5, 1
140140
; CHECK-NEXT: cmplw 7, 4
141141
; CHECK-NEXT: bc 12, 0, .LBB2_4
142142
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
@@ -169,18 +169,18 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
169169
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
170170
; CHECK: # %bb.0:
171171
; CHECK-NEXT: sync
172-
; CHECK-NEXT: ld 7, 0(3)
172+
; CHECK-NEXT: ld 5, 0(3)
173173
; CHECK-NEXT: li 6, 0
174174
; CHECK-NEXT: b .LBB3_2
175175
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
176176
; CHECK-NEXT: #
177177
; CHECK-NEXT: cmpld 5, 7
178-
; CHECK-NEXT: mr 7, 5
179178
; CHECK-NEXT: beq 0, .LBB3_7
180179
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
181180
; CHECK-NEXT: # =>This Loop Header: Depth=1
182181
; CHECK-NEXT: # Child Loop BB3_5 Depth 2
183-
; CHECK-NEXT: addi 5, 7, 1
182+
; CHECK-NEXT: mr 7, 5
183+
; CHECK-NEXT: addi 5, 5, 1
184184
; CHECK-NEXT: cmpld 7, 4
185185
; CHECK-NEXT: bc 12, 0, .LBB3_4
186186
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
@@ -334,19 +334,19 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
334334
; CHECK-LABEL: atomicrmw_udec_wrap_i32:
335335
; CHECK: # %bb.0:
336336
; CHECK-NEXT: sync
337-
; CHECK-NEXT: lwz 6, 0(3)
337+
; CHECK-NEXT: lwz 5, 0(3)
338338
; CHECK-NEXT: b .LBB6_2
339339
; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
340340
; CHECK-NEXT: #
341341
; CHECK-NEXT: cmplw 5, 6
342-
; CHECK-NEXT: mr 6, 5
343342
; CHECK-NEXT: beq 0, .LBB6_7
344343
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
345344
; CHECK-NEXT: # =>This Loop Header: Depth=1
346345
; CHECK-NEXT: # Child Loop BB6_5 Depth 2
346+
; CHECK-NEXT: mr 6, 5
347347
; CHECK-NEXT: cmpwi 6, 0
348348
; CHECK-NEXT: cmplw 1, 6, 4
349-
; CHECK-NEXT: addi 5, 6, -1
349+
; CHECK-NEXT: addi 5, 5, -1
350350
; CHECK-NEXT: cror 20, 2, 5
351351
; CHECK-NEXT: bc 12, 20, .LBB6_4
352352
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
@@ -379,19 +379,18 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
379379
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
380380
; CHECK: # %bb.0:
381381
; CHECK-NEXT: sync
382-
; CHECK-NEXT: ld 6, 0(3)
382+
; CHECK-NEXT: ld 5, 0(3)
383383
; CHECK-NEXT: b .LBB7_2
384384
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
385385
; CHECK-NEXT: #
386386
; CHECK-NEXT: cmpld 5, 6
387-
; CHECK-NEXT: mr 6, 5
388387
; CHECK-NEXT: beq 0, .LBB7_7
389388
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
390389
; CHECK-NEXT: # =>This Loop Header: Depth=1
391390
; CHECK-NEXT: # Child Loop BB7_5 Depth 2
392-
; CHECK-NEXT: cmpdi 6, 0
391+
; CHECK-NEXT: mr. 6, 5
393392
; CHECK-NEXT: cmpld 1, 6, 4
394-
; CHECK-NEXT: addi 5, 6, -1
393+
; CHECK-NEXT: addi 5, 5, -1
395394
; CHECK-NEXT: cror 20, 2, 5
396395
; CHECK-NEXT: bc 12, 20, .LBB7_4
397396
; CHECK-NEXT: # %bb.3: # %atomicrmw.start

llvm/test/Transforms/LICM/pr23608.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ define void @fn1() {
2525
; NO_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
2626
; NO_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
2727
; NO_ASSUME: bb13:
28-
; NO_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
29-
; NO_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
30-
; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
28+
; NO_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
29+
; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
3130
; NO_ASSUME-NEXT: call void @__msan_warning_noreturn()
3231
; NO_ASSUME-NEXT: unreachable
3332
; NO_ASSUME: bb15:
@@ -54,9 +53,8 @@ define void @fn1() {
5453
; USE_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
5554
; USE_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
5655
; USE_ASSUME: bb13:
57-
; USE_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
58-
; USE_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
59-
; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
56+
; USE_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
57+
; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
6058
; USE_ASSUME-NEXT: call void @__msan_warning_noreturn()
6159
; USE_ASSUME-NEXT: unreachable
6260
; USE_ASSUME: bb15:

llvm/test/Transforms/LICM/sink-foldable.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,11 @@ define ptr @test3(i64 %j, ptr readonly %P, ptr readnone %Q) {
188188
; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[ARRAYIDX0]], [[FOR_BODY]] ]
189189
; CHECK-NEXT: br label [[RETURN]]
190190
; CHECK: loopexit1:
191-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[IF_END]] ]
191+
; CHECK-NEXT: [[TRUNC_LCSSA1:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
192192
; CHECK-NEXT: [[P_ADDR_LCSSA:%.*]] = phi ptr [ [[P_ADDR]], [[IF_END]] ]
193-
; CHECK-NEXT: [[TRUNC_LE:%.*]] = trunc i64 [[ADD_LCSSA]] to i32
194-
; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LE]]
195-
; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LE]])
193+
; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
194+
; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LCSSA1]]
195+
; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LCSSA]])
196196
; CHECK-NEXT: br label [[RETURN]]
197197
; CHECK: return:
198198
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ]

llvm/test/Transforms/LICM/sinking.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,7 @@ out:
10011001
declare void @use.i32(i32)
10021002
declare void @use.i64(i64)
10031003

1004+
; Don't duplicate freeze just because it's free.
10041005
define i32 @duplicate_freeze(i1 %c, i32 %x) {
10051006
; CHECK-LABEL: @duplicate_freeze(
10061007
; CHECK-NEXT: entry:
@@ -1010,8 +1011,8 @@ define i32 @duplicate_freeze(i1 %c, i32 %x) {
10101011
; CHECK-NEXT: call void @use.i32(i32 [[FR]])
10111012
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
10121013
; CHECK: exit:
1013-
; CHECK-NEXT: [[FR_LE:%.*]] = freeze i32 [[X]]
1014-
; CHECK-NEXT: ret i32 [[FR_LE]]
1014+
; CHECK-NEXT: [[FR_LCSSA:%.*]] = phi i32 [ [[FR]], [[LOOP]] ]
1015+
; CHECK-NEXT: ret i32 [[FR_LCSSA]]
10151016
;
10161017
entry:
10171018
br label %loop
@@ -1025,6 +1026,7 @@ exit:
10251026
ret i32 %fr
10261027
}
10271028

1029+
; Don't duplicate ptrtoint just because it's free.
10281030
define i64 @duplicate_ptrtoint(i1 %c, ptr %p) {
10291031
; CHECK-LABEL: @duplicate_ptrtoint(
10301032
; CHECK-NEXT: entry:
@@ -1034,8 +1036,8 @@ define i64 @duplicate_ptrtoint(i1 %c, ptr %p) {
10341036
; CHECK-NEXT: call void @use.i64(i64 [[PI]])
10351037
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
10361038
; CHECK: exit:
1037-
; CHECK-NEXT: [[PI_LE:%.*]] = ptrtoint ptr [[P]] to i64
1038-
; CHECK-NEXT: ret i64 [[PI_LE]]
1039+
; CHECK-NEXT: [[PI_LCSSA:%.*]] = phi i64 [ [[PI]], [[LOOP]] ]
1040+
; CHECK-NEXT: ret i64 [[PI_LCSSA]]
10391041
;
10401042
entry:
10411043
br label %loop

0 commit comments

Comments
 (0)