Skip to content

Commit 2cdc6f2

Browse files
Djordje TodorovicDjordje Todorovic
authored andcommitted
Reland "[LICM] Hoist LOAD without sinking the STORE"
When doing load/store promotion within LICM, if we cannot prove that it is safe to sink the store we won't hoist the load, even though we can prove the load could be dereferenced and moved outside the loop. This patch implements the load promotion by moving it in the loop preheader by inserting proper PHI in the loop. The store is kept as is in the loop. By doing this, we avoid doing the load from a memory location in each iteration. Please consider this small example: loop { var = *ptr; if (var) break; *ptr= var + 1; } After this patch, it will be: var0 = *ptr; loop { var1 = phi (var0, var2); if (var1) break; var2 = var1 + 1; *ptr = var2; } This addresses some problems from [0]. [0] https://bugs.llvm.org/show_bug.cgi?id=51193 Differential revision: https://reviews.llvm.org/D113289
1 parent 47616c8 commit 2cdc6f2

File tree

9 files changed

+65
-26
lines changed

9 files changed

+65
-26
lines changed

llvm/include/llvm/Transforms/Utils/SSAUpdater.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ class LoadAndStorePromoter {
169169

170170
/// Called to update debug info associated with the instruction.
171171
virtual void updateDebugInfo(Instruction *I) const {}
172+
173+
/// Return false if a sub-class wants to keep one of the loads/stores
174+
/// after the SSA construction.
175+
virtual bool shouldDelete(Instruction *I) const { return true; }
172176
};
173177

174178
} // end namespace llvm

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
18601860
bool UnorderedAtomic;
18611861
AAMDNodes AATags;
18621862
ICFLoopSafetyInfo &SafetyInfo;
1863+
bool CanInsertStoresInExitBlocks;
18631864

18641865
// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
18651866
// (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@ class LoopPromoter : public LoadAndStorePromoter {
18861887
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
18871888
MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
18881889
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1889-
ICFLoopSafetyInfo &SafetyInfo)
1890+
ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
18901891
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
18911892
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
18921893
PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
18931894
Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1894-
SafetyInfo(SafetyInfo) {}
1895+
SafetyInfo(SafetyInfo),
1896+
CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
18951897

18961898
bool isInstInList(Instruction *I,
18971899
const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@ class LoopPromoter : public LoadAndStorePromoter {
19031905
return PointerMustAliases.count(Ptr);
19041906
}
19051907

1906-
void doExtraRewritesBeforeFinalDeletion() override {
1908+
void insertStoresInLoopExitBlocks() {
19071909
// Insert stores after in the loop exit blocks. Each exit block gets a
19081910
// store of the live-out values that feed them. Since we've already told
19091911
// the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@ class LoopPromoter : public LoadAndStorePromoter {
19371939
}
19381940
}
19391941

1942+
void doExtraRewritesBeforeFinalDeletion() override {
1943+
if (CanInsertStoresInExitBlocks)
1944+
insertStoresInLoopExitBlocks();
1945+
}
1946+
19401947
void instructionDeleted(Instruction *I) const override {
19411948
SafetyInfo.removeInstruction(I);
19421949
MSSAU->removeMemoryAccess(I);
19431950
}
1951+
1952+
bool shouldDelete(Instruction *I) const override {
1953+
if (isa<StoreInst>(I))
1954+
return CanInsertStoresInExitBlocks;
1955+
return true;
1956+
}
19441957
};
19451958

19461959
bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(
20392052

20402053
bool DereferenceableInPH = false;
20412054
bool SafeToInsertStore = false;
2055+
bool FoundLoadToPromote = false;
20422056

20432057
SmallVector<Instruction *, 64> LoopUses;
20442058

@@ -2086,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(
20862100

20872101
SawUnorderedAtomic |= Load->isAtomic();
20882102
SawNotAtomic |= !Load->isAtomic();
2103+
FoundLoadToPromote = true;
20892104

20902105
Align InstAlignment = Load->getAlign();
20912106

@@ -2197,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
21972212
}
21982213
}
21992214

2200-
// If we've still failed to prove we can sink the store, give up.
2201-
if (!SafeToInsertStore)
2215+
// If we've still failed to prove we can sink the store, hoist the load
2216+
// only, if possible.
2217+
if (!SafeToInsertStore && !FoundLoadToPromote)
2218+
// If we cannot hoist the load either, give up.
22022219
return false;
22032220

2204-
// Otherwise, this is safe to promote, lets do it!
2205-
LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2206-
<< '\n');
2221+
// Lets do the promotion!
2222+
if (SafeToInsertStore)
2223+
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2224+
<< '\n');
2225+
else
2226+
LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2227+
<< '\n');
2228+
22072229
ORE->emit([&]() {
22082230
return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
22092231
LoopUses[0])
@@ -2222,7 +2244,8 @@ bool llvm::promoteLoopAccessesToScalars(
22222244
SSAUpdater SSA(&NewPHIs);
22232245
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
22242246
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
2225-
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2247+
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
2248+
SafeToInsertStore);
22262249

22272250
// Set up the preheader to have a definition of the value. It is the live-out
22282251
// value from the preheader that uses in the loop will use.

llvm/lib/Transforms/Utils/SSAUpdater.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
446446
// Now that everything is rewritten, delete the old instructions from the
447447
// function. They should all be dead now.
448448
for (Instruction *User : Insts) {
449+
if (!shouldDelete(User))
450+
continue;
451+
449452
// If this is a load that still has uses, then the load must have been added
450453
// as a live value in the SSAUpdate data structure for a block (e.g. because
451454
// the loaded value was stored later). In this case, we need to recursively

llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
55
; RUN: opt -O2 -S < %s | FileCheck %s
66

77
; CHECK-LABEL: main
8-
; CHECK: if.end
9-
; CHECK: store
108
; CHECK: memset
119
; CHECK: if.then
1210
; CHECK: store
13-
; CHECK: memset
11+
; CHECK: if.end
12+
; CHECK: store
13+
; CHECK: store
1414

1515
@d = common global i32 0, align 4
1616
@b = common global i32 0, align 4

llvm/test/Transforms/LICM/hoist-load-without-store.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ define dso_local void @f(i32* nocapture %ptr, i32 %n) {
1818
; CHECK-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]]
1919
; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]]
2020
; CHECK: for.body.lr.ph:
21+
; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
2122
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2223
; CHECK: for.body:
23-
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
24-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
24+
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ]
25+
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
2526
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
2627
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]]
2728
; CHECK: if.end:

llvm/test/Transforms/LICM/promote-capture.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,19 @@ define void @test_captured_before_loop(i32 %len) {
111111
; CHECK-NEXT: [[COUNT:%.*]] = alloca i32, align 4
112112
; CHECK-NEXT: store i32 0, i32* [[COUNT]], align 4
113113
; CHECK-NEXT: call void @capture(i32* [[COUNT]])
114+
; CHECK-NEXT: [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4
114115
; CHECK-NEXT: br label [[LOOP:%.*]]
115116
; CHECK: loop:
116-
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
117+
; CHECK-NEXT: [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ]
118+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ]
117119
; CHECK-NEXT: [[COND:%.*]] = call i1 @cond(i32 [[I]])
118120
; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
119121
; CHECK: if:
120-
; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[COUNT]], align 4
121-
; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C]], 1
122+
; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1
122123
; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4
123124
; CHECK-NEXT: br label [[LATCH]]
124125
; CHECK: latch:
126+
; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ]
125127
; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
126128
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
127129
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]

llvm/test/Transforms/LICM/scalar-promote-memmodel.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,21 @@ define void @bar(i32 %n, i32 %b) nounwind uwtable ssp {
1111
; CHECK-LABEL: @bar(
1212
; CHECK-NEXT: entry:
1313
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
14+
; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4
1415
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1516
; CHECK: for.cond:
16-
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ]
17+
; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ]
18+
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ]
1719
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
1820
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
1921
; CHECK: for.body:
2022
; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2123
; CHECK: if.then:
22-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4
23-
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
24+
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC2]], 1
2425
; CHECK-NEXT: store i32 [[INC]], i32* @g, align 4
2526
; CHECK-NEXT: br label [[FOR_INC]]
2627
; CHECK: for.inc:
28+
; CHECK-NEXT: [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ]
2729
; CHECK-NEXT: [[INC5]] = add nsw i32 [[I_0]], 1
2830
; CHECK-NEXT: br label [[FOR_COND]]
2931
; CHECK: for.end:

llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,17 +314,19 @@ define i32 @test7bad() {
314314
; CHECK-NEXT: entry:
315315
; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4
316316
; CHECK-NEXT: call void @capture(ptr [[LOCAL]])
317+
; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
317318
; CHECK-NEXT: br label [[LOOP:%.*]]
318319
; CHECK: loop:
319-
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
320-
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[LOCAL]], align 4
321-
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]])
320+
; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
321+
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
322+
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
322323
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0
323324
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
324325
; CHECK: if:
325326
; CHECK-NEXT: store i32 [[X2]], ptr [[LOCAL]], align 4
326327
; CHECK-NEXT: br label [[ELSE]]
327328
; CHECK: else:
329+
; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
328330
; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1
329331
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
330332
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]

llvm/test/Transforms/LICM/scalar-promote.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,17 +315,19 @@ define i32 @test7bad() {
315315
; CHECK-NEXT: entry:
316316
; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4
317317
; CHECK-NEXT: call void @capture(i32* [[LOCAL]])
318+
; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4
318319
; CHECK-NEXT: br label [[LOOP:%.*]]
319320
; CHECK: loop:
320-
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
321-
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[LOCAL]], align 4
322-
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]])
321+
; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
322+
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
323+
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
323324
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0
324325
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
325326
; CHECK: if:
326327
; CHECK-NEXT: store i32 [[X2]], i32* [[LOCAL]], align 4
327328
; CHECK-NEXT: br label [[ELSE]]
328329
; CHECK: else:
330+
; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
329331
; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1
330332
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
331333
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]

0 commit comments

Comments
 (0)