Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 7034870

Browse files
committed
[SimplifyCFG] don't sink common insts too soon (PR34603)
This should solve: https://bugs.llvm.org/show_bug.cgi?id=34603 ...by preventing SimplifyCFG from altering redundant instructions before early-cse has a chance to run. It changes the default (canonical-forming) behavior of SimplifyCFG, so we're only doing the sinking transform later in the optimization pipeline. Differential Revision: https://reviews.llvm.org/D38566 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320749 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a40d3af commit 7034870

File tree

15 files changed

+55
-27
lines changed

15 files changed

+55
-27
lines changed

include/llvm/Transforms/Scalar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
267267
//
268268
FunctionPass *createCFGSimplificationPass(
269269
unsigned Threshold = 1, bool ForwardSwitchCond = false,
270-
bool ConvertSwitch = false, bool KeepLoops = true,
270+
bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
271271
std::function<bool(const Function &)> Ftor = nullptr);
272272

273273
//===----------------------------------------------------------------------===//

include/llvm/Transforms/Scalar/SimplifyCFG.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class SimplifyCFGPass : public PassInfoMixin<SimplifyCFGPass> {
3939
: SimplifyCFGPass(SimplifyCFGOptions()
4040
.forwardSwitchCondToPhi(false)
4141
.convertSwitchToLookupTable(false)
42-
.needCanonicalLoops(true)) {}
42+
.needCanonicalLoops(true)
43+
.sinkCommonInsts(false)) {}
4344

4445

4546
/// Construct a pass with optional optimizations.

include/llvm/Transforms/Utils/Local.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,20 @@ struct SimplifyCFGOptions {
6363
bool ForwardSwitchCondToPhi;
6464
bool ConvertSwitchToLookupTable;
6565
bool NeedCanonicalLoop;
66+
bool SinkCommonInsts;
6667
AssumptionCache *AC;
6768

6869
SimplifyCFGOptions(unsigned BonusThreshold = 1,
6970
bool ForwardSwitchCond = false,
7071
bool SwitchToLookup = false, bool CanonicalLoops = true,
72+
bool SinkCommon = false,
7173
AssumptionCache *AssumpCache = nullptr)
7274
: BonusInstThreshold(BonusThreshold),
7375
ForwardSwitchCondToPhi(ForwardSwitchCond),
7476
ConvertSwitchToLookupTable(SwitchToLookup),
75-
NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {}
77+
NeedCanonicalLoop(CanonicalLoops),
78+
SinkCommonInsts(SinkCommon),
79+
AC(AssumpCache) {}
7680

7781
// Support 'builder' pattern to set members by name at construction time.
7882
SimplifyCFGOptions &bonusInstThreshold(int I) {
@@ -91,6 +95,10 @@ struct SimplifyCFGOptions {
9195
NeedCanonicalLoop = B;
9296
return *this;
9397
}
98+
SimplifyCFGOptions &sinkCommonInsts(bool B) {
99+
SinkCommonInsts = B;
100+
return *this;
101+
}
94102
SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
95103
AC = Cache;
96104
return *this;

lib/Passes/PassBuilder.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
747747
// Cleanup after the loop optimization passes.
748748
OptimizePM.addPass(InstCombinePass());
749749

750-
751750
// Now that we've formed fast to execute loop structures, we do further
752751
// optimizations. These are run afterward as they might block doing complex
753752
// analyses and transforms such as what are needed for loop vectorization.
754753

755-
// Optimize parallel scalar instruction chains into SIMD instructions.
756-
OptimizePM.addPass(SLPVectorizerPass());
757-
758-
// Cleanup after all of the vectorizers. Simplification passes like CVP and
754+
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
759755
// GVN, loop transforms, and others have already run, so it's now better to
760756
// convert to more optimized IR using more aggressive simplify CFG options.
757+
// The extra sinking transform can create larger basic blocks, so do this
758+
// before SLP vectorization.
761759
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
762-
forwardSwitchCondToPhi(true).
763-
convertSwitchToLookupTable(true).
764-
needCanonicalLoops(false)));
760+
forwardSwitchCondToPhi(true).
761+
convertSwitchToLookupTable(true).
762+
needCanonicalLoops(false).
763+
sinkCommonInsts(true)));
764+
765+
// Optimize parallel scalar instruction chains into SIMD instructions.
766+
OptimizePM.addPass(SLPVectorizerPass());
767+
765768
OptimizePM.addPass(InstCombinePass());
766769

767770
// Unroll small loops to hide loop backedge latency and saturate any parallel

lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() {
365365
// determine whether it succeeded. We can exploit existing control-flow in
366366
// ldrex/strex loops to simplify this, but it needs tidying up.
367367
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
368-
addPass(createCFGSimplificationPass(1, true, true, false));
368+
addPass(createCFGSimplificationPass(1, true, true, false, true));
369369

370370
// Run LoopDataPrefetch
371371
//

lib/Target/ARM/ARMTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ void ARMPassConfig::addIRPasses() {
385385
// ldrex/strex loops to simplify this, but it needs tidying up.
386386
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
387387
addPass(createCFGSimplificationPass(
388-
1, false, false, true, [this](const Function &F) {
388+
1, false, false, true, true, [this](const Function &F) {
389389
const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
390390
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
391391
}));

lib/Transforms/IPO/PassManagerBuilder.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager(
630630
addInstructionCombiningPass(MPM);
631631
}
632632

633+
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
634+
// GVN, loop transforms, and others have already run, so it's now better to
635+
// convert to more optimized IR using more aggressive simplify CFG options.
636+
// The extra sinking transform can create larger basic blocks, so do this
637+
// before SLP vectorization.
638+
MPM.add(createCFGSimplificationPass(1, true, true, false, true));
639+
633640
if (RunSLPAfterLoopVectorization && SLPVectorize) {
634641
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
635642
if (OptLevel > 1 && ExtraVectorizerPasses) {
@@ -638,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager(
638645
}
639646

640647
addExtensionsToPM(EP_Peephole, MPM);
641-
// Switches to lookup tables and other transforms that may not be considered
642-
// canonical by other IR passes.
643-
MPM.add(createCFGSimplificationPass(1, true, true, false));
644648
addInstructionCombiningPass(MPM);
645649

646650
if (!DisableUnrollLoops) {

lib/Transforms/Scalar/SimplifyCFGPass.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ static cl::opt<bool> UserForwardSwitchCond(
6161
"forward-switch-cond", cl::Hidden, cl::init(false),
6262
cl::desc("Forward switch condition to phi ops (default = false)"));
6363

64+
static cl::opt<bool> UserSinkCommonInsts(
65+
"sink-common-insts", cl::Hidden, cl::init(false),
66+
cl::desc("Sink common instructions (default = false)"));
67+
68+
6469
STATISTIC(NumSimpl, "Number of blocks simplified");
6570

6671
/// If we have more than one empty (other than phi node) return blocks,
@@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
205210
Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
206211
? UserKeepLoops
207212
: Opts.NeedCanonicalLoop;
213+
Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
214+
? UserSinkCommonInsts
215+
: Opts.SinkCommonInsts;
208216
}
209217

210218
PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass {
226234

227235
CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
228236
bool ConvertSwitch = false, bool KeepLoops = true,
237+
bool SinkCommon = false,
229238
std::function<bool(const Function &)> Ftor = nullptr)
230239
: FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
231240

@@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass {
246255

247256
Options.NeedCanonicalLoop =
248257
UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
258+
259+
Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
260+
? UserSinkCommonInsts
261+
: SinkCommon;
249262
}
250263

251264
bool runOnFunction(Function &F) override {
@@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
276289
FunctionPass *
277290
llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
278291
bool ConvertSwitch, bool KeepLoops,
292+
bool SinkCommon,
279293
std::function<bool(const Function &)> Ftor) {
280294
return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
281-
KeepLoops, std::move(Ftor));
295+
KeepLoops, SinkCommon, std::move(Ftor));
282296
}

lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5728,7 +5728,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
57285728
BasicBlock *BB = BI->getParent();
57295729
BasicBlock *Succ = BI->getSuccessor(0);
57305730

5731-
if (SinkCommon && SinkThenElseCodeToEnd(BI))
5731+
if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
57325732
return true;
57335733

57345734
// If the Terminator is the only non-phi instruction, simplify the block.

test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -simplifycfg -S < %s | FileCheck %s
1+
; RUN: opt -simplifycfg -sink-common-insts -S < %s | FileCheck %s
22

33
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
44
target triple = "x86_64-unknown-linux-gnu"

test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,8 @@
197197
; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
198198
; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
199199
; CHECK-O-NEXT: Running pass: InstCombinePass
200-
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
201200
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
201+
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
202202
; CHECK-O-NEXT: Running pass: InstCombinePass
203203
; CHECK-O-NEXT: Running pass: LoopUnrollPass
204204
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy

test/Other/new-pm-thinlto-defaults.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@
185185
; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
186186
; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
187187
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
188-
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
189188
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
189+
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
190190
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
191191
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
192192
; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy

test/Transforms/PhaseOrdering/simplifycfg-options.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,8 @@ define double @max_of_loads(double* %x, double* %y, i64 %i) {
7676
; ALL-NEXT: [[XI:%.*]] = load double, double* [[XI_PTR]], align 8
7777
; ALL-NEXT: [[YI:%.*]] = load double, double* [[YI_PTR]], align 8
7878
; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]]
79-
; ALL-NEXT: [[Y_SINK:%.*]] = select i1 [[CMP]], double* [[X]], double* [[Y]]
80-
; ALL-NEXT: [[YI_PTR_AGAIN:%.*]] = getelementptr double, double* [[Y_SINK]], i64 [[I]]
81-
; ALL-NEXT: [[YI_AGAIN:%.*]] = load double, double* [[YI_PTR_AGAIN]], align 8
82-
; ALL-NEXT: ret double [[YI_AGAIN]]
79+
; ALL-NEXT: [[XI_YI:%.*]] = select i1 [[CMP]], double [[XI]], double [[YI]]
80+
; ALL-NEXT: ret double [[XI_YI]]
8381
;
8482
entry:
8583
%xi_ptr = getelementptr double, double* %x, i64 %i

test/Transforms/SimplifyCFG/no-md-sink.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -simplifycfg -S | FileCheck %s
1+
; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck %s
22

33
define i1 @test1(i1 zeroext %flag, i8* %y) #0 {
44
entry:

test/Transforms/SimplifyCFG/sink-common-code.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
1+
; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck -enable-var-scope %s
22

33
define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
44
entry:

0 commit comments

Comments
 (0)