Skip to content

Commit 1f93b16

Browse files
Support repeated machine outlining
Summary: The following change is to allow the machine outlining can be applied for Nth times, where N is specified by the compiler option. By default the value of N is 1. The motivation is that the repeated machine outlining can further reduce code size. Please refer to the presentation "Improving Swift Binary Size via Link Time Optimization" in LLVM Developers' Meeting in 2019. Reviewers: aschwaighofer, tellenbach, paquette Reviewed By: paquette Subscribers: tellenbach, hiraditya, llvm-commits, jinlin Tags: #llvm Differential Revision: https://reviews.llvm.org/D71027
1 parent 08ab8c9 commit 1f93b16

File tree

2 files changed

+192
-3
lines changed

2 files changed

+192
-3
lines changed

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ static cl::opt<bool> EnableLinkOnceODROutlining(
9797
cl::desc("Enable the machine outliner on linkonceodr functions"),
9898
cl::init(false));
9999

100+
// Set the number of times to repeatedly apply outlining.
101+
// Defaults to 1, but more repetitions can save additional size.
102+
static cl::opt<unsigned>
103+
NumRepeat("machine-outline-runs", cl::Hidden,
104+
cl::desc("The number of times to apply machine outlining"),
105+
cl::init(1));
106+
100107
namespace {
101108

102109
/// Represents an undefined index in the suffix tree.
@@ -842,6 +849,9 @@ struct MachineOutliner : public ModulePass {
842849
/// linkonceodr linkage.
843850
bool OutlineFromLinkOnceODRs = false;
844851

852+
/// The current repeat number of machine outlining.
853+
unsigned OutlineRepeatedNum = 0;
854+
845855
/// Set to true if the outliner should run on all functions in the module
846856
/// considered safe for outlining.
847857
/// Set to true by default for compatibility with llc's -run-pass option.
@@ -900,9 +910,12 @@ struct MachineOutliner : public ModulePass {
900910
InstructionMapper &Mapper,
901911
unsigned Name);
902912

903-
/// Calls 'doOutline()'.
913+
/// Calls runOnceOnModule NumRepeat times
904914
bool runOnModule(Module &M) override;
905915

916+
/// Calls 'doOutline()'.
917+
bool runOnceOnModule(Module &M, unsigned Iter);
918+
906919
/// Construct a suffix tree on the instructions in \p M and outline repeated
907920
/// strings from that tree.
908921
bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
@@ -1099,7 +1112,13 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
10991112
// Create the function name. This should be unique.
11001113
// FIXME: We should have a better naming scheme. This should be stable,
11011114
// regardless of changes to the outliner's cost model/traversal order.
1102-
std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
1115+
std::string FunctionName;
1116+
if (OutlineRepeatedNum > 0)
1117+
FunctionName = ("OUTLINED_FUNCTION_" + Twine(OutlineRepeatedNum + 1) + "_" +
1118+
Twine(Name))
1119+
.str();
1120+
else
1121+
FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
11031122

11041123
// Create the function using an IR-level function.
11051124
LLVMContext &C = M.getContext();
@@ -1438,12 +1457,14 @@ void MachineOutliner::emitInstrCountChangedRemark(
14381457
}
14391458
}
14401459

1441-
bool MachineOutliner::runOnModule(Module &M) {
1460+
bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) {
14421461
// Check if there's anything in the module. If it's empty, then there's
14431462
// nothing to outline.
14441463
if (M.empty())
14451464
return false;
14461465

1466+
OutlineRepeatedNum = Iter;
1467+
14471468
// Number to append to the current outlined function.
14481469
unsigned OutlinedFunctionNum = 0;
14491470

@@ -1507,3 +1528,23 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
15071528

15081529
return OutlinedSomething;
15091530
}
1531+
1532+
// Apply machine outlining for NumRepeat times.
1533+
bool MachineOutliner::runOnModule(Module &M) {
1534+
if (NumRepeat < 1)
1535+
report_fatal_error("Expect NumRepeat for machine outlining "
1536+
"to be greater than or equal to 1!\n");
1537+
1538+
bool Changed = false;
1539+
for (unsigned I = 0; I < NumRepeat; I++) {
1540+
if (!runOnceOnModule(M, I)) {
1541+
LLVM_DEBUG(dbgs() << "Stopped outlining at iteration " << I
1542+
<< " because no changes were found.\n";);
1543+
return Changed;
1544+
}
1545+
Changed = true;
1546+
}
1547+
LLVM_DEBUG(dbgs() << "Stopped outlining because iteration is "
1548+
"equal to " << NumRepeat << "\n";);
1549+
return Changed;
1550+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS
2+
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN
3+
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=4 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS
4+
5+
# Example of Repeated Instruction Sequence - Iterative Machine Outlining
6+
#
7+
#; define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
8+
# ... ... ...
9+
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
10+
# %9 = load i4, i4*, %6 %9 = load i4, i4*, %6 %9 = load i4, i4*, %6
11+
# store i4 %9, i4* %5 store i4 %9, i4* %5 store i4 %9, i4* %5
12+
# ... ... ...
13+
# } } }
14+
#
15+
# After machine outliner (1st time)
16+
#
17+
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
18+
# ... ... ...
19+
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
20+
# call void @outlined_function_1_1 call void @outlined_function_1_1 call void @outlined_function_1_1
21+
# ... ... ...
22+
# } } }
23+
#
24+
# After machine outliner (2nd time)
25+
#
26+
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
27+
# ... ... ...
28+
# call void @outlined_function_2_1 call void @outlined_function_1_1 call void @outlined_function_2_1
29+
# ... ... ...
30+
# } } }
31+
#
32+
# Check whether machine outliner can further find the outlining opportunity after machine
33+
# outlining has performed.
34+
#
35+
--- |
36+
target triple = "aarch64-apple-darwin"
37+
38+
declare void @foo() local_unnamed_addr
39+
40+
declare void @widget() local_unnamed_addr
41+
42+
; Function Attrs: minsize noredzone optsize
43+
define void @baz.14() #0 {
44+
ret void
45+
}
46+
47+
; Function Attrs: minsize noredzone optsize
48+
define void @baz.15() #0 {
49+
ret void
50+
}
51+
52+
; Function Attrs: minsize noredzone optsize
53+
define void @baz.16() #0 {
54+
ret void
55+
}
56+
57+
attributes #0 = { minsize noredzone optsize }
58+
...
59+
---
60+
name: baz.14
61+
tracksRegLiveness: true
62+
stack:
63+
- { id: 0, offset: -8, size: 8 }
64+
- { id: 1, offset: -16, size: 8 }
65+
body: |
66+
bb.0:
67+
liveins: $x0, $x19, $lr
68+
69+
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
70+
frame-setup CFI_INSTRUCTION def_cfa_offset 16
71+
frame-setup CFI_INSTRUCTION offset $w19, -8
72+
frame-setup CFI_INSTRUCTION offset $w30, -16
73+
renamable $x19 = COPY $x0
74+
renamable $x0 = nuw ADDXri $x0, 48, 0
75+
$x1 = ADDXri $sp, 0, 0
76+
dead $w2 = MOVi32imm 33, implicit-def $x2
77+
$x3 = COPY $xzr
78+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
79+
$x0 = COPY killed renamable $x19
80+
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
81+
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
82+
RET_ReallyLR
83+
84+
...
85+
---
86+
name: baz.15
87+
stack:
88+
- { id: 0, offset: -8, size: 8 }
89+
- { id: 1, offset: -16, size: 8 }
90+
body: |
91+
bb.0:
92+
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
93+
frame-setup CFI_INSTRUCTION def_cfa_offset 16
94+
frame-setup CFI_INSTRUCTION offset $w19, -8
95+
frame-setup CFI_INSTRUCTION offset $w30, -16
96+
renamable $x19 = COPY $x0
97+
renamable $x0 = nuw ADDXri killed renamable $x1, 16, 0
98+
$x1 = ADDXri $sp, 0, 0
99+
dead $w2 = MOVi32imm 33, implicit-def $x2
100+
$x3 = COPY $xzr
101+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
102+
$x0 = COPY killed renamable $x19
103+
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
104+
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
105+
RET_ReallyLR
106+
107+
...
108+
---
109+
name: baz.16
110+
tracksRegLiveness: true
111+
stack:
112+
- { id: 0, offset: -8, size: 8 }
113+
- { id: 1, offset: -16, size: 8 }
114+
body: |
115+
bb.0:
116+
liveins: $x0, $x19, $lr
117+
118+
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
119+
frame-setup CFI_INSTRUCTION def_cfa_offset 16
120+
frame-setup CFI_INSTRUCTION offset $w19, -8
121+
frame-setup CFI_INSTRUCTION offset $w30, -16
122+
renamable $x19 = COPY $x0
123+
renamable $x0 = nuw ADDXri $x0, 48, 0
124+
$x1 = ADDXri $sp, 0, 0
125+
dead $w2 = MOVi32imm 33, implicit-def $x2
126+
$x3 = COPY $xzr
127+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
128+
$x0 = COPY killed renamable $x19
129+
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
130+
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
131+
RET_ReallyLR
132+
133+
...
134+
135+
# TWO-RUNS: name: OUTLINED_FUNCTION_2_0
136+
# TWO-RUNS-DAG: bb.0:
137+
# TWO-RUNS-DAG: renamable $x19 = COPY $x0
138+
# TWO-RUNS-NEXT: renamable $x0 = nuw ADDXri $x0, 48, 0
139+
# TWO-RUNS-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp
140+
#
141+
# The machine outliner is expected to stop at the 1st iteration for case ONE-RUN
142+
# since machine-outline-runs is specified as 1.
143+
# ONE-RUN-NOT: [[OUTLINED:OUTLINED_FUNCTION_2_[0-9]+]]
144+
#
145+
# The machine outliner is expected to stop at the 3rd iteration for case FOUR-RUNS
146+
# since the MIR has no change at the 3rd iteration.
147+
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_3_[0-9]+]]
148+
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_4_[0-9]+]]

0 commit comments

Comments
 (0)