Skip to content

Commit 7b1c59c

Browse files
author
Raj Barik
committed
Profile-based inlining enabled
1 parent 8a86b94 commit 7b1c59c

File tree

10 files changed

+5071
-3
lines changed

10 files changed

+5071
-3
lines changed

src/cmd/compile/internal/base/flag.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ type CmdFlags struct {
121121
TraceProfile string "help:\"write an execution trace to `file`\""
122122
TrimPath string "help:\"remove `prefix` from recorded source file paths\""
123123
WB bool "help:\"enable write barrier\"" // TODO: remove
124+
ProfileUse string "help:\"read profile from `file`\""
125+
InlineHotThreshold string "help:\"Threshold percentage for determining hot methods for inlining\""
124126

125127
// Configuration derived from flags; not a flag itself.
126128
Cfg struct {

src/cmd/compile/internal/gc/main.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"cmd/compile/internal/ir"
1717
"cmd/compile/internal/logopt"
1818
"cmd/compile/internal/noder"
19+
"cmd/compile/internal/pgo"
1920
"cmd/compile/internal/pkginit"
2021
"cmd/compile/internal/reflectdata"
2122
"cmd/compile/internal/ssa"
@@ -232,10 +233,28 @@ func Main(archInit func(*ssagen.ArchInfo)) {
232233
typecheck.AllImportedBodies()
233234
}
234235

236+
// Read cpu profile file and build cross-package pprof-graph and per-package weighted-call-graph. Pprof-graph is built one-time.
237+
base.Timer.Start("fe", "profileuse")
238+
if base.Flag.ProfileUse != "" {
239+
if pgo.PProfGraph == nil {
240+
pgo.PProfGraph = pgo.BuildGlobalPProfGraph(base.Flag.ProfileUse, &pgo.Options{
241+
CallTree: false,
242+
SampleValue: func(v []int64) int64 { return v[1] },
243+
})
244+
}
245+
pgo.WeightedCG = pgo.BuildWeightedCallGraphPerPkg()
246+
}
247+
235248
// Inlining
236249
base.Timer.Start("fe", "inlining")
237250
if base.Flag.LowerL != 0 {
251+
if pgo.WeightedCG != nil {
252+
inline.InlinePrologue()
253+
}
238254
inline.InlinePackage()
255+
if pgo.WeightedCG != nil {
256+
inline.InlineEpilogue()
257+
}
239258
}
240259
noder.MakeWrappers(typecheck.Target) // must happen after inlining
241260

src/cmd/compile/internal/inline/inl.go

Lines changed: 135 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ package inline
2929
import (
3030
"fmt"
3131
"go/constant"
32+
"strconv"
3233
"strings"
3334

3435
"cmd/compile/internal/base"
3536
"cmd/compile/internal/ir"
3637
"cmd/compile/internal/logopt"
38+
"cmd/compile/internal/pgo"
3739
"cmd/compile/internal/typecheck"
3840
"cmd/compile/internal/types"
3941
"cmd/internal/obj"
@@ -42,17 +44,98 @@ import (
4244

4345
// Inlining budget parameters, gathered in one place
4446
const (
45-
inlineMaxBudget = 80
46-
inlineExtraAppendCost = 0
47+
inlineMaxBudget = 80
48+
// Budget increased due to hotness.
49+
inlineHotCalleeMaxBudget = 160
50+
inlineExtraAppendCost = 0
4751
// default is to inline if there's at most one call. -l=4 overrides this by using 1 instead.
4852
inlineExtraCallCost = 57 // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742
4953
inlineExtraPanicCost = 1 // do not penalize inlining panics.
5054
inlineExtraThrowCost = inlineMaxBudget // with current (2018-05/1.11) code, inlining runtime.throw does not help.
5155

5256
inlineBigFunctionNodes = 5000 // Functions with this many nodes are considered "big".
5357
inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function.
58+
5459
)
5560

61+
var (
62+
// Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
63+
listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
64+
65+
// List of all hot call sites.
66+
candHotEdgeMap = make(map[string]struct{})
67+
68+
// List of inlined call sites.
69+
inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
70+
71+
// Threshold for Hot callsite inlining.
72+
inlineHotThresholdPercent = float64(2)
73+
)
74+
75+
// weightInPercentage converts profile weights to a percentage.
76+
func weightInPercentage(value int64, total int64) float64 {
77+
var ratio float64
78+
// percentage is computed at the (weight/totalweights) * 100
79+
// e.g. if edge weight is 30 and the sum of all the edges weight is 126
80+
// the percentage will be 23.8%
81+
if total != 0 {
82+
ratio = (float64(value) / float64(total)) * 100
83+
}
84+
return ratio
85+
}
86+
87+
// InlinePrologue records the hot callsites from ir-graph.
88+
func InlinePrologue() {
89+
if s, err := strconv.ParseFloat(base.Flag.InlineHotThreshold, 64); err == nil {
90+
inlineHotThresholdPercent = s
91+
if base.Flag.LowerM != 0 {
92+
fmt.Printf("hot-thres=%v\n", inlineHotThresholdPercent)
93+
}
94+
}
95+
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
96+
for _, f := range list {
97+
name := ir.PkgFuncName(f)
98+
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
99+
nodeweight := weightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
100+
101+
for _, e := range pgo.WeightedCG.OutEdges[n] {
102+
if e.Weight != 0 {
103+
weightpercent := weightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
104+
if weightpercent > inlineHotThresholdPercent {
105+
splits := strings.Split(e.CallSite, ":")
106+
line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
107+
lineno := fmt.Sprintf("%v", line2)
108+
canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
109+
candHotEdgeMap[canonicalName] = struct{}{}
110+
if base.Flag.LowerM != 0 {
111+
fmt.Printf("hot-inline cand=%v\n", canonicalName)
112+
}
113+
}
114+
}
115+
}
116+
if nodeweight > inlineHotThresholdPercent {
117+
n.HotNode = true
118+
if base.Flag.LowerM != 0 {
119+
fmt.Printf("hot-node=%v\n", name)
120+
}
121+
}
122+
}
123+
}
124+
})
125+
}
126+
127+
// InlineEpilogue updates IRGraph after inlining.
128+
func InlineEpilogue() {
129+
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
130+
for _, f := range list {
131+
name := ir.PkgFuncName(f)
132+
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
133+
pgo.RedirectEdges(n, inlinedCallSites)
134+
}
135+
}
136+
})
137+
}
138+
56139
// InlinePackage finds functions that can be inlined and clones them before walk expands them.
57140
func InlinePackage() {
58141
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
@@ -81,6 +164,9 @@ func CanInline(fn *ir.Func) {
81164
base.Fatalf("CanInline no nname %+v", fn)
82165
}
83166

167+
// Initialize an empty list of hot callsites for this caller.
168+
listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
169+
84170
var reason string // reason, if any, that the function was not inlined
85171
if base.Flag.LowerM > 1 || logopt.Enabled() {
86172
defer func() {
@@ -252,6 +338,23 @@ func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
252338
return true
253339
}
254340
if v.budget < 0 {
341+
if pgo.WeightedCG != nil {
342+
// If the cost of hot function is greater than inlineHotCalleeMaxBudget,
343+
// the inliner won't inline this function.
344+
if inlineMaxBudget-v.budget < inlineHotCalleeMaxBudget {
345+
// In some cases, there is no hot calling edges in the function.
346+
// However, if the function is hot function, it is still inline
347+
// candidate.
348+
if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
349+
if n.HotNode == true {
350+
if base.Flag.LowerM != 0 {
351+
fmt.Printf("hot-node enabled increased budget for func=%v\n", ir.PkgFuncName(fn))
352+
}
353+
return false
354+
}
355+
}
356+
}
357+
}
255358
v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
256359
return true
257360
}
@@ -315,6 +418,20 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
315418
}
316419
}
317420

421+
// Determine if callee edge is a hot callee or not.
422+
if pgo.WeightedCG != nil && ir.CurFunc != nil {
423+
if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
424+
lineno := fmt.Sprintf("%v", ir.Line(n))
425+
splits := strings.Split(lineno, ":")
426+
l, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
427+
linenum := fmt.Sprintf("%v", l)
428+
canonicalName := ir.PkgFuncName(ir.CurFunc) + "-" + linenum + "-" + ir.PkgFuncName(fn)
429+
if _, o := candHotEdgeMap[canonicalName]; o {
430+
listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
431+
}
432+
}
433+
}
434+
318435
if ir.IsIntrinsicCall(n) {
319436
// Treat like any other node.
320437
break
@@ -716,7 +833,15 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
716833
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
717834
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
718835
}
719-
return n
836+
837+
// If the callsite is hot and it is under the inlineHotCalleeMaxBudget budget, then inline it, or else bail.
838+
if _, ok := listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
839+
if fn.Inl.Cost > inlineHotCalleeMaxBudget {
840+
return n
841+
}
842+
} else {
843+
return n
844+
}
720845
}
721846

722847
if fn == ir.CurFunc {
@@ -817,6 +942,13 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
817942
fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
818943
}
819944

945+
if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
946+
inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
947+
}
948+
if base.Flag.LowerM != 0 {
949+
fmt.Printf("Line %v: is definitely inlined\n", ir.Line(n))
950+
}
951+
820952
res := NewInline(n, fn, inlIndex)
821953
if res == nil {
822954
return n

0 commit comments

Comments
 (0)