Skip to content

Commit 03e649f

Browse files
author
Raj Barik
committed
Enables PGO in Go and performs profile-guided inlining
1 parent 600db8a commit 03e649f

File tree

11 files changed

+2434
-7
lines changed

11 files changed

+2434
-7
lines changed

api/go1.19.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,13 @@ pkg net/url, type URL struct, OmitHost bool #46059
244244
pkg os/exec, method (*Cmd) Environ() []string #50599
245245
pkg os/exec, type Cmd struct, Err error #43724
246246
pkg os/exec, var ErrDot error #43724
247+
pkg pgo/inline, func A() #43724
248+
pkg pgo/inline, func D(uint) int #43724
249+
pkg pgo/inline, func N(uint) *BS #43724
250+
pkg pgo/inline, func T(uint64) uint #43724
251+
pkg pgo/inline, type BS struct #43724
252+
pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
253+
pkg pgo/inline, method (*BS) S(uint) *BS #43724
247254
pkg regexp/syntax, const ErrNestingDepth = "expression nests too deeply" #51684
248255
pkg regexp/syntax, const ErrNestingDepth ErrorCode #51684
249256
pkg runtime/debug, func SetMemoryLimit(int64) int64 #48409

src/cmd/compile/internal/base/flag.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ type CmdFlags struct {
121121
TraceProfile string "help:\"write an execution trace to `file`\""
122122
TrimPath string "help:\"remove `prefix` from recorded source file paths\""
123123
WB bool "help:\"enable write barrier\"" // TODO: remove
124+
ProfileUse string "help:\"read profile from `file`\""
125+
InlineHotThreshold string "help:\"threshold percentage for determining hot methods and callsites for inlining\""
126+
InlineHotBudget int "help:\"inline budget for hot methods\""
124127

125128
// Configuration derived from flags; not a flag itself.
126129
Cfg struct {

src/cmd/compile/internal/gc/main.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"cmd/compile/internal/ir"
1717
"cmd/compile/internal/logopt"
1818
"cmd/compile/internal/noder"
19+
"cmd/compile/internal/pgo"
1920
"cmd/compile/internal/pkginit"
2021
"cmd/compile/internal/reflectdata"
2122
"cmd/compile/internal/ssa"
@@ -29,6 +30,7 @@ import (
2930
"flag"
3031
"fmt"
3132
"internal/buildcfg"
33+
"internal/profile"
3234
"log"
3335
"os"
3436
"runtime"
@@ -232,10 +234,28 @@ func Main(archInit func(*ssagen.ArchInfo)) {
232234
typecheck.AllImportedBodies()
233235
}
234236

237+
// Read cpu profile file and build cross-package pprof-graph and per-package weighted-call-graph.
238+
base.Timer.Start("fe", "profileuse")
239+
if base.Flag.ProfileUse != "" {
240+
if pgo.PProfGraph == nil {
241+
pgo.PProfGraph = pgo.BuildPProfGraph(base.Flag.ProfileUse, &profile.Options{
242+
CallTree: false,
243+
SampleValue: func(v []int64) int64 { return v[1] },
244+
})
245+
}
246+
pgo.WeightedCG = pgo.BuildWeightedCallGraph()
247+
}
248+
235249
// Inlining
236250
base.Timer.Start("fe", "inlining")
237251
if base.Flag.LowerL != 0 {
252+
if pgo.WeightedCG != nil {
253+
inline.InlinePrologue()
254+
}
238255
inline.InlinePackage()
256+
if pgo.WeightedCG != nil {
257+
inline.InlineEpilogue()
258+
}
239259
}
240260
noder.MakeWrappers(typecheck.Target) // must happen after inlining
241261

src/cmd/compile/internal/inline/inl.go

Lines changed: 143 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ package inline
2929
import (
3030
"fmt"
3131
"go/constant"
32+
"strconv"
3233
"strings"
3334

3435
"cmd/compile/internal/base"
3536
"cmd/compile/internal/ir"
3637
"cmd/compile/internal/logopt"
38+
"cmd/compile/internal/pgo"
3739
"cmd/compile/internal/typecheck"
3840
"cmd/compile/internal/types"
3941
"cmd/internal/obj"
@@ -42,7 +44,8 @@ import (
4244

4345
// Inlining budget parameters, gathered in one place
4446
const (
45-
inlineMaxBudget = 80
47+
inlineMaxBudget = 80
48+
// Budget increased due to hotness.
4649
inlineExtraAppendCost = 0
4750
// default is to inline if there's at most one call. -l=4 overrides this by using 1 instead.
4851
inlineExtraCallCost = 57 // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742
@@ -51,9 +54,87 @@ const (
5154

5255
inlineBigFunctionNodes = 5000 // Functions with this many nodes are considered "big".
5356
inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function.
57+
58+
)
59+
60+
var (
61+
// Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
62+
listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
63+
64+
// List of all hot call sites.
65+
candHotEdgeMap = make(map[string]struct{})
66+
67+
// List of inlined call sites.
68+
inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
69+
70+
// Threshold for Hot callsite inlining.
71+
inlineHotThresholdPercent = float64(2)
72+
73+
// Budget increased due to hotness.
74+
inlineHotMaxBudget int32 = 160
5475
)
5576

56-
// InlinePackage finds functions that can be inlined and clones them before walk expands them.
77+
// InlinePrologue records the hot callsites from ir-graph.
78+
func InlinePrologue() {
79+
if s, err := strconv.ParseFloat(base.Flag.InlineHotThreshold, 64); err == nil {
80+
inlineHotThresholdPercent = s
81+
if base.Flag.LowerM != 0 {
82+
fmt.Printf("hot-thres=%v\n", inlineHotThresholdPercent)
83+
}
84+
}
85+
86+
if base.Flag.InlineHotBudget != 0 {
87+
inlineHotMaxBudget = int32(base.Flag.InlineHotBudget)
88+
}
89+
90+
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
91+
for _, f := range list {
92+
name := ir.PkgFuncName(f)
93+
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
94+
nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
95+
if nodeweight > inlineHotThresholdPercent {
96+
n.HotNode = true
97+
}
98+
for _, e := range pgo.WeightedCG.OutEdges[n] {
99+
if e.Weight != 0 {
100+
weightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
101+
if weightpercent > inlineHotThresholdPercent {
102+
splits := strings.Split(e.CallSite, ":")
103+
line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
104+
lineno := fmt.Sprintf("%v", line2)
105+
canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
106+
if _, ok := candHotEdgeMap[canonicalName]; !ok {
107+
candHotEdgeMap[canonicalName] = struct{}{}
108+
}
109+
}
110+
}
111+
}
112+
}
113+
}
114+
})
115+
if base.Flag.LowerM > 4 {
116+
fmt.Printf("hot-cg before inline in dot format:")
117+
pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
118+
}
119+
}
120+
121+
// InlineEpilogue updates IRGraph after inlining.
122+
func InlineEpilogue() {
123+
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
124+
for _, f := range list {
125+
name := ir.PkgFuncName(f)
126+
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
127+
pgo.RedirectEdges(n, inlinedCallSites)
128+
}
129+
}
130+
})
131+
if base.Flag.LowerM > 4 {
132+
fmt.Printf("hot-cg after inline in dot:")
133+
pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
134+
}
135+
}
136+
137+
// InlinePackage finds functions that can be inlined and clones them.
57138
func InlinePackage() {
58139
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
59140
numfns := numNonClosures(list)
@@ -81,6 +162,9 @@ func CanInline(fn *ir.Func) {
81162
base.Fatalf("CanInline no nname %+v", fn)
82163
}
83164

165+
// Initialize an empty list of hot callsites for this caller.
166+
listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
167+
84168
var reason string // reason, if any, that the function was not inlined
85169
if base.Flag.LowerM > 1 || logopt.Enabled() {
86170
defer func() {
@@ -181,10 +265,14 @@ func CanInline(fn *ir.Func) {
181265
budget: inlineMaxBudget,
182266
extraCallCost: cc,
183267
}
268+
savefn := ir.CurFunc
269+
ir.CurFunc = fn
184270
if visitor.tooHairy(fn) {
185271
reason = visitor.reason
272+
ir.CurFunc = savefn
186273
return
187274
}
275+
ir.CurFunc = savefn
188276

189277
n.Func.Inl = &ir.Inline{
190278
Cost: inlineMaxBudget - visitor.budget,
@@ -252,6 +340,19 @@ func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
252340
return true
253341
}
254342
if v.budget < 0 {
343+
if pgo.WeightedCG != nil {
344+
// Find the existing node in WeightedCallGraph.
345+
if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
346+
// If the cost of hot function is greater than inlineHotMaxBudget,
347+
// the inliner won't inline this function.
348+
if inlineMaxBudget-v.budget < inlineHotMaxBudget && n.HotNode == true {
349+
if base.Flag.LowerM > 1 {
350+
fmt.Printf("hot-node enabled increased budget for func=%v\n", ir.PkgFuncName(fn))
351+
}
352+
return false
353+
}
354+
}
355+
}
255356
v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
256357
return true
257358
}
@@ -315,6 +416,23 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
315416
}
316417
}
317418

419+
// Determine if callee edge is a hot callee or not.
420+
if pgo.WeightedCG != nil && ir.CurFunc != nil {
421+
if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
422+
lineno := fmt.Sprintf("%v", ir.Line(n))
423+
splits := strings.Split(lineno, ":")
424+
l, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
425+
linenum := fmt.Sprintf("%v", l)
426+
canonicalName := ir.PkgFuncName(ir.CurFunc) + "-" + linenum + "-" + ir.PkgFuncName(fn)
427+
if _, o := candHotEdgeMap[canonicalName]; o {
428+
if base.Flag.LowerM > 1 {
429+
fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(ir.CurFunc))
430+
}
431+
listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
432+
}
433+
}
434+
}
435+
318436
if ir.IsIntrinsicCall(n) {
319437
// Treat like any other node.
320438
break
@@ -464,10 +582,12 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
464582

465583
v.budget--
466584

467-
// When debugging, don't stop early, to get full cost of inlining this function
468-
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
469-
v.reason = "too expensive"
470-
return true
585+
if pgo.WeightedCG == nil {
586+
// When debugging, don't stop early, to get full cost of inlining this function
587+
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
588+
v.reason = "too expensive"
589+
return true
590+
}
471591
}
472592

473593
return ir.DoChildren(n, v.do)
@@ -716,7 +836,18 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
716836
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
717837
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
718838
}
719-
return n
839+
840+
// If the callsite is hot and it is under the inlineHotMaxBudget budget, then inline it, or else bail.
841+
if _, ok := listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
842+
if fn.Inl.Cost > inlineHotMaxBudget {
843+
return n
844+
}
845+
if base.Flag.LowerM > 1 {
846+
fmt.Printf("hot-budget check allows inlining at %v\n", ir.Line(n))
847+
}
848+
} else {
849+
return n
850+
}
720851
}
721852

722853
if fn == ir.CurFunc {
@@ -817,7 +948,12 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
817948
fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
818949
}
819950

951+
if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
952+
inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
953+
}
954+
820955
res := InlineCall(n, fn, inlIndex)
956+
821957
if res == nil {
822958
base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
823959
}

0 commit comments

Comments
 (0)