Skip to content

Commit 746f7e1

Browse files
committed
cmd/compile/internal/inline/inlheur: assign scores to callsites
Assign scores to callsites based on previously computed function properties and callsite properties. This currently works by taking the size score for the function (as computed by CanInline) and then making a series of adjustments, positive or negative based on various function and callsite properties. NB: much work also remaining on deciding what are the best score adjustment values for specific heuristics. I've picked a bunch of arbitrary constants, but they will almost certainly need tuning and tweaking to arrive at something that has good performance. Updates #61502. Change-Id: I887403f95e76d7aa2708494b8686c6026861a6ed Reviewed-on: https://go-review.googlesource.com/c/go/+/511566 Reviewed-by: Matthew Dempsky <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent dc0548f commit 746f7e1

File tree

10 files changed

+574
-73
lines changed

10 files changed

+574
-73
lines changed

src/cmd/compile/internal/inline/inl.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,10 @@ func CanInline(fn *ir.Func, profile *pgo.Profile) {
293293
base.Fatalf("CanInline no nname %+v", fn)
294294
}
295295

296-
canInline := func(fn *ir.Func) { CanInline(fn, profile) }
297-
298296
var funcProps *inlheur.FuncProps
299-
if goexperiment.NewInliner {
300-
funcProps = inlheur.AnalyzeFunc(fn, canInline)
301-
}
302-
303-
if base.Debug.DumpInlFuncProps != "" {
304-
inlheur.DumpFuncProps(fn, base.Debug.DumpInlFuncProps, canInline)
297+
if goexperiment.NewInliner || inlheur.UnitTesting() {
298+
funcProps = inlheur.AnalyzeFunc(fn,
299+
func(fn *ir.Func) { CanInline(fn, profile) })
305300
}
306301

307302
var reason string // reason, if any, that the function was not inlined
@@ -803,6 +798,13 @@ func isBigFunc(fn *ir.Func) bool {
803798
// InlineCalls/inlnode walks fn's statements and expressions and substitutes any
804799
// calls made to inlineable functions. This is the external entry point.
805800
func InlineCalls(fn *ir.Func, profile *pgo.Profile) {
801+
if goexperiment.NewInliner && !fn.Wrapper() {
802+
inlheur.ScoreCalls(fn)
803+
}
804+
if base.Debug.DumpInlFuncProps != "" && !fn.Wrapper() {
805+
inlheur.DumpFuncProps(fn, base.Debug.DumpInlFuncProps,
806+
func(fn *ir.Func) { CanInline(fn, profile) })
807+
}
806808
savefn := ir.CurFunc
807809
ir.CurFunc = fn
808810
bigCaller := isBigFunc(fn)

src/cmd/compile/internal/inline/inlheur/analyze.go

+30-5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const (
2424
debugTraceParams
2525
debugTraceExprClassify
2626
debugTraceCalls
27+
debugTraceScoring
2728
)
2829

2930
// propAnalyzer interface is used for defining one or more analyzer
@@ -76,6 +77,9 @@ func AnalyzeFunc(fn *ir.Func, canInline func(*ir.Func)) *FuncProps {
7677
base.FatalfAt(fn.Pos(), "%v", err)
7778
}
7879
fpmap[fn] = entry
80+
if fn.Inl != nil && fn.Inl.Properties == "" {
81+
fn.Inl.Properties = entry.props.SerializeToString()
82+
}
7983
return fp
8084
}
8185

@@ -139,12 +143,26 @@ func UnitTesting() bool {
139143
}
140144

141145
// DumpFuncProps computes and caches function properties for the func
142-
// 'fn', or if fn is nil, writes out the cached set of properties to
143-
// the file given in 'dumpfile'. Used for the "-d=dumpinlfuncprops=..."
144-
// command line flag, intended for use primarily in unit testing.
146+
// 'fn' and any closures it contains, or if fn is nil, it writes out the
147+
// cached set of properties to the file given in 'dumpfile'. Used for
148+
// the "-d=dumpinlfuncprops=..." command line flag, intended for use
149+
// primarily in unit testing.
145150
func DumpFuncProps(fn *ir.Func, dumpfile string, canInline func(*ir.Func)) {
146151
if fn != nil {
152+
dmp := func(fn *ir.Func) {
153+
154+
if !goexperiment.NewInliner {
155+
ScoreCalls(fn)
156+
}
157+
captureFuncDumpEntry(fn, canInline)
158+
}
147159
captureFuncDumpEntry(fn, canInline)
160+
dmp(fn)
161+
ir.Visit(fn, func(n ir.Node) {
162+
if clo, ok := n.(*ir.ClosureExpr); ok {
163+
dmp(clo.Func)
164+
}
165+
})
148166
} else {
149167
emitDumpToFile(dumpfile)
150168
}
@@ -185,9 +203,16 @@ func emitDumpToFile(dumpfile string) {
185203
dumpBuffer = nil
186204
}
187205

188-
// captureFuncDumpEntry analyzes function 'fn' and adds a entry
189-
// for it to 'dumpBuffer'. Used for unit testing.
206+
// captureFuncDumpEntry grabs the function properties object for 'fn'
207+
// and enqueues it for later dumping. Used for the
208+
// "-d=dumpinlfuncprops=..." command line flag, intended for use
209+
// primarily in unit testing.
190210
func captureFuncDumpEntry(fn *ir.Func, canInline func(*ir.Func)) {
211+
if debugTrace&debugTraceFuncs != 0 {
212+
fmt.Fprintf(os.Stderr, "=-= capturing dump for %v:\n",
213+
fn.Sym().Name)
214+
}
215+
191216
// avoid capturing compiler-generated equality funcs.
192217
if strings.HasPrefix(fn.Sym().Name, ".eq.") {
193218
return

src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go

+81-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
package inlheur
66

77
import (
8+
"cmd/compile/internal/base"
89
"cmd/compile/internal/ir"
910
"cmd/compile/internal/pgo"
1011
"fmt"
1112
"os"
13+
"sort"
1214
"strings"
1315
)
1416

@@ -120,26 +122,102 @@ func (csa *callSiteAnalyzer) determinePanicPathBits(call ir.Node, r CSPropBits)
120122
}
121123

122124
func (csa *callSiteAnalyzer) addCallSite(callee *ir.Func, call *ir.CallExpr) {
125+
flags := csa.flagsForNode(call)
123126
// FIXME: maybe bulk-allocate these?
124127
cs := &CallSite{
125128
Call: call,
126129
Callee: callee,
127130
Assign: csa.containingAssignment(call),
128-
Flags: csa.flagsForNode(call),
129-
Id: uint(len(csa.cstab)),
131+
Flags: flags,
132+
ID: uint(len(csa.cstab)),
130133
}
131134
if _, ok := csa.cstab[call]; ok {
132135
fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n",
133136
fmtFullPos(call.Pos()))
134137
fmt.Fprintf(os.Stderr, "*** call: %+v\n", call)
135138
panic("bad")
136139
}
140+
if callee.Inl != nil {
141+
// Set initial score for callsite to the cost computed
142+
// by CanInline; this score will be refined later based
143+
// on heuristics.
144+
cs.Score = int(callee.Inl.Cost)
145+
}
146+
147+
csa.cstab[call] = cs
137148
if debugTrace&debugTraceCalls != 0 {
138149
fmt.Fprintf(os.Stderr, "=-= added callsite: callee=%s call=%v\n",
139150
callee.Sym().Name, callee)
140151
}
152+
}
141153

142-
csa.cstab[call] = cs
154+
// ScoreCalls assigns numeric scores to each of the callsites in
155+
// function 'fn'; the lower the score, the more helpful we think it
156+
// will be to inline.
157+
//
158+
// Unlike a lot of the other inline heuristics machinery, callsite
159+
// scoring can't be done as part of the CanInline call for a function,
160+
// due to fact that we may be working on a non-trivial SCC. So for
161+
// example with this SCC:
162+
//
163+
// func foo(x int) { func bar(x int, f func()) {
164+
// if x != 0 { f()
165+
// bar(x, func(){}) foo(x-1)
166+
// } }
167+
// }
168+
//
169+
// We don't want to perform scoring for the 'foo' call in "bar" until
170+
// after foo has been analyzed, but it's conceivable that CanInline
171+
// might visit bar before foo for this SCC.
172+
func ScoreCalls(fn *ir.Func) {
173+
enableDebugTraceIfEnv()
174+
defer disableDebugTrace()
175+
if debugTrace&debugTraceScoring != 0 {
176+
fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
177+
}
178+
179+
fih, ok := fpmap[fn]
180+
if !ok {
181+
// TODO: add an assert/panic here.
182+
return
183+
}
184+
185+
// Sort callsites to avoid any surprises with non deterministic
186+
// map iteration order (this is probably not needed, but here just
187+
// in case).
188+
csl := make([]*CallSite, 0, len(fih.cstab))
189+
for _, cs := range fih.cstab {
190+
csl = append(csl, cs)
191+
}
192+
sort.Slice(csl, func(i, j int) bool {
193+
return csl[i].ID < csl[j].ID
194+
})
195+
196+
// Score each call site.
197+
for _, cs := range csl {
198+
var cprops *FuncProps
199+
fihcprops := false
200+
desercprops := false
201+
if fih, ok := fpmap[cs.Callee]; ok {
202+
cprops = fih.props
203+
fihcprops = true
204+
} else if cs.Callee.Inl != nil {
205+
cprops = DeserializeFromString(cs.Callee.Inl.Properties)
206+
desercprops = true
207+
} else {
208+
if base.Debug.DumpInlFuncProps != "" {
209+
fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
210+
panic("should never happen")
211+
} else {
212+
continue
213+
}
214+
}
215+
cs.Score, cs.ScoreMask = computeCallSiteScore(cs.Callee, cprops, cs.Call, cs.Flags)
216+
217+
if debugTrace&debugTraceScoring != 0 {
218+
fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d fih=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
219+
}
220+
}
143221
}
144222

145223
func (csa *callSiteAnalyzer) nodeVisitPre(n ir.Node) {

src/cmd/compile/internal/inline/inlheur/callsite.go

+29-13
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@ import (
2222
// appears in the form of a top-level statement, e.g. "x := foo()"),
2323
// "Flags" contains properties of the call that might be useful for
2424
// making inlining decisions, "Score" is the final score assigned to
25-
// the site, and "Id" is a numeric ID for the site within its
25+
// the site, and "ID" is a numeric ID for the site within its
2626
// containing function.
2727
type CallSite struct {
28-
Callee *ir.Func
29-
Call *ir.CallExpr
30-
Assign ir.Node
31-
Flags CSPropBits
32-
Score int
33-
Id uint
28+
Callee *ir.Func
29+
Call *ir.CallExpr
30+
Assign ir.Node
31+
Flags CSPropBits
32+
Score int
33+
ScoreMask scoreAdjustTyp
34+
ID uint
3435
}
3536

3637
// CallSiteTab is a table of call sites, keyed by call expr.
@@ -53,8 +54,19 @@ const (
5354

5455
// encodedCallSiteTab is a table keyed by "encoded" callsite
5556
// (stringified src.XPos plus call site ID) mapping to a value of call
56-
// property bits.
57-
type encodedCallSiteTab map[string]CSPropBits
57+
// property bits and score.
58+
type encodedCallSiteTab map[string]propsAndScore
59+
60+
type propsAndScore struct {
61+
props CSPropBits
62+
score int
63+
mask scoreAdjustTyp
64+
}
65+
66+
func (pas propsAndScore) String() string {
67+
return fmt.Sprintf("P=%s|S=%d|M=%s", pas.props.String(),
68+
pas.score, pas.mask.String())
69+
}
5870

5971
func (cst CallSiteTab) merge(other CallSiteTab) error {
6072
for k, v := range other {
@@ -80,17 +92,21 @@ func fmtFullPos(p src.XPos) string {
8092

8193
func encodeCallSiteKey(cs *CallSite) string {
8294
var sb strings.Builder
83-
// FIXME: rewrite line offsets relative to function start
95+
// FIXME: maybe rewrite line offsets relative to function start?
8496
sb.WriteString(fmtFullPos(cs.Call.Pos()))
85-
fmt.Fprintf(&sb, "|%d", cs.Id)
97+
fmt.Fprintf(&sb, "|%d", cs.ID)
8698
return sb.String()
8799
}
88100

89101
func buildEncodedCallSiteTab(tab CallSiteTab) encodedCallSiteTab {
90102
r := make(encodedCallSiteTab)
91103
for _, cs := range tab {
92104
k := encodeCallSiteKey(cs)
93-
r[k] = cs.Flags
105+
r[k] = propsAndScore{
106+
props: cs.Flags,
107+
score: cs.Score,
108+
mask: cs.ScoreMask,
109+
}
94110
}
95111
return r
96112
}
@@ -109,7 +125,7 @@ func dumpCallSiteComments(w io.Writer, tab CallSiteTab, ecst encodedCallSiteTab)
109125
sort.Strings(tags)
110126
for _, s := range tags {
111127
v := ecst[s]
112-
fmt.Fprintf(w, "// callsite: %s flagstr %q flagval %d\n", s, v.String(), v)
128+
fmt.Fprintf(w, "// callsite: %s flagstr %q flagval %d score %d mask %d maskstr %q\n", s, v.props.String(), v.props, v.score, v.mask, v.mask.String())
113129
}
114130
fmt.Fprintf(w, "// %s\n", csDelimiter)
115131
}

src/cmd/compile/internal/inline/inlheur/funcprops_test.go

+26-14
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ func TestFuncProperties(t *testing.T) {
7272
continue
7373
}
7474
if eidx >= len(eentries) {
75-
t.Errorf("missing expected entry for %s, skipping",
76-
dentry.fname)
75+
t.Errorf("testcase %s missing expected entry for %s, skipping", tc, dentry.fname)
7776
continue
7877
}
7978
eentry := eentries[eidx]
@@ -124,20 +123,18 @@ func compareEntries(t *testing.T, tc string, dentry *fnInlHeur, dcsites encodedC
124123
// Compare call sites.
125124
for k, ve := range ecsites {
126125
if vd, ok := dcsites[k]; !ok {
127-
t.Errorf("missing expected callsite %q in func %q",
128-
dfn, k)
126+
t.Errorf("testcase %q missing expected callsite %q in func %q", tc, k, dfn)
129127
continue
130128
} else {
131129
if vd != ve {
132-
t.Errorf("callsite %q in func %q: got %s want %s",
133-
k, dfn, vd.String(), ve.String())
130+
t.Errorf("testcase %q callsite %q in func %q: got %+v want %+v",
131+
tc, k, dfn, vd.String(), ve.String())
134132
}
135133
}
136134
}
137135
for k := range dcsites {
138136
if _, ok := ecsites[k]; !ok {
139-
t.Errorf("unexpected extra callsite %q in func %q",
140-
dfn, k)
137+
t.Errorf("testcase %q unexpected extra callsite %q in func %q", tc, k, dfn)
141138
}
142139
}
143140
}
@@ -276,13 +273,12 @@ func (dr *dumpReader) readEntry() (fnInlHeur, encodedCallSiteTab, error) {
276273
if line == csDelimiter {
277274
break
278275
}
279-
// expected format: "// callsite: <expanded pos> flagstr <desc> flagval <flags>"
276+
// expected format: "// callsite: <expanded pos> flagstr <desc> flagval <flags> score <score> mask <scoremask> maskstr <scoremaskstring>"
280277
fields := strings.Fields(line)
281-
if len(fields) != 6 {
282-
return fih, nil, fmt.Errorf("malformed callsite %s line %d: %s",
283-
dr.p, dr.ln, line)
278+
if len(fields) != 12 {
279+
return fih, nil, fmt.Errorf("malformed callsite (nf=%d) %s line %d: %s", len(fields), dr.p, dr.ln, line)
284280
}
285-
if fields[2] != "flagstr" || fields[4] != "flagval" {
281+
if fields[2] != "flagstr" || fields[4] != "flagval" || fields[6] != "score" || fields[8] != "mask" || fields[10] != "maskstr" {
286282
return fih, nil, fmt.Errorf("malformed callsite %s line %d: %s",
287283
dr.p, dr.ln, line)
288284
}
@@ -293,7 +289,23 @@ func (dr *dumpReader) readEntry() (fnInlHeur, encodedCallSiteTab, error) {
293289
return fih, nil, fmt.Errorf("bad flags val %s line %d: %q err=%v",
294290
dr.p, dr.ln, line, err)
295291
}
296-
callsites[tag] = CSPropBits(flags)
292+
scorestr := fields[7]
293+
score, err2 := strconv.Atoi(scorestr)
294+
if err2 != nil {
295+
return fih, nil, fmt.Errorf("bad score val %s line %d: %q err=%v",
296+
dr.p, dr.ln, line, err2)
297+
}
298+
maskstr := fields[9]
299+
mask, err3 := strconv.Atoi(maskstr)
300+
if err3 != nil {
301+
return fih, nil, fmt.Errorf("bad mask val %s line %d: %q err=%v",
302+
dr.p, dr.ln, line, err3)
303+
}
304+
callsites[tag] = propsAndScore{
305+
props: CSPropBits(flags),
306+
score: score,
307+
mask: scoreAdjustTyp(mask),
308+
}
297309
}
298310

299311
// Consume function delimiter.

0 commit comments

Comments
 (0)