Skip to content

Commit 19ca233

Browse files
committed
cmd/compile/internal/inline: build call site table
Build up a table of (potentially) inlinable call sites during inline heuristic analysis, and introduce a framework for analyzing each call site to collect applicable flags (for example, is call nested in loop). This patch doesn't include any of the flag analysis, just the machinery to collect the callsites and a regression test harness. Updates #61502. Change-Id: Ieaf4a008ac9868e9762c63f5b59bd264dc71ab30 Reviewed-on: https://go-review.googlesource.com/c/go/+/511564 Reviewed-by: Matthew Dempsky <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent d2024a0 commit 19ca233

File tree

9 files changed

+609
-114
lines changed

9 files changed

+609
-114
lines changed

src/cmd/compile/internal/inline/inlheur/analyze.go

+26-9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ const (
2323
debugTraceResults
2424
debugTraceParams
2525
debugTraceExprClassify
26+
debugTraceCalls
2627
)
2728

2829
// propAnalyzer interface is used for defining one or more analyzer
@@ -40,13 +41,19 @@ type propAnalyzer interface {
4041
setResults(fp *FuncProps)
4142
}
4243

43-
// fnInlHeur contains inline heuristics state information about
44-
// a specific Go function being analyzed/considered by the inliner.
44+
// fnInlHeur contains inline heuristics state information about a
45+
// specific Go function being analyzed/considered by the inliner. Note
46+
// that in addition to constructing a fnInlHeur object by analyzing a
47+
// specific *ir.Func, there is also code in the test harness
48+
// (funcprops_test.go) that builds up fnInlHeur's by reading in and
49+
// parsing a dump. This is the reason why we have file/fname/line
50+
// fields below instead of just an *ir.Func field.
4551
type fnInlHeur struct {
4652
fname string
4753
file string
4854
line uint
4955
props *FuncProps
56+
cstab CallSiteTab
5057
}
5158

5259
var fpmap = map[*ir.Func]fnInlHeur{}
@@ -55,13 +62,18 @@ func AnalyzeFunc(fn *ir.Func, canInline func(*ir.Func)) *FuncProps {
5562
if fih, ok := fpmap[fn]; ok {
5663
return fih.props
5764
}
58-
fp := computeFuncProps(fn, canInline)
65+
fp, fcstab := computeFuncProps(fn, canInline)
5966
file, line := fnFileLine(fn)
6067
entry := fnInlHeur{
6168
fname: fn.Sym().Name,
6269
file: file,
6370
line: line,
6471
props: fp,
72+
cstab: fcstab,
73+
}
74+
// Merge this functions call sites into the package level table.
75+
if err := cstab.merge(fcstab); err != nil {
76+
base.FatalfAt(fn.Pos(), "%v", err)
6577
}
6678
fpmap[fn] = entry
6779
return fp
@@ -70,7 +82,7 @@ func AnalyzeFunc(fn *ir.Func, canInline func(*ir.Func)) *FuncProps {
7082
// computeFuncProps examines the Go function 'fn' and computes for it
7183
// a function "properties" object, to be used to drive inlining
7284
// heuristics. See comments on the FuncProps type for more info.
73-
func computeFuncProps(fn *ir.Func, canInline func(*ir.Func)) *FuncProps {
85+
func computeFuncProps(fn *ir.Func, canInline func(*ir.Func)) (*FuncProps, CallSiteTab) {
7486
enableDebugTraceIfEnv()
7587
if debugTrace&debugTraceFuncs != 0 {
7688
fmt.Fprintf(os.Stderr, "=-= starting analysis of func %v:\n%+v\n",
@@ -85,8 +97,10 @@ func computeFuncProps(fn *ir.Func, canInline func(*ir.Func)) *FuncProps {
8597
for _, a := range analyzers {
8698
a.setResults(fp)
8799
}
100+
// Now build up a partial table of callsites for this func.
101+
cstab := computeCallSiteTable(fn)
88102
disableDebugTrace()
89-
return fp
103+
return fp, cstab
90104
}
91105

92106
func runAnalyzersOnFunction(fn *ir.Func, analyzers []propAnalyzer) {
@@ -164,7 +178,7 @@ func emitDumpToFile(dumpfile string) {
164178
}
165179
prevline = entry.line
166180
atl := atline[entry.line]
167-
if err := dumpFnPreamble(outf, &entry, idx, atl); err != nil {
181+
if err := dumpFnPreamble(outf, &entry, nil, idx, atl); err != nil {
168182
base.Fatalf("function props dump: %v\n", err)
169183
}
170184
}
@@ -211,11 +225,11 @@ func dumpFilePreamble(w io.Writer) {
211225
fmt.Fprintf(w, "// %s\n", preambleDelimiter)
212226
}
213227

214-
// dumpFilePreamble writes out a function-level preamble for a given
228+
// dumpFnPreamble writes out a function-level preamble for a given
215229
// Go function as part of a function properties dump. See the
216230
// README.txt file in testdata/props for more on the format of
217231
// this preamble.
218-
func dumpFnPreamble(w io.Writer, fih *fnInlHeur, idx, atl uint) error {
232+
func dumpFnPreamble(w io.Writer, fih *fnInlHeur, ecst encodedCallSiteTab, idx, atl uint) error {
219233
fmt.Fprintf(w, "// %s %s %d %d %d\n",
220234
fih.file, fih.fname, fih.line, idx, atl)
221235
// emit props as comments, followed by delimiter
@@ -224,7 +238,9 @@ func dumpFnPreamble(w io.Writer, fih *fnInlHeur, idx, atl uint) error {
224238
if err != nil {
225239
return fmt.Errorf("marshall error %v\n", err)
226240
}
227-
fmt.Fprintf(w, "// %s\n// %s\n", string(data), fnDelimiter)
241+
fmt.Fprintf(w, "// %s\n", string(data))
242+
dumpCallSiteComments(w, fih.cstab, ecst)
243+
fmt.Fprintf(w, "// %s\n", fnDelimiter)
228244
return nil
229245
}
230246

@@ -245,6 +261,7 @@ func sortFnInlHeurSlice(sl []fnInlHeur) []fnInlHeur {
245261
const preambleDelimiter = "<endfilepreamble>"
246262
const fnDelimiter = "<endfuncpreamble>"
247263
const comDelimiter = "<endpropsdump>"
264+
const csDelimiter = "<endcallsites>"
248265

249266
// dumpBuffer stores up function properties dumps when
250267
// "-d=dumpinlfuncprops=..." is in effect.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package inlheur
6+
7+
import (
8+
"cmd/compile/internal/ir"
9+
"cmd/compile/internal/pgo"
10+
"fmt"
11+
"os"
12+
)
13+
14+
type callSiteAnalyzer struct {
15+
cstab CallSiteTab
16+
nstack []ir.Node
17+
}
18+
19+
func makeCallSiteAnalyzer(fn *ir.Func) *callSiteAnalyzer {
20+
return &callSiteAnalyzer{
21+
cstab: make(CallSiteTab),
22+
}
23+
}
24+
25+
func computeCallSiteTable(fn *ir.Func) CallSiteTab {
26+
if debugTrace&debugTraceCalls != 0 {
27+
fmt.Fprintf(os.Stderr, "=-= making callsite table for func %v:\n",
28+
fn.Sym().Name)
29+
}
30+
csa := makeCallSiteAnalyzer(fn)
31+
var doNode func(ir.Node) bool
32+
doNode = func(n ir.Node) bool {
33+
csa.nodeVisitPre(n)
34+
ir.DoChildren(n, doNode)
35+
csa.nodeVisitPost(n)
36+
return false
37+
}
38+
doNode(fn)
39+
return csa.cstab
40+
}
41+
42+
func (csa *callSiteAnalyzer) flagsForNode(call *ir.CallExpr) CSPropBits {
43+
return 0
44+
}
45+
46+
func (csa *callSiteAnalyzer) addCallSite(callee *ir.Func, call *ir.CallExpr) {
47+
// FIXME: maybe bulk-allocate these?
48+
cs := &CallSite{
49+
Call: call,
50+
Callee: callee,
51+
Assign: csa.containingAssignment(call),
52+
Flags: csa.flagsForNode(call),
53+
Id: uint(len(csa.cstab)),
54+
}
55+
if _, ok := csa.cstab[call]; ok {
56+
fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n",
57+
fmtFullPos(call.Pos()))
58+
fmt.Fprintf(os.Stderr, "*** call: %+v\n", call)
59+
panic("bad")
60+
}
61+
if debugTrace&debugTraceCalls != 0 {
62+
fmt.Fprintf(os.Stderr, "=-= added callsite: callee=%s call=%v\n",
63+
callee.Sym().Name, callee)
64+
}
65+
66+
csa.cstab[call] = cs
67+
}
68+
69+
func (csa *callSiteAnalyzer) nodeVisitPre(n ir.Node) {
70+
switch n.Op() {
71+
case ir.OCALLFUNC:
72+
ce := n.(*ir.CallExpr)
73+
callee := pgo.DirectCallee(ce.X)
74+
if callee != nil && callee.Inl != nil {
75+
csa.addCallSite(callee, ce)
76+
}
77+
}
78+
csa.nstack = append(csa.nstack, n)
79+
}
80+
81+
func (csa *callSiteAnalyzer) nodeVisitPost(n ir.Node) {
82+
csa.nstack = csa.nstack[:len(csa.nstack)-1]
83+
}
84+
85+
// containingAssignment returns the top-level assignment statement
86+
// for a statement level function call "n". Examples:
87+
//
88+
// x := foo()
89+
// x, y := bar(z, baz())
90+
// if blah() { ...
91+
//
92+
// Here the top-level assignment statement for the foo() call is the
93+
// statement assigning to "x"; the top-level assignment for "bar()"
94+
// call is the assignment to x,y. For the baz() and blah() calls,
95+
// there is no top level assignment statement.
96+
//
97+
// The unstated goal here is that we want to use the containing assignment
98+
// to establish a connection between a given call and the variables
99+
// to which its results/returns are being assigned.
100+
//
101+
// Note that for the "bar" command above, the front end sometimes
102+
// decomposes this into two assignments, the first one assigning the
103+
// call to a pair of auto-temps, then the second one assigning the
104+
// auto-temps to the user-visible vars. This helper will return the
105+
// second (outer) of these two.
106+
func (csa *callSiteAnalyzer) containingAssignment(n ir.Node) ir.Node {
107+
parent := csa.nstack[len(csa.nstack)-1]
108+
109+
// assignsOnlyAutoTemps returns TRUE of the specified OAS2FUNC
110+
// node assigns only auto-temps.
111+
assignsOnlyAutoTemps := func(x ir.Node) bool {
112+
alst := x.(*ir.AssignListStmt)
113+
oa2init := alst.Init()
114+
if len(oa2init) == 0 {
115+
return false
116+
}
117+
for _, v := range oa2init {
118+
d := v.(*ir.Decl)
119+
if !ir.IsAutoTmp(d.X) {
120+
return false
121+
}
122+
}
123+
return true
124+
}
125+
126+
// Simple case: x := foo()
127+
if parent.Op() == ir.OAS {
128+
return parent
129+
}
130+
131+
// Multi-return case: x, y := bar()
132+
if parent.Op() == ir.OAS2FUNC {
133+
// Hack city: if the result vars are auto-temps, try looking
134+
// for an outer assignment in the tree. The code shape we're
135+
// looking for here is:
136+
//
137+
// OAS1({x,y},OCONVNOP(OAS2FUNC({auto1,auto2},OCALLFUNC(bar))))
138+
//
139+
if assignsOnlyAutoTemps(parent) {
140+
par2 := csa.nstack[len(csa.nstack)-2]
141+
if par2.Op() == ir.OAS2 {
142+
return par2
143+
}
144+
if par2.Op() == ir.OCONVNOP {
145+
par3 := csa.nstack[len(csa.nstack)-3]
146+
if par3.Op() == ir.OAS2 {
147+
return par3
148+
}
149+
}
150+
}
151+
}
152+
153+
return nil
154+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package inlheur
6+
7+
import (
8+
"cmd/compile/internal/base"
9+
"cmd/compile/internal/ir"
10+
"cmd/internal/src"
11+
"fmt"
12+
"io"
13+
"path/filepath"
14+
"sort"
15+
"strings"
16+
)
17+
18+
// CallSite records useful information about a potentially inlinable
19+
// (direct) function call. "Callee" is the target of the call, "Call"
20+
// is the ir node corresponding to the call itself, "Assign" is
21+
// the top-level assignment statement containing the call (if the call
22+
// appears in the form of a top-level statement, e.g. "x := foo()"),
23+
// "Flags" contains properties of the call that might be useful for
24+
// making inlining decisions, "Score" is the final score assigned to
25+
// the site, and "Id" is a numeric ID for the site within its
26+
// containing function.
27+
type CallSite struct {
28+
Callee *ir.Func
29+
Call *ir.CallExpr
30+
Assign ir.Node
31+
Flags CSPropBits
32+
Score int
33+
Id uint
34+
}
35+
36+
// CallSiteTab is a table of call sites, keyed by call expr.
37+
// Ideally it would be nice to key the table by src.XPos, but
38+
// this results in collisions for calls on very long lines (the
39+
// front end saturates column numbers at 255). We also wind up
40+
// with many calls that share the same auto-generated pos.
41+
type CallSiteTab map[*ir.CallExpr]*CallSite
42+
43+
// Package-level table of callsites.
44+
var cstab = CallSiteTab{}
45+
46+
type CSPropBits uint32
47+
48+
const (
49+
CallSiteInLoop CSPropBits = 1 << iota
50+
CallSiteOnPanicPath
51+
CallSiteInInitFunc
52+
)
53+
54+
// encodedCallSiteTab is a table keyed by "encoded" callsite
55+
// (stringified src.XPos plus call site ID) mapping to a value of call
56+
// property bits.
57+
type encodedCallSiteTab map[string]CSPropBits
58+
59+
func (cst CallSiteTab) merge(other CallSiteTab) error {
60+
for k, v := range other {
61+
if prev, ok := cst[k]; ok {
62+
return fmt.Errorf("internal error: collision during call site table merge, fn=%s callsite=%s", prev.Callee.Sym().Name, fmtFullPos(prev.Call.Pos()))
63+
}
64+
cst[k] = v
65+
}
66+
return nil
67+
}
68+
69+
func fmtFullPos(p src.XPos) string {
70+
var sb strings.Builder
71+
sep := ""
72+
base.Ctxt.AllPos(p, func(pos src.Pos) {
73+
fmt.Fprintf(&sb, sep)
74+
sep = "|"
75+
file := filepath.Base(pos.Filename())
76+
fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
77+
})
78+
return sb.String()
79+
}
80+
81+
func encodeCallSiteKey(cs *CallSite) string {
82+
var sb strings.Builder
83+
// FIXME: rewrite line offsets relative to function start
84+
sb.WriteString(fmtFullPos(cs.Call.Pos()))
85+
fmt.Fprintf(&sb, "|%d", cs.Id)
86+
return sb.String()
87+
}
88+
89+
func buildEncodedCallSiteTab(tab CallSiteTab) encodedCallSiteTab {
90+
r := make(encodedCallSiteTab)
91+
for _, cs := range tab {
92+
k := encodeCallSiteKey(cs)
93+
r[k] = cs.Flags
94+
}
95+
return r
96+
}
97+
98+
// dumpCallSiteComments emits comments into the dump file for the
99+
// callsites in the function of interest. If "ecst" is non-nil, we use
100+
// that, otherwise generated a fresh encodedCallSiteTab from "tab".
101+
func dumpCallSiteComments(w io.Writer, tab CallSiteTab, ecst encodedCallSiteTab) {
102+
if ecst == nil {
103+
ecst = buildEncodedCallSiteTab(tab)
104+
}
105+
tags := make([]string, 0, len(ecst))
106+
for k := range ecst {
107+
tags = append(tags, k)
108+
}
109+
sort.Strings(tags)
110+
for _, s := range tags {
111+
v := ecst[s]
112+
fmt.Fprintf(w, "// callsite: %s flagstr %q flagval %d\n", s, v.String(), v)
113+
}
114+
fmt.Fprintf(w, "// %s\n", csDelimiter)
115+
}

0 commit comments

Comments
 (0)