Skip to content

Commit 6546e8e

Browse files
committed
gopls/internal/lsp/source/typerefs: new package to analyze syntax deps
Add a new typerefs package that analyzes type-affecting references between declarations. This can be used to compute the set of packages that may influence eachothers types, for the purpose of precise pruning. This CL include both an implementation of the references analysis, and a standalone implementation of the whole-graph reachability analysis. The latter is intended to be temporary, as for various reasons it is more convenient for the memoization and computation of graph data to live in the gopls/internal/lsp/cache package. For golang/go#57987 Change-Id: Ia1355ff4dfe476319e6f4f8f06b2a79dd7ba554f Reviewed-on: https://go-review.googlesource.com/c/tools/+/479296 Run-TryBot: Robert Findley <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Alan Donovan <[email protected]> gopls-CI: kokoro <[email protected]>
1 parent 04059e1 commit 6546e8e

File tree

8 files changed

+1832
-5
lines changed

8 files changed

+1832
-5
lines changed

gopls/internal/lsp/cache/parse.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,14 @@ func parseGoImpl(ctx context.Context, fset *token.FileSet, fh source.FileHandle,
5151
if ctx.Err() != nil {
5252
return nil, ctx.Err()
5353
}
54-
pgf, _ := parseGoSrc(ctx, fset, fh.URI(), content, mode)
54+
pgf, _ := ParseGoSrc(ctx, fset, fh.URI(), content, mode)
5555
return pgf, nil
5656
}
5757

58-
// parseGoSrc parses a buffer of Go source, repairing the tree if necessary.
59-
func parseGoSrc(ctx context.Context, fset *token.FileSet, uri span.URI, src []byte, mode parser.Mode) (res *source.ParsedGoFile, fixes []fixType) {
58+
// ParseGoSrc parses a buffer of Go source, repairing the tree if necessary.
59+
//
60+
// The provided ctx is used only for logging.
61+
func ParseGoSrc(ctx context.Context, fset *token.FileSet, uri span.URI, src []byte, mode parser.Mode) (res *source.ParsedGoFile, fixes []fixType) {
6062
file, err := parser.ParseFile(fset, uri.Filename(), src, mode)
6163
var parseErr scanner.ErrorList
6264
if err != nil {

gopls/internal/lsp/cache/parse_cache.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
166166
// inside of parseGoSrc without exceeding the allocated space.
167167
base, nextBase := c.allocateSpace(2*len(content) + parsePadding)
168168

169-
pgf, fixes1 := parseGoSrc(ctx, fileSetWithBase(base), uri, content, mode)
169+
pgf, fixes1 := ParseGoSrc(ctx, fileSetWithBase(base), uri, content, mode)
170170
file := pgf.Tok
171171
if file.Base()+file.Size()+1 > nextBase {
172172
// The parsed file exceeds its allocated space, likely due to multiple
@@ -178,7 +178,7 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
178178
// there, as parseGoSrc will repeat them.
179179
actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing
180180
base2, nextBase2 := c.allocateSpace(actual)
181-
pgf2, fixes2 := parseGoSrc(ctx, fileSetWithBase(base2), uri, content, mode)
181+
pgf2, fixes2 := ParseGoSrc(ctx, fileSetWithBase(base2), uri, content, mode)
182182

183183
// In golang/go#59097 we observed that this panic condition was hit.
184184
// One bug was found and fixed, but record more information here in
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package typerefs extracts from Go syntax a graph of symbol-level
6+
// dependencies, for the purpose of precise invalidation of package data.
7+
//
8+
// # Background
9+
//
10+
// The goal of this analysis is to determine, for each package P, a nearly
11+
// minimal set of packages that could affect the type checking of P. This set
12+
// may contain false positives, but the smaller this set the better we can
13+
// invalidate and prune packages in gopls.
14+
//
15+
// More precisely, for each package P we define the set of "reachable" packages
16+
// from P as the set of packages that may affect the (deep) export data of the
17+
// direct dependencies of P. By this definition, the complement of this set
18+
// cannot affect any information derived from type checking P (e.g.
19+
// diagnostics, cross references, or method sets). Therefore we need not
20+
// invalidate any results for P when a package in the complement of this set
21+
// changes.
22+
//
23+
// # Computing references
24+
//
25+
// For a given declaration D, references are computed based on identifiers or
26+
// dotted identifiers referenced in the declaration of D, that may affect
27+
// the type of D. However, these references reflect only local knowledge of the
28+
// package and its dependency metadata, and do not depend on any analysis of
29+
// the dependencies themselves.
30+
//
31+
// Specifically, if a referring identifier I appears in the declaration, we
32+
// record an edge from D to each object possibly referenced by I. We search for
33+
// references within type syntax, but do not actual type-check, so we can't
34+
// reliably determine whether an expression is a type or a term, or whether a
35+
// function is a builtin or generic. For example, the type of x in var x =
36+
// p.F(W) only depends on W if p.F is a builtin or generic function, which we
37+
// cannot know without type-checking package p. So we may over-approximate in
38+
// this way.
39+
//
40+
// - If I is declared in the current package, record a reference to its
41+
// declaration.
42+
// - Else, if there are any dot-imported imports in the current file and I is
43+
// exported, record a (possibly dangling) edge to the corresponding
44+
// declaration in each dot-imported package.
45+
//
46+
// If a dotted identifier q.I appears in the declaration, we
47+
// perform a similar operation:
48+
// - If q is declared in the current package, we record a reference to that
49+
// object. It may be a var or const that has a field or method I.
50+
// - Else, if q is a valid import name based on imports in the current file
51+
// and the provided metadata for dependency package names, record a
52+
// reference to the object I in that package.
53+
// - Additionally, handle the case where Q is exported, and Q.I may refer to
54+
// a field or method in a dot-imported package.
55+
//
56+
// That is essentially the entire algorithm, though there is some subtlety to
57+
// visiting the set of identifiers or dotted identifiers that may affect the
58+
// declaration type. See the visitDeclOrSpec function for the details of this
59+
// analysis. Notably, we also skip identifiers that refer to type parameters in
60+
// generic declarations.
61+
//
62+
// # API
63+
//
64+
// The main entry point for this analysis is the [Refs] function, which
65+
// implements the aforementioned syntactic analysis for a set of files
66+
// constituting a package.
67+
//
68+
// These references use shared state to efficiently represent references, by
69+
// way of the [PackageIndex] and [PackageSet] types.
70+
//
71+
// The [BuildPackageGraph] constructor implements a whole-graph analysis similar
72+
// to that which will be implemented by gopls, but for various reasons the
73+
// logic for this analysis will eventually live in the
74+
// [golang.org/x/tools/gopls/internal/lsp/cache] package. Nevertheless,
75+
// BuildPackageGraph and its test serve to verify the syntactic analysis, and
76+
// may serve as a proving ground for new optimizations of the whole-graph analysis.
77+
//
78+
// # Comparison with export data
79+
//
80+
// At first it may seem that the simplest way to implement this analysis would
81+
// be to consider the types.Packages of the dependencies of P, for example
82+
// during export. After all, it makes sense that the type checked packages
83+
// themselves could describe their dependencies. However, this does not work as
84+
// type information does not describe certain syntactic relationships.
85+
//
86+
// For example, the following scenarios cause type information to miss
87+
// syntactic relationships:
88+
//
89+
// Named type forwarding:
90+
//
91+
// package a; type A b.B
92+
// package b; type B int
93+
//
94+
// Aliases:
95+
//
96+
// package a; func A(f b.B)
97+
// package b; type B = func()
98+
//
99+
// Initializers:
100+
//
101+
// package a; var A = b.B()
102+
// package b; func B() string { return "hi" }
103+
//
104+
// Use of the unsafe package:
105+
//
106+
// package a; type A [unsafe.Sizeof(B{})]int
107+
// package b; type B struct { f1, f2, f3 int }
108+
//
109+
// In all of these examples, types do not contain information about the edge
110+
// between the a.A and b.B declarations.
111+
package typerefs
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package typerefs
6+
7+
import (
8+
"fmt"
9+
"math/bits"
10+
"sort"
11+
"strings"
12+
"sync"
13+
14+
"golang.org/x/tools/gopls/internal/lsp/source"
15+
)
16+
17+
// PackageIndex stores common data to enable efficient representation of
18+
// references and package sets.
19+
type PackageIndex struct {
20+
// For now, PackageIndex just indexes package ids, to save space and allow for
21+
// faster unions via sparse int vectors.
22+
mu sync.Mutex
23+
ids []source.PackageID
24+
m map[source.PackageID]packageIdx
25+
}
26+
27+
type packageIdx int // for additional type safety: an index in PackageIndex.ids
28+
29+
// NewPackageIndex creates a new PackageIndex instance for use in building
30+
// reference and package sets.
31+
func NewPackageIndex() *PackageIndex {
32+
return &PackageIndex{
33+
m: make(map[source.PackageID]packageIdx),
34+
}
35+
}
36+
37+
// idx returns the packageIdx referencing id, creating one if id is not yet
38+
// tracked by the receiver.
39+
func (r *PackageIndex) idx(id source.PackageID) packageIdx {
40+
r.mu.Lock()
41+
defer r.mu.Unlock()
42+
if i, ok := r.m[id]; ok {
43+
return i
44+
}
45+
i := packageIdx(len(r.ids))
46+
r.m[id] = i
47+
r.ids = append(r.ids, id)
48+
return i
49+
}
50+
51+
// id returns the PackageID for idx.
52+
//
53+
// idx must have been created by this PackageIndex instance.
54+
func (r *PackageIndex) id(idx packageIdx) source.PackageID {
55+
r.mu.Lock()
56+
defer r.mu.Unlock()
57+
return r.ids[idx]
58+
}
59+
60+
// A PackageSet is a set of source.PackageIDs, optimized for inuse memory
61+
// footprint and efficient union operations.
62+
type PackageSet struct {
63+
// PackageSet is a sparse int vector of package indexes from parent.
64+
parent *PackageIndex
65+
sparse map[int]blockType // high bits in key, set of low bits in value
66+
}
67+
68+
type blockType = uint // type of each sparse vector element
69+
const blockSize = bits.UintSize
70+
71+
// New creates a new PackageSet bound to this PackageIndex instance.
72+
//
73+
// PackageSets may only be combined with other PackageSets from the same
74+
// instance.
75+
func (s *PackageIndex) New() *PackageSet {
76+
return &PackageSet{
77+
parent: s,
78+
sparse: make(map[int]blockType),
79+
}
80+
}
81+
82+
// add records a new element in the package set.
83+
//
84+
// For internal use, since it adds by index rather than ID, to avoid lookups.
85+
func (s *PackageSet) add(idx packageIdx) {
86+
i := int(idx)
87+
s.sparse[i/blockSize] |= 1 << (i % blockSize)
88+
}
89+
90+
// Union records all elements from other into the receiver, mutating the
91+
// receiver set but not the argument set. The receiver must not be nil, but the
92+
// argument set may be nil.
93+
//
94+
// Precondition: both package sets were created with the same PackageIndex.
95+
func (s *PackageSet) Union(other *PackageSet) {
96+
if other == nil {
97+
return // e.g. unsafe
98+
}
99+
if other.parent != s.parent {
100+
panic("other set is from a different PackageIndex instance")
101+
}
102+
for k, v := range other.sparse {
103+
if v0 := s.sparse[k]; v0 != v {
104+
s.sparse[k] = v0 | v
105+
}
106+
}
107+
}
108+
109+
// Contains reports whether id is contained in the receiver set.
110+
func (s *PackageSet) Contains(id source.PackageID) bool {
111+
i := int(s.parent.idx(id))
112+
return s.sparse[i/blockSize]&(1<<(i%blockSize)) != 0
113+
}
114+
115+
// Elems calls f for each element of the set in ascending order.
116+
func (s *PackageSet) Elems(f func(source.PackageID)) {
117+
blockIndexes := make([]int, 0, len(s.sparse))
118+
for k := range s.sparse {
119+
blockIndexes = append(blockIndexes, k)
120+
}
121+
sort.Ints(blockIndexes)
122+
for _, i := range blockIndexes {
123+
v := s.sparse[i]
124+
for b := 0; b < blockSize; b++ {
125+
if (v & (1 << b)) != 0 {
126+
f(s.parent.id(packageIdx(i*blockSize + b)))
127+
}
128+
}
129+
}
130+
}
131+
132+
// Len reports the length of the receiver set.
133+
func (s *PackageSet) Len() int { // could be optimized
134+
l := 0
135+
s.Elems(func(source.PackageID) {
136+
l++
137+
})
138+
return l
139+
}
140+
141+
// String returns a human-readable representation of the set: {A, B, ...}.
142+
func (s *PackageSet) String() string {
143+
var ids []string
144+
s.Elems(func(id source.PackageID) {
145+
ids = append(ids, string(id))
146+
})
147+
return fmt.Sprintf("{%s}", strings.Join(ids, ", "))
148+
}

0 commit comments

Comments
 (0)