Skip to content

Commit 6d4ccf2

Browse files
committed
gopls/internal/cache: prime goimports cache asynchronously
Gopls' refresh of the goimports resolver already introduces non-determinism into imports operations: gopls does not observe changes until the asynchronous refresh occurs. This change allows operations to continue to run on the stale resolver until a new resolver is ready. Due to inherent raciness, it's hard to benchmark the impact of this change: one would have to catch gopls during a refresh, which occurs at an automatically adjusted pacing. Also update TODOs. Fixes golang/go#59216 Change-Id: I303df998d804c9a1cd1c0e307872d1d271eed601 Reviewed-on: https://go-review.googlesource.com/c/tools/+/561235 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Alan Donovan <[email protected]>
1 parent 8b6359d commit 6d4ccf2

File tree

4 files changed

+78
-46
lines changed

4 files changed

+78
-46
lines changed

gopls/internal/cache/imports.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -207,21 +207,23 @@ func (s *importsState) refreshProcessEnv() {
207207
start := time.Now()
208208

209209
s.mu.Lock()
210-
env := s.processEnv
211-
if resolver, err := s.processEnv.GetResolver(); err == nil {
212-
resolver.ClearForNewScan()
213-
}
214-
// TODO(rfindley): it's not clear why we're unlocking here. Shouldn't we
215-
// guard the use of env below? In any case, we can prime a separate resolver.
210+
resolver, err := s.processEnv.GetResolver()
216211
s.mu.Unlock()
212+
if err != nil {
213+
return
214+
}
217215

218216
event.Log(s.ctx, "background imports cache refresh starting")
219217

220-
// TODO(rfindley, golang/go#59216): do this priming with a separate resolver,
221-
// and then replace, so that we never have to wait on an unprimed cache.
222-
if err := imports.PrimeCache(context.Background(), env); err == nil {
218+
// Prime the new resolver before updating the processEnv, so that gopls
219+
// doesn't wait on an unprimed cache.
220+
if err := imports.PrimeCache(context.Background(), resolver); err == nil {
223221
event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)))
224222
} else {
225223
event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)), keys.Err.Of(err))
226224
}
225+
226+
s.mu.Lock()
227+
s.processEnv.UpdateResolver(resolver)
228+
s.mu.Unlock()
227229
}

internal/imports/fix.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -701,20 +701,21 @@ func ScoreImportPaths(ctx context.Context, env *ProcessEnv, paths []string) (map
701701
return result, nil
702702
}
703703

704-
func PrimeCache(ctx context.Context, env *ProcessEnv) error {
704+
func PrimeCache(ctx context.Context, resolver Resolver) error {
705705
// Fully scan the disk for directories, but don't actually read any Go files.
706706
callback := &scanCallback{
707-
rootFound: func(gopathwalk.Root) bool {
708-
return true
707+
rootFound: func(root gopathwalk.Root) bool {
708+
// See getCandidatePkgs: walking GOROOT is apparently expensive and
709+
// unnecessary.
710+
return root.Type != gopathwalk.RootGOROOT
709711
},
710712
dirFound: func(pkg *pkg) bool {
711713
return false
712714
},
713-
packageNameLoaded: func(pkg *pkg) bool {
714-
return false
715-
},
715+
// packageNameLoaded and exportsLoaded must never be called.
716716
}
717-
return getCandidatePkgs(ctx, callback, "", "", env)
717+
718+
return resolver.scan(ctx, callback)
718719
}
719720

720721
func candidateImportName(pkg *pkg) string {
@@ -1089,7 +1090,12 @@ type Resolver interface {
10891090
// scoreImportPath returns the relevance for an import path.
10901091
scoreImportPath(ctx context.Context, path string) float64
10911092

1092-
ClearForNewScan()
1093+
// ClearForNewScan returns a new Resolver based on the receiver that has
1094+
// cleared its internal caches of directory contents.
1095+
//
1096+
// The new resolver should be primed and then set via
1097+
// [ProcessEnv.UpdateResolver].
1098+
ClearForNewScan() Resolver
10931099
}
10941100

10951101
// A scanCallback controls a call to scan and receives its results.
@@ -1270,11 +1276,8 @@ func newGopathResolver(env *ProcessEnv) *gopathResolver {
12701276
return r
12711277
}
12721278

1273-
func (r *gopathResolver) ClearForNewScan() {
1274-
<-r.scanSema
1275-
r.cache = NewDirInfoCache()
1276-
r.walked = false
1277-
r.scanSema <- struct{}{}
1279+
func (r *gopathResolver) ClearForNewScan() Resolver {
1280+
return newGopathResolver(r.env)
12781281
}
12791282

12801283
func (r *gopathResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) {

internal/imports/mod.go

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,11 @@ import (
3030
// both caused by populating the cache, albeit in slightly different ways.
3131
//
3232
// A high level list of TODOs:
33-
// - Write an additional benchmark for refreshing the directory state.
34-
// - Split scanning the module cache from other ModuleResolver functionality,
35-
// as it is the source of performance woes (and inconsistency).
36-
// - Allow sharing module cache state across multiple ModuleResolvers.
3733
// - Optimize the scan itself, as there is some redundancy statting and
3834
// reading go.mod files.
39-
// - Make it possible to reuse the current state while running a refresh in
40-
// the background.
41-
// - Fix context cancellation (again): if the context is cancelled while a
42-
// root is being walked, nothing stops that ongoing walk.
35+
// - Invert the relationship between ProcessEnv and Resolver (see the
36+
// docstring of ProcessEnv).
37+
// - Make it easier to use an external resolver implementation.
4338
//
4439
// Smaller TODOs are annotated in the code below.
4540

@@ -72,7 +67,11 @@ type ModuleResolver struct {
7267
modsByDir []*gocommand.ModuleJSON // ...or by the number of path components in their Dir.
7368

7469
// Scanning state, populated by scan
75-
scanSema chan struct{} // prevents concurrent scans and guards scannedRoots
70+
71+
// scanSema prevents concurrent scans, and guards scannedRoots and the cache
72+
// fields below (though the caches themselves are concurrency safe).
73+
// Receive to acquire, send to release.
74+
scanSema chan struct{}
7675
scannedRoots map[gopathwalk.Root]bool // if true, root has been walked
7776

7877
// Caches of directory info, populated by scans and scan callbacks
@@ -86,12 +85,16 @@ type ModuleResolver struct {
8685
otherCache *DirInfoCache
8786
}
8887

88+
// newModuleResolver returns a new module-aware goimports resolver.
89+
//
90+
// Note: use caution when modifying this constructor: changes must also be
91+
// reflected in ModuleResolver.ClearForNewScan.
8992
func newModuleResolver(e *ProcessEnv, moduleCacheCache *DirInfoCache) (*ModuleResolver, error) {
9093
r := &ModuleResolver{
9194
env: e,
9295
scanSema: make(chan struct{}, 1),
9396
}
94-
r.scanSema <- struct{}{}
97+
r.scanSema <- struct{}{} // release
9598

9699
goenv, err := r.env.goEnv()
97100
if err != nil {
@@ -265,10 +268,23 @@ func (r *ModuleResolver) initAllMods() error {
265268
// It preserves the set of roots, but forgets about the set of directories.
266269
// Though it forgets the set of module cache directories, it remembers their
267270
// contents, since they are assumed to be immutable.
268-
func (r *ModuleResolver) ClearForNewScan() {
269-
<-r.scanSema
270-
prevRoots := r.scannedRoots
271-
r.scannedRoots = map[gopathwalk.Root]bool{}
271+
func (r *ModuleResolver) ClearForNewScan() Resolver {
272+
<-r.scanSema // acquire r, to guard scannedRoots
273+
r2 := &ModuleResolver{
274+
env: r.env,
275+
dummyVendorMod: r.dummyVendorMod,
276+
moduleCacheDir: r.moduleCacheDir,
277+
roots: r.roots,
278+
mains: r.mains,
279+
mainByDir: r.mainByDir,
280+
modsByModPath: r.modsByModPath,
281+
282+
scanSema: make(chan struct{}, 1),
283+
scannedRoots: make(map[gopathwalk.Root]bool),
284+
otherCache: NewDirInfoCache(),
285+
moduleCacheCache: r.moduleCacheCache,
286+
}
287+
r2.scanSema <- struct{}{} // r2 must start released
272288
// Invalidate root scans. We don't need to invalidate module cache roots,
273289
// because they are immutable.
274290
// (We don't support a use case where GOMODCACHE is cleaned in the middle of
@@ -278,12 +294,12 @@ func (r *ModuleResolver) ClearForNewScan() {
278294
// Scanning for new directories in GOMODCACHE should be handled elsewhere,
279295
// via a call to ScanModuleCache.
280296
for _, root := range r.roots {
281-
if root.Type == gopathwalk.RootModuleCache && prevRoots[root] {
282-
r.scannedRoots[root] = true
297+
if root.Type == gopathwalk.RootModuleCache && r.scannedRoots[root] {
298+
r2.scannedRoots[root] = true
283299
}
284300
}
285-
r.otherCache = NewDirInfoCache()
286-
r.scanSema <- struct{}{}
301+
r.scanSema <- struct{}{} // release r
302+
return r2
287303
}
288304

289305
// ClearModuleInfo invalidates resolver state that depends on go.mod file
@@ -299,16 +315,27 @@ func (e *ProcessEnv) ClearModuleInfo() {
299315
if r, ok := e.resolver.(*ModuleResolver); ok {
300316
resolver, resolverErr := newModuleResolver(e, e.ModCache)
301317
if resolverErr == nil {
302-
<-r.scanSema // guards caches
318+
<-r.scanSema // acquire (guards caches)
303319
resolver.moduleCacheCache = r.moduleCacheCache
304320
resolver.otherCache = r.otherCache
305-
r.scanSema <- struct{}{}
321+
r.scanSema <- struct{}{} // release
306322
}
307323
e.resolver = resolver
308324
e.resolverErr = resolverErr
309325
}
310326
}
311327

328+
// UpdateResolver sets the resolver for the ProcessEnv to use in imports
329+
// operations. Only for use with the result of [Resolver.ClearForNewScan].
330+
//
331+
// TODO(rfindley): this awkward API is a result of the (arguably) inverted
332+
// relationship between configuration and state described in the doc comment
333+
// for [ProcessEnv].
334+
func (e *ProcessEnv) UpdateResolver(r Resolver) {
335+
e.resolver = r
336+
e.resolverErr = nil
337+
}
338+
312339
// findPackage returns the module and directory from within the main modules
313340
// and their dependencies that contains the package at the given import path,
314341
// or returns nil, "" if no module is in scope.
@@ -580,9 +607,9 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error
580607
select {
581608
case <-ctx.Done():
582609
return
583-
case <-r.scanSema:
610+
case <-r.scanSema: // acquire
584611
}
585-
defer func() { r.scanSema <- struct{}{} }()
612+
defer func() { r.scanSema <- struct{}{} }() // release
586613
// We have the lock on r.scannedRoots, and no other scans can run.
587614
for _, root := range roots {
588615
if ctx.Err() != nil {

internal/imports/mod_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ import _ "rsc.io/sampler"
244244
// Clear out the resolver's cache, since we've changed the environment.
245245
mt.env.Env["GOFLAGS"] = "-mod=vendor"
246246
mt.env.ClearModuleInfo()
247-
mt.env.resolver.ClearForNewScan()
247+
mt.env.UpdateResolver(mt.env.resolver.ClearForNewScan())
248248
mt.assertModuleFoundInDir("rsc.io/sampler", "sampler", `/vendor/`)
249249
}
250250

@@ -1314,7 +1314,7 @@ func BenchmarkModuleResolver_RescanModCache(b *testing.B) {
13141314
b.ResetTimer()
13151315
for i := 0; i < b.N; i++ {
13161316
scanToSlice(resolver, exclude)
1317-
resolver.(*ModuleResolver).ClearForNewScan()
1317+
resolver = resolver.ClearForNewScan()
13181318
}
13191319
}
13201320

0 commit comments

Comments
 (0)