Skip to content

Commit a1ca4fc

Browse files
Bryan C. Millsgopherbot
Bryan C. Mills
authored andcommitted
internal/gopathwalk: rationalize error and symlink handling in walker.walk
filepath.WalkDir may invoke its callback with a nil DirEntry and non-nil error — a detail I missed when converting from fastwalk.Walk to filepath.WalkDir in CL 508506. Even though gopathwalk.Walk ignores filesystem errors, its callback needs to check for that one. Looking into this bug, I also realized that the walk callback used to return only SkipDir or ErrTraverseLink. In retrospect, that makes sense given that Walk and WalkDir don't return errors, so I have documented that property and fixed a few places where it no longer held. To help to simplify the code enough to rule out other similar errors, we also inline walker.shouldTraverse and simplify the case for traversing symlinks. This results in slight changes in the visited package directories when a root contains cyclic symlink paths, but since we never expect symlink cycles to occur in the first place I don't expect that the changes will affect any users in practice. Fixes golang/go#58054. Updates golang/go#58035. Change-Id: Ie8cf1e4b5186b0d7b4650305bfc863330541f080 Reviewed-on: https://go-review.googlesource.com/c/tools/+/539480 LUCI-TryBot-Result: Go LUCI <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Robert Findley <[email protected]> Auto-Submit: Bryan Mills <[email protected]> Run-TryBot: Bryan Mills <[email protected]>
1 parent 7d6e6a9 commit a1ca4fc

File tree

3 files changed

+284
-157
lines changed

3 files changed

+284
-157
lines changed

internal/gopathwalk/walk.go

Lines changed: 108 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,18 @@ type Root struct {
4444
}
4545

4646
// Walk walks Go source directories ($GOROOT, $GOPATH, etc) to find packages.
47-
// For each package found, add will be called (concurrently) with the absolute
47+
// For each package found, add will be called with the absolute
4848
// paths of the containing source directory and the package directory.
49-
// add will be called concurrently.
5049
func Walk(roots []Root, add func(root Root, dir string), opts Options) {
5150
WalkSkip(roots, add, func(Root, string) bool { return false }, opts)
5251
}
5352

5453
// WalkSkip walks Go source directories ($GOROOT, $GOPATH, etc) to find packages.
55-
// For each package found, add will be called (concurrently) with the absolute
54+
// For each package found, add will be called with the absolute
5655
// paths of the containing source directory and the package directory.
57-
// For each directory that will be scanned, skip will be called (concurrently)
56+
// For each directory that will be scanned, skip will be called
5857
// with the absolute paths of the containing source directory and the directory.
5958
// If skip returns false on a directory it will be processed.
60-
// add will be called concurrently.
61-
// skip will be called concurrently.
6259
func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root, dir string) bool, opts Options) {
6360
for _, root := range roots {
6461
walkDir(root, add, skip, opts)
@@ -115,7 +112,8 @@ type walker struct {
115112
skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true.
116113
opts Options // Options passed to Walk by the user.
117114

118-
ignoredDirs []string
115+
pathSymlinks []os.FileInfo
116+
ignoredDirs []string
119117

120118
added map[string]bool
121119
}
@@ -184,9 +182,24 @@ func (w *walker) shouldSkipDir(dir string) bool {
184182
}
185183

186184
// walk walks through the given path.
185+
//
186+
// Errors are logged if w.opts.Logf is non-nil, but otherwise ignored:
187+
// walk returns only nil or fs.SkipDir.
187188
func (w *walker) walk(path string, d fs.DirEntry, err error) error {
188-
typ := d.Type()
189-
if typ.IsRegular() {
189+
if err != nil {
190+
// We have no way to report errors back through Walk or WalkSkip,
191+
// so just log and ignore them.
192+
if w.opts.Logf != nil {
193+
w.opts.Logf("%v", err)
194+
}
195+
if d == nil {
196+
// Nothing more to do: the error prevents us from knowing
197+
// what path even represents.
198+
return nil
199+
}
200+
}
201+
202+
if d.Type().IsRegular() {
190203
if !strings.HasSuffix(path, ".go") {
191204
return nil
192205
}
@@ -204,118 +217,115 @@ func (w *walker) walk(path string, d fs.DirEntry, err error) error {
204217
}
205218
return nil
206219
}
207-
if typ == os.ModeDir {
220+
221+
if d.IsDir() {
208222
base := filepath.Base(path)
209223
if base == "" || base[0] == '.' || base[0] == '_' ||
210224
base == "testdata" ||
211225
(w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") ||
212226
(!w.opts.ModulesEnabled && base == "node_modules") {
213-
return filepath.SkipDir
227+
return fs.SkipDir
214228
}
215229
if w.shouldSkipDir(path) {
216-
return filepath.SkipDir
230+
return fs.SkipDir
217231
}
218232
return nil
219233
}
220-
if typ == os.ModeSymlink && err == nil {
234+
235+
if d.Type()&os.ModeSymlink != 0 {
221236
// TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src
222237
// and GOPATH/src. Do we really need to traverse them here? If so, why?
223238

224-
if os.IsPathSeparator(path[len(path)-1]) {
225-
// The OS was supposed to resolve a directory symlink but didn't.
226-
//
227-
// On macOS this may be caused by a known libc/kernel bug;
228-
// see https://go.dev/issue/59586.
229-
//
230-
// On Windows before Go 1.21, this may be caused by a bug in
231-
// os.Lstat (fixed in https://go.dev/cl/463177).
232-
//
233-
// In either case, we can work around the bug by walking this level
234-
// explicitly: first the symlink target itself, then its contents.
235-
236-
fi, err := os.Stat(path)
237-
if err != nil || !fi.IsDir() {
238-
return nil
239-
}
240-
err = w.walk(path, fs.FileInfoToDirEntry(fi), nil)
241-
if err == filepath.SkipDir {
242-
return nil
243-
} else if err != nil {
244-
return err
245-
}
239+
fi, err := os.Stat(path)
240+
if err != nil || !fi.IsDir() {
241+
// Not a directory. Just walk the file (or broken link) and be done.
242+
return w.walk(path, fs.FileInfoToDirEntry(fi), err)
243+
}
246244

247-
ents, _ := os.ReadDir(path) // ignore error if unreadable
248-
for _, d := range ents {
249-
nextPath := filepath.Join(path, d.Name())
250-
var err error
251-
if d.IsDir() {
252-
err = filepath.WalkDir(nextPath, w.walk)
253-
} else {
254-
err = w.walk(nextPath, d, nil)
255-
if err == filepath.SkipDir {
256-
break
257-
}
258-
}
259-
if err != nil {
260-
return err
261-
}
245+
// Avoid walking symlink cycles: if we have already followed a symlink to
246+
// this directory as a parent of itself, don't follow it again.
247+
//
248+
// This doesn't catch the first time through a cycle, but it also minimizes
249+
// the number of extra stat calls we make if we *don't* encounter a cycle.
250+
// Since we don't actually expect to encounter symlink cycles in practice,
251+
// this seems like the right tradeoff.
252+
for _, parent := range w.pathSymlinks {
253+
if os.SameFile(fi, parent) {
254+
return nil
262255
}
263-
return nil
264256
}
265257

266-
base := filepath.Base(path)
267-
if strings.HasPrefix(base, ".#") {
268-
// Emacs noise.
258+
w.pathSymlinks = append(w.pathSymlinks, fi)
259+
defer func() {
260+
w.pathSymlinks = w.pathSymlinks[:len(w.pathSymlinks)-1]
261+
}()
262+
263+
// On some platforms the OS (or the Go os package) sometimes fails to
264+
// resolve directory symlinks before a trailing slash
265+
// (even though POSIX requires it to do so).
266+
//
267+
// On macOS that failure may be caused by a known libc/kernel bug;
268+
// see https://go.dev/issue/59586.
269+
//
270+
// On Windows before Go 1.21, it may be caused by a bug in
271+
// os.Lstat (fixed in https://go.dev/cl/463177).
272+
//
273+
// Since we need to handle this explicitly on broken platforms anyway,
274+
// it is simplest to just always do that and not rely on POSIX pathname
275+
// resolution to walk the directory (such as by calling WalkDir with
276+
// a trailing slash appended to the path).
277+
//
278+
// Instead, we make a sequence of walk calls — directly and through
279+
// recursive calls to filepath.WalkDir — simulating what WalkDir would do
280+
// if the symlink were a regular directory.
281+
282+
// First we call walk on the path as a directory
283+
// (instead of a symlink).
284+
err = w.walk(path, fs.FileInfoToDirEntry(fi), nil)
285+
if err == fs.SkipDir {
269286
return nil
287+
} else if err != nil {
288+
// This should be impossible, but handle it anyway in case
289+
// walk is changed to return other errors.
290+
return err
270291
}
271-
if w.shouldTraverse(path) {
272-
// Add a trailing separator to traverse the symlink.
273-
nextPath := path + string(filepath.Separator)
274-
return filepath.WalkDir(nextPath, w.walk)
275-
}
276-
}
277-
return nil
278-
}
279-
280-
// shouldTraverse reports whether the symlink fi, found in dir,
281-
// should be followed. It makes sure symlinks were never visited
282-
// before to avoid symlink loops.
283-
func (w *walker) shouldTraverse(path string) bool {
284-
if w.shouldSkipDir(path) {
285-
return false
286-
}
287-
288-
ts, err := os.Stat(path)
289-
if err != nil {
290-
logf := w.opts.Logf
291-
if logf == nil {
292-
logf = log.Printf
293-
}
294-
logf("%v", err)
295-
return false
296-
}
297-
if !ts.IsDir() {
298-
return false
299-
}
300292

301-
// Check for symlink loops by statting each directory component
302-
// and seeing if any are the same file as ts.
303-
for {
304-
parent := filepath.Dir(path)
305-
if parent == path {
306-
// Made it to the root without seeing a cycle.
307-
// Use this symlink.
308-
return true
309-
}
310-
parentInfo, err := os.Stat(parent)
293+
// Now read the directory and walk its entries.
294+
ents, err := os.ReadDir(path)
311295
if err != nil {
312-
return false
296+
// Report the ReadDir error, as filepath.WalkDir would do.
297+
err = w.walk(path, fs.FileInfoToDirEntry(fi), err)
298+
if err == fs.SkipDir {
299+
return nil
300+
} else if err != nil {
301+
return err // Again, should be impossible.
302+
}
303+
// Fall through and iterate over whatever entries we did manage to get.
313304
}
314-
if os.SameFile(ts, parentInfo) {
315-
// Cycle. Don't traverse.
316-
return false
305+
306+
for _, d := range ents {
307+
nextPath := filepath.Join(path, d.Name())
308+
if d.IsDir() {
309+
// We want to walk the whole directory tree rooted at nextPath,
310+
// not just the single entry for the directory.
311+
err := filepath.WalkDir(nextPath, w.walk)
312+
if err != nil && w.opts.Logf != nil {
313+
w.opts.Logf("%v", err)
314+
}
315+
} else {
316+
err := w.walk(nextPath, d, nil)
317+
if err == fs.SkipDir {
318+
// Skip the rest of the entries in the parent directory of nextPath
319+
// (that is, path itself).
320+
break
321+
} else if err != nil {
322+
return err // Again, should be impossible.
323+
}
324+
}
317325
}
318-
path = parent
326+
return nil
319327
}
320328

329+
// Not a file, regular directory, or symlink; skip.
330+
return nil
321331
}

0 commit comments

Comments
 (0)