From 44cea1234dc18352fec89285e94f92693034323d Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 19 Nov 2024 08:34:07 -0500 Subject: [PATCH] cmd/go/internal/fsys: replace file tree with sorted list Replace the tree of nodes with a sorted list of file replacements. The most important property of this representation is that it allows replacing directories: a replacement x -> y where y is a directory could not be implemented before, because it would require making a node for every file in the tree rooted at y, or else it would require unsuccessful lookups for files like x/a/b/c/d/e/f/g/h/i/j/k to try every possible parent in order to discover the x -> y mapping. The sorted list makes it easy to find the x -> y mapping: when you do the binary search for x/a/b/c/d/e/f/g/h/i/j/k, you end up immediately after the x -> y mapping, so stepping backward one entry provides the mapping we need, if it exists. This CL does not allow overlay files to include directories, but now it is possible. This is at least useful for other kinds of experiments (like FIPS). Change-Id: Ief0afaee82e644dab8ae4eafeec20440afee2e36 Reviewed-on: https://go-review.googlesource.com/c/go/+/628701 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Matloob --- src/cmd/go/internal/fsys/fsys.go | 201 +++++++++++++++----------- src/cmd/go/internal/fsys/fsys_test.go | 97 +++++++++++++ 2 files changed, 213 insertions(+), 85 deletions(-) diff --git a/src/cmd/go/internal/fsys/fsys.go b/src/cmd/go/internal/fsys/fsys.go index 261a1d9f6b..e8df80bb93 100644 --- a/src/cmd/go/internal/fsys/fsys.go +++ b/src/cmd/go/internal/fsys/fsys.go @@ -11,6 +11,7 @@ package fsys import ( + "cmd/go/internal/str" "encoding/json" "errors" "fmt" @@ -88,27 +89,31 @@ type overlayJSON struct { Replace map[string]string } +// overlay is a list of replacements to be applied, sorted by cmp of the from field. +// cmp sorts the filepath.Separator less than any other byte so that x is always +// just before any children x/a, x/b, and so on, before x.go. (This would not +// be the case with byte-wise sorting, which would produce x, x.go, x/a.) +// The sorting lets us find the relevant overlay entry quickly even if it is for a +// parent of the path being searched. +var overlay []replace + +// A replace represents a single replaced path. type replace struct { + // from is the old path being replaced. + // It is an absolute path returned by abs. from string - to string -} - -type node struct { - actual string // empty if a directory - children map[string]*node // path element → file or directory -} - -func (n *node) isDir() bool { - return n.actual == "" && n.children != nil -} -func (n *node) isDeleted() bool { - return n.actual == "" && n.children == nil + // to is the replacement for the old path. + // It is an absolute path returned by abs. + // If it is the empty string, the old path appears deleted. + // Otherwise the old path appears to be the file named by to. + // If to ends in a trailing slash, the overlay code below treats + // it as a directory replacement, akin to a bind mount. + // However, our processing of external overlay maps removes + // such paths by calling abs, except for / or C:\. + to string } -// TODO(matloob): encapsulate these in an io/fs-like interface -var overlay map[string]*node // path -> file or directory node - // cwd returns the current directory, caching it on first use. var cwd = sync.OnceValue(cwdOnce) @@ -143,6 +148,10 @@ func abs(path string) string { return filepath.Join(dir, path) } +func searchcmp(r replace, t string) int { + return cmp(r.from, t) +} + // info is a summary of the known information about a path // being looked up in the virtual file system. type info struct { @@ -156,56 +165,110 @@ type info struct { // stat returns info about the path in the virtual file system. func stat(path string) info { apath := abs(path) - if n, ok := overlay[apath]; ok { - if n.isDir() { - return info{abs: apath, replaced: true, dir: true, actual: path} - } - if n.isDeleted() { - return info{abs: apath, deleted: true} - } - return info{abs: apath, replaced: true, actual: n.actual} + if path == "" { + return info{abs: apath, actual: path} } - // Check whether any parents are replaced by files, - // meaning this path and the directory that contained it - // have been deleted. - prefix := apath - for { - if n, ok := overlay[prefix]; ok { - if n.children == nil { - return info{abs: apath, deleted: true} - } - break + // Binary search for apath to find the nearest relevant entry in the overlay. + i, ok := slices.BinarySearchFunc(overlay, apath, searchcmp) + if ok { + // Exact match; overlay[i].from == apath. + r := overlay[i] + if r.to == "" { + // Deleted. + return info{abs: apath, deleted: true} + } + if strings.HasSuffix(r.to, string(filepath.Separator)) { + // Replacement ends in slash, denoting directory. + // Note that this is impossible in current overlays since we call abs + // and it strips the trailing slashes. But we could support it in the future. + return info{abs: apath, replaced: true, dir: true, actual: path} } - parent := filepath.Dir(prefix) - if parent == prefix { - break + // Replaced file. + return info{abs: apath, replaced: true, actual: r.to} + } + if i < len(overlay) && str.HasFilePathPrefix(overlay[i].from, apath) { + // Replacement for child path; infer existence of parent directory. + return info{abs: apath, replaced: true, dir: true, actual: path} + } + if i > 0 && str.HasFilePathPrefix(apath, overlay[i-1].from) { + // Replacement for parent. + r := overlay[i-1] + if strings.HasSuffix(r.to, string(filepath.Separator)) { + // Parent replaced by directory; apply replacement in our path. + // Note that this is impossible in current overlays since we call abs + // and it strips the trailing slashes. But we could support it in the future. + p := r.to + apath[len(r.from)+1:] + return info{abs: apath, replaced: true, actual: p} } - prefix = parent + // Parent replaced by file; path is deleted. + return info{abs: apath, deleted: true} } - return info{abs: apath, actual: path} } // children returns a sequence of (name, info) -// for all the children of the directory i. +// for all the children of the directory i +// implied by the overlay. func (i *info) children() iter.Seq2[string, info] { return func(yield func(string, info) bool) { - n := overlay[i.abs] - if n == nil { - return - } - for name, c := range n.children { + // Loop looking for next possible child in sorted overlay, + // which is previous child plus "\x00". + target := i.abs + string(filepath.Separator) + "\x00" + for { + // Search for next child: first entry in overlay >= target. + j, _ := slices.BinarySearchFunc(overlay, target, func(r replace, t string) int { + return cmp(r.from, t) + }) + + Loop: + // Skip subdirectories with deleted children (but not direct deleted children). + for j < len(overlay) && overlay[j].to == "" && str.HasFilePathPrefix(overlay[j].from, i.abs) && strings.Contains(overlay[j].from[len(i.abs)+1:], string(filepath.Separator)) { + j++ + } + if j >= len(overlay) { + // Nothing found at all. + return + } + r := overlay[j] + if !str.HasFilePathPrefix(r.from, i.abs) { + // Next entry in overlay is beyond the directory we want; all done. + return + } + + // Found the next child in the directory. + // Yield it and its info. + name := r.from[len(i.abs)+1:] + actual := r.to + dir := false + if j := strings.IndexByte(name, filepath.Separator); j >= 0 { + // Child is multiple levels down, so name must be a directory, + // and there is no actual replacement. + name = name[:j] + dir = true + actual = "" + } + deleted := !dir && r.to == "" ci := info{ abs: filepath.Join(i.abs, name), - deleted: c.isDeleted(), - replaced: c.children != nil || c.actual != "", - dir: c.isDir(), - actual: c.actual, + deleted: deleted, + replaced: !deleted, + dir: dir || strings.HasSuffix(r.to, string(filepath.Separator)), + actual: actual, } if !yield(name, ci) { return } + + // Next target is first name after the one we just returned. + target = ci.abs + "\x00" + + // Optimization: Check whether the very next element + // is the next child. If so, skip the binary search. + if j+1 < len(overlay) && cmp(overlay[j+1].from, target) >= 0 { + j++ + goto Loop + } } } } @@ -246,7 +309,7 @@ func initFromJSON(js []byte) error { return fmt.Errorf("duplicate paths %s and %s in overlay map", old, from) } seen[afrom] = from - list = append(list, replace{from: afrom, to: ojs.Replace[from]}) + list = append(list, replace{from: afrom, to: abs(ojs.Replace[from])}) } slices.SortFunc(list, func(x, y replace) int { return cmp(x.from, y.from) }) @@ -268,39 +331,7 @@ func initFromJSON(js []byte) error { } } - overlay = make(map[string]*node) - for _, r := range list { - n := &node{actual: abs(r.to)} - from := r.from - overlay[from] = n - - for { - dir, base := filepath.Dir(from), filepath.Base(from) - if dir == from { - break - } - dn := overlay[dir] - if dn == nil || dn.isDeleted() { - dn = &node{children: make(map[string]*node)} - overlay[dir] = dn - } - if n.isDeleted() && !dn.isDir() { - break - } - if !dn.isDir() { - panic("fsys inconsistency") - } - dn.children[base] = n - if n.isDeleted() { - // Deletion is recorded now. - // Don't need to create entire parent chain, - // because we don't need to force parents to exist. - break - } - from, n = dir, dn - } - } - + overlay = list return nil } @@ -414,8 +445,8 @@ func Actual(name string) string { // Replaced reports whether the named file has been modified // in the virtual file system compared to the OS file system. func Replaced(name string) bool { - p, ok := overlay[abs(name)] - return ok && !p.isDir() + info := stat(name) + return info.deleted || info.replaced && !info.dir } // Open opens the named file in the virtual file system. diff --git a/src/cmd/go/internal/fsys/fsys_test.go b/src/cmd/go/internal/fsys/fsys_test.go index 7fbe3f1842..ec5488f5e3 100644 --- a/src/cmd/go/internal/fsys/fsys_test.go +++ b/src/cmd/go/internal/fsys/fsys_test.go @@ -14,6 +14,7 @@ import ( "path/filepath" "reflect" "runtime" + "slices" "strings" "sync" "testing" @@ -51,6 +52,102 @@ func initOverlay(t *testing.T, config string) { } } +var statInfoOverlay = `{"Replace": { + "x": "replace/x", + "a/b/c": "replace/c", + "d/e": "" +}}` + +var statInfoTests = []struct { + path string + info info +}{ + {"foo", info{abs: "/tmp/foo", actual: "foo"}}, + {"foo/bar/baz/quux", info{abs: "/tmp/foo/bar/baz/quux", actual: "foo/bar/baz/quux"}}, + {"x", info{abs: "/tmp/x", replaced: true, actual: "/tmp/replace/x"}}, + {"/tmp/x", info{abs: "/tmp/x", replaced: true, actual: "/tmp/replace/x"}}, + {"x/y", info{abs: "/tmp/x/y", deleted: true}}, + {"a", info{abs: "/tmp/a", replaced: true, dir: true, actual: "a"}}, + {"a/b", info{abs: "/tmp/a/b", replaced: true, dir: true, actual: "a/b"}}, + {"a/b/c", info{abs: "/tmp/a/b/c", replaced: true, actual: "/tmp/replace/c"}}, + {"d/e", info{abs: "/tmp/d/e", deleted: true}}, + {"d", info{abs: "/tmp/d", replaced: true, dir: true, actual: "d"}}, +} + +var statInfoChildrenTests = []struct { + path string + children []info +}{ + {"foo", nil}, + {"foo/bar", nil}, + {"foo/bar/baz", nil}, + {"x", nil}, + {"x/y", nil}, + {"a", []info{{abs: "/tmp/a/b", replaced: true, dir: true, actual: ""}}}, + {"a/b", []info{{abs: "/tmp/a/b/c", replaced: true, actual: "/tmp/replace/c"}}}, + {"d", []info{{abs: "/tmp/d/e", deleted: true}}}, + {"d/e", nil}, + {".", []info{ + {abs: "/tmp/a", replaced: true, dir: true, actual: ""}, + // {abs: "/tmp/d", replaced: true, dir: true, actual: ""}, + {abs: "/tmp/x", replaced: true, actual: "/tmp/replace/x"}, + }}, +} + +func TestStatInfo(t *testing.T) { + tmp := "/tmp" + if runtime.GOOS == "windows" { + tmp = `C:\tmp` + } + cwd = sync.OnceValue(func() string { return tmp }) + + winFix := func(s string) string { + if runtime.GOOS == "windows" { + s = strings.ReplaceAll(s, `/tmp`, tmp) // fix tmp + s = strings.ReplaceAll(s, `/`, `\`) // use backslashes + } + return s + } + + overlay := statInfoOverlay + overlay = winFix(overlay) + overlay = strings.ReplaceAll(overlay, `\`, `\\`) // JSON escaping + if err := initFromJSON([]byte(overlay)); err != nil { + t.Fatal(err) + } + + for _, tt := range statInfoTests { + tt.path = winFix(tt.path) + tt.info.abs = winFix(tt.info.abs) + tt.info.actual = winFix(tt.info.actual) + info := stat(tt.path) + if info != tt.info { + t.Errorf("stat(%#q):\nhave %+v\nwant %+v", tt.path, info, tt.info) + } + } + + for _, tt := range statInfoChildrenTests { + tt.path = winFix(tt.path) + for i, info := range tt.children { + info.abs = winFix(info.abs) + info.actual = winFix(info.actual) + tt.children[i] = info + } + parent := stat(winFix(tt.path)) + var children []info + for name, child := range parent.children() { + if name != filepath.Base(child.abs) { + t.Errorf("stat(%#q): child %#q has inconsistent abs %#q", tt.path, name, child.abs) + } + children = append(children, child) + } + slices.SortFunc(children, func(x, y info) int { return cmp(x.abs, y.abs) }) + if !slices.Equal(children, tt.children) { + t.Errorf("stat(%#q) children:\nhave %+v\nwant %+v", tt.path, children, tt.children) + } + } +} + func TestIsDir(t *testing.T) { initOverlay(t, ` { -- 2.48.1