From 5a1c5b8ae741df2d5c53f328c57a84d85ae6c44a Mon Sep 17 00:00:00 2001 From: Michael Matloob Date: Thu, 23 Jun 2022 15:46:29 -0400 Subject: [PATCH] cmd/go: add per-package indexing for modules outside mod cache Packages outside the module cache including the standard library will be indexed individually rather than as a whole module. For #52876 Change-Id: I142dad6a790e9e8eb4dc6430a588fbfa86552e49 Reviewed-on: https://go-review.googlesource.com/c/go/+/413815 Reviewed-by: Michael Matloob Run-TryBot: Michael Matloob Reviewed-by: Bryan Mills TryBot-Result: Gopher Robot --- src/cmd/go/internal/load/pkg.go | 4 +- src/cmd/go/internal/modindex/index_format.txt | 13 +- src/cmd/go/internal/modindex/read.go | 248 ++++++++++++++---- src/cmd/go/internal/modindex/scan.go | 10 +- src/cmd/go/internal/modindex/write.go | 18 +- src/cmd/go/internal/modload/import.go | 6 +- src/cmd/go/internal/modload/load.go | 4 +- src/cmd/go/internal/modload/search.go | 8 +- 8 files changed, 242 insertions(+), 69 deletions(-) diff --git a/src/cmd/go/internal/load/pkg.go b/src/cmd/go/internal/load/pkg.go index 1a7b9d235d..95a06a325d 100644 --- a/src/cmd/go/internal/load/pkg.go +++ b/src/cmd/go/internal/load/pkg.go @@ -878,8 +878,8 @@ func loadPackageData(ctx context.Context, path, parentPath, parentDir, parentRoo buildMode = build.ImportComment } if modroot := modload.PackageModRoot(ctx, r.path); modroot != "" { - if mi, err := modindex.Get(modroot); err == nil { - data.p, data.err = mi.Import(cfg.BuildContext, mi.RelPath(r.dir), buildMode) + if rp, err := modindex.GetPackage(modroot, r.dir); err == nil { + data.p, data.err = rp.Import(cfg.BuildContext, buildMode) goto Happy } else if !errors.Is(err, modindex.ErrNotIndexed) { base.Fatalf("go: %v", err) diff --git a/src/cmd/go/internal/modindex/index_format.txt b/src/cmd/go/internal/modindex/index_format.txt index 3768eea6c7..c74b1d458b 100644 --- a/src/cmd/go/internal/modindex/index_format.txt +++ b/src/cmd/go/internal/modindex/index_format.txt @@ -7,6 +7,8 @@ Strings are written into the string table at the end of the file. Each string is null-terminated. String offsets are relative to the start of the string table. Bools are written as uint32s: 0 for false and 1 for true. +The following is the format for a full module: + “go index v0\n” str uint32 - offset of string table n uint32 - number of packages @@ -40,7 +42,16 @@ for each RawPackage: position - file, offset, line, column - uint32 [string table] -// parseError struct +The following is the format for a single indexed package: + +“go index v0\n” +str uint32 - offset of string table +for the single RawPackage: + [same RawPackage format as above] +[string table] + +The following is the definition of the json-serialized parseError struct: + type parseError struct { ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise ErrorString string // non-empty for all other cases diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go index ea1ebb07c2..65a1ecf6dc 100644 --- a/src/cmd/go/internal/modindex/read.go +++ b/src/cmd/go/internal/modindex/read.go @@ -23,11 +23,13 @@ import ( "sort" "strings" "sync" + "time" "unsafe" "cmd/go/internal/base" "cmd/go/internal/cache" "cmd/go/internal/cfg" + "cmd/go/internal/fsys" "cmd/go/internal/imports" "cmd/go/internal/par" "cmd/go/internal/str" @@ -39,20 +41,16 @@ import ( // module index. var enabled bool = godebug.Get("goindex") != "0" -// ModuleIndex represents and encoded module index file. It is used to +// Module represents and encoded module index file. It is used to // do the equivalent of build.Import of packages in the module and answer other // questions based on the index file's data. -type ModuleIndex struct { +type Module struct { modroot string od offsetDecoder packages map[string]int // offsets of each package packagePaths []string // paths to package directories relative to modroot; these are the keys of packages } -var fcache par.Cache - -var salt = godebug.Get("goindexsalt") - // moduleHash returns an ActionID corresponding to the state of the module // located at filesystem path modroot. func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) { @@ -75,7 +73,45 @@ func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) { } h := cache.NewHash("moduleIndex") - fmt.Fprintf(h, "module index %s %s %s %v\n", runtime.Version(), salt, indexVersion, modroot) + fmt.Fprintf(h, "module index %s %s %v\n", runtime.Version(), indexVersion, modroot) + return h.Sum(), nil +} + +const modTimeCutoff = 2 * time.Second + +// dirHash returns an ActionID corresponding to the state of the package +// located at filesystem path pkgdir. +func dirHash(pkgdir string) (cache.ActionID, error) { + h := cache.NewHash("moduleIndex") + fmt.Fprintf(h, "package %s %s %v\n", runtime.Version(), indexVersion, pkgdir) + entries, err := fsys.ReadDir(pkgdir) + if err != nil { + // pkgdir might not be a directory. give up on hashing. + return cache.ActionID{}, ErrNotIndexed + } + cutoff := time.Now().Add(-modTimeCutoff) + for _, info := range entries { + if info.IsDir() { + continue + } + + if !info.Mode().IsRegular() { + return cache.ActionID{}, ErrNotIndexed + } + // To avoid problems for very recent files where a new + // write might not change the mtime due to file system + // mtime precision, reject caching if a file was read that + // is less than modTimeCutoff old. + // + // This is the same strategy used for hashing test inputs. + // See hashOpen in cmd/go/internal/test/test.go for the + // corresponding code. + if info.ModTime().After(cutoff) { + return cache.ActionID{}, ErrNotIndexed + } + + fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size()) + } return h.Sum(), nil } @@ -83,31 +119,61 @@ var modrootCache par.Cache var ErrNotIndexed = errors.New("not in module index") -// Get returns the ModuleIndex for the module rooted at modroot. +var ( + errDisabled = fmt.Errorf("%w: module indexing disabled", ErrNotIndexed) + errNotFromModuleCache = fmt.Errorf("%w: not from module cache", ErrNotIndexed) +) + +// GetPackage returns the IndexPackage for the package at the given path. +// It will return ErrNotIndexed if the directory should be read without +// using the index, for instance because the index is disabled, or the packgae +// is not in a module. +func GetPackage(modroot, pkgdir string) (*IndexPackage, error) { + mi, err := GetModule(modroot) + if err == nil { + return mi.Package(relPath(pkgdir, modroot)), nil + } + if !errors.Is(err, errNotFromModuleCache) { + return nil, err + } + return openIndexPackage(modroot, pkgdir) +} + +// GetModule returns the Module for the given modroot. // It will return ErrNotIndexed if the directory should be read without // using the index, for instance because the index is disabled, or the packgae // is not in a module. -func Get(modroot string) (*ModuleIndex, error) { - if !enabled || cache.DefaultDir() == "off" || cfg.BuildMod == "vendor" { - return nil, ErrNotIndexed +func GetModule(modroot string) (*Module, error) { + if !enabled || cache.DefaultDir() == "off" { + return nil, errDisabled } if modroot == "" { - panic("modindex.Get called with empty modroot") + panic("modindex.GetPackage called with empty modroot") + } + if cfg.BuildMod == "vendor" { + // Even if the main module is in the module cache, + // its vendored dependencies are not loaded from their + // usual cached locations. + return nil, errNotFromModuleCache } modroot = filepath.Clean(modroot) - isModCache := str.HasFilePathPrefix(modroot, cfg.GOMODCACHE) - return openIndex(modroot, isModCache) + if !str.HasFilePathPrefix(modroot, cfg.GOMODCACHE) { + return nil, errNotFromModuleCache + } + return openIndexModule(modroot, true) } -// openIndex returns the module index for modPath. +var mcache par.Cache + +// openIndexModule returns the module index for modPath. // It will return ErrNotIndexed if the module can not be read // using the index because it contains symlinks. -func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) { +func openIndexModule(modroot string, ismodcache bool) (*Module, error) { type result struct { - mi *ModuleIndex + mi *Module err error } - r := fcache.Do(modroot, func() any { + r := mcache.Do(modroot, func() any { id, err := moduleHash(modroot, ismodcache) if err != nil { return result{nil, err} @@ -133,8 +199,38 @@ func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) { return r.mi, r.err } -// fromBytes returns a *ModuleIndex given the encoded representation. -func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) { +var pcache par.Cache + +func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) { + type result struct { + pkg *IndexPackage + err error + } + r := pcache.Do(pkgdir, func() any { + id, err := dirHash(pkgdir) + if err != nil { + return result{nil, err} + } + data, _, err := cache.Default().GetMmap(id) + if err != nil { + // Couldn't read from index. Assume we couldn't read from + // the index because the package hasn't been indexed yet. + data = indexPackage(modroot, pkgdir) + if err = cache.Default().PutBytes(id, data); err != nil { + return result{nil, err} + } + } + pkg, err := packageFromBytes(modroot, data) + if err != nil { + return result{nil, err} + } + return result{pkg, nil} + }).(result) + return r.pkg, r.err +} + +// fromBytes returns a *Module given the encoded representation. +func fromBytes(moddir string, data []byte) (mi *Module, err error) { if !enabled { panic("use of index") } @@ -184,7 +280,7 @@ func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) { packages[packagePaths[i]] = packageOffsets[i] } - return &ModuleIndex{ + return &Module{ moddir, offsetDecoder{data, st}, packages, @@ -192,21 +288,60 @@ func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) { }, nil } +// packageFromBytes returns a *IndexPackage given the encoded representation. +func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) { + if !enabled { + panic("use of package index when not enabled") + } + + // SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed + // that all its errors satisfy this interface, we'll only check for these errors so that + // we don't suppress panics that could have been produced from other sources. + type addrer interface { + Addr() uintptr + } + + // set PanicOnFault to true so that we can catch errors on the initial reads of the slice, + // in case it's mmapped (the common case). + old := debug.SetPanicOnFault(true) + defer func() { + debug.SetPanicOnFault(old) + if e := recover(); e != nil { + if _, ok := e.(addrer); ok { + // This panic was almost certainly caused by SetPanicOnFault. + err = fmt.Errorf("error reading module index: %v", e) + return + } + // The panic was likely not caused by SetPanicOnFault. + panic(e) + } + }() + + gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'}) + if string(gotVersion) != indexVersion { + return nil, fmt.Errorf("bad index version string: %q", gotVersion) + } + stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:] + st := newStringTable(data[stringTableOffset:]) + d := &decoder{unread, st} + p = decodePackage(d, offsetDecoder{data, st}) + p.modroot = modroot + return p, nil +} + // Returns a list of directory paths, relative to the modroot, for // packages contained in the module index. -func (mi *ModuleIndex) Packages() []string { +func (mi *Module) Packages() []string { return mi.packagePaths } -// RelPath returns the path relative to the module's root. -func (mi *ModuleIndex) RelPath(path string) string { - return str.TrimFilePathPrefix(filepath.Clean(path), mi.modroot) // mi.modroot is already clean +// relPath returns the path relative to the module's root. +func relPath(path, modroot string) string { + return str.TrimFilePathPrefix(filepath.Clean(path), filepath.Clean(modroot)) } -// ImportPackage is the equivalent of build.Import given the information in ModuleIndex. -func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.ImportMode) (p *build.Package, err error) { - rp := mi.indexPackage(relpath) - +// Import is the equivalent of build.Import given the information in Module. +func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) { defer func() { if e := recover(); e != nil { err = fmt.Errorf("error reading module index: %v", e) @@ -218,7 +353,7 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im p = &build.Package{} p.ImportPath = "." - p.Dir = filepath.Join(mi.modroot, rp.dir) + p.Dir = filepath.Join(rp.modroot, rp.dir) var pkgerr error switch ctxt.Compiler { @@ -236,7 +371,7 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im inTestdata := func(sub string) bool { return strings.Contains(sub, "/testdata/") || strings.HasSuffix(sub, "/testdata") || str.HasPathPrefix(sub, "testdata") } - if !inTestdata(relpath) { + if !inTestdata(rp.dir) { // In build.go, p.Root should only be set in the non-local-import case, or in // GOROOT or GOPATH. Since module mode only calls Import with path set to "." // and the module index doesn't apply outside modules, the GOROOT case is @@ -248,8 +383,8 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im if ctxt.GOROOT != "" && str.HasFilePathPrefix(p.Dir, cfg.GOROOTsrc) && p.Dir != cfg.GOROOTsrc { p.Root = ctxt.GOROOT p.Goroot = true - modprefix := str.TrimFilePathPrefix(mi.modroot, cfg.GOROOTsrc) - p.ImportPath = relpath + modprefix := str.TrimFilePathPrefix(rp.modroot, cfg.GOROOTsrc) + p.ImportPath = rp.dir if modprefix != "" { p.ImportPath = filepath.Join(modprefix, p.ImportPath) } @@ -521,20 +656,21 @@ func IsStandardPackage(goroot_, compiler, path string) bool { reldir = str.TrimFilePathPrefix(reldir, "cmd") modroot = filepath.Join(modroot, "cmd") } - mod, err := Get(modroot) - if err != nil { + if _, err := GetPackage(modroot, filepath.Join(modroot, reldir)); err == nil { + // Note that goroot.IsStandardPackage doesn't check that the directory + // actually contains any go files-- merely that it exists. GetPackage + // returning a nil error is enough for us to know the directory exists. + return true + } else if errors.Is(err, ErrNotIndexed) { + // Fall back because package isn't indexable. (Probably because + // a file was modified recently) return goroot.IsStandardPackage(goroot_, compiler, path) } - - pkgs := mod.Packages() - i := sort.SearchStrings(pkgs, reldir) - return i != len(pkgs) && pkgs[i] == reldir + return false } // IsDirWithGoFiles is the equivalent of fsys.IsDirWithGoFiles using the information in the index. -func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) { - rp := mi.indexPackage(relpath) - +func (rp *IndexPackage) IsDirWithGoFiles() (_ bool, err error) { defer func() { if e := recover(); e != nil { err = fmt.Errorf("error reading module index: %v", e) @@ -549,9 +685,7 @@ func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) { } // ScanDir implements imports.ScanDir using the information in the index. -func (mi *ModuleIndex) ScanDir(path string, tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) { - rp := mi.indexPackage(path) - +func (rp *IndexPackage) ScanDir(tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) { // TODO(matloob) dir should eventually be relative to indexed directory // TODO(matloob): skip reading raw package and jump straight to data we need? @@ -639,20 +773,22 @@ func shouldBuild(sf *sourceFile, tags map[string]bool) bool { return true } -// index package holds the information needed to access information in the -// index about a package. -type indexPackage struct { +// IndexPackage holds the information needed to access information in the +// index needed to load a package in a specific directory. +type IndexPackage struct { error error dir string // directory of the package relative to the modroot + modroot string + // Source files sourceFiles []*sourceFile } var errCannotFindPackage = errors.New("cannot find package") -// indexPackage returns an indexPackage constructed using the information in the ModuleIndex. -func (mi *ModuleIndex) indexPackage(path string) *indexPackage { +// Package returns an IndexPackage constructed using the information in the Module. +func (mi *Module) Package(path string) *IndexPackage { defer func() { if e := recover(); e != nil { base.Fatalf("error reading module index: %v", e) @@ -660,12 +796,18 @@ func (mi *ModuleIndex) indexPackage(path string) *indexPackage { }() offset, ok := mi.packages[path] if !ok { - return &indexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))} + return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))} } // TODO(matloob): do we want to lock on the module index? d := mi.od.decoderAt(offset) - rp := new(indexPackage) + p := decodePackage(d, mi.od) + p.modroot = mi.modroot + return p +} + +func decodePackage(d *decoder, od offsetDecoder) *IndexPackage { + rp := new(IndexPackage) if errstr := d.string(); errstr != "" { rp.error = errors.New(errstr) } @@ -675,7 +817,7 @@ func (mi *ModuleIndex) indexPackage(path string) *indexPackage { for i := uint32(0); i < numSourceFiles; i++ { offset := d.uint32() rp.sourceFiles[i] = &sourceFile{ - od: mi.od.offsetDecoderAt(offset), + od: od.offsetDecoderAt(offset), } } return rp diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go index d1f73dbb53..eb84bf8d89 100644 --- a/src/cmd/go/internal/modindex/scan.go +++ b/src/cmd/go/internal/modindex/scan.go @@ -65,7 +65,15 @@ func indexModule(modroot string) ([]byte, error) { if err != nil { return nil, err } - return encodeModule(packages), nil + return encodeModuleBytes(packages), nil +} + +// indexModule indexes the package at the given directory and returns its +// encoded representation. It returns ErrNotIndexed if the package can't +// be indexed. +func indexPackage(modroot, pkgdir string) []byte { + p := importRaw(modroot, relPath(pkgdir, modroot)) + return encodePackageBytes(p) } // rawPackage holds the information from each package that's needed to diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go index 0c3123a46f..3408248bd9 100644 --- a/src/cmd/go/internal/modindex/write.go +++ b/src/cmd/go/internal/modindex/write.go @@ -11,9 +11,9 @@ import ( const indexVersion = "go index v0" -// encodeModule produces the encoded representation of the module index. -// encodeModule may modify the packages slice. -func encodeModule(packages []*rawPackage) []byte { +// encodeModuleBytes produces the encoded representation of the module index. +// encodeModuleBytes may modify the packages slice. +func encodeModuleBytes(packages []*rawPackage) []byte { e := newEncoder() e.Bytes([]byte(indexVersion)) e.Bytes([]byte{'\n'}) @@ -39,6 +39,18 @@ func encodeModule(packages []*rawPackage) []byte { return e.b } +func encodePackageBytes(p *rawPackage) []byte { + e := newEncoder() + e.Bytes([]byte(indexVersion)) + e.Bytes([]byte{'\n'}) + stringTableOffsetPos := e.Pos() // fill this at the end + e.Uint32(0) // string table offset + encodePackage(e, p) + e.IntAt(e.Pos(), stringTableOffsetPos) + e.Bytes(e.stringTable) + return e.b +} + func encodePackage(e *encoder, p *rawPackage) { e.String(p.error) e.String(p.dir) diff --git a/src/cmd/go/internal/modload/import.go b/src/cmd/go/internal/modload/import.go index f7810ca5c6..f2c7592a28 100644 --- a/src/cmd/go/internal/modload/import.go +++ b/src/cmd/go/internal/modload/import.go @@ -657,11 +657,11 @@ func dirInModule(path, mpath, mdir string, isLocal bool) (dir string, haveGoFile // We don't care about build tags, not even "+build ignore". // We're just looking for a plausible directory. res := haveGoFilesCache.Do(dir, func() any { - // modindex.Get will return ErrNotIndexed for any directories which + // modindex.GetPackage will return ErrNotIndexed for any directories which // are reached through a symlink, so that they will be handled by // fsys.IsDirWithGoFiles below. - if mi, err := modindex.Get(mdir); err == nil { - isDirWithGoFiles, err := mi.IsDirWithGoFiles(mi.RelPath(dir)) + if ip, err := modindex.GetPackage(mdir, dir); err == nil { + isDirWithGoFiles, err := ip.IsDirWithGoFiles() return goFilesEntry{isDirWithGoFiles, err} } else if !errors.Is(err, modindex.ErrNotIndexed) { return goFilesEntry{err: err} diff --git a/src/cmd/go/internal/modload/load.go b/src/cmd/go/internal/modload/load.go index b2c3ba2633..ba85dc2438 100644 --- a/src/cmd/go/internal/modload/load.go +++ b/src/cmd/go/internal/modload/load.go @@ -2102,8 +2102,8 @@ func (ld *loader) checkTidyCompatibility(ctx context.Context, rs *Requirements) // may see these legacy imports. We drop them so that the module // search does not look for modules to try to satisfy them. func scanDir(modroot string, dir string, tags map[string]bool) (imports_, testImports []string, err error) { - if mi, mierr := modindex.Get(modroot); mierr == nil { - imports_, testImports, err = mi.ScanDir(mi.RelPath(dir), tags) + if ip, mierr := modindex.GetPackage(modroot, dir); mierr == nil { + imports_, testImports, err = ip.ScanDir(tags) goto Happy } else if !errors.Is(mierr, modindex.ErrNotIndexed) { return nil, nil, mierr diff --git a/src/cmd/go/internal/modload/search.go b/src/cmd/go/internal/modload/search.go index d9d7711d06..856390a0f2 100644 --- a/src/cmd/go/internal/modload/search.go +++ b/src/cmd/go/internal/modload/search.go @@ -195,7 +195,7 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f } modPrefix = mod.Path } - if mi, err := modindex.Get(root); err == nil { + if mi, err := modindex.GetModule(root); err == nil { walkFromIndex(mi, modPrefix, isMatch, treeCanMatch, tags, have, addPkg) continue } else if !errors.Is(err, modindex.ErrNotIndexed) { @@ -213,9 +213,9 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f } // walkFromIndex matches packages in a module using the module index. modroot -// is the module's root directory on disk, index is the ModuleIndex for the +// is the module's root directory on disk, index is the modindex.Module for the // module, and importPathRoot is the module's path prefix. -func walkFromIndex(index *modindex.ModuleIndex, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) { +func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) { loopPackages: for _, reldir := range index.Packages() { // Avoid .foo, _foo, and testdata subdirectory trees. @@ -252,7 +252,7 @@ loopPackages: if !have[name] { have[name] = true if isMatch(name) { - if _, _, err := index.ScanDir(reldir, tags); err != imports.ErrNoGo { + if _, _, err := index.Package(reldir).ScanDir(tags); err != imports.ErrNoGo { addPkg(name) } } -- 2.50.0