cmd/go: make module index loading O(1)

author Russ Cox <rsc@golang.org>

Wed, 6 Jul 2022 13:49:32 +0000 (09:49 -0400)

committer Russ Cox <rsc@golang.org>

Mon, 11 Jul 2022 19:09:00 +0000 (19:09 +0000)
author Russ Cox <rsc@golang.org>
Wed, 6 Jul 2022 13:49:32 +0000 (09:49 -0400)
committer Russ Cox <rsc@golang.org>
Mon, 11 Jul 2022 19:09:00 +0000 (19:09 +0000)
diff --git a/src/cmd/go/internal/modindex/index_test.go b/src/cmd/go/internal/modindex/index_test.go

new file mode 100644 (file)

index 0000000..2c072f9
--- /dev/null
+++ b/src/cmd/go/internal/modindex/index_test.go
@@ -0,0 +1,87 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package modindex
+
+import (
+       "encoding/hex"
+       "encoding/json"
+       "go/build"
+       "internal/diff"
+       "path/filepath"
+       "reflect"
+       "runtime"
+       "testing"
+)
+
+func init() {
+       isTest = true
+       enabled = true // to allow GODEBUG=goindex=0 go test, when things are very broken
+}
+
+func TestIndex(t *testing.T) {
+       src := filepath.Join(runtime.GOROOT(), "src")
+       checkPkg := func(t *testing.T, m *Module, pkg string, data []byte) {
+               p := m.Package(pkg)
+               bp, err := p.Import(build.Default, build.ImportComment)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               bp1, err := build.Default.Import(pkg, filepath.Join(src, pkg), build.ImportComment)
+               if err != nil {
+                       t.Fatal(err)
+               }
+
+               if !reflect.DeepEqual(bp, bp1) {
+                       t.Errorf("mismatch")
+                       t.Logf("index:\n%s", hex.Dump(data))
+
+                       js, err := json.MarshalIndent(bp, "", "\t")
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       js1, err := json.MarshalIndent(bp1, "", "\t")
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       t.Logf("diff:\n%s", diff.Diff("index", js, "correct", js1))
+                       t.FailNow()
+               }
+       }
+
+       // Check packages in increasing complexity, one at a time.
+       pkgs := []string{
+               "crypto",
+               "encoding",
+               "unsafe",
+               "encoding/json",
+               "runtime",
+               "net",
+       }
+       var raws []*rawPackage
+       for _, pkg := range pkgs {
+               raw := importRaw(src, pkg)
+               raws = append(raws, raw)
+               t.Run(pkg, func(t *testing.T) {
+                       data := encodeModuleBytes([]*rawPackage{raw})
+                       m, err := fromBytes(src, data)
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       checkPkg(t, m, pkg, data)
+               })
+       }
+
+       // Check that a multi-package index works too.
+       t.Run("all", func(t *testing.T) {
+               data := encodeModuleBytes(raws)
+               m, err := fromBytes(src, data)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               for _, pkg := range pkgs {
+                       checkPkg(t, m, pkg, data)
+               }
+       })
+}
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go

index 436bbebb39819f4109ef025eed3b6b61e1790a9f..38ddfec70fb53d7dde07be097ee3e72f854fa040 100644 (file)
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@@ -15,7 +15,6 @@ import (
         "internal/godebug"
         "internal/goroot"
         "internal/unsafeheader"
-       "math"
         "path"
         "path/filepath"
         "runtime"
@@ -45,10 +44,9 @@ var enabled bool = godebug.Get("goindex") != "0"
  // do the equivalent of build.Import of packages in the module and answer other
  // questions based on the index file's data.
  type Module struct {
-       modroot      string
-       od           offsetDecoder
-       packages     map[string]int // offsets of each package
-       packagePaths []string       // paths to package directories relative to modroot; these are the keys of packages
+       modroot string
+       d       *decoder
+       n       int // number of packages
  }
  
  // moduleHash returns an ActionID corresponding to the state of the module
@@ -236,12 +234,30 @@ func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
         return r.pkg, r.err
  }
  
-// fromBytes returns a *Module given the encoded representation.
-func fromBytes(moddir string, data []byte) (mi *Module, err error) {
-       if !enabled {
-               panic("use of index")
-       }
+var errCorrupt = errors.New("corrupt index")
+
+// protect marks the start of a large section of code that accesses the index.
+// It should be used as:
+//
+//     defer unprotect(protect, &err)
+//
+// It should not be used for trivial accesses which would be
+// dwarfed by the overhead of the defer.
+func protect() bool {
+       return debug.SetPanicOnFault(true)
+}
  
+var isTest = false
+
+// unprotect marks the end of a large section of code that accesses the index.
+// It should be used as:
+//
+//     defer unprotect(protect, &err)
+//
+// end looks for panics due to errCorrupt or bad mmap accesses.
+// When it finds them, it adds explanatory text, consumes the panic, and sets *errp instead.
+// If errp is nil, end adds the explanatory text but then calls base.Fatalf.
+func unprotect(old bool, errp *error) {
         // SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
         // that all its errors satisfy this interface, we'll only check for these errors so that
         // we don't suppress panics that could have been produced from other sources.
@@ -249,97 +265,100 @@ func fromBytes(moddir string, data []byte) (mi *Module, err error) {
                 Addr() uintptr
         }
  
-       // set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
-       // in case it's mmapped (the common case).
-       old := debug.SetPanicOnFault(true)
-       defer func() {
-               debug.SetPanicOnFault(old)
-               if e := recover(); e != nil {
-                       if _, ok := e.(addrer); ok {
-                               // This panic was almost certainly caused by SetPanicOnFault.
-                               err = fmt.Errorf("error reading module index: %v", e)
+       debug.SetPanicOnFault(old)
+
+       if e := recover(); e != nil {
+               if _, ok := e.(addrer); ok || e == errCorrupt {
+                       // This panic was almost certainly caused by SetPanicOnFault or our panic(errCorrupt).
+                       err := fmt.Errorf("error reading module index: %v", e)
+                       if errp != nil {
+                               *errp = err
                                 return
                         }
-                       // The panic was likely not caused by SetPanicOnFault.
-                       panic(e)
+                       if isTest {
+                               panic(err)
+                       }
+                       base.Fatalf("%v", err)
                 }
-       }()
+               // The panic was likely not caused by SetPanicOnFault.
+               panic(e)
+       }
+}
  
-       gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
-       if string(gotVersion) != indexVersion {
-               return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+// fromBytes returns a *Module given the encoded representation.
+func fromBytes(moddir string, data []byte) (m *Module, err error) {
+       if !enabled {
+               panic("use of index")
+       }
+
+       defer unprotect(protect(), &err)
+
+       if !bytes.HasPrefix(data, []byte(indexVersion+"\n")) {
+               return nil, errCorrupt
         }
-       stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
-       st := newStringTable(data[stringTableOffset:])
-       d := decoder{unread, st}
-       numPackages := d.int()
-
-       packagePaths := make([]string, numPackages)
-       for i := range packagePaths {
-               packagePaths[i] = d.string()
+
+       const hdr = len(indexVersion + "\n")
+       d := &decoder{data: data}
+       str := d.intAt(hdr)
+       if str < hdr+8 || len(d.data) < str {
+               return nil, errCorrupt
         }
-       packageOffsets := make([]int, numPackages)
-       for i := range packageOffsets {
-               packageOffsets[i] = d.int()
+       d.data, d.str = data[:str], d.data[str:]
+       // Check that string table looks valid.
+       // First string is empty string (length 0),
+       // and we leave a marker byte 0xFF at the end
+       // just to make sure that the file is not truncated.
+       if len(d.str) == 0 || d.str[0] != 0 || d.str[len(d.str)-1] != 0xFF {
+               return nil, errCorrupt
         }
-       packages := make(map[string]int, numPackages)
-       for i := range packagePaths {
-               packages[packagePaths[i]] = packageOffsets[i]
+
+       n := d.intAt(hdr + 4)
+       if n < 0 || n > (len(d.data)-8)/8 {
+               return nil, errCorrupt
         }
  
-       return &Module{
+       m = &Module{
                 moddir,
-               offsetDecoder{data, st},
-               packages,
-               packagePaths,
-       }, nil
+               d,
+               n,
+       }
+       return m, nil
  }
  
  // packageFromBytes returns a *IndexPackage given the encoded representation.
  func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
-       if !enabled {
-               panic("use of package index when not enabled")
+       m, err := fromBytes(modroot, data)
+       if err != nil {
+               return nil, err
         }
-
-       // SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
-       // that all its errors satisfy this interface, we'll only check for these errors so that
-       // we don't suppress panics that could have been produced from other sources.
-       type addrer interface {
-               Addr() uintptr
+       if m.n != 1 {
+               return nil, fmt.Errorf("corrupt single-package index")
         }
+       return m.pkg(0), nil
+}
  
-       // set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
-       // in case it's mmapped (the common case).
-       old := debug.SetPanicOnFault(true)
-       defer func() {
-               debug.SetPanicOnFault(old)
-               if e := recover(); e != nil {
-                       if _, ok := e.(addrer); ok {
-                               // This panic was almost certainly caused by SetPanicOnFault.
-                               err = fmt.Errorf("error reading module index: %v", e)
-                               return
-                       }
-                       // The panic was likely not caused by SetPanicOnFault.
-                       panic(e)
-               }
-       }()
+// pkgDir returns the dir string of the i'th package in the index.
+func (m *Module) pkgDir(i int) string {
+       if i < 0 || i >= m.n {
+               panic(errCorrupt)
+       }
+       return m.d.stringAt(12 + 8 + 8*i)
+}
  
-       gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
-       if string(gotVersion) != indexVersion {
-               return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+// pkgOff returns the offset of the data for the i'th package in the index.
+func (m *Module) pkgOff(i int) int {
+       if i < 0 || i >= m.n {
+               panic(errCorrupt)
         }
-       stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
-       st := newStringTable(data[stringTableOffset:])
-       d := &decoder{unread, st}
-       p = decodePackage(d, offsetDecoder{data, st})
-       p.modroot = modroot
-       return p, nil
+       return m.d.intAt(12 + 8 + 8*i + 4)
  }
  
-// Returns a list of directory paths, relative to the modroot, for
-// packages contained in the module index.
-func (mi *Module) Packages() []string {
-       return mi.packagePaths
+// Walk calls f for each package in the index, passing the path to that package relative to the module root.
+func (m *Module) Walk(f func(path string)) {
+       defer unprotect(protect(), nil)
+       for i := 0; i < m.n; i++ {
+               f(m.pkgDir(i))
+       }
  }
  
  // relPath returns the path relative to the module's root.
@@ -349,11 +368,7 @@ func relPath(path, modroot string) string {
  
  // Import is the equivalent of build.Import given the information in Module.
  func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
-       defer func() {
-               if e := recover(); e != nil {
-                       err = fmt.Errorf("error reading module index: %v", e)
-               }
-       }()
+       defer unprotect(protect(), &err)
  
         ctxt := (*Context)(&bctxt)
  
@@ -794,46 +809,44 @@ type IndexPackage struct {
  
  var errCannotFindPackage = errors.New("cannot find package")
  
-// Package returns an IndexPackage constructed using the information in the Module.
-func (mi *Module) Package(path string) *IndexPackage {
-       defer func() {
-               if e := recover(); e != nil {
-                       base.Fatalf("error reading module index: %v", e)
-               }
-       }()
-       offset, ok := mi.packages[path]
+// Package and returns finds the package with the given path (relative to the module root).
+// If the package does not exist, Package returns an IndexPackage that will return an
+// appropriate error from its methods.
+func (m *Module) Package(path string) *IndexPackage {
+       defer unprotect(protect(), nil)
+
+       i, ok := sort.Find(m.n, func(i int) int {
+               return strings.Compare(path, m.pkgDir(i))
+       })
         if !ok {
-               return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
+               return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(m.modroot, path))}
         }
-
-       // TODO(matloob): do we want to lock on the module index?
-       d := mi.od.decoderAt(offset)
-       p := decodePackage(d, mi.od)
-       p.modroot = mi.modroot
-       return p
+       return m.pkg(i)
  }
  
-func decodePackage(d *decoder, od offsetDecoder) *IndexPackage {
-       rp := new(IndexPackage)
-       if errstr := d.string(); errstr != "" {
-               rp.error = errors.New(errstr)
+// pkgAt returns the i'th IndexPackage in m.
+func (m *Module) pkg(i int) *IndexPackage {
+       r := m.d.readAt(m.pkgOff(i))
+       p := new(IndexPackage)
+       if errstr := r.string(); errstr != "" {
+               p.error = errors.New(errstr)
         }
-       rp.dir = d.string()
-       numSourceFiles := d.uint32()
-       rp.sourceFiles = make([]*sourceFile, numSourceFiles)
-       for i := uint32(0); i < numSourceFiles; i++ {
-               offset := d.uint32()
-               rp.sourceFiles[i] = &sourceFile{
-                       od: od.offsetDecoderAt(offset),
+       p.dir = r.string()
+       p.sourceFiles = make([]*sourceFile, r.int())
+       for i := range p.sourceFiles {
+               p.sourceFiles[i] = &sourceFile{
+                       d:   m.d,
+                       pos: r.int(),
                 }
         }
-       return rp
+       p.modroot = m.modroot
+       return p
  }
  
  // sourceFile represents the information of a given source file in the module index.
  type sourceFile struct {
-       od offsetDecoder // od interprets all offsets relative to the start of the source file's data
-
+       d               *decoder // encoding of this source file
+       pos             int      // start of sourceFile encoding in d
         onceReadImports sync.Once
         savedImports    []rawImport // saved imports so that they're only read once
  }
@@ -853,73 +866,67 @@ const (
  )
  
  func (sf *sourceFile) error() string {
-       return sf.od.stringAt(sourceFileError)
+       return sf.d.stringAt(sf.pos + sourceFileError)
  }
  func (sf *sourceFile) parseError() string {
-       return sf.od.stringAt(sourceFileParseError)
+       return sf.d.stringAt(sf.pos + sourceFileParseError)
  }
  func (sf *sourceFile) synopsis() string {
-       return sf.od.stringAt(sourceFileSynopsis)
+       return sf.d.stringAt(sf.pos + sourceFileSynopsis)
  }
  func (sf *sourceFile) name() string {
-       return sf.od.stringAt(sourceFileName)
+       return sf.d.stringAt(sf.pos + sourceFileName)
  }
  func (sf *sourceFile) pkgName() string {
-       return sf.od.stringAt(sourceFilePkgName)
+       return sf.d.stringAt(sf.pos + sourceFilePkgName)
  }
  func (sf *sourceFile) ignoreFile() bool {
-       return sf.od.boolAt(sourceFileIgnoreFile)
+       return sf.d.boolAt(sf.pos + sourceFileIgnoreFile)
  }
  func (sf *sourceFile) binaryOnly() bool {
-       return sf.od.boolAt(sourceFileBinaryOnly)
+       return sf.d.boolAt(sf.pos + sourceFileBinaryOnly)
  }
  func (sf *sourceFile) cgoDirectives() string {
-       return sf.od.stringAt(sourceFileCgoDirectives)
+       return sf.d.stringAt(sf.pos + sourceFileCgoDirectives)
  }
  func (sf *sourceFile) goBuildConstraint() string {
-       return sf.od.stringAt(sourceFileGoBuildConstraint)
+       return sf.d.stringAt(sf.pos + sourceFileGoBuildConstraint)
  }
  
  func (sf *sourceFile) plusBuildConstraints() []string {
-       d := sf.od.decoderAt(sourceFileNumPlusBuildConstraints)
-       n := d.int()
+       pos := sf.pos + sourceFileNumPlusBuildConstraints
+       n := sf.d.intAt(pos)
+       pos += 4
         ret := make([]string, n)
         for i := 0; i < n; i++ {
-               ret[i] = d.string()
+               ret[i] = sf.d.stringAt(pos)
+               pos += 4
         }
         return ret
  }
  
-func importsOffset(numPlusBuildConstraints int) int {
-       // 4 bytes per uin32, add one to advance past numPlusBuildConstraints itself
-       return sourceFileNumPlusBuildConstraints + 4*(numPlusBuildConstraints+1)
-}
-
  func (sf *sourceFile) importsOffset() int {
-       numPlusBuildConstraints := sf.od.intAt(sourceFileNumPlusBuildConstraints)
-       return importsOffset(numPlusBuildConstraints)
-}
-
-func embedsOffset(importsOffset, numImports int) int {
-       // 4 bytes per uint32; 1 to advance past numImports itself, and 5 uint32s per import
-       return importsOffset + 4*(1+(5*numImports))
+       pos := sf.pos + sourceFileNumPlusBuildConstraints
+       n := sf.d.intAt(pos)
+       // each build constraint is 1 uint32
+       return pos + 4 + n*4
  }
  
  func (sf *sourceFile) embedsOffset() int {
-       importsOffset := sf.importsOffset()
-       numImports := sf.od.intAt(importsOffset)
-       return embedsOffset(importsOffset, numImports)
+       pos := sf.importsOffset()
+       n := sf.d.intAt(pos)
+       // each import is 5 uint32s (string + tokpos)
+       return pos + 4 + n*(4*5)
  }
  
  func (sf *sourceFile) imports() []rawImport {
         sf.onceReadImports.Do(func() {
                 importsOffset := sf.importsOffset()
-               d := sf.od.decoderAt(importsOffset)
-               numImports := d.int()
+               r := sf.d.readAt(importsOffset)
+               numImports := r.int()
                 ret := make([]rawImport, numImports)
                 for i := 0; i < numImports; i++ {
-                       ret[i].path = d.string()
-                       ret[i].position = d.tokpos()
+                       ret[i] = rawImport{r.string(), r.tokpos()}
                 }
                 sf.savedImports = ret
         })
@@ -928,132 +935,101 @@ func (sf *sourceFile) imports() []rawImport {
  
  func (sf *sourceFile) embeds() []embed {
         embedsOffset := sf.embedsOffset()
-       d := sf.od.decoderAt(embedsOffset)
-       numEmbeds := d.int()
+       r := sf.d.readAt(embedsOffset)
+       numEmbeds := r.int()
         ret := make([]embed, numEmbeds)
         for i := range ret {
-               pattern := d.string()
-               pos := d.tokpos()
-               ret[i] = embed{pattern, pos}
+               ret[i] = embed{r.string(), r.tokpos()}
         }
         return ret
  }
  
-// A decoder reads from the current position of the file and advances its position as it
-// reads.
-type decoder struct {
-       b  []byte
-       st *stringTable
-}
+func asString(b []byte) string {
+       p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
  
-func (d *decoder) uint32() uint32 {
-       n := binary.LittleEndian.Uint32(d.b[:4])
-       d.b = d.b[4:]
-       return n
-}
+       var s string
+       hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
+       hdr.Data = p
+       hdr.Len = len(b)
  
-func (d *decoder) int() int {
-       n := d.uint32()
-       if int64(n) > math.MaxInt {
-               base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
-       }
-       return int(n)
+       return s
  }
  
-func (d *decoder) tokpos() token.Position {
-       file := d.string()
-       offset := d.int()
-       line := d.int()
-       column := d.int()
-       return token.Position{
-               Filename: file,
-               Offset:   offset,
-               Line:     line,
-               Column:   column,
-       }
+// A decoder helps decode the index format.
+type decoder struct {
+       data []byte // data after header
+       str  []byte // string table
  }
  
-func (d *decoder) string() string {
-       return d.st.string(d.int())
+// intAt returns the int at the given offset in d.data.
+func (d *decoder) intAt(off int) int {
+       if off < 0 || len(d.data)-off < 4 {
+               panic(errCorrupt)
+       }
+       i := binary.LittleEndian.Uint32(d.data[off : off+4])
+       if int32(i)>>31 != 0 {
+               panic(errCorrupt)
+       }
+       return int(i)
  }
  
-// And offset decoder reads information offset from its position in the file.
-// It's either offset from the beginning of the index, or the beginning of a sourceFile's data.
-type offsetDecoder struct {
-       b  []byte
-       st *stringTable
+// boolAt returns the bool at the given offset in d.data.
+func (d *decoder) boolAt(off int) bool {
+       return d.intAt(off) != 0
  }
  
-func (od *offsetDecoder) uint32At(offset int) uint32 {
-       if offset > len(od.b) {
-               base.Fatalf("go: trying to read from index file at offset higher than file length. This indicates a corrupt offset file in the cache.")
-       }
-       return binary.LittleEndian.Uint32(od.b[offset:])
+// stringTableAt returns the string pointed at by the int at the given offset in d.data.
+func (d *decoder) stringAt(off int) string {
+       return d.stringTableAt(d.intAt(off))
  }
  
-func (od *offsetDecoder) intAt(offset int) int {
-       n := od.uint32At(offset)
-       if int64(n) > math.MaxInt {
-               base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
+// stringTableAt returns the string at the given offset in the string table d.str.
+func (d *decoder) stringTableAt(off int) string {
+       if off < 0 || off >= len(d.str) {
+               panic(errCorrupt)
         }
-       return int(n)
-}
-
-func (od *offsetDecoder) boolAt(offset int) bool {
-       switch v := od.uint32At(offset); v {
-       case 0:
-               return false
-       case 1:
-               return true
-       default:
-               base.Fatalf("go: invalid bool value in index file encoding: %v", v)
+       s := d.str[off:]
+       v, n := binary.Uvarint(s)
+       if n <= 0 || v > uint64(len(s[n:])) {
+               panic(errCorrupt)
         }
-       panic("unreachable")
+       return asString(s[n : n+int(v)])
  }
  
-func (od *offsetDecoder) stringAt(offset int) string {
-       return od.st.string(od.intAt(offset))
+// A reader reads sequential fields from a section of the index format.
+type reader struct {
+       d   *decoder
+       pos int
  }
  
-func (od *offsetDecoder) decoderAt(offset int) *decoder {
-       return &decoder{od.b[offset:], od.st}
+// readAt returns a reader starting at the given position in d.
+func (d *decoder) readAt(pos int) *reader {
+       return &reader{d, pos}
  }
  
-func (od *offsetDecoder) offsetDecoderAt(offset uint32) offsetDecoder {
-       return offsetDecoder{od.b[offset:], od.st}
+// int reads the next int.
+func (r *reader) int() int {
+       i := r.d.intAt(r.pos)
+       r.pos += 4
+       return i
  }
  
-type stringTable struct {
-       b []byte
+// string reads the next string.
+func (r *reader) string() string {
+       return r.d.stringTableAt(r.int())
  }
  
-func newStringTable(b []byte) *stringTable {
-       return &stringTable{b: b}
+// bool reads the next bool.
+func (r *reader) bool() bool {
+       return r.int() != 0
  }
  
-func (st *stringTable) string(pos int) string {
-       if pos == 0 {
-               return ""
-       }
-
-       bb := st.b[pos:]
-       i := bytes.IndexByte(bb, 0)
-
-       if i == -1 {
-               panic("reached end of string table trying to read string")
+// tokpos reads the next token.Position.
+func (r *reader) tokpos() token.Position {
+       return token.Position{
+               Filename: r.string(),
+               Offset:   r.int(),
+               Line:     r.int(),
+               Column:   r.int(),
         }
-       s := asString(bb[:i])
-
-       return s
-}
-
-func asString(b []byte) string {
-       p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
-
-       var s string
-       hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
-       hdr.Data = p
-       hdr.Len = len(b)
-
-       return s
  }
diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go

index 3408248bd9e6cd216ca00409944e37662e26abcc..7db1fb087052c7d353f6fa8aaf7e2b8936f5bdbb 100644 (file)
--- a/src/cmd/go/internal/modindex/write.go
+++ b/src/cmd/go/internal/modindex/write.go
@@ -1,54 +1,46 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
  package modindex
  
  import (
         "cmd/go/internal/base"
         "encoding/binary"
         "go/token"
-       "math"
         "sort"
-       "strings"
  )
  
-const indexVersion = "go index v0"
+const indexVersion = "go index v1" // 11 bytes (plus \n), to align uint32s in index
  
  // encodeModuleBytes produces the encoded representation of the module index.
  // encodeModuleBytes may modify the packages slice.
  func encodeModuleBytes(packages []*rawPackage) []byte {
         e := newEncoder()
-       e.Bytes([]byte(indexVersion))
-       e.Bytes([]byte{'\n'})
+       e.Bytes([]byte(indexVersion + "\n"))
         stringTableOffsetPos := e.Pos() // fill this at the end
         e.Uint32(0)                     // string table offset
-       e.Int(len(packages))
         sort.Slice(packages, func(i, j int) bool {
                 return packages[i].dir < packages[j].dir
         })
+       e.Int(len(packages))
+       packagesPos := e.Pos()
         for _, p := range packages {
                 e.String(p.dir)
-       }
-       packagesOffsetPos := e.Pos()
-       for range packages {
                 e.Int(0)
         }
         for i, p := range packages {
-               e.IntAt(e.Pos(), packagesOffsetPos+4*i)
+               e.IntAt(e.Pos(), packagesPos+8*i+4)
                 encodePackage(e, p)
         }
         e.IntAt(e.Pos(), stringTableOffsetPos)
         e.Bytes(e.stringTable)
+       e.Bytes([]byte{0xFF}) // end of string table marker
         return e.b
  }
  
  func encodePackageBytes(p *rawPackage) []byte {
-       e := newEncoder()
-       e.Bytes([]byte(indexVersion))
-       e.Bytes([]byte{'\n'})
-       stringTableOffsetPos := e.Pos() // fill this at the end
-       e.Uint32(0)                     // string table offset
-       encodePackage(e, p)
-       e.IntAt(e.Pos(), stringTableOffsetPos)
-       e.Bytes(e.stringTable)
-       return e.b
+       return encodeModuleBytes([]*rawPackage{p})
  }
  
  func encodePackage(e *encoder, p *rawPackage) {
@@ -126,9 +118,6 @@ func (e *encoder) Bytes(b []byte) {
  }
  
  func (e *encoder) String(s string) {
-       if strings.IndexByte(s, 0) >= 0 {
-               base.Fatalf("go: attempting to encode a string containing a null byte")
-       }
         if n, ok := e.strings[s]; ok {
                 e.Int(n)
                 return
@@ -136,8 +125,8 @@ func (e *encoder) String(s string) {
         pos := len(e.stringTable)
         e.strings[s] = pos
         e.Int(pos)
+       e.stringTable = binary.AppendUvarint(e.stringTable, uint64(len(s)))
         e.stringTable = append(e.stringTable, []byte(s)...)
-       e.stringTable = append(e.stringTable, 0)
  }
  
  func (e *encoder) Bool(b bool) {
@@ -152,17 +141,18 @@ func (e *encoder) Uint32(n uint32) {
         e.b = binary.LittleEndian.AppendUint32(e.b, n)
  }
  
-// Int encodes n. Note that all ints are written to the index as uint32s.
+// Int encodes n. Note that all ints are written to the index as uint32s,
+// and to avoid problems on 32-bit systems we require fitting into a 32-bit int.
  func (e *encoder) Int(n int) {
-       if n < 0 || int64(n) > math.MaxUint32 {
-               base.Fatalf("go: attempting to write an int to the index that overflows uint32")
+       if n < 0 || int(int32(n)) != n {
+               base.Fatalf("go: attempting to write an int to the index that overflows int32")
         }
         e.Uint32(uint32(n))
  }
  
  func (e *encoder) IntAt(n int, at int) {
-       if n < 0 || int64(n) > math.MaxUint32 {
-               base.Fatalf("go: attempting to write an int to the index that overflows uint32")
+       if n < 0 || int(int32(n)) != n {
+               base.Fatalf("go: attempting to write an int to the index that overflows int32")
         }
         binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
  }
diff --git a/src/cmd/go/internal/modload/search.go b/src/cmd/go/internal/modload/search.go

index 856390a0f25511c2ff0fd58679017c4db0ce1300..b2ac7f22b1ea9a3cb2e1f22c1663879ac5f42dc0 100644 (file)
--- a/src/cmd/go/internal/modload/search.go
+++ b/src/cmd/go/internal/modload/search.go
@@ -216,21 +216,20 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f
  // is the module's root directory on disk, index is the modindex.Module for the
  // module, and importPathRoot is the module's path prefix.
  func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
-loopPackages:
-       for _, reldir := range index.Packages() {
+       index.Walk(func(reldir string) {
                 // Avoid .foo, _foo, and testdata subdirectory trees.
                 p := reldir
                 for {
                         elem, rest, found := strings.Cut(p, string(filepath.Separator))
                         if strings.HasPrefix(elem, ".") || strings.HasPrefix(elem, "_") || elem == "testdata" {
-                               continue loopPackages
+                               return
                         }
                         if found && elem == "vendor" {
                                 // Ignore this path if it contains the element "vendor" anywhere
                                 // except for the last element (packages named vendor are allowed
                                 // for historical reasons). Note that found is true when this
                                 // isn't the last path element.
-                               continue loopPackages
+                               return
                         }
                         if !found {
                                 // Didn't find the separator, so we're considering the last element.
@@ -241,12 +240,12 @@ loopPackages:
  
                 // Don't use GOROOT/src.
                 if reldir == "" && importPathRoot == "" {
-                       continue
+                       return
                 }
  
                 name := path.Join(importPathRoot, filepath.ToSlash(reldir))
                 if !treeCanMatch(name) {
-                       continue
+                       return
                 }
  
                 if !have[name] {
@@ -257,7 +256,7 @@ loopPackages:
                                 }
                         }
                 }
-       }
+       })
  }
  
  // MatchInModule identifies the packages matching the given pattern within the
author	Russ Cox <rsc@golang.org>
	Wed, 6 Jul 2022 13:49:32 +0000 (09:49 -0400)
committer	Russ Cox <rsc@golang.org>
	Mon, 11 Jul 2022 19:09:00 +0000 (19:09 +0000)
src/cmd/go/internal/modindex/index_test.go	[new file with mode: 0644]	patch \| blob
src/cmd/go/internal/modindex/read.go		patch \| blob \| history
src/cmd/go/internal/modindex/write.go		patch \| blob \| history
src/cmd/go/internal/modload/search.go		patch \| blob \| history