From: Michael Matloob <matloob@golang.org>
Date: Tue, 5 Apr 2022 22:47:23 +0000 (-0400)
Subject: cmd/go: add functions to read index file
X-Git-Tag: go1.19beta1~80
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=06261062e9a1875338b04aaea3b6335d73b513a7;p=gostls13.git

cmd/go: add functions to read index file

The data read is used for three primary functions: ImportPackage,
IsDirWithGoFiles and ScanDir. Functions are also provided to get this
information from the intermediate package representation to cache
the information from reads for non-indexed packages.

Change-Id: I5eed629bb0d6ee5b88ab706d06b074475004c081
Reviewed-on: https://go-review.googlesource.com/c/go/+/403975
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
Run-TryBot: Bryan Mills <bcmills@google.com>
---

diff --git a/src/cmd/go/internal/cache/cache.go b/src/cmd/go/internal/cache/cache.go
index 4ac2b818ff..c30d7c864b 100644
--- a/src/cmd/go/internal/cache/cache.go
+++ b/src/cmd/go/internal/cache/cache.go
@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"cmd/go/internal/lockedfile"
+	"cmd/go/internal/mmap"
 )
 
 // An ActionID is a cache action key, the hash of a complete description of a
@@ -244,6 +245,24 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
 	return data, entry, nil
 }
 
+// GetMmap looks up the action ID in the cache and returns
+// the corresponding output bytes.
+// GetMmap should only be used for data that can be expected to fit in memory.
+func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
+	entry, err := c.Get(id)
+	if err != nil {
+		return nil, entry, err
+	}
+	md, err := mmap.Mmap(c.OutputFile(entry.OutputID))
+	if err != nil {
+		return nil, Entry{}, err
+	}
+	if int64(len(md.Data)) != entry.Size {
+		return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
+	}
+	return md.Data, entry, nil
+}
+
 // OutputFile returns the name of the cache file storing output with the given OutputID.
 func (c *Cache) OutputFile(out OutputID) string {
 	file := c.fileName(out, "d")
diff --git a/src/cmd/go/internal/imports/build.go b/src/cmd/go/internal/imports/build.go
index 0f20a8786e..957113686c 100644
--- a/src/cmd/go/internal/imports/build.go
+++ b/src/cmd/go/internal/imports/build.go
@@ -234,6 +234,16 @@ func eval(x constraint.Expr, tags map[string]bool, prefer bool) bool {
 	panic(fmt.Sprintf("unexpected constraint expression %T", x))
 }
 
+// Eval is like
+//
+//	x.Eval(func(tag string) bool { return matchTag(tag, tags) })
+//
+// except that it implements the special case for tags["*"] meaning
+// all tags are both true and false at the same time.
+func Eval(x constraint.Expr, tags map[string]bool, prefer bool) bool {
+	return eval(x, tags, prefer)
+}
+
 // MatchFile returns false if the name contains a $GOOS or $GOARCH
 // suffix which does not match the current system.
 // The recognized name formats are:
diff --git a/src/cmd/go/internal/mmap/mmap.go b/src/cmd/go/internal/mmap/mmap.go
new file mode 100644
index 0000000000..0cad9caf27
--- /dev/null
+++ b/src/cmd/go/internal/mmap/mmap.go
@@ -0,0 +1,31 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This package is a lightly modified version of the mmap code
+// in github.com/google/codesearch/index.
+
+// The mmap package provides an abstraction for memory mapping files
+// on different platforms.
+package mmap
+
+import (
+	"os"
+)
+
+// Data is mmap'ed read-only data from a file.
+// The backing file is never closed, so Data
+// remains valid for the lifetime of the process.
+type Data struct {
+	f    *os.File
+	Data []byte
+}
+
+// Mmap maps the given file into memory.
+func Mmap(file string) (Data, error) {
+	f, err := os.Open(file)
+	if err != nil {
+		return Data{}, err
+	}
+	return mmapFile(f)
+}
diff --git a/src/cmd/go/internal/mmap/mmap_other.go b/src/cmd/go/internal/mmap/mmap_other.go
new file mode 100644
index 0000000000..269fe8d60a
--- /dev/null
+++ b/src/cmd/go/internal/mmap/mmap_other.go
@@ -0,0 +1,21 @@
+// Copyright 2022 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build plan9 || solaris
+
+package mmap
+
+import (
+	"io"
+	"os"
+)
+
+// mmapFile on other systems doesn't mmap the file. It just reads everything.
+func mmapFile(f *os.File) (Data, error) {
+	b, err := io.ReadAll(f)
+	if err != nil {
+		return Data{}, err
+	}
+	return Data{f, b}, nil
+}
diff --git a/src/cmd/go/internal/mmap/mmap_unix.go b/src/cmd/go/internal/mmap/mmap_unix.go
new file mode 100644
index 0000000000..33e839c217
--- /dev/null
+++ b/src/cmd/go/internal/mmap/mmap_unix.go
@@ -0,0 +1,36 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix && !solaris
+
+package mmap
+
+import (
+	"fmt"
+	"io/fs"
+	"os"
+	"syscall"
+)
+
+func mmapFile(f *os.File) (Data, error) {
+	st, err := f.Stat()
+	if err != nil {
+		return Data{}, err
+	}
+	size := st.Size()
+	pagesize := int64(os.Getpagesize())
+	if int64(int(size+(pagesize-1))) != size+(pagesize-1) {
+		return Data{}, fmt.Errorf("%s: too large for mmap", f.Name())
+	}
+	n := int(size)
+	if n == 0 {
+		return Data{f, nil}, nil
+	}
+	mmapLength := int(((size + pagesize - 1) / pagesize) * pagesize) // round up to page size
+	data, err := syscall.Mmap(int(f.Fd()), 0, mmapLength, syscall.PROT_READ, syscall.MAP_SHARED)
+	if err != nil {
+		return Data{}, &fs.PathError{Op: "mmap", Path: f.Name(), Err: err}
+	}
+	return Data{f, data[:n]}, nil
+}
diff --git a/src/cmd/go/internal/mmap/mmap_windows.go b/src/cmd/go/internal/mmap/mmap_windows.go
new file mode 100644
index 0000000000..1cf62feca3
--- /dev/null
+++ b/src/cmd/go/internal/mmap/mmap_windows.go
@@ -0,0 +1,41 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mmap
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+	"unsafe"
+
+	"internal/syscall/windows"
+)
+
+func mmapFile(f *os.File) (Data, error) {
+	st, err := f.Stat()
+	if err != nil {
+		return Data{}, err
+	}
+	size := st.Size()
+	if size == 0 {
+		return Data{f, nil}, nil
+	}
+	h, err := syscall.CreateFileMapping(syscall.Handle(f.Fd()), nil, syscall.PAGE_READONLY, 0, 0, nil)
+	if err != nil {
+		return Data{}, fmt.Errorf("CreateFileMapping %s: %w", f.Name(), err)
+	}
+
+	addr, err := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, 0)
+	if err != nil {
+		return Data{}, fmt.Errorf("MapViewOfFile %s: %w", f.Name(), err)
+	}
+	var info windows.MemoryBasicInformation
+	err = windows.VirtualQuery(addr, &info, unsafe.Sizeof(info))
+	if err != nil {
+		return Data{}, fmt.Errorf("VirtualQuery %s: %w", f.Name(), err)
+	}
+	data := unsafe.Slice((*byte)(unsafe.Pointer(addr)), int(info.RegionSize))
+	return Data{f, data}, nil
+}
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go
new file mode 100644
index 0000000000..2579c516d6
--- /dev/null
+++ b/src/cmd/go/internal/modindex/read.go
@@ -0,0 +1,820 @@
+package modindex
+
+import (
+	"bytes"
+	"cmd/go/internal/base"
+	"cmd/go/internal/cache"
+	"cmd/go/internal/cfg"
+	"cmd/go/internal/fsys"
+	"cmd/go/internal/imports"
+	"cmd/go/internal/par"
+	"cmd/go/internal/str"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"go/build"
+	"go/build/constraint"
+	"go/token"
+	"internal/unsafeheader"
+	"io/fs"
+	"math"
+	"os"
+	"path/filepath"
+	"runtime/debug"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"unsafe"
+)
+
+// enabled is used to flag off the behavior of the module index on tip.
+// It will be removed before the release.
+// TODO(matloob): Remove enabled once we have more confidence on the
+// module index.
+var enabled, _ = strconv.ParseBool(os.Getenv("GOINDEX"))
+
+// ModuleIndex represents and encoded module index file. It is used to
+// do the equivalent of build.Import of packages in the module and answer other
+// questions based on the index file's data.
+type ModuleIndex struct {
+	modroot      string
+	od           offsetDecoder
+	packages     map[string]int // offsets of each package
+	packagePaths []string       // paths to package directories relative to modroot; these are the keys of packages
+}
+
+var fcache par.Cache
+
+func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) {
+	h := cache.NewHash("moduleIndex")
+	fmt.Fprintf(h, "module index %s %v", indexVersion, modroot)
+	if ismodcache {
+		return h.Sum(), nil
+	}
+	// walkdir happens in deterministic order.
+	err := fsys.Walk(modroot, func(path string, info fs.FileInfo, err error) error {
+		if modroot == path {
+			// Check for go.mod in root directory, and return ErrNotIndexed
+			// if it doesn't exist. Outside the module cache, it's not a module
+			// if it doesn't have a go.mod file.
+		}
+		if err := moduleWalkErr(modroot, path, info, err); err != nil {
+			return err
+		}
+
+		if info.IsDir() {
+			return nil
+		}
+		fmt.Fprintf(h, "file %v %v\n", info.Name(), info.ModTime())
+		if info.Mode()&fs.ModeSymlink != 0 {
+			targ, err := fsys.Stat(path)
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(h, "target %v %v\n", targ.Name(), targ.ModTime())
+		}
+		return nil
+	})
+	if err != nil {
+		return cache.ActionID{}, err
+	}
+	return h.Sum(), nil
+}
+
+var modrootCache par.Cache
+
+var ErrNotIndexed = errors.New("not in module index")
+
+// Get returns the ModuleIndex for the module rooted at modroot.
+// It will return ErrNotIndexed if the directory should be read without
+// using the index, for instance because the index is disabled, or the packgae
+// is not in a module.
+func Get(modroot string) (*ModuleIndex, error) {
+	if !enabled || cache.DefaultDir() == "off" || cfg.BuildMod == "vendor" {
+		return nil, ErrNotIndexed
+	}
+	if modroot == "" {
+		panic("modindex.Get called with empty modroot")
+	}
+	if str.HasFilePathPrefix(modroot, cfg.GOROOT) {
+		// TODO(matloob): add a case for stdlib here.
+		return nil, ErrNotIndexed
+	}
+	isModCache := str.HasFilePathPrefix(modroot, cfg.GOMODCACHE)
+	return openIndex(modroot, isModCache)
+}
+
+// openIndex returns the module index for modPath.
+// It will return ErrNotIndexed if the module can not be read
+// using the index because it contains symlinks.
+func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) {
+	type result struct {
+		mi  *ModuleIndex
+		err error
+	}
+	r := fcache.Do(modroot, func() any {
+		id, err := moduleHash(modroot, ismodcache)
+		if err != nil {
+			return result{nil, err}
+		}
+		data, _, err := cache.Default().GetMmap(id)
+		if err != nil {
+			// Couldn't read from modindex. Assume we couldn't read from
+			// the index because the module has't been indexed yet.
+			data, err = indexModule(modroot)
+			if err != nil {
+				return result{nil, err}
+			}
+			if err = cache.Default().PutBytes(id, data); err != nil {
+				return result{nil, err}
+			}
+		}
+		mi, err := fromBytes(modroot, data)
+		if err != nil {
+			return result{nil, err}
+		}
+		return mi
+	}).(result)
+	return r.mi, r.err
+}
+
+// fromBytes returns a *ModuleIndex given the encoded representation.
+func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
+	if !enabled {
+		panic("use of index")
+	}
+
+	// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
+	// that all its errors satisfy this interface, we'll only check for these errors so that
+	// we don't suppress panics that could have been produced from other sources.
+	type addrer interface {
+		Addr() uintptr
+	}
+
+	// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
+	// in case it's mmapped (the common case).
+	old := debug.SetPanicOnFault(true)
+	defer func() {
+		debug.SetPanicOnFault(old)
+		if e := recover(); e != nil {
+			if _, ok := e.(addrer); ok {
+				// This panic was almost certainly caused by SetPanicOnFault.
+				err = fmt.Errorf("error reading module index: %v", e)
+				return
+			}
+			// The panic was likely not caused by SetPanicOnFault.
+			panic(e)
+		}
+	}()
+
+	gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
+	if string(gotVersion) != indexVersion {
+		return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+	}
+	stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
+	st := newStringTable(data[stringTableOffset:])
+	d := decoder{unread, st}
+	numPackages := d.int()
+
+	packagePaths := make([]string, numPackages)
+	for i := range packagePaths {
+		packagePaths[i] = d.string()
+	}
+	packageOffsets := make([]int, numPackages)
+	for i := range packageOffsets {
+		packageOffsets[i] = d.int()
+	}
+	packages := make(map[string]int, numPackages)
+	for i := range packagePaths {
+		packages[packagePaths[i]] = packageOffsets[i]
+	}
+
+	return &ModuleIndex{
+		moddir,
+		offsetDecoder{data, st},
+		packages,
+		packagePaths,
+	}, nil
+}
+
+// Returns a list of directory paths, relative to the modroot, for
+// packages contained in the module index.
+func (mi *ModuleIndex) Packages() []string {
+	return mi.packagePaths
+}
+
+// RelPath returns the path relative to the module's root.
+func (mi *ModuleIndex) RelPath(path string) string {
+	return filepath.Clean(str.TrimFilePathPrefix(path, mi.modroot))
+}
+
+// ImportPackage is the equivalent of build.Import given the information in ModuleIndex.
+func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.ImportMode) (p *build.Package, err error) {
+	rp := mi.indexPackage(relpath)
+
+	defer func() {
+		if e := recover(); e != nil {
+			err = fmt.Errorf("error reading module index: %v", e)
+		}
+	}()
+
+	ctxt := (*Context)(&bctxt)
+
+	p = &build.Package{}
+
+	p.ImportPath = "."
+	p.Dir = filepath.Join(mi.modroot, rp.dir)
+	if rp.error != "" {
+		return p, errors.New(rp.error)
+	}
+
+	var pkgerr error
+	switch ctxt.Compiler {
+	case "gccgo", "gc":
+	default:
+		// Save error for end of function.
+		pkgerr = fmt.Errorf("import %q: unknown compiler %q", p.Dir, ctxt.Compiler)
+	}
+
+	if p.Dir == "" {
+		return p, fmt.Errorf("import %q: import of unknown directory", p.Dir)
+	}
+
+	if mode&build.FindOnly != 0 {
+		return p, pkgerr
+	}
+
+	// We need to do a second round of bad file processing.
+	var badGoError error
+	badFiles := make(map[string]bool)
+	badFile := func(name string, err error) {
+		if badGoError == nil {
+			badGoError = err
+		}
+		if !badFiles[name] {
+			p.InvalidGoFiles = append(p.InvalidGoFiles, name)
+			badFiles[name] = true
+		}
+	}
+
+	var Sfiles []string // files with ".S"(capital S)/.sx(capital s equivalent for case insensitive filesystems)
+	var firstFile string
+	embedPos := make(map[string][]token.Position)
+	testEmbedPos := make(map[string][]token.Position)
+	xTestEmbedPos := make(map[string][]token.Position)
+	importPos := make(map[string][]token.Position)
+	testImportPos := make(map[string][]token.Position)
+	xTestImportPos := make(map[string][]token.Position)
+	allTags := make(map[string]bool)
+	for _, tf := range rp.sourceFiles {
+		name := tf.name()
+		if error := tf.error(); error != "" {
+			badFile(name, errors.New(tf.error()))
+			continue
+		} else if parseError := tf.parseError(); parseError != "" {
+			badFile(name, parseErrorFromString(tf.parseError()))
+			// Fall through: we still want to list files with parse errors.
+		}
+
+		var shouldBuild = true
+		if !ctxt.goodOSArchFile(name, allTags) && !ctxt.UseAllFiles {
+			shouldBuild = false
+		} else if goBuildConstraint := tf.goBuildConstraint(); goBuildConstraint != "" {
+			x, err := constraint.Parse(goBuildConstraint)
+			if err != nil {
+				return p, fmt.Errorf("%s: parsing //go:build line: %v", name, err)
+			}
+			shouldBuild = ctxt.eval(x, allTags)
+		} else if plusBuildConstraints := tf.plusBuildConstraints(); len(plusBuildConstraints) > 0 {
+			for _, text := range plusBuildConstraints {
+				if x, err := constraint.Parse(text); err == nil {
+					if !ctxt.eval(x, allTags) {
+						shouldBuild = false
+					}
+				}
+			}
+		}
+
+		ext := nameExt(name)
+		if !shouldBuild || tf.ignoreFile() {
+			if ext == ".go" {
+				p.IgnoredGoFiles = append(p.IgnoredGoFiles, name)
+			} else if fileListForExt((*Package)(p), ext) != nil {
+				p.IgnoredOtherFiles = append(p.IgnoredOtherFiles, name)
+			}
+			continue
+		}
+
+		// Going to save the file. For non-Go files, can stop here.
+		switch ext {
+		case ".go":
+			// keep going
+		case ".S", ".sx":
+			// special case for cgo, handled at end
+			Sfiles = append(Sfiles, name)
+			continue
+		default:
+			if list := fileListForExt((*Package)(p), ext); list != nil {
+				*list = append(*list, name)
+			}
+			continue
+		}
+
+		pkg := tf.pkgName()
+		if pkg == "documentation" {
+			p.IgnoredGoFiles = append(p.IgnoredGoFiles, name)
+			continue
+		}
+		isTest := strings.HasSuffix(name, "_test.go")
+		isXTest := false
+		if isTest && strings.HasSuffix(tf.pkgName(), "_test") && p.Name != tf.pkgName() {
+			isXTest = true
+			pkg = pkg[:len(pkg)-len("_test")]
+		}
+
+		if !isTest && tf.binaryOnly() {
+			p.BinaryOnly = true
+		}
+
+		if p.Name == "" {
+			p.Name = pkg
+			firstFile = name
+		} else if pkg != p.Name {
+			// TODO(#45999): The choice of p.Name is arbitrary based on file iteration
+			// order. Instead of resolving p.Name arbitrarily, we should clear out the
+			// existing Name and mark the existing files as also invalid.
+			badFile(name, &MultiplePackageError{
+				Dir:      p.Dir,
+				Packages: []string{p.Name, pkg},
+				Files:    []string{firstFile, name},
+			})
+		}
+		// Grab the first package comment as docs, provided it is not from a test file.
+		if p.Doc == "" && !isTest && !isXTest {
+			if synopsis := tf.synopsis(); synopsis != "" {
+				p.Doc = synopsis
+			}
+		}
+
+		// Record Imports and information about cgo.
+		isCgo := false
+		imports := tf.imports()
+		for _, imp := range imports {
+			if imp.path == "C" {
+				if isTest {
+					badFile(name, fmt.Errorf("use of cgo in test %s not supported", name))
+					continue
+				}
+				isCgo = true
+			}
+		}
+		if directives := tf.cgoDirectives(); directives != "" {
+			if err := ctxt.saveCgo(name, (*Package)(p), directives); err != nil {
+				badFile(name, err)
+			}
+		}
+
+		var fileList *[]string
+		var importMap, embedMap map[string][]token.Position
+		switch {
+		case isCgo:
+			allTags["cgo"] = true
+			if ctxt.CgoEnabled {
+				fileList = &p.CgoFiles
+				importMap = importPos
+				embedMap = embedPos
+			} else {
+				// Ignore Imports and Embeds from cgo files if cgo is disabled.
+				fileList = &p.IgnoredGoFiles
+			}
+		case isXTest:
+			fileList = &p.XTestGoFiles
+			importMap = xTestImportPos
+			embedMap = xTestEmbedPos
+		case isTest:
+			fileList = &p.TestGoFiles
+			importMap = testImportPos
+			embedMap = testEmbedPos
+		default:
+			fileList = &p.GoFiles
+			importMap = importPos
+			embedMap = embedPos
+		}
+		*fileList = append(*fileList, name)
+		if importMap != nil {
+			for _, imp := range imports {
+				importMap[imp.path] = append(importMap[imp.path], imp.position)
+			}
+		}
+		if embedMap != nil {
+			for _, e := range tf.embeds() {
+				embedMap[e.pattern] = append(embedMap[e.pattern], e.position)
+			}
+		}
+	}
+
+	p.EmbedPatterns, p.EmbedPatternPos = cleanDecls(embedPos)
+	p.TestEmbedPatterns, p.TestEmbedPatternPos = cleanDecls(testEmbedPos)
+	p.XTestEmbedPatterns, p.XTestEmbedPatternPos = cleanDecls(xTestEmbedPos)
+
+	p.Imports, p.ImportPos = cleanDecls(importPos)
+	p.TestImports, p.TestImportPos = cleanDecls(testImportPos)
+	p.XTestImports, p.XTestImportPos = cleanDecls(xTestImportPos)
+
+	for tag := range allTags {
+		p.AllTags = append(p.AllTags, tag)
+	}
+	sort.Strings(p.AllTags)
+
+	if len(p.CgoFiles) > 0 {
+		p.SFiles = append(p.SFiles, Sfiles...)
+		sort.Strings(p.SFiles)
+	} else {
+		p.IgnoredOtherFiles = append(p.IgnoredOtherFiles, Sfiles...)
+		sort.Strings(p.IgnoredOtherFiles)
+	}
+
+	if badGoError != nil {
+		return p, badGoError
+	}
+	if len(p.GoFiles)+len(p.CgoFiles)+len(p.TestGoFiles)+len(p.XTestGoFiles) == 0 {
+		return p, &build.NoGoError{Dir: p.Dir}
+	}
+	return p, pkgerr
+}
+
+// IsDirWithGoFiles is the equivalent of fsys.IsDirWithGoFiles using the information in the
+// RawPackage.
+func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) {
+	rp := mi.indexPackage(relpath)
+
+	defer func() {
+		if e := recover(); e != nil {
+			err = fmt.Errorf("error reading module index: %v", e)
+		}
+	}()
+	for _, sf := range rp.sourceFiles {
+		if strings.HasSuffix(sf.name(), ".go") {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+// ScanDir implements imports.ScanDir using the information in the RawPackage.
+func (mi *ModuleIndex) ScanDir(path string, tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
+	rp := mi.indexPackage(path)
+
+	// TODO(matloob) dir should eventually be relative to indexed directory
+	// TODO(matloob): skip reading raw package and jump straight to data we need?
+
+	defer func() {
+		if e := recover(); e != nil {
+			err = fmt.Errorf("error reading module index: %v", e)
+		}
+	}()
+
+	imports_ := make(map[string]bool)
+	testImports := make(map[string]bool)
+	numFiles := 0
+
+Files:
+	for _, sf := range rp.sourceFiles {
+		name := sf.name()
+		if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") || !strings.HasSuffix(name, ".go") || !imports.MatchFile(name, tags) {
+			continue
+		}
+
+		// The following section exists for backwards compatibility reasons:
+		// scanDir ignores files with import "C" when collecting the list
+		// of imports unless the "cgo" tag is provided. The following comment
+		// is copied from the original.
+		//
+		// import "C" is implicit requirement of cgo tag.
+		// When listing files on the command line (explicitFiles=true)
+		// we do not apply build tag filtering but we still do apply
+		// cgo filtering, so no explicitFiles check here.
+		// Why? Because we always have, and it's not worth breaking
+		// that behavior now.
+		imps := sf.imports() // TODO(matloob): directly read import paths to avoid the extra strings?
+		for _, imp := range imps {
+			if imp.path == "C" && !tags["cgo"] && !tags["*"] {
+				continue Files
+			}
+		}
+
+		if !shouldBuild(sf, tags) {
+			continue
+		}
+		numFiles++
+		m := imports_
+		if strings.HasSuffix(name, "_test.go") {
+			m = testImports
+		}
+		for _, p := range imps {
+			m[p.path] = true
+		}
+	}
+	if numFiles == 0 {
+		return nil, nil, imports.ErrNoGo
+	}
+	return keys(imports_), keys(testImports), nil
+}
+
+func keys(m map[string]bool) []string {
+	list := make([]string, 0, len(m))
+	for k := range m {
+		list = append(list, k)
+	}
+	sort.Strings(list)
+	return list
+}
+
+// implements imports.ShouldBuild in terms of an index sourcefile.
+func shouldBuild(sf *sourceFile, tags map[string]bool) bool {
+	if goBuildConstraint := sf.goBuildConstraint(); goBuildConstraint != "" {
+		x, err := constraint.Parse(goBuildConstraint)
+		if err != nil {
+			return false
+		}
+		return imports.Eval(x, tags, true)
+	}
+
+	plusBuildConstraints := sf.plusBuildConstraints()
+	for _, text := range plusBuildConstraints {
+		if x, err := constraint.Parse(text); err == nil {
+			if imports.Eval(x, tags, true) == false {
+				return false
+			}
+		}
+	}
+
+	return true
+}
+
+// index package holds the information needed to access information in the
+// index about a package.
+type indexPackage struct {
+	error string
+	dir   string // directory of the package relative to the modroot
+
+	// Source files
+	sourceFiles []*sourceFile
+}
+
+// indexPackage returns an indexPackage constructed using the information in the ModuleIndex.
+func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
+	defer func() {
+		if e := recover(); e != nil {
+			base.Fatalf("error reading module index: %v", e)
+		}
+	}()
+	offset, ok := mi.packages[path]
+	if !ok {
+		return &indexPackage{error: fmt.Sprintf("cannot find package %q in:\n\t%s", path, filepath.Join(mi.modroot, path))}
+	}
+
+	// TODO(matloob): do we want to lock on the module index?
+	d := mi.od.decoderAt(offset)
+	rp := new(indexPackage)
+	rp.error = d.string()
+	rp.dir = d.string()
+	numSourceFiles := d.uint32()
+	rp.sourceFiles = make([]*sourceFile, numSourceFiles)
+	for i := uint32(0); i < numSourceFiles; i++ {
+		offset := d.uint32()
+		rp.sourceFiles[i] = &sourceFile{
+			od: mi.od.offsetDecoderAt(offset),
+		}
+	}
+	return rp
+}
+
+// sourceFile represents the information of a given source file in the module index.
+type sourceFile struct {
+	od offsetDecoder // od interprets all offsets relative to the start of the source file's data
+
+	onceReadImports sync.Once
+	savedImports    []rawImport // saved imports so that they're only read once
+}
+
+// Offsets for fields in the sourceFile.
+const (
+	sourceFileError = 4 * iota
+	sourceFileParseError
+	sourceFileSynopsis
+	sourceFileName
+	sourceFilePkgName
+	sourceFileIgnoreFile
+	sourceFileBinaryOnly
+	sourceFileCgoDirectives
+	sourceFileGoBuildConstraint
+	sourceFileNumPlusBuildConstraints
+)
+
+func (sf *sourceFile) error() string {
+	return sf.od.stringAt(sourceFileError)
+}
+func (sf *sourceFile) parseError() string {
+	return sf.od.stringAt(sourceFileParseError)
+}
+func (sf *sourceFile) name() string {
+	return sf.od.stringAt(sourceFileName)
+}
+func (sf *sourceFile) synopsis() string {
+	return sf.od.stringAt(sourceFileSynopsis)
+}
+func (sf *sourceFile) pkgName() string {
+	return sf.od.stringAt(sourceFilePkgName)
+}
+func (sf *sourceFile) ignoreFile() bool {
+	return sf.od.boolAt(sourceFileIgnoreFile)
+}
+func (sf *sourceFile) binaryOnly() bool {
+	return sf.od.boolAt(sourceFileBinaryOnly)
+}
+func (sf *sourceFile) cgoDirectives() string {
+	return sf.od.stringAt(sourceFileCgoDirectives)
+}
+func (sf *sourceFile) goBuildConstraint() string {
+	return sf.od.stringAt(sourceFileGoBuildConstraint)
+}
+
+func (sf *sourceFile) plusBuildConstraints() []string {
+	d := sf.od.decoderAt(sourceFileNumPlusBuildConstraints)
+	n := d.int()
+	ret := make([]string, n)
+	for i := 0; i < n; i++ {
+		ret[i] = d.string()
+	}
+	return ret
+}
+
+func importsOffset(numPlusBuildConstraints int) int {
+	// 4 bytes per uin32, add one to advance past numPlusBuildConstraints itself
+	return sourceFileNumPlusBuildConstraints + 4*(numPlusBuildConstraints+1)
+}
+
+func (sf *sourceFile) importsOffset() int {
+	numPlusBuildConstraints := sf.od.intAt(sourceFileNumPlusBuildConstraints)
+	return importsOffset(numPlusBuildConstraints)
+}
+
+func embedsOffset(importsOffset, numImports int) int {
+	// 4 bytes per uint32; 1 to advance past numImports itself, and 5 uint32s per import
+	return importsOffset + 4*(1+(5*numImports))
+}
+
+func (sf *sourceFile) embedsOffset() int {
+	importsOffset := sf.importsOffset()
+	numImports := sf.od.intAt(importsOffset)
+	return embedsOffset(importsOffset, numImports)
+}
+
+func (sf *sourceFile) imports() []rawImport {
+	sf.onceReadImports.Do(func() {
+		importsOffset := sf.importsOffset()
+		d := sf.od.decoderAt(importsOffset)
+		numImports := d.int()
+		ret := make([]rawImport, numImports)
+		for i := 0; i < numImports; i++ {
+			ret[i].path = d.string()
+			ret[i].position = d.tokpos()
+		}
+		sf.savedImports = ret
+	})
+	return sf.savedImports
+}
+
+func (sf *sourceFile) embeds() []embed {
+	embedsOffset := sf.embedsOffset()
+	d := sf.od.decoderAt(embedsOffset)
+	numEmbeds := d.int()
+	ret := make([]embed, numEmbeds)
+	for i := range ret {
+		pattern := d.string()
+		pos := d.tokpos()
+		ret[i] = embed{pattern, pos}
+	}
+	return ret
+}
+
+// A decoder reads from the current position of the file and advances its position as it
+// reads.
+type decoder struct {
+	b  []byte
+	st *stringTable
+}
+
+func (d *decoder) uint32() uint32 {
+	n := binary.LittleEndian.Uint32(d.b[:4])
+	d.b = d.b[4:]
+	return n
+}
+
+func (d *decoder) int() int {
+	n := d.uint32()
+	if int64(n) > math.MaxInt {
+		base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
+	}
+	return int(n)
+}
+
+func (d *decoder) tokpos() token.Position {
+	file := d.string()
+	offset := d.int()
+	line := d.int()
+	column := d.int()
+	return token.Position{
+		Filename: file,
+		Offset:   offset,
+		Line:     line,
+		Column:   column,
+	}
+}
+
+func (d *decoder) string() string {
+	return d.st.string(d.int())
+}
+
+// And offset decoder reads information offset from its position in the file.
+// It's either offset from the beginning of the index, or the beginning of a sourceFile's data.
+type offsetDecoder struct {
+	b  []byte
+	st *stringTable
+}
+
+func (od *offsetDecoder) uint32At(offset int) uint32 {
+	if offset > len(od.b) {
+		base.Fatalf("go: trying to read from index file at offset higher than file length. This indicates a corrupt offset file in the cache.")
+	}
+	return binary.LittleEndian.Uint32(od.b[offset:])
+}
+
+func (od *offsetDecoder) intAt(offset int) int {
+	n := od.uint32At(offset)
+	if int64(n) > math.MaxInt {
+		base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
+	}
+	return int(n)
+}
+
+func (od *offsetDecoder) boolAt(offset int) bool {
+	switch v := od.uint32At(offset); v {
+	case 0:
+		return false
+	case 1:
+		return true
+	default:
+		base.Fatalf("go: invalid bool value in index file encoding: %v", v)
+	}
+	panic("unreachable")
+}
+
+func (od *offsetDecoder) stringAt(offset int) string {
+	return od.st.string(od.intAt(offset))
+}
+
+func (od *offsetDecoder) decoderAt(offset int) *decoder {
+	return &decoder{od.b[offset:], od.st}
+}
+
+func (od *offsetDecoder) offsetDecoderAt(offset uint32) offsetDecoder {
+	return offsetDecoder{od.b[offset:], od.st}
+}
+
+type stringTable struct {
+	b []byte
+}
+
+func newStringTable(b []byte) *stringTable {
+	return &stringTable{b: b}
+}
+
+func (st *stringTable) string(pos int) string {
+	if pos == 0 {
+		return ""
+	}
+
+	bb := st.b[pos:]
+	i := bytes.IndexByte(bb, 0)
+
+	if i == -1 {
+		panic("reached end of string table trying to read string")
+	}
+	s := asString(bb[:i])
+
+	return s
+}
+
+func asString(b []byte) string {
+	p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
+
+	var s string
+	hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
+	hdr.Data = p
+	hdr.Len = len(b)
+
+	return s
+}
diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go
index 0904278691..6e42e4ecac 100644
--- a/src/cmd/go/internal/modindex/scan.go
+++ b/src/cmd/go/internal/modindex/scan.go
@@ -15,24 +15,45 @@ import (
 	"strings"
 )
 
+// moduleWalkErr returns filepath.SkipDir if the directory isn't relevant
+// when indexing a module or generating a filehash, ErrNotIndexed,
+// if the module shouldn't be indexed, and nil otherwise.
+func moduleWalkErr(modroot string, path string, info fs.FileInfo, err error) error {
+	if err != nil {
+		return err
+	}
+	// stop at module boundaries
+	if info.IsDir() && path != modroot {
+		if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() {
+			return filepath.SkipDir
+		}
+	}
+	if info.Mode()&fs.ModeSymlink != 0 {
+		if target, err := fsys.Stat(path); err == nil && target.IsDir() {
+			// return an error to make the module hash invalid.
+			// Symlink directories in modules are tricky, so we won't index
+			// modules that contain them.
+			// TODO(matloob): perhaps don't return this error if the symlink leads to
+			// a directory with a go.mod file.
+			return ErrNotIndexed
+		}
+	}
+	return nil
+}
+
 // indexModule indexes the module at the given directory and returns its
-// encoded representation.
+// encoded representation. It returns ErrNotIndexed if the module can't
+// be indexed because it contains symlinks.
 func indexModule(modroot string) ([]byte, error) {
 	var packages []*rawPackage
 	err := fsys.Walk(modroot, func(path string, info fs.FileInfo, err error) error {
-		if err != nil {
+		if err := moduleWalkErr(modroot, path, info, err); err != nil {
 			return err
 		}
+
 		if !info.IsDir() {
 			return nil
 		}
-		// stop at module boundaries
-		if modroot != path {
-			if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() {
-				return filepath.SkipDir
-			}
-		}
-		// TODO(matloob): what do we do about symlinks
 		rel, err := filepath.Rel(modroot, path)
 		if err != nil {
 			panic(err)
@@ -43,7 +64,7 @@ func indexModule(modroot string) ([]byte, error) {
 	if err != nil {
 		return nil, err
 	}
-	return encodeModule(packages)
+	return encodeModule(packages), nil
 }
 
 // rawPackage holds the information from each package that's needed to
diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go
index b3a41cb2bd..0c3123a46f 100644
--- a/src/cmd/go/internal/modindex/write.go
+++ b/src/cmd/go/internal/modindex/write.go
@@ -9,13 +9,14 @@ import (
 	"strings"
 )
 
-const indexVersion = "go index v0\n"
+const indexVersion = "go index v0"
 
 // encodeModule produces the encoded representation of the module index.
 // encodeModule may modify the packages slice.
-func encodeModule(packages []*rawPackage) ([]byte, error) {
+func encodeModule(packages []*rawPackage) []byte {
 	e := newEncoder()
 	e.Bytes([]byte(indexVersion))
+	e.Bytes([]byte{'\n'})
 	stringTableOffsetPos := e.Pos() // fill this at the end
 	e.Uint32(0)                     // string table offset
 	e.Int(len(packages))
@@ -35,7 +36,7 @@ func encodeModule(packages []*rawPackage) ([]byte, error) {
 	}
 	e.IntAt(e.Pos(), stringTableOffsetPos)
 	e.Bytes(e.stringTable)
-	return e.b, nil
+	return e.b
 }
 
 func encodePackage(e *encoder, p *rawPackage) {
diff --git a/src/internal/syscall/windows/memory_windows.go b/src/internal/syscall/windows/memory_windows.go
new file mode 100644
index 0000000000..ba30f92c1a
--- /dev/null
+++ b/src/internal/syscall/windows/memory_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+type MemoryBasicInformation struct {
+	BaseAddress       uintptr
+	AllocationBase    uintptr
+	AllocationProtect uint32
+	PartitionId       uint16
+	RegionSize        uintptr
+	State             uint32
+	Protect           uint32
+	Type              uint32
+}
diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go
index f8965d0bab..b37085e13b 100644
--- a/src/internal/syscall/windows/syscall_windows.go
+++ b/src/internal/syscall/windows/syscall_windows.go
@@ -154,6 +154,7 @@ const (
 //sys	MoveFileEx(from *uint16, to *uint16, flags uint32) (err error) = MoveFileExW
 //sys	GetModuleFileName(module syscall.Handle, fn *uint16, len uint32) (n uint32, err error) = kernel32.GetModuleFileNameW
 //sys	SetFileInformationByHandle(handle syscall.Handle, fileInformationClass uint32, buf uintptr, bufsize uint32) (err error) = kernel32.SetFileInformationByHandle
+//sys	VirtualQuery(address uintptr, buffer *MemoryBasicInformation, length uintptr) (err error) = kernel32.VirtualQuery
 
 const (
 	WSA_FLAG_OVERLAPPED        = 0x01
diff --git a/src/internal/syscall/windows/zsyscall_windows.go b/src/internal/syscall/windows/zsyscall_windows.go
index aaad4a5b94..962607aba2 100644
--- a/src/internal/syscall/windows/zsyscall_windows.go
+++ b/src/internal/syscall/windows/zsyscall_windows.go
@@ -66,6 +66,7 @@ var (
 	procMultiByteToWideChar          = modkernel32.NewProc("MultiByteToWideChar")
 	procSetFileInformationByHandle   = modkernel32.NewProc("SetFileInformationByHandle")
 	procUnlockFileEx                 = modkernel32.NewProc("UnlockFileEx")
+	procVirtualQuery                 = modkernel32.NewProc("VirtualQuery")
 	procNetShareAdd                  = modnetapi32.NewProc("NetShareAdd")
 	procNetShareDel                  = modnetapi32.NewProc("NetShareDel")
 	procNetUserGetLocalGroups        = modnetapi32.NewProc("NetUserGetLocalGroups")
@@ -257,6 +258,14 @@ func UnlockFileEx(file syscall.Handle, reserved uint32, bytesLow uint32, bytesHi
 	return
 }
 
+func VirtualQuery(address uintptr, buffer *MemoryBasicInformation, length uintptr) (err error) {
+	r1, _, e1 := syscall.Syscall(procVirtualQuery.Addr(), 3, uintptr(address), uintptr(unsafe.Pointer(buffer)), uintptr(length))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
 func NetShareAdd(serverName *uint16, level uint32, buf *byte, parmErr *uint16) (neterr error) {
 	r0, _, _ := syscall.Syscall6(procNetShareAdd.Addr(), 4, uintptr(unsafe.Pointer(serverName)), uintptr(level), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(parmErr)), 0, 0)
 	if r0 != 0 {