cmd/go: abstract build cache, support implementations via child process

author Brad Fitzpatrick <bradfitz@golang.org>

Mon, 6 Feb 2023 03:52:06 +0000 (19:52 -0800)

committer Brad Fitzpatrick <bradfitz@golang.org>

Thu, 25 May 2023 00:49:37 +0000 (00:49 +0000)
author Brad Fitzpatrick <bradfitz@golang.org>
Mon, 6 Feb 2023 03:52:06 +0000 (19:52 -0800)
committer Brad Fitzpatrick <bradfitz@golang.org>
Thu, 25 May 2023 00:49:37 +0000 (00:49 +0000)
diff --git a/src/cmd/go/internal/cache/cache.go b/src/cmd/go/internal/cache/cache.go

index 378ae5db003c8fbe5c0ba188d370e06618b30d7b..4a82d27e7abfab0591757f12d43fa288c05b5e9c 100644 (file)
--- a/src/cmd/go/internal/cache/cache.go
+++ b/src/cmd/go/internal/cache/cache.go
@@ -32,8 +32,50 @@ type ActionID [HashSize]byte
  // An OutputID is a cache output key, the hash of an output of a computation.
  type OutputID [HashSize]byte
  
+// Cache is the interface as used by the cmd/go.
+type Cache interface {
+       // Get returns the cache entry for the provided ActionID.
+       // On miss, the error type should be of type *entryNotFoundError.
+       //
+       // After a success call to Get, OutputFile(Entry.OutputID) must
+       // exist on disk for until Close is called (at the end of the process).
+       Get(ActionID) (Entry, error)
+
+       // Put adds an item to the cache.
+       //
+       // The seeker is only used to seek to the beginning. After a call to Put,
+       // the seek position is not guaranteed to be in any particular state.
+       //
+       // As a special case, if the ReadSeeker is of type noVerifyReadSeeker,
+       // the verification from GODEBUG=goverifycache=1 is skipped.
+       //
+       // After a success call to Get, OutputFile(Entry.OutputID) must
+       // exist on disk for until Close is called (at the end of the process).
+       Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error)
+
+       // Close is called at the end of the go process. Implementations can do
+       // cache cleanup work at this phase, or wait for and report any errors from
+       // background cleanup work started earlier. Any cache trimming should in one
+       // process should not violate cause the invariants of this interface to be
+       // violated in another process. Namely, a cache trim from one process should
+       // not delete an ObjectID from disk that was recently Get or Put from
+       // another process. As a rule of thumb, don't trim things used in the last
+       // day.
+       Close() error
+
+       // OutputFile returns the path on disk where OutputID is stored.
+       //
+       // It's only called after a successful get or put call so it doesn't need
+       // to return an error; it's assumed that if the previous get or put succeeded,
+       // it's already on disk.
+       OutputFile(OutputID) string
+
+       // FuzzDir returns where fuzz files are stored.
+       FuzzDir() string
+}
+
  // A Cache is a package cache, backed by a file system directory tree.
-type Cache struct {
+type DiskCache struct {
         dir string
         now func() time.Time
  }
@@ -49,7 +91,7 @@ type Cache struct {
  // to share a cache directory (for example, if the directory were stored
  // in a network file system). File locking is notoriously unreliable in
  // network file systems and may not suffice to protect the cache.
-func Open(dir string) (*Cache, error) {
+func Open(dir string) (*DiskCache, error) {
         info, err := os.Stat(dir)
         if err != nil {
                 return nil, err
@@ -63,7 +105,7 @@ func Open(dir string) (*Cache, error) {
                         return nil, err
                 }
         }
-       c := &Cache{
+       c := &DiskCache{
                 dir: dir,
                 now: time.Now,
         }
@@ -71,7 +113,7 @@ func Open(dir string) (*Cache, error) {
  }
  
  // fileName returns the name of the file corresponding to the given id.
-func (c *Cache) fileName(id [HashSize]byte, key string) string {
+func (c *DiskCache) fileName(id [HashSize]byte, key string) string {
         return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
  }
  
@@ -141,7 +183,7 @@ func initEnv() {
  // returning the corresponding output ID and file size, if any.
  // Note that finding an output ID does not guarantee that the
  // saved file for that output ID is still available.
-func (c *Cache) Get(id ActionID) (Entry, error) {
+func (c *DiskCache) Get(id ActionID) (Entry, error) {
         if verify {
                 return Entry{}, &entryNotFoundError{Err: errVerifyMode}
         }
@@ -151,11 +193,11 @@ func (c *Cache) Get(id ActionID) (Entry, error) {
  type Entry struct {
         OutputID OutputID
         Size     int64
-       Time     time.Time
+       Time     time.Time // when added to cache
  }
  
  // get is Get but does not respect verify mode, so that Put can use it.
-func (c *Cache) get(id ActionID) (Entry, error) {
+func (c *DiskCache) get(id ActionID) (Entry, error) {
         missing := func(reason error) (Entry, error) {
                 return Entry{}, &entryNotFoundError{Err: reason}
         }
@@ -219,7 +261,7 @@ func (c *Cache) get(id ActionID) (Entry, error) {
  
  // GetFile looks up the action ID in the cache and returns
  // the name of the corresponding data file.
-func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
+func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) {
         entry, err = c.Get(id)
         if err != nil {
                 return "", Entry{}, err
@@ -238,7 +280,7 @@ func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
  // GetBytes looks up the action ID in the cache and returns
  // the corresponding output bytes.
  // GetBytes should only be used for data that can be expected to fit in memory.
-func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
+func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) {
         entry, err := c.Get(id)
         if err != nil {
                 return nil, entry, err
@@ -253,7 +295,7 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
  // GetMmap looks up the action ID in the cache and returns
  // the corresponding output bytes.
  // GetMmap should only be used for data that can be expected to fit in memory.
-func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
+func GetMmap(c Cache, id ActionID) ([]byte, Entry, error) {
         entry, err := c.Get(id)
         if err != nil {
                 return nil, entry, err
@@ -269,7 +311,7 @@ func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
  }
  
  // OutputFile returns the name of the cache file storing output with the given OutputID.
-func (c *Cache) OutputFile(out OutputID) string {
+func (c *DiskCache) OutputFile(out OutputID) string {
         file := c.fileName(out, "d")
         c.used(file)
         return file
@@ -302,7 +344,7 @@ const (
  // mtime is more than an hour old. This heuristic eliminates
  // nearly all of the mtime updates that would otherwise happen,
  // while still keeping the mtimes useful for cache trimming.
-func (c *Cache) used(file string) {
+func (c *DiskCache) used(file string) {
         info, err := os.Stat(file)
         if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
                 return
@@ -310,8 +352,10 @@ func (c *Cache) used(file string) {
         os.Chtimes(file, c.now(), c.now())
  }
  
+func (c *DiskCache) Close() error { return c.Trim() }
+
  // Trim removes old cache entries that are likely not to be reused.
-func (c *Cache) Trim() error {
+func (c *DiskCache) Trim() error {
         now := c.now()
  
         // We maintain in dir/trim.txt the time of the last completed cache trim.
@@ -351,7 +395,7 @@ func (c *Cache) Trim() error {
  }
  
  // trimSubdir trims a single cache subdirectory.
-func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
+func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) {
         // Read all directory entries from subdir before removing
         // any files, in case removing files invalidates the file offset
         // in the directory scan. Also, ignore error from f.Readdirnames,
@@ -379,7 +423,7 @@ func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
  
  // putIndexEntry adds an entry to the cache recording that executing the action
  // with the given id produces an output with the given output id (hash) and size.
-func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
+func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
         // Note: We expect that for one reason or another it may happen
         // that repeating an action produces a different output hash
         // (for example, if the output contains a time stamp or temp dir name).
@@ -433,21 +477,32 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
         return nil
  }
  
+// noVerifyReadSeeker is a io.ReadSeeker wrapper sentinel type
+// that says that Cache.Put should skip the verify check
+// (from GODEBUG=goverifycache=1).
+type noVerifyReadSeeker struct {
+       io.ReadSeeker
+}
+
  // Put stores the given output in the cache as the output for the action ID.
  // It may read file twice. The content of file must not change between the two passes.
-func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
-       return c.put(id, file, true)
+func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+       wrapper, isNoVerify := file.(noVerifyReadSeeker)
+       if isNoVerify {
+               file = wrapper.ReadSeeker
+       }
+       return c.put(id, file, !isNoVerify)
  }
  
  // PutNoVerify is like Put but disables the verify check
  // when GODEBUG=goverifycache=1 is set.
  // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
  // like test output containing times and the like.
-func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
-       return c.put(id, file, false)
+func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+       return c.Put(id, noVerifyReadSeeker{file})
  }
  
-func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
+func (c *DiskCache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
         // Compute output ID.
         h := sha256.New()
         if _, err := file.Seek(0, 0); err != nil {
@@ -470,14 +525,14 @@ func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID
  }
  
  // PutBytes stores the given bytes in the cache as the output for the action ID.
-func (c *Cache) PutBytes(id ActionID, data []byte) error {
+func PutBytes(c Cache, id ActionID, data []byte) error {
         _, _, err := c.Put(id, bytes.NewReader(data))
         return err
  }
  
  // copyFile copies file into the cache, expecting it to have the given
  // output ID and size, if that file is not present already.
-func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
+func (c *DiskCache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
         name := c.fileName(out, "d")
         info, err := os.Stat(name)
         if err == nil && info.Size() == size {
@@ -567,6 +622,6 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
  // They may be removed with 'go clean -fuzzcache'.
  //
  // TODO(#48526): make Trim remove unused files from this directory.
-func (c *Cache) FuzzDir() string {
+func (c *DiskCache) FuzzDir() string {
         return filepath.Join(c.dir, "fuzz")
  }
diff --git a/src/cmd/go/internal/cache/cache_test.go b/src/cmd/go/internal/cache/cache_test.go

index c422920c985b4d105b86b41e163d622a568f496f..a12f1d2ee798ee735594475861c314da3eea97eb 100644 (file)
--- a/src/cmd/go/internal/cache/cache_test.go
+++ b/src/cmd/go/internal/cache/cache_test.go
@@ -130,7 +130,7 @@ func TestVerifyPanic(t *testing.T) {
         }
  
         id := ActionID(dummyID(1))
-       if err := c.PutBytes(id, []byte("abc")); err != nil {
+       if err := PutBytes(c, id, []byte("abc")); err != nil {
                 t.Fatal(err)
         }
  
@@ -140,7 +140,7 @@ func TestVerifyPanic(t *testing.T) {
                         return
                 }
         }()
-       c.PutBytes(id, []byte("def"))
+       PutBytes(c, id, []byte("def"))
         t.Fatal("mismatched Put did not panic in verify mode")
  }
  
@@ -178,9 +178,9 @@ func TestCacheTrim(t *testing.T) {
         }
  
         id := ActionID(dummyID(1))
-       c.PutBytes(id, []byte("abc"))
+       PutBytes(c, id, []byte("abc"))
         entry, _ := c.Get(id)
-       c.PutBytes(ActionID(dummyID(2)), []byte("def"))
+       PutBytes(c, ActionID(dummyID(2)), []byte("def"))
         mtime := now
         checkTime(fmt.Sprintf("%x-a", id), mtime)
         checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime)
diff --git a/src/cmd/go/internal/cache/default.go b/src/cmd/go/internal/cache/default.go

index f39699d5ba4e343b11c8ec0f7858811d3a312407..b5650eac669b46926769c9519dec7f7476fb6e2d 100644 (file)
--- a/src/cmd/go/internal/cache/default.go
+++ b/src/cmd/go/internal/cache/default.go
@@ -12,18 +12,19 @@ import (
  
         "cmd/go/internal/base"
         "cmd/go/internal/cfg"
+       "internal/goexperiment"
  )
  
  // Default returns the default cache to use.
  // It never returns nil.
-func Default() *Cache {
+func Default() Cache {
         defaultOnce.Do(initDefaultCache)
         return defaultCache
  }
  
  var (
         defaultOnce  sync.Once
-       defaultCache *Cache
+       defaultCache Cache
  )
  
  // cacheREADME is a message stored in a README in the cache directory.
@@ -53,11 +54,16 @@ func initDefaultCache() {
                 os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666)
         }
  
-       c, err := Open(dir)
+       diskCache, err := Open(dir)
         if err != nil {
                 base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err)
         }
-       defaultCache = c
+
+       if v := cfg.Getenv("GOCACHEPROG"); v != "" && goexperiment.CacheProg {
+               defaultCache = startCacheProg(v, diskCache)
+       } else {
+               defaultCache = diskCache
+       }
  }
  
  var (
diff --git a/src/cmd/go/internal/cache/prog.go b/src/cmd/go/internal/cache/prog.go

new file mode 100644 (file)

index 0000000..30f69b3
--- /dev/null
+++ b/src/cmd/go/internal/cache/prog.go
@@ -0,0 +1,427 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cache
+
+import (
+       "bufio"
+       "cmd/go/internal/base"
+       "cmd/internal/quoted"
+       "context"
+       "crypto/sha256"
+       "encoding/base64"
+       "encoding/json"
+       "errors"
+       "fmt"
+       "io"
+       "log"
+       "os"
+       "os/exec"
+       "sync"
+       "sync/atomic"
+       "time"
+)
+
+// ProgCache implements Cache via JSON messages over stdin/stdout to a child
+// helper process which can then implement whatever caching policy/mechanism it
+// wants.
+//
+// See https://github.com/golang/go/issues/59719
+type ProgCache struct {
+       cmd    *exec.Cmd
+       stdout io.ReadCloser  // from the child process
+       stdin  io.WriteCloser // to the child process
+       bw     *bufio.Writer  // to stdin
+       jenc   *json.Encoder  // to bw
+
+       // can are the commands that the child process declared that it supports.
+       // This is effectively the versioning mechanism.
+       can map[ProgCmd]bool
+
+       // fuzzDirCache is another Cache implementation to use for the FuzzDir
+       // method. In practice this is the default GOCACHE disk-based
+       // implementation.
+       //
+       // TODO(bradfitz): maybe this isn't ideal. But we'd need to extend the Cache
+       // interface and the fuzzing callers to be less disk-y to do more here.
+       fuzzDirCache Cache
+
+       closing      atomic.Bool
+       ctx          context.Context    // valid until Close via ctxClose
+       ctxCancel    context.CancelFunc // called on Close
+       readLoopDone chan struct{}      // closed when readLoop returns
+
+       mu         sync.Mutex // guards following fields
+       nextID     int64
+       inFlight   map[int64]chan<- *ProgResponse
+       outputFile map[OutputID]string // object => abs path on disk
+
+       // writeMu serializes writing to the child process.
+       // It must never be held at the same time as mu.
+       writeMu sync.Mutex
+}
+
+// ProgCmd is a command that can be issued to a child process.
+//
+// If the interface needs to grow, we can add new commands or new versioned
+// commands like "get2".
+type ProgCmd string
+
+const (
+       cmdGet   = ProgCmd("get")
+       cmdPut   = ProgCmd("put")
+       cmdClose = ProgCmd("close")
+)
+
+// ProgRequest is the JSON-encoded message that's sent from cmd/go to
+// the GOCACHEPROG child process over stdin. Each JSON object is on its
+// own line. A ProgRequest of Type "put" with BodySize > 0 will be followed
+// by a line containing a base64-encoded JSON string literal of the body.
+type ProgRequest struct {
+       // ID is a unique number per process across all requests.
+       // It must be echoed in the ProgResponse from the child.
+       ID int64
+
+       // Command is the type of request.
+       // The cmd/go tool will only send commands that were declared
+       // as supported by the child.
+       Command ProgCmd
+
+       // ActionID is non-nil for get and puts.
+       ActionID []byte `json:",omitempty"` // or nil if not used
+
+       // ObjectID is set for Type "put" and "output-file".
+       ObjectID []byte `json:",omitempty"` // or nil if not used
+
+       // Body is the body for "put" requests. It's sent after the JSON object
+       // as a base64-encoded JSON string when BodySize is non-zero.
+       // It's sent as a separate JSON value instead of being a struct field
+       // send in this JSON object so large values can be streamed in both directions.
+       // The base64 string body of a ProgRequest will always be written
+       // immediately after the JSON object and a newline.
+       Body io.Reader `json:"-"`
+
+       // BodySize is the number of bytes of Body. If zero, the body isn't written.
+       BodySize int64 `json:",omitempty"`
+}
+
+// ProgResponse is the JSON response from the child process to cmd/go.
+//
+// With the exception of the first protocol message that the child writes to its
+// stdout with ID==0 and KnownCommands populated, these are only sent in
+// response to a ProgRequest from cmd/go.
+//
+// ProgResponses can be sent in any order. The ID must match the request they're
+// replying to.
+type ProgResponse struct {
+       ID  int64  // that corresponds to ProgRequest; they can be answered out of order
+       Err string `json:",omitempty"` // if non-empty, the error
+
+       // KnownCommands is included in the first message that cache helper program
+       // writes to stdout on startup (with ID==0). It includes the
+       // ProgRequest.Command types that are supported by the program.
+       //
+       // This lets us extend the protocol gracefully over time (adding "get2",
+       // etc), or fail gracefully when needed. It also lets us verify the program
+       // wants to be a cache helper.
+       KnownCommands []ProgCmd `json:",omitempty"`
+
+       // For Get requests.
+
+       Miss     bool       `json:",omitempty"` // cache miss
+       OutputID []byte     `json:",omitempty"`
+       Size     int64      `json:",omitempty"` // in bytes
+       Time     *time.Time `json:",omitempty"` // an Entry.Time; when the object was added to the docs
+
+       // DiskPath is the absolute path on disk of the ObjectID corresponding
+       // a "get" request's ActionID (on cache hit) or a "put" request's
+       // provided ObjectID.
+       DiskPath string `json:",omitempty"`
+}
+
+// startCacheProg starts the prog binary (with optional space-separated flags)
+// and returns a Cache implementation that talks to it.
+//
+// It blocks a few seconds to wait for the child process to successfully start
+// and advertise its capabilities.
+func startCacheProg(progAndArgs string, fuzzDirCache Cache) Cache {
+       if fuzzDirCache == nil {
+               panic("missing fuzzDirCache")
+       }
+       args, err := quoted.Split(progAndArgs)
+       if err != nil {
+               base.Fatalf("GOCACHEPROG args: %v", err)
+       }
+       var prog string
+       if len(args) > 0 {
+               prog = args[0]
+               args = args[1:]
+       }
+
+       ctx, ctxCancel := context.WithCancel(context.Background())
+
+       cmd := exec.CommandContext(ctx, prog, args...)
+       out, err := cmd.StdoutPipe()
+       if err != nil {
+               base.Fatalf("StdoutPipe to GOCACHEPROG: %v", err)
+       }
+       in, err := cmd.StdinPipe()
+       if err != nil {
+               base.Fatalf("StdinPipe to GOCACHEPROG: %v", err)
+       }
+       cmd.Stderr = os.Stderr
+       cmd.Cancel = in.Close
+
+       if err := cmd.Start(); err != nil {
+               base.Fatalf("error starting GOCACHEPROG program %q: %v", prog, err)
+       }
+
+       pc := &ProgCache{
+               ctx:          ctx,
+               ctxCancel:    ctxCancel,
+               fuzzDirCache: fuzzDirCache,
+               cmd:          cmd,
+               stdout:       out,
+               stdin:        in,
+               bw:           bufio.NewWriter(in),
+               inFlight:     make(map[int64]chan<- *ProgResponse),
+               outputFile:   make(map[OutputID]string),
+               readLoopDone: make(chan struct{}),
+       }
+
+       // Register our interest in the initial protocol message from the child to
+       // us, saying what it can do.
+       capResc := make(chan *ProgResponse, 1)
+       pc.inFlight[0] = capResc
+
+       pc.jenc = json.NewEncoder(pc.bw)
+       go pc.readLoop(pc.readLoopDone)
+
+       // Give the child process a few seconds to report its capabilities. This
+       // should be instant and not require any slow work by the program.
+       timer := time.NewTicker(5 * time.Second)
+       defer timer.Stop()
+       for {
+               select {
+               case <-timer.C:
+                       log.Printf("# still waiting for GOCACHEPROG %v ...", prog)
+               case capRes := <-capResc:
+                       can := map[ProgCmd]bool{}
+                       for _, cmd := range capRes.KnownCommands {
+                               can[cmd] = true
+                       }
+                       if len(can) == 0 {
+                               base.Fatalf("GOCACHEPROG %v declared no supported commands", prog)
+                       }
+                       pc.can = can
+                       return pc
+               }
+       }
+}
+
+func (c *ProgCache) readLoop(readLoopDone chan<- struct{}) {
+       defer close(readLoopDone)
+       jd := json.NewDecoder(c.stdout)
+       for {
+               res := new(ProgResponse)
+               if err := jd.Decode(res); err != nil {
+                       if c.closing.Load() {
+                               return // quietly
+                       }
+                       if errors.Is(err, io.EOF) {
+                               c.mu.Lock()
+                               inFlight := len(c.inFlight)
+                               c.mu.Unlock()
+                               base.Fatalf("GOCACHEPROG exited pre-Close with %v pending requests", inFlight)
+                       }
+                       base.Fatalf("error reading JSON from GOCACHEPROG: %v", err)
+               }
+               c.mu.Lock()
+               ch, ok := c.inFlight[res.ID]
+               delete(c.inFlight, res.ID)
+               c.mu.Unlock()
+               if ok {
+                       ch <- res
+               } else {
+                       base.Fatalf("GOCACHEPROG sent response for unknown request ID %v", res.ID)
+               }
+       }
+}
+
+func (c *ProgCache) send(ctx context.Context, req *ProgRequest) (*ProgResponse, error) {
+       resc := make(chan *ProgResponse, 1)
+       if err := c.writeToChild(req, resc); err != nil {
+               return nil, err
+       }
+       select {
+       case res := <-resc:
+               if res.Err != "" {
+                       return nil, errors.New(res.Err)
+               }
+               return res, nil
+       case <-ctx.Done():
+               return nil, ctx.Err()
+       }
+}
+
+func (c *ProgCache) writeToChild(req *ProgRequest, resc chan<- *ProgResponse) (err error) {
+       c.mu.Lock()
+       c.nextID++
+       req.ID = c.nextID
+       c.inFlight[req.ID] = resc
+       c.mu.Unlock()
+
+       defer func() {
+               if err != nil {
+                       c.mu.Lock()
+                       delete(c.inFlight, req.ID)
+                       c.mu.Unlock()
+               }
+       }()
+
+       c.writeMu.Lock()
+       defer c.writeMu.Unlock()
+
+       if err := c.jenc.Encode(req); err != nil {
+               return err
+       }
+       if err := c.bw.WriteByte('\n'); err != nil {
+               return err
+       }
+       if req.Body != nil && req.BodySize > 0 {
+               if err := c.bw.WriteByte('"'); err != nil {
+                       return err
+               }
+               e := base64.NewEncoder(base64.StdEncoding, c.bw)
+               wrote, err := io.Copy(e, req.Body)
+               if err != nil {
+                       return err
+               }
+               if err := e.Close(); err != nil {
+                       return nil
+               }
+               if wrote != req.BodySize {
+                       return fmt.Errorf("short write writing body to GOCACHEPROG for action %x, object %x: wrote %v; expected %v",
+                               req.ActionID, req.ObjectID, wrote, req.BodySize)
+               }
+               if _, err := c.bw.WriteString("\"\n"); err != nil {
+                       return err
+               }
+       }
+       if err := c.bw.Flush(); err != nil {
+               return err
+       }
+       return nil
+}
+
+func (c *ProgCache) Get(a ActionID) (Entry, error) {
+       if !c.can[cmdGet] {
+               // They can't do a "get". Maybe they're a write-only cache.
+               //
+               // TODO(bradfitz,bcmills): figure out the proper error type here. Maybe
+               // errors.ErrUnsupported? Is entryNotFoundError even appropriate? There
+               // might be places where we rely on the fact that a recent Put can be
+               // read through a corresponding Get. Audit callers and check, and document
+               // error types on the Cache interface.
+               return Entry{}, &entryNotFoundError{}
+       }
+       res, err := c.send(c.ctx, &ProgRequest{
+               Command:  cmdGet,
+               ActionID: a[:],
+       })
+       if err != nil {
+               return Entry{}, err // TODO(bradfitz): or entryNotFoundError? Audit callers.
+       }
+       if res.Miss {
+               return Entry{}, &entryNotFoundError{}
+       }
+       e := Entry{
+               Size: res.Size,
+       }
+       if res.Time != nil {
+               e.Time = *res.Time
+       } else {
+               e.Time = time.Now()
+       }
+       if res.DiskPath == "" {
+               return Entry{}, &entryNotFoundError{errors.New("GOCACHEPROG didn't populate DiskPath on get hit")}
+       }
+       if copy(e.OutputID[:], res.OutputID) != len(res.OutputID) {
+               return Entry{}, &entryNotFoundError{errors.New("incomplete ProgResponse OutputID")}
+       }
+       c.noteOutputFile(e.OutputID, res.DiskPath)
+       return e, nil
+}
+
+func (c *ProgCache) noteOutputFile(o OutputID, diskPath string) {
+       c.mu.Lock()
+       defer c.mu.Unlock()
+       c.outputFile[o] = diskPath
+}
+
+func (c *ProgCache) OutputFile(o OutputID) string {
+       c.mu.Lock()
+       defer c.mu.Unlock()
+       return c.outputFile[o]
+}
+
+func (c *ProgCache) Put(a ActionID, file io.ReadSeeker) (_ OutputID, size int64, _ error) {
+       // Compute output ID.
+       h := sha256.New()
+       if _, err := file.Seek(0, 0); err != nil {
+               return OutputID{}, 0, err
+       }
+       size, err := io.Copy(h, file)
+       if err != nil {
+               return OutputID{}, 0, err
+       }
+       var out OutputID
+       h.Sum(out[:0])
+
+       if _, err := file.Seek(0, 0); err != nil {
+               return OutputID{}, 0, err
+       }
+
+       if !c.can[cmdPut] {
+               // Child is a read-only cache. Do nothing.
+               return out, size, nil
+       }
+
+       res, err := c.send(c.ctx, &ProgRequest{
+               Command:  cmdPut,
+               ActionID: a[:],
+               ObjectID: out[:],
+               Body:     file,
+               BodySize: size,
+       })
+       if err != nil {
+               return OutputID{}, 0, err
+       }
+       if res.DiskPath == "" {
+               return OutputID{}, 0, errors.New("GOCACHEPROG didn't return DiskPath in put response")
+       }
+       c.noteOutputFile(out, res.DiskPath)
+       return out, size, err
+}
+
+func (c *ProgCache) Close() error {
+       c.closing.Store(true)
+       var err error
+
+       // First write a "close" message to the child so it can exit nicely
+       // and clean up if it wants. Only after that exchange do we cancel
+       // the context that kills the process.
+       if c.can[cmdClose] {
+               _, err = c.send(c.ctx, &ProgRequest{Command: cmdClose})
+       }
+       c.ctxCancel()
+       <-c.readLoopDone
+       return err
+}
+
+func (c *ProgCache) FuzzDir() string {
+       // TODO(bradfitz): figure out what to do here. For now just use the
+       // disk-based default.
+       return c.fuzzDirCache.FuzzDir()
+}
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go

index 2ad5301d9e1a2dd65f9c192237dafdd0ecb7698b..1c53e8314f7eaf69ae855e47cc71884474ade219 100644 (file)
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@@ -178,7 +178,7 @@ func openIndexModule(modroot string, ismodcache bool) (*Module, error) {
                 if err != nil {
                         return nil, err
                 }
-               data, _, err := cache.Default().GetMmap(id)
+               data, _, err := cache.GetMmap(cache.Default(), id)
                 if err != nil {
                         // Couldn't read from modindex. Assume we couldn't read from
                         // the index because the module hasn't been indexed yet.
@@ -186,7 +186,7 @@ func openIndexModule(modroot string, ismodcache bool) (*Module, error) {
                         if err != nil {
                                 return nil, err
                         }
-                       if err = cache.Default().PutBytes(id, data); err != nil {
+                       if err = cache.PutBytes(cache.Default(), id, data); err != nil {
                                 return nil, err
                         }
                 }
@@ -207,12 +207,12 @@ func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
                 if err != nil {
                         return nil, err
                 }
-               data, _, err := cache.Default().GetMmap(id)
+               data, _, err := cache.GetMmap(cache.Default(), id)
                 if err != nil {
                         // Couldn't read from index. Assume we couldn't read from
                         // the index because the package hasn't been indexed yet.
                         data = indexPackage(modroot, pkgdir)
-                       if err = cache.Default().PutBytes(id, data); err != nil {
+                       if err = cache.PutBytes(cache.Default(), id, data); err != nil {
                                 return nil, err
                         }
                 }
diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go

index 97f2dbdbe6cd32c64d09d4353546cda1b20e8547..31ae79c80d285396803e4e89d2f28dda83f7e1c2 100644 (file)
--- a/src/cmd/go/internal/test/test.go
+++ b/src/cmd/go/internal/test/test.go
@@ -1580,7 +1580,7 @@ func (c *runCache) tryCacheWithID(b *work.Builder, a *work.Action, id string) bo
  
         // Load list of referenced environment variables and files
         // from last run of testID, and compute hash of that content.
-       data, entry, err := cache.Default().GetBytes(testID)
+       data, entry, err := cache.GetBytes(cache.Default(), testID)
         if !bytes.HasPrefix(data, testlogMagic) || data[len(data)-1] != '\n' {
                 if cache.DebugTest {
                         if err != nil {
@@ -1601,7 +1601,7 @@ func (c *runCache) tryCacheWithID(b *work.Builder, a *work.Action, id string) bo
  
         // Parse cached result in preparation for changing run time to "(cached)".
         // If we can't parse the cached result, don't use it.
-       data, entry, err = cache.Default().GetBytes(testAndInputKey(testID, testInputsID))
+       data, entry, err = cache.GetBytes(cache.Default(), testAndInputKey(testID, testInputsID))
         if len(data) == 0 || data[len(data)-1] != '\n' {
                 if cache.DebugTest {
                         if err != nil {
@@ -1813,15 +1813,15 @@ func (c *runCache) saveOutput(a *work.Action) {
                 if cache.DebugTest {
                         fmt.Fprintf(os.Stderr, "testcache: %s: save test ID %x => input ID %x => %x\n", a.Package.ImportPath, c.id1, testInputsID, testAndInputKey(c.id1, testInputsID))
                 }
-               cache.Default().PutNoVerify(c.id1, bytes.NewReader(testlog))
-               cache.Default().PutNoVerify(testAndInputKey(c.id1, testInputsID), bytes.NewReader(a.TestOutput.Bytes()))
+               cache.PutNoVerify(cache.Default(), c.id1, bytes.NewReader(testlog))
+               cache.PutNoVerify(cache.Default(), testAndInputKey(c.id1, testInputsID), bytes.NewReader(a.TestOutput.Bytes()))
         }
         if c.id2 != (cache.ActionID{}) {
                 if cache.DebugTest {
                         fmt.Fprintf(os.Stderr, "testcache: %s: save test ID %x => input ID %x => %x\n", a.Package.ImportPath, c.id2, testInputsID, testAndInputKey(c.id2, testInputsID))
                 }
-               cache.Default().PutNoVerify(c.id2, bytes.NewReader(testlog))
-               cache.Default().PutNoVerify(testAndInputKey(c.id2, testInputsID), bytes.NewReader(a.TestOutput.Bytes()))
+               cache.PutNoVerify(cache.Default(), c.id2, bytes.NewReader(testlog))
+               cache.PutNoVerify(cache.Default(), testAndInputKey(c.id2, testInputsID), bytes.NewReader(a.TestOutput.Bytes()))
         }
  }
  
diff --git a/src/cmd/go/internal/work/buildid.go b/src/cmd/go/internal/work/buildid.go

index ea3240412c8c4b1783e56892766c2432200b03f3..a1d7599cdd9238336e63d3cb6b6d446c1103cdd0 100644 (file)
--- a/src/cmd/go/internal/work/buildid.go
+++ b/src/cmd/go/internal/work/buildid.go
@@ -515,7 +515,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string,
         }
  
         // Check to see if the action output is cached.
-       if file, _, err := c.GetFile(actionHash); err == nil {
+       if file, _, err := cache.GetFile(c, actionHash); err == nil {
                 if buildID, err := buildid.ReadFile(file); err == nil {
                         if printOutput {
                                 showStdout(b, c, a.actionID, "stdout")
@@ -560,8 +560,8 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string,
         return false
  }
  
-func showStdout(b *Builder, c *cache.Cache, actionID cache.ActionID, key string) error {
-       stdout, stdoutEntry, err := c.GetBytes(cache.Subkey(actionID, key))
+func showStdout(b *Builder, c cache.Cache, actionID cache.ActionID, key string) error {
+       stdout, stdoutEntry, err := cache.GetBytes(c, cache.Subkey(actionID, key))
         if err != nil {
                 return err
         }
@@ -610,7 +610,7 @@ func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error {
         // Cache output from compile/link, even if we don't do the rest.
         switch a.Mode {
         case "build":
-               c.PutBytes(cache.Subkey(a.actionID, "stdout"), a.output)
+               cache.PutBytes(c, cache.Subkey(a.actionID, "stdout"), a.output)
         case "link":
                 // Even though we don't cache the binary, cache the linker text output.
                 // We might notice that an installed binary is up-to-date but still
@@ -619,7 +619,7 @@ func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error {
                 // to make it easier to find when that's all we have.
                 for _, a1 := range a.Deps {
                         if p1 := a1.Package; p1 != nil && p1.Name == "main" {
-                               c.PutBytes(cache.Subkey(a1.actionID, "link-stdout"), a.output)
+                               cache.PutBytes(c, cache.Subkey(a1.actionID, "link-stdout"), a.output)
                                 break
                         }
                 }
diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go

index 2756b701cf06387b17136c4797fc5d691dca289c..a832b6c359bc9dfb37f51e23c5e88b2e93133270 100644 (file)
--- a/src/cmd/go/internal/work/exec.go
+++ b/src/cmd/go/internal/work/exec.go
@@ -76,7 +76,7 @@ func (b *Builder) Do(ctx context.Context, root *Action) {
                 // If we're doing real work, take time at the end to trim the cache.
                 c := cache.Default()
                 defer func() {
-                       if err := c.Trim(); err != nil {
+                       if err := c.Close(); err != nil {
                                 base.Fatalf("go: failed to trim cache: %v", err)
                         }
                 }()
@@ -993,7 +993,7 @@ func (b *Builder) checkDirectives(a *Action) error {
         return nil
  }
  
-func (b *Builder) cacheObjdirFile(a *Action, c *cache.Cache, name string) error {
+func (b *Builder) cacheObjdirFile(a *Action, c cache.Cache, name string) error {
         f, err := os.Open(a.Objdir + name)
         if err != nil {
                 return err
@@ -1003,15 +1003,15 @@ func (b *Builder) cacheObjdirFile(a *Action, c *cache.Cache, name string) error
         return err
  }
  
-func (b *Builder) findCachedObjdirFile(a *Action, c *cache.Cache, name string) (string, error) {
-       file, _, err := c.GetFile(cache.Subkey(a.actionID, name))
+func (b *Builder) findCachedObjdirFile(a *Action, c cache.Cache, name string) (string, error) {
+       file, _, err := cache.GetFile(c, cache.Subkey(a.actionID, name))
         if err != nil {
                 return "", fmt.Errorf("loading cached file %s: %w", name, err)
         }
         return file, nil
  }
  
-func (b *Builder) loadCachedObjdirFile(a *Action, c *cache.Cache, name string) error {
+func (b *Builder) loadCachedObjdirFile(a *Action, c cache.Cache, name string) error {
         cached, err := b.findCachedObjdirFile(a, c, name)
         if err != nil {
                 return err
@@ -1047,12 +1047,12 @@ func (b *Builder) cacheSrcFiles(a *Action, srcfiles []string) {
                         return
                 }
         }
-       c.PutBytes(cache.Subkey(a.actionID, "srcfiles"), buf.Bytes())
+       cache.PutBytes(c, cache.Subkey(a.actionID, "srcfiles"), buf.Bytes())
  }
  
  func (b *Builder) loadCachedVet(a *Action) error {
         c := cache.Default()
-       list, _, err := c.GetBytes(cache.Subkey(a.actionID, "srcfiles"))
+       list, _, err := cache.GetBytes(c, cache.Subkey(a.actionID, "srcfiles"))
         if err != nil {
                 return fmt.Errorf("reading srcfiles list: %w", err)
         }
@@ -1076,7 +1076,7 @@ func (b *Builder) loadCachedVet(a *Action) error {
  
  func (b *Builder) loadCachedCompiledGoFiles(a *Action) error {
         c := cache.Default()
-       list, _, err := c.GetBytes(cache.Subkey(a.actionID, "srcfiles"))
+       list, _, err := cache.GetBytes(c, cache.Subkey(a.actionID, "srcfiles"))
         if err != nil {
                 return fmt.Errorf("reading srcfiles list: %w", err)
         }
@@ -1279,7 +1279,7 @@ func (b *Builder) vet(ctx context.Context, a *Action) error {
  
         if vcfg.VetxOnly && !cfg.BuildA {
                 c := cache.Default()
-               if file, _, err := c.GetFile(key); err == nil {
+               if file, _, err := cache.GetFile(c, key); err == nil {
                         a.built = file
                         return nil
                 }
@@ -2918,7 +2918,7 @@ func (b *Builder) gccSupportsFlag(compiler []string, flag string) bool {
         var flagID cache.ActionID
         if cacheOK {
                 flagID = cache.Subkey(compilerID, "gccSupportsFlag "+flag)
-               if data, _, err := cache.Default().GetBytes(flagID); err == nil {
+               if data, _, err := cache.GetBytes(cache.Default(), flagID); err == nil {
                         supported := string(data) == "true"
                         b.flagCache[key] = supported
                         return supported
@@ -2950,7 +2950,7 @@ func (b *Builder) gccSupportsFlag(compiler []string, flag string) bool {
                 if supported {
                         s = "true"
                 }
-               cache.Default().PutBytes(flagID, []byte(s))
+               cache.PutBytes(cache.Default(), flagID, []byte(s))
         }
  
         b.flagCache[key] = supported
@@ -3002,7 +3002,7 @@ func (b *Builder) gccCompilerID(compiler string) (id cache.ActionID, ok bool) {
         h := cache.NewHash("gccCompilerID")
         fmt.Fprintf(h, "gccCompilerID %q", exe)
         key := h.Sum()
-       data, _, err := cache.Default().GetBytes(key)
+       data, _, err := cache.GetBytes(cache.Default(), key)
         if err == nil && len(data) > len(id) {
                 stats := strings.Split(string(data[:len(data)-len(id)]), "\x00")
                 if len(stats)%2 != 0 {
@@ -3050,7 +3050,7 @@ func (b *Builder) gccCompilerID(compiler string) (id cache.ActionID, ok bool) {
         }
         buf.Write(id[:])
  
-       cache.Default().PutBytes(key, buf.Bytes())
+       cache.PutBytes(cache.Default(), key, buf.Bytes())
         if b.gccCompilerIDCache == nil {
                 b.gccCompilerIDCache = make(map[string]cache.ActionID)
         }
diff --git a/src/internal/cfg/cfg.go b/src/internal/cfg/cfg.go

index f4adea2a25ff8efde4a3fca0a7c50e71c1aa5f4b..2af0ec707868947bda6c966a3156625241db12c7 100644 (file)
--- a/src/internal/cfg/cfg.go
+++ b/src/internal/cfg/cfg.go
@@ -38,6 +38,7 @@ const KnownEnv = `
         GOARM
         GOBIN
         GOCACHE
+       GOCACHEPROG
         GOENV
         GOEXE
         GOEXPERIMENT
diff --git a/src/internal/goexperiment/exp_cacheprog_off.go b/src/internal/goexperiment/exp_cacheprog_off.go

new file mode 100644 (file)

index 0000000..29aa869
--- /dev/null
+++ b/src/internal/goexperiment/exp_cacheprog_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.cacheprog
+// +build !goexperiment.cacheprog
+
+package goexperiment
+
+const CacheProg = false
+const CacheProgInt = 0
diff --git a/src/internal/goexperiment/exp_cacheprog_on.go b/src/internal/goexperiment/exp_cacheprog_on.go

new file mode 100644 (file)

index 0000000..121b299
--- /dev/null
+++ b/src/internal/goexperiment/exp_cacheprog_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.cacheprog
+// +build goexperiment.cacheprog
+
+package goexperiment
+
+const CacheProg = true
+const CacheProgInt = 1
diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go

index 8758505173b5d484dd7f91c13e6aa9447e0bede2..ae3cbaf89fa5dd646300ce26b4e710c591c17a99 100644 (file)
--- a/src/internal/goexperiment/flags.go
+++ b/src/internal/goexperiment/flags.go
@@ -105,4 +105,8 @@ type Flags struct {
         // LoopVar changes loop semantics so that each iteration gets its own
         // copy of the iteration variable.
         LoopVar bool
+
+       // CacheProg adds support to cmd/go to use a child process to implement
+       // the build cache; see https://github.com/golang/go/issues/59719.
+       CacheProg bool
  }
author	Brad Fitzpatrick <bradfitz@golang.org>
	Mon, 6 Feb 2023 03:52:06 +0000 (19:52 -0800)
committer	Brad Fitzpatrick <bradfitz@golang.org>
	Thu, 25 May 2023 00:49:37 +0000 (00:49 +0000)
src/cmd/go/internal/cache/cache.go		patch \| blob \| history
src/cmd/go/internal/cache/cache_test.go		patch \| blob \| history
src/cmd/go/internal/cache/default.go		patch \| blob \| history
src/cmd/go/internal/cache/prog.go	[new file with mode: 0644]	patch \| blob
src/cmd/go/internal/modindex/read.go		patch \| blob \| history
src/cmd/go/internal/test/test.go		patch \| blob \| history
src/cmd/go/internal/work/buildid.go		patch \| blob \| history
src/cmd/go/internal/work/exec.go		patch \| blob \| history
src/internal/cfg/cfg.go		patch \| blob \| history
src/internal/goexperiment/exp_cacheprog_off.go	[new file with mode: 0644]	patch \| blob
src/internal/goexperiment/exp_cacheprog_on.go	[new file with mode: 0644]	patch \| blob
src/internal/goexperiment/flags.go		patch \| blob \| history