internal/exportdata, cmd/compile/internal/noder: merge export data handling

author Tim King <taking@google.com>

Thu, 14 Nov 2024 20:04:39 +0000 (12:04 -0800)

committer Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

Fri, 22 Nov 2024 00:04:39 +0000 (00:04 +0000)
author Tim King <taking@google.com>
Thu, 14 Nov 2024 20:04:39 +0000 (12:04 -0800)
committer Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Fri, 22 Nov 2024 00:04:39 +0000 (00:04 +0000)
diff --git a/src/cmd/compile/internal/importer/gcimporter.go b/src/cmd/compile/internal/importer/gcimporter.go

index 9af257730d8a1a57c81aaa54fc0596e30d87469a..e0aec9823189495e789eeb2c84faaf1a0e47280e 100644 (file)
--- a/src/cmd/compile/internal/importer/gcimporter.go
+++ b/src/cmd/compile/internal/importer/gcimporter.go
@@ -11,10 +11,8 @@ import (
         "fmt"
         "internal/exportdata"
         "internal/pkgbits"
-       "internal/saferio"
         "io"
         "os"
-       "strings"
  
         "cmd/compile/internal/types2"
  )
@@ -75,50 +73,15 @@ func Import(packages map[string]*types2.Package, path, srcDir string, lookup fun
         defer rc.Close()
  
         buf := bufio.NewReader(rc)
-       hdr, size, err := exportdata.FindExportData(buf)
+       data, err := exportdata.ReadUnified(buf)
         if err != nil {
+               err = fmt.Errorf("import %q: %v", path, err)
                 return
         }
+       s := string(data)
  
-       switch hdr {
-       case "$$\n":
-               err = fmt.Errorf("import %q: old textual export format no longer supported (recompile package)", path)
-
-       case "$$B\n":
-               var exportFormat byte
-               if exportFormat, err = buf.ReadByte(); err != nil {
-                       return
-               }
-               size--
-
-               // The unified export format starts with a 'u'; the indexed export
-               // format starts with an 'i'; and the older binary export format
-               // starts with a 'c', 'd', or 'v' (from "version"). Select
-               // appropriate importer.
-               switch exportFormat {
-               case 'u':
-                       // exported strings may contain "\n$$\n" - search backwards
-                       var data []byte
-                       var r io.Reader = buf
-                       if size >= 0 {
-                               if data, err = saferio.ReadData(r, uint64(size)); err != nil {
-                                       return
-                               }
-                       } else if data, err = io.ReadAll(r); err != nil {
-                               return
-                       }
-                       s := string(data)
-                       s = s[:strings.LastIndex(s, "\n$$\n")]
-
-                       input := pkgbits.NewPkgDecoder(id, s)
-                       pkg = ReadPackage(nil, packages, input)
-               default:
-                       err = fmt.Errorf("import %q: binary export format %q is no longer supported (recompile package)", path, exportFormat)
-               }
-
-       default:
-               err = fmt.Errorf("import %q: unknown export data header: %q", path, hdr)
-       }
+       input := pkgbits.NewPkgDecoder(id, s)
+       pkg = ReadPackage(nil, packages, input)
  
         return
  }
diff --git a/src/cmd/compile/internal/noder/import.go b/src/cmd/compile/internal/noder/import.go

index 964b01ec42438095c48f889b37e5eed3ea64e444..910988f0612ca99ed1b5ba26f09d7957e54b51f3 100644 (file)
--- a/src/cmd/compile/internal/noder/import.go
+++ b/src/cmd/compile/internal/noder/import.go
@@ -8,6 +8,7 @@ import (
         "errors"
         "fmt"
         "internal/buildcfg"
+       "internal/exportdata"
         "internal/pkgbits"
         "os"
         pathpkg "path"
@@ -22,7 +23,6 @@ import (
         "cmd/compile/internal/typecheck"
         "cmd/compile/internal/types"
         "cmd/compile/internal/types2"
-       "cmd/internal/archive"
         "cmd/internal/bio"
         "cmd/internal/goobj"
         "cmd/internal/objabi"
@@ -207,7 +207,7 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag
         }
         defer f.Close()
  
-       r, end, err := findExportData(f)
+       data, err := readExportData(f)
         if err != nil {
                 return
         }
@@ -216,94 +216,63 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag
                 fmt.Printf("importing %s (%s)\n", path, f.Name())
         }
  
-       c, err := r.ReadByte()
-       if err != nil {
-               return
-       }
+       pr := pkgbits.NewPkgDecoder(pkg1.Path, data)
  
-       pos := r.Offset()
-
-       // Map export data section into memory as a single large
-       // string. This reduces heap fragmentation and allows returning
-       // individual substrings very efficiently.
-       var data string
-       data, err = base.MapFile(r.File(), pos, end-pos)
-       if err != nil {
-               return
-       }
-
-       switch c {
-       case 'u':
-               // TODO(mdempsky): This seems a bit clunky.
-               data = strings.TrimSuffix(data, "\n$$\n")
-
-               pr := pkgbits.NewPkgDecoder(pkg1.Path, data)
-
-               // Read package descriptors for both types2 and compiler backend.
-               readPackage(newPkgReader(pr), pkg1, false)
-               pkg2 = importer.ReadPackage(env, packages, pr)
-
-       default:
-               // Indexed format is distinguished by an 'i' byte,
-               // whereas previous export formats started with 'c', 'd', or 'v'.
-               err = fmt.Errorf("unexpected package format byte: %v", c)
-               return
-       }
+       // Read package descriptors for both types2 and compiler backend.
+       readPackage(newPkgReader(pr), pkg1, false)
+       pkg2 = importer.ReadPackage(env, packages, pr)
  
-       err = addFingerprint(path, f, end)
+       err = addFingerprint(path, data)
         return
  }
  
-// findExportData returns a *bio.Reader positioned at the start of the
-// binary export data section, and a file offset for where to stop
-// reading.
-func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {
-       r = bio.NewReader(f)
+// readExportData returns the contents of GC-created unified export data.
+func readExportData(f *os.File) (data string, err error) {
+       r := bio.NewReader(f)
  
-       // check object header
-       line, err := r.ReadString('\n')
+       sz, err := exportdata.FindPackageDefinition(r.Reader)
         if err != nil {
                 return
         }
+       end := r.Offset() + int64(sz)
  
-       // Is the first line an archive file signature?
-       if line != "!<arch>\n" {
-               err = fmt.Errorf("not the start of an archive file (%q)", line)
+       abihdr, _, err := exportdata.ReadObjectHeaders(r.Reader)
+       if err != nil {
                 return
         }
  
-       // package export block should be first
-       sz := int64(archive.ReadHeader(r.Reader, "__.PKGDEF"))
-       if sz <= 0 {
-               err = errors.New("not a package file")
+       if expect := objabi.HeaderString(); abihdr != expect {
+               err = fmt.Errorf("object is [%s] expected [%s]", abihdr, expect)
                 return
         }
-       end = r.Offset() + sz
-       line, err = r.ReadString('\n')
+
+       _, err = exportdata.ReadExportDataHeader(r.Reader)
         if err != nil {
                 return
         }
  
-       if !strings.HasPrefix(line, "go object ") {
-               err = fmt.Errorf("not a go object file: %s", line)
-               return
-       }
-       if expect := objabi.HeaderString(); line != expect {
-               err = fmt.Errorf("object is [%s] expected [%s]", line, expect)
+       pos := r.Offset()
+
+       // Map export data section (+ end-of-section marker) into memory
+       // as a single large string. This reduces heap fragmentation and
+       // allows returning individual substrings very efficiently.
+       var mapped string
+       mapped, err = base.MapFile(r.File(), pos, end-pos)
+       if err != nil {
                 return
         }
  
-       // process header lines
-       for !strings.HasPrefix(line, "$$") {
-               line, err = r.ReadString('\n')
-               if err != nil {
-                       return
-               }
-       }
+       // check for end-of-section marker "\n$$\n" and remove it
+       const marker = "\n$$\n"
  
-       // Expect $$B\n to signal binary import format.
-       if line != "$$B\n" {
-               err = errors.New("old export format no longer supported (recompile package)")
+       var ok bool
+       data, ok = strings.CutSuffix(mapped, marker)
+       if !ok {
+               cutoff := data // include last 10 bytes in error message
+               if len(cutoff) >= 10 {
+                       cutoff = cutoff[len(cutoff)-10:]
+               }
+               err = fmt.Errorf("expected $$ marker, but found %q (recompile package)", cutoff)
                 return
         }
  
@@ -312,24 +281,16 @@ func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {
  
  // addFingerprint reads the linker fingerprint included at the end of
  // the exportdata.
-func addFingerprint(path string, f *os.File, end int64) error {
-       const eom = "\n$$\n"
+func addFingerprint(path string, data string) error {
         var fingerprint goobj.FingerprintType
  
-       var buf [len(fingerprint) + len(eom)]byte
-       if _, err := f.ReadAt(buf[:], end-int64(len(buf))); err != nil {
-               return err
-       }
-
-       // Caller should have given us the end position of the export data,
-       // which should end with the "\n$$\n" marker. As a consistency check
-       // to make sure we're reading at the right offset, make sure we
-       // found the marker.
-       if s := string(buf[len(fingerprint):]); s != eom {
-               return fmt.Errorf("expected $$ marker, but found %q", s)
+       pos := len(data) - len(fingerprint)
+       if pos < 0 {
+               return fmt.Errorf("missing linker fingerprint in exportdata, but found %q", data)
         }
+       buf := []byte(data[pos:])
  
-       copy(fingerprint[:], buf[:])
+       copy(fingerprint[:], buf)
         base.Ctxt.AddImport(path, fingerprint)
  
         return nil
diff --git a/src/go/internal/gcimporter/gcimporter.go b/src/go/internal/gcimporter/gcimporter.go

index 451afe6fd5a7b39f7c6316461a1f63c34cc16abd..ed5e5dcacdc2320af22579873419dbaa487e820c 100644 (file)
--- a/src/go/internal/gcimporter/gcimporter.go
+++ b/src/go/internal/gcimporter/gcimporter.go
@@ -12,10 +12,8 @@ import (
         "go/types"
         "internal/exportdata"
         "internal/pkgbits"
-       "internal/saferio"
         "io"
         "os"
-       "strings"
  )
  
  // Import imports a gc-generated package given its import path and srcDir, adds
@@ -72,49 +70,15 @@ func Import(fset *token.FileSet, packages map[string]*types.Package, path, srcDi
         defer rc.Close()
  
         buf := bufio.NewReader(rc)
-       hdr, size, err := exportdata.FindExportData(buf)
+       data, err := exportdata.ReadUnified(buf)
         if err != nil {
+               err = fmt.Errorf("import %q: %v", path, err)
                 return
         }
+       s := string(data)
  
-       switch hdr {
-       case "$$\n":
-               err = fmt.Errorf("import %q: old textual export format no longer supported (recompile package)", path)
-
-       case "$$B\n":
-               var exportFormat byte
-               if exportFormat, err = buf.ReadByte(); err != nil {
-                       return
-               }
-               size--
-
-               // The unified export format starts with a 'u'; the indexed export
-               // format starts with an 'i'; and the older binary export format
-               // starts with a 'c', 'd', or 'v' (from "version"). Select
-               // appropriate importer.
-               switch exportFormat {
-               case 'u':
-                       var data []byte
-                       var r io.Reader = buf
-                       if size >= 0 {
-                               if data, err = saferio.ReadData(r, uint64(size)); err != nil {
-                                       return
-                               }
-                       } else if data, err = io.ReadAll(r); err != nil {
-                               return
-                       }
-                       s := string(data)
-                       s = s[:strings.LastIndex(s, "\n$$\n")]
-
-                       input := pkgbits.NewPkgDecoder(id, s)
-                       pkg = readUnifiedPackage(fset, nil, packages, input)
-               default:
-                       err = fmt.Errorf("import %q: binary export format %q is no longer supported (recompile package)", path, exportFormat)
-               }
-
-       default:
-               err = fmt.Errorf("import %q: unknown export data header: %q", path, hdr)
-       }
+       input := pkgbits.NewPkgDecoder(id, s)
+       pkg = readUnifiedPackage(fset, nil, packages, input)
  
         return
  }
diff --git a/src/internal/exportdata/exportdata.go b/src/internal/exportdata/exportdata.go

index 5cd7cb18c25c583509c623a5aa322830db59c9fc..27675923b528d8519b485b8af33cdcd622ae1cfd 100644 (file)
--- a/src/internal/exportdata/exportdata.go
+++ b/src/internal/exportdata/exportdata.go
@@ -6,12 +6,16 @@
  // and reading gc-generated object files.
  package exportdata
  
+// This file should be kept in sync with src/cmd/compile/internal/gc/obj.go .
+
  import (
         "bufio"
         "bytes"
         "errors"
         "fmt"
         "go/build"
+       "internal/saferio"
+       "io"
         "os"
         "os/exec"
         "path/filepath"
@@ -19,13 +23,100 @@ import (
         "sync"
  )
  
-// FindExportData positions the reader r at the beginning of the
-// export data section of an underlying GC-created object/archive
-// file by reading from it. The reader must be positioned at the
-// start of the file before calling this function. The hdr result
-// is the string before the export data, either "$$" or "$$B".
-func FindExportData(r *bufio.Reader) (hdr string, size int, err error) {
-       // TODO(taking): Merge with cmd/compile/internal/noder.findExportData.
+// ReadUnified reads the contents of the unified export data from a reader r
+// that contains the contents of a GC-created archive file.
+//
+// On success, the reader will be positioned after the end-of-section marker "\n$$\n".
+//
+// Supported GC-created archive files have 4 layers of nesting:
+//   - An archive file containing a package definition file.
+//   - The package definition file contains headers followed by a data section.
+//     Headers are lines (≤ 4kb) that do not start with "$$".
+//   - The data section starts with "$$B\n" followed by export data followed
+//     by an end of section marker "\n$$\n". (The section start "$$\n" is no
+//     longer supported.)
+//   - The export data starts with a format byte ('u') followed by the <data> in
+//     the given format. (See ReadExportDataHeader for older formats.)
+//
+// Putting this together, the bytes in a GC-created archive files are expected
+// to look like the following.
+// See cmd/internal/archive for more details on ar file headers.
+//
+// | <!arch>\n             | ar file signature
+// | __.PKGDEF...size...\n | ar header for __.PKGDEF including size.
+// | go object <...>\n     | objabi header
+// | <optional headers>\n  | other headers such as build id
+// | $$B\n                 | binary format marker
+// | u<data>\n             | unified export <data>
+// | $$\n                  | end-of-section marker
+// | [optional padding]    | padding byte (0x0A) if size is odd
+// | [ar file header]      | other ar files
+// | [ar file data]        |
+func ReadUnified(r *bufio.Reader) (data []byte, err error) {
+       // We historically guaranteed headers at the default buffer size (4096) work.
+       // This ensures we can use ReadSlice throughout.
+       const minBufferSize = 4096
+       r = bufio.NewReaderSize(r, minBufferSize)
+
+       size, err := FindPackageDefinition(r)
+       if err != nil {
+               return
+       }
+       n := size
+
+       objapi, headers, err := ReadObjectHeaders(r)
+       if err != nil {
+               return
+       }
+       n -= len(objapi)
+       for _, h := range headers {
+               n -= len(h)
+       }
+
+       hdrlen, err := ReadExportDataHeader(r)
+       if err != nil {
+               return
+       }
+       n -= hdrlen
+
+       // size also includes the end of section marker. Remove that many bytes from the end.
+       const marker = "\n$$\n"
+       n -= len(marker)
+
+       if n < 0 {
+               err = fmt.Errorf("invalid size (%d) in the archive file: %d bytes remain without section headers (recompile package)", size, n)
+       }
+
+       // Read n bytes from buf.
+       data, err = saferio.ReadData(r, uint64(n))
+       if err != nil {
+               return
+       }
+
+       // Check for marker at the end.
+       var suffix [len(marker)]byte
+       _, err = io.ReadFull(r, suffix[:])
+       if err != nil {
+               return
+       }
+       if s := string(suffix[:]); s != marker {
+               err = fmt.Errorf("read %q instead of end-of-section marker (%q)", s, marker)
+               return
+       }
+
+       return
+}
+
+// FindPackageDefinition positions the reader r at the beginning of a package
+// definition file ("__.PKGDEF") within a GC-created archive by reading
+// from it, and returns the size of the package definition file in the archive.
+//
+// The reader must be positioned at the start of the archive file before calling
+// this function, and "__.PKGDEF" is assumed to be the first file in the archive.
+//
+// See cmd/internal/archive for details on the archive format.
+func FindPackageDefinition(r *bufio.Reader) (size int, err error) {
+       // Uses ReadSlice to limit risk of malformed inputs.
  
         // Read first line to make sure this is an object file.
         line, err := r.ReadSlice('\n')
@@ -47,31 +138,96 @@ func FindExportData(r *bufio.Reader) (hdr string, size int, err error) {
                 return
         }
  
-       // Read first line of __.PKGDEF data, so that line
-       // is once again the first line of the input.
+       return
+}
+
+// ReadObjectHeaders reads object headers from the reader. Object headers are
+// lines that do not start with an end-of-section marker "$$". The first header
+// is the objabi header. On success, the reader will be positioned at the beginning
+// of the end-of-section marker.
+//
+// It returns an error if any header does not fit in r.Size() bytes.
+func ReadObjectHeaders(r *bufio.Reader) (objapi string, headers []string, err error) {
+       // line is a temporary buffer for headers.
+       // Use bounded reads (ReadSlice, Peek) to limit risk of malformed inputs.
+       var line []byte
+
+       // objapi header should be the first line
         if line, err = r.ReadSlice('\n'); err != nil {
                 err = fmt.Errorf("can't find export data (%v)", err)
                 return
         }
+       objapi = string(line)
+
+       // objapi header begins with "go object ".
+       if !strings.HasPrefix(objapi, "go object ") {
+               err = fmt.Errorf("not a go object file: %s", objapi)
+               return
+       }
+
+       // process remaining object header lines
+       for {
+               // check for an end of section marker "$$"
+               line, err = r.Peek(2)
+               if err != nil {
+                       return
+               }
+               if string(line) == "$$" {
+                       return // stop
+               }
+
+               // read next header
+               line, err = r.ReadSlice('\n')
+               if err != nil {
+                       return
+               }
+               headers = append(headers, string(line))
+       }
+}
  
-       // Now at __.PKGDEF in archive. line should begin with "go object ".
-       if !strings.HasPrefix(string(line), "go object ") {
-               err = fmt.Errorf("not a Go object file")
+// ReadExportDataHeader reads the export data header and format from r.
+// It returns the number of bytes read, or an error if the format is no longer
+// supported or it failed to read.
+//
+// The only currently supported format is binary export data in the
+// unified export format.
+func ReadExportDataHeader(r *bufio.Reader) (n int, err error) {
+       // Read export data header.
+       line, err := r.ReadSlice('\n')
+       if err != nil {
                 return
         }
-       size -= len(line)
  
-       // Skip over object header to export data.
-       // Begins after first line starting with $$.
-       for line[0] != '$' {
-               if line, err = r.ReadSlice('\n'); err != nil {
-                       err = fmt.Errorf("can't find export data (%v)", err)
+       hdr := string(line)
+       switch hdr {
+       case "$$\n":
+               err = fmt.Errorf("old textual export format no longer supported (recompile package)")
+               return
+
+       case "$$B\n":
+               var format byte
+               format, err = r.ReadByte()
+               if err != nil {
+                       return
+               }
+               // The unified export format starts with a 'u'.
+               switch format {
+               case 'u':
+               default:
+                       // Older no longer supported export formats include:
+                       // indexed export format which started with an 'i'; and
+                       // the older binary export format which started with a 'c',
+                       // 'd', or 'v' (from "version").
+                       err = fmt.Errorf("binary export format %q is no longer supported (recompile package)", format)
                         return
                 }
-               size -= len(line)
+
+       default:
+               err = fmt.Errorf("unknown export data header: %q", hdr)
+               return
         }
-       hdr = string(line)
  
+       n = len(hdr) + 1 // + 1 is for 'u'
         return
  }
author	Tim King <taking@google.com>
	Thu, 14 Nov 2024 20:04:39 +0000 (12:04 -0800)
committer	Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
	Fri, 22 Nov 2024 00:04:39 +0000 (00:04 +0000)
src/cmd/compile/internal/importer/gcimporter.go		patch \| blob \| history
src/cmd/compile/internal/noder/import.go		patch \| blob \| history
src/go/internal/gcimporter/gcimporter.go		patch \| blob \| history
src/internal/exportdata/exportdata.go		patch \| blob \| history