From: Tim King Date: Thu, 14 Nov 2024 20:04:39 +0000 (-0800) Subject: internal/exportdata, cmd/compile/internal/noder: merge export data handling X-Git-Tag: go1.24rc1~113 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0edea47f264a4185d78e00e1e9e977d99f5c997b;p=gostls13.git internal/exportdata, cmd/compile/internal/noder: merge export data handling Unify how go/types, types2, and noder read in unified export data from GC-created files. This splits FindExportData into smaller pieces for improved code sharing. - FindPackageDefinition finds the package definition file in the ar archive. - ReadObjectHeaders reads the object headers. - ReadExportDataHeader reads the export data format header. There is a new convenience wrapper ReadUnified that combines all of these. This documents the expected archive contents. Updates noder and the importers to use these. This also adjusts when end-of-section marker ("\n$$\n") checking happens. Change-Id: Iec2179b0a1ae7f69eb12d077018f731116a77f13 Reviewed-on: https://go-review.googlesource.com/c/go/+/628155 Reviewed-by: Robert Griesemer LUCI-TryBot-Result: Go LUCI Commit-Queue: Tim King --- diff --git a/src/cmd/compile/internal/importer/gcimporter.go b/src/cmd/compile/internal/importer/gcimporter.go index 9af257730d..e0aec98231 100644 --- a/src/cmd/compile/internal/importer/gcimporter.go +++ b/src/cmd/compile/internal/importer/gcimporter.go @@ -11,10 +11,8 @@ import ( "fmt" "internal/exportdata" "internal/pkgbits" - "internal/saferio" "io" "os" - "strings" "cmd/compile/internal/types2" ) @@ -75,50 +73,15 @@ func Import(packages map[string]*types2.Package, path, srcDir string, lookup fun defer rc.Close() buf := bufio.NewReader(rc) - hdr, size, err := exportdata.FindExportData(buf) + data, err := exportdata.ReadUnified(buf) if err != nil { + err = fmt.Errorf("import %q: %v", path, err) return } + s := string(data) - switch hdr { - case "$$\n": - err = fmt.Errorf("import %q: old textual export format no longer supported (recompile package)", path) - - case "$$B\n": - var exportFormat byte - if exportFormat, err = buf.ReadByte(); err != nil { - return - } - size-- - - // The unified export format starts with a 'u'; the indexed export - // format starts with an 'i'; and the older binary export format - // starts with a 'c', 'd', or 'v' (from "version"). Select - // appropriate importer. - switch exportFormat { - case 'u': - // exported strings may contain "\n$$\n" - search backwards - var data []byte - var r io.Reader = buf - if size >= 0 { - if data, err = saferio.ReadData(r, uint64(size)); err != nil { - return - } - } else if data, err = io.ReadAll(r); err != nil { - return - } - s := string(data) - s = s[:strings.LastIndex(s, "\n$$\n")] - - input := pkgbits.NewPkgDecoder(id, s) - pkg = ReadPackage(nil, packages, input) - default: - err = fmt.Errorf("import %q: binary export format %q is no longer supported (recompile package)", path, exportFormat) - } - - default: - err = fmt.Errorf("import %q: unknown export data header: %q", path, hdr) - } + input := pkgbits.NewPkgDecoder(id, s) + pkg = ReadPackage(nil, packages, input) return } diff --git a/src/cmd/compile/internal/noder/import.go b/src/cmd/compile/internal/noder/import.go index 964b01ec42..910988f061 100644 --- a/src/cmd/compile/internal/noder/import.go +++ b/src/cmd/compile/internal/noder/import.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "internal/buildcfg" + "internal/exportdata" "internal/pkgbits" "os" pathpkg "path" @@ -22,7 +23,6 @@ import ( "cmd/compile/internal/typecheck" "cmd/compile/internal/types" "cmd/compile/internal/types2" - "cmd/internal/archive" "cmd/internal/bio" "cmd/internal/goobj" "cmd/internal/objabi" @@ -207,7 +207,7 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag } defer f.Close() - r, end, err := findExportData(f) + data, err := readExportData(f) if err != nil { return } @@ -216,94 +216,63 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag fmt.Printf("importing %s (%s)\n", path, f.Name()) } - c, err := r.ReadByte() - if err != nil { - return - } + pr := pkgbits.NewPkgDecoder(pkg1.Path, data) - pos := r.Offset() - - // Map export data section into memory as a single large - // string. This reduces heap fragmentation and allows returning - // individual substrings very efficiently. - var data string - data, err = base.MapFile(r.File(), pos, end-pos) - if err != nil { - return - } - - switch c { - case 'u': - // TODO(mdempsky): This seems a bit clunky. - data = strings.TrimSuffix(data, "\n$$\n") - - pr := pkgbits.NewPkgDecoder(pkg1.Path, data) - - // Read package descriptors for both types2 and compiler backend. - readPackage(newPkgReader(pr), pkg1, false) - pkg2 = importer.ReadPackage(env, packages, pr) - - default: - // Indexed format is distinguished by an 'i' byte, - // whereas previous export formats started with 'c', 'd', or 'v'. - err = fmt.Errorf("unexpected package format byte: %v", c) - return - } + // Read package descriptors for both types2 and compiler backend. + readPackage(newPkgReader(pr), pkg1, false) + pkg2 = importer.ReadPackage(env, packages, pr) - err = addFingerprint(path, f, end) + err = addFingerprint(path, data) return } -// findExportData returns a *bio.Reader positioned at the start of the -// binary export data section, and a file offset for where to stop -// reading. -func findExportData(f *os.File) (r *bio.Reader, end int64, err error) { - r = bio.NewReader(f) +// readExportData returns the contents of GC-created unified export data. +func readExportData(f *os.File) (data string, err error) { + r := bio.NewReader(f) - // check object header - line, err := r.ReadString('\n') + sz, err := exportdata.FindPackageDefinition(r.Reader) if err != nil { return } + end := r.Offset() + int64(sz) - // Is the first line an archive file signature? - if line != "!\n" { - err = fmt.Errorf("not the start of an archive file (%q)", line) + abihdr, _, err := exportdata.ReadObjectHeaders(r.Reader) + if err != nil { return } - // package export block should be first - sz := int64(archive.ReadHeader(r.Reader, "__.PKGDEF")) - if sz <= 0 { - err = errors.New("not a package file") + if expect := objabi.HeaderString(); abihdr != expect { + err = fmt.Errorf("object is [%s] expected [%s]", abihdr, expect) return } - end = r.Offset() + sz - line, err = r.ReadString('\n') + + _, err = exportdata.ReadExportDataHeader(r.Reader) if err != nil { return } - if !strings.HasPrefix(line, "go object ") { - err = fmt.Errorf("not a go object file: %s", line) - return - } - if expect := objabi.HeaderString(); line != expect { - err = fmt.Errorf("object is [%s] expected [%s]", line, expect) + pos := r.Offset() + + // Map export data section (+ end-of-section marker) into memory + // as a single large string. This reduces heap fragmentation and + // allows returning individual substrings very efficiently. + var mapped string + mapped, err = base.MapFile(r.File(), pos, end-pos) + if err != nil { return } - // process header lines - for !strings.HasPrefix(line, "$$") { - line, err = r.ReadString('\n') - if err != nil { - return - } - } + // check for end-of-section marker "\n$$\n" and remove it + const marker = "\n$$\n" - // Expect $$B\n to signal binary import format. - if line != "$$B\n" { - err = errors.New("old export format no longer supported (recompile package)") + var ok bool + data, ok = strings.CutSuffix(mapped, marker) + if !ok { + cutoff := data // include last 10 bytes in error message + if len(cutoff) >= 10 { + cutoff = cutoff[len(cutoff)-10:] + } + err = fmt.Errorf("expected $$ marker, but found %q (recompile package)", cutoff) return } @@ -312,24 +281,16 @@ func findExportData(f *os.File) (r *bio.Reader, end int64, err error) { // addFingerprint reads the linker fingerprint included at the end of // the exportdata. -func addFingerprint(path string, f *os.File, end int64) error { - const eom = "\n$$\n" +func addFingerprint(path string, data string) error { var fingerprint goobj.FingerprintType - var buf [len(fingerprint) + len(eom)]byte - if _, err := f.ReadAt(buf[:], end-int64(len(buf))); err != nil { - return err - } - - // Caller should have given us the end position of the export data, - // which should end with the "\n$$\n" marker. As a consistency check - // to make sure we're reading at the right offset, make sure we - // found the marker. - if s := string(buf[len(fingerprint):]); s != eom { - return fmt.Errorf("expected $$ marker, but found %q", s) + pos := len(data) - len(fingerprint) + if pos < 0 { + return fmt.Errorf("missing linker fingerprint in exportdata, but found %q", data) } + buf := []byte(data[pos:]) - copy(fingerprint[:], buf[:]) + copy(fingerprint[:], buf) base.Ctxt.AddImport(path, fingerprint) return nil diff --git a/src/go/internal/gcimporter/gcimporter.go b/src/go/internal/gcimporter/gcimporter.go index 451afe6fd5..ed5e5dcacd 100644 --- a/src/go/internal/gcimporter/gcimporter.go +++ b/src/go/internal/gcimporter/gcimporter.go @@ -12,10 +12,8 @@ import ( "go/types" "internal/exportdata" "internal/pkgbits" - "internal/saferio" "io" "os" - "strings" ) // Import imports a gc-generated package given its import path and srcDir, adds @@ -72,49 +70,15 @@ func Import(fset *token.FileSet, packages map[string]*types.Package, path, srcDi defer rc.Close() buf := bufio.NewReader(rc) - hdr, size, err := exportdata.FindExportData(buf) + data, err := exportdata.ReadUnified(buf) if err != nil { + err = fmt.Errorf("import %q: %v", path, err) return } + s := string(data) - switch hdr { - case "$$\n": - err = fmt.Errorf("import %q: old textual export format no longer supported (recompile package)", path) - - case "$$B\n": - var exportFormat byte - if exportFormat, err = buf.ReadByte(); err != nil { - return - } - size-- - - // The unified export format starts with a 'u'; the indexed export - // format starts with an 'i'; and the older binary export format - // starts with a 'c', 'd', or 'v' (from "version"). Select - // appropriate importer. - switch exportFormat { - case 'u': - var data []byte - var r io.Reader = buf - if size >= 0 { - if data, err = saferio.ReadData(r, uint64(size)); err != nil { - return - } - } else if data, err = io.ReadAll(r); err != nil { - return - } - s := string(data) - s = s[:strings.LastIndex(s, "\n$$\n")] - - input := pkgbits.NewPkgDecoder(id, s) - pkg = readUnifiedPackage(fset, nil, packages, input) - default: - err = fmt.Errorf("import %q: binary export format %q is no longer supported (recompile package)", path, exportFormat) - } - - default: - err = fmt.Errorf("import %q: unknown export data header: %q", path, hdr) - } + input := pkgbits.NewPkgDecoder(id, s) + pkg = readUnifiedPackage(fset, nil, packages, input) return } diff --git a/src/internal/exportdata/exportdata.go b/src/internal/exportdata/exportdata.go index 5cd7cb18c2..27675923b5 100644 --- a/src/internal/exportdata/exportdata.go +++ b/src/internal/exportdata/exportdata.go @@ -6,12 +6,16 @@ // and reading gc-generated object files. package exportdata +// This file should be kept in sync with src/cmd/compile/internal/gc/obj.go . + import ( "bufio" "bytes" "errors" "fmt" "go/build" + "internal/saferio" + "io" "os" "os/exec" "path/filepath" @@ -19,13 +23,100 @@ import ( "sync" ) -// FindExportData positions the reader r at the beginning of the -// export data section of an underlying GC-created object/archive -// file by reading from it. The reader must be positioned at the -// start of the file before calling this function. The hdr result -// is the string before the export data, either "$$" or "$$B". -func FindExportData(r *bufio.Reader) (hdr string, size int, err error) { - // TODO(taking): Merge with cmd/compile/internal/noder.findExportData. +// ReadUnified reads the contents of the unified export data from a reader r +// that contains the contents of a GC-created archive file. +// +// On success, the reader will be positioned after the end-of-section marker "\n$$\n". +// +// Supported GC-created archive files have 4 layers of nesting: +// - An archive file containing a package definition file. +// - The package definition file contains headers followed by a data section. +// Headers are lines (≤ 4kb) that do not start with "$$". +// - The data section starts with "$$B\n" followed by export data followed +// by an end of section marker "\n$$\n". (The section start "$$\n" is no +// longer supported.) +// - The export data starts with a format byte ('u') followed by the in +// the given format. (See ReadExportDataHeader for older formats.) +// +// Putting this together, the bytes in a GC-created archive files are expected +// to look like the following. +// See cmd/internal/archive for more details on ar file headers. +// +// | \n | ar file signature +// | __.PKGDEF...size...\n | ar header for __.PKGDEF including size. +// | go object <...>\n | objabi header +// | \n | other headers such as build id +// | $$B\n | binary format marker +// | u\n | unified export +// | $$\n | end-of-section marker +// | [optional padding] | padding byte (0x0A) if size is odd +// | [ar file header] | other ar files +// | [ar file data] | +func ReadUnified(r *bufio.Reader) (data []byte, err error) { + // We historically guaranteed headers at the default buffer size (4096) work. + // This ensures we can use ReadSlice throughout. + const minBufferSize = 4096 + r = bufio.NewReaderSize(r, minBufferSize) + + size, err := FindPackageDefinition(r) + if err != nil { + return + } + n := size + + objapi, headers, err := ReadObjectHeaders(r) + if err != nil { + return + } + n -= len(objapi) + for _, h := range headers { + n -= len(h) + } + + hdrlen, err := ReadExportDataHeader(r) + if err != nil { + return + } + n -= hdrlen + + // size also includes the end of section marker. Remove that many bytes from the end. + const marker = "\n$$\n" + n -= len(marker) + + if n < 0 { + err = fmt.Errorf("invalid size (%d) in the archive file: %d bytes remain without section headers (recompile package)", size, n) + } + + // Read n bytes from buf. + data, err = saferio.ReadData(r, uint64(n)) + if err != nil { + return + } + + // Check for marker at the end. + var suffix [len(marker)]byte + _, err = io.ReadFull(r, suffix[:]) + if err != nil { + return + } + if s := string(suffix[:]); s != marker { + err = fmt.Errorf("read %q instead of end-of-section marker (%q)", s, marker) + return + } + + return +} + +// FindPackageDefinition positions the reader r at the beginning of a package +// definition file ("__.PKGDEF") within a GC-created archive by reading +// from it, and returns the size of the package definition file in the archive. +// +// The reader must be positioned at the start of the archive file before calling +// this function, and "__.PKGDEF" is assumed to be the first file in the archive. +// +// See cmd/internal/archive for details on the archive format. +func FindPackageDefinition(r *bufio.Reader) (size int, err error) { + // Uses ReadSlice to limit risk of malformed inputs. // Read first line to make sure this is an object file. line, err := r.ReadSlice('\n') @@ -47,31 +138,96 @@ func FindExportData(r *bufio.Reader) (hdr string, size int, err error) { return } - // Read first line of __.PKGDEF data, so that line - // is once again the first line of the input. + return +} + +// ReadObjectHeaders reads object headers from the reader. Object headers are +// lines that do not start with an end-of-section marker "$$". The first header +// is the objabi header. On success, the reader will be positioned at the beginning +// of the end-of-section marker. +// +// It returns an error if any header does not fit in r.Size() bytes. +func ReadObjectHeaders(r *bufio.Reader) (objapi string, headers []string, err error) { + // line is a temporary buffer for headers. + // Use bounded reads (ReadSlice, Peek) to limit risk of malformed inputs. + var line []byte + + // objapi header should be the first line if line, err = r.ReadSlice('\n'); err != nil { err = fmt.Errorf("can't find export data (%v)", err) return } + objapi = string(line) + + // objapi header begins with "go object ". + if !strings.HasPrefix(objapi, "go object ") { + err = fmt.Errorf("not a go object file: %s", objapi) + return + } + + // process remaining object header lines + for { + // check for an end of section marker "$$" + line, err = r.Peek(2) + if err != nil { + return + } + if string(line) == "$$" { + return // stop + } + + // read next header + line, err = r.ReadSlice('\n') + if err != nil { + return + } + headers = append(headers, string(line)) + } +} - // Now at __.PKGDEF in archive. line should begin with "go object ". - if !strings.HasPrefix(string(line), "go object ") { - err = fmt.Errorf("not a Go object file") +// ReadExportDataHeader reads the export data header and format from r. +// It returns the number of bytes read, or an error if the format is no longer +// supported or it failed to read. +// +// The only currently supported format is binary export data in the +// unified export format. +func ReadExportDataHeader(r *bufio.Reader) (n int, err error) { + // Read export data header. + line, err := r.ReadSlice('\n') + if err != nil { return } - size -= len(line) - // Skip over object header to export data. - // Begins after first line starting with $$. - for line[0] != '$' { - if line, err = r.ReadSlice('\n'); err != nil { - err = fmt.Errorf("can't find export data (%v)", err) + hdr := string(line) + switch hdr { + case "$$\n": + err = fmt.Errorf("old textual export format no longer supported (recompile package)") + return + + case "$$B\n": + var format byte + format, err = r.ReadByte() + if err != nil { + return + } + // The unified export format starts with a 'u'. + switch format { + case 'u': + default: + // Older no longer supported export formats include: + // indexed export format which started with an 'i'; and + // the older binary export format which started with a 'c', + // 'd', or 'v' (from "version"). + err = fmt.Errorf("binary export format %q is no longer supported (recompile package)", format) return } - size -= len(line) + + default: + err = fmt.Errorf("unknown export data header: %q", hdr) + return } - hdr = string(line) + n = len(hdr) + 1 // + 1 is for 'u' return }