cmd/compile/internal/gc: escape binary export data so it contains no '$'

author Robert Griesemer <gri@golang.org>

Fri, 23 Oct 2015 23:01:09 +0000 (16:01 -0700)

committer Robert Griesemer <gri@golang.org>

Mon, 26 Oct 2015 17:27:17 +0000 (17:27 +0000)
author Robert Griesemer <gri@golang.org>
Fri, 23 Oct 2015 23:01:09 +0000 (16:01 -0700)
committer Robert Griesemer <gri@golang.org>
Mon, 26 Oct 2015 17:27:17 +0000 (17:27 +0000)
diff --git a/src/cmd/compile/internal/gc/bexport.go b/src/cmd/compile/internal/gc/bexport.go

index 1b2a520f0bf14a5aa695b34ba0308a98b5c29c98..cb15af08ea506bd870152188f8f5f0ea61d66825 100644 (file)
--- a/src/cmd/compile/internal/gc/bexport.go
+++ b/src/cmd/compile/internal/gc/bexport.go
@@ -828,10 +828,8 @@ func (p *exporter) string(s string) {
                 p.tracef("%q ", s)
         }
         p.rawInt64(int64(len(s)))
-       w, err := obj.Bwritestring(p.out, s)
-       p.written += w
-       if w != len(s) || err != nil {
-               Fatalf("write error: %v (wrote %d bytes of %d)", err, w, len(s))
+       for i := 0; i < len(s); i++ {
+               p.byte(s[i])
         }
  }
  
@@ -843,22 +841,39 @@ func (p *exporter) marker(m byte) {
         p.rawInt64(int64(p.written))
  }
  
-func (p *exporter) byte(b byte) {
-       obj.Bputc(p.out, b)
-       p.written++
-}
-
  // rawInt64 should only be used by low-level encoders
  func (p *exporter) rawInt64(x int64) {
         var tmp [binary.MaxVarintLen64]byte
         n := binary.PutVarint(tmp[:], x)
-       w, err := p.out.Write(tmp[:n])
-       p.written += w
-       if err != nil {
-               Fatalf("write error: %v", err)
+       for i := 0; i < n; i++ {
+               p.byte(tmp[i])
         }
  }
  
+// byte is the bottleneck interface to write to p.out.
+// byte escapes b as follows (any encoding does that
+// hides '$'):
+//
+//     '$'  => '|' 'S'
+//     '|'  => '|' '|'
+//
+// Necessary so other tools can find the end of the
+// export data by searching for "$$".
+func (p *exporter) byte(b byte) {
+       switch b {
+       case '$':
+               // write '$' as '|' 'S'
+               b = 'S'
+               fallthrough
+       case '|':
+               // write '|' as '|' '|'
+               obj.Bputc(p.out, '|')
+               p.written++
+       }
+       obj.Bputc(p.out, b)
+       p.written++
+}
+
  // tracef is like fmt.Printf but it rewrites the format string
  // to take care of indentation.
  func (p *exporter) tracef(format string, args ...interface{}) {
diff --git a/src/cmd/compile/internal/gc/bimport.go b/src/cmd/compile/internal/gc/bimport.go

index 962d4a765144c6eb174e4a6d11e4b1be476a56d2..08e5dd7e16c1e0ecb57ef9128add39504a60ea23 100644 (file)
--- a/src/cmd/compile/internal/gc/bimport.go
+++ b/src/cmd/compile/internal/gc/bimport.go
@@ -573,10 +573,8 @@ func (p *importer) string() string {
                 } else {
                         p.buf = p.buf[:n]
                 }
-               r := obj.Bread(p.in, p.buf)
-               p.read += r
-               if r != n {
-                       Fatalf("read error: read %d bytes of %d", r, n)
+               for i := 0; i < n; i++ {
+                       p.buf[i] = p.byte()
                 }
                 return string(p.buf)
         }
@@ -595,15 +593,6 @@ func (p *importer) marker(want byte) {
         }
  }
  
-func (p *importer) byte() byte {
-       if c := obj.Bgetc(p.in); c >= 0 {
-               p.read++
-               return byte(c)
-       }
-       Fatalf("read error")
-       return 0
-}
-
  // rawInt64 should only be used by low-level decoders
  func (p *importer) rawInt64() int64 {
         i, err := binary.ReadVarint(p)
@@ -617,3 +606,29 @@ func (p *importer) rawInt64() int64 {
  func (p *importer) ReadByte() (byte, error) {
         return p.byte(), nil
  }
+
+// byte is the bottleneck interface for reading from p.in.
+// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
+func (p *importer) byte() byte {
+       c := obj.Bgetc(p.in)
+       p.read++
+       if c < 0 {
+               Fatalf("read error")
+       }
+       if c == '|' {
+               c = obj.Bgetc(p.in)
+               p.read++
+               if c < 0 {
+                       Fatalf("read error")
+               }
+               switch c {
+               case 'S':
+                       c = '$'
+               case '|':
+                       // nothing to do
+               default:
+                       Fatalf("unexpected escape sequence in export data")
+               }
+       }
+       return byte(c)
+}
diff --git a/src/cmd/compile/internal/gc/export.go b/src/cmd/compile/internal/gc/export.go

index d38810b8095a62fc284164f53930172597454471..f69fa9069920850f67bdc4f8243ac9f93dde963b 100644 (file)
--- a/src/cmd/compile/internal/gc/export.go
+++ b/src/cmd/compile/internal/gc/export.go
@@ -375,13 +375,9 @@ func dumpexport() {
                         if n, err := bout.Write(copy.Bytes()); n != size || err != nil {
                                 Fatalf("error writing export data: got %d bytes, want %d bytes, err = %v", n, size, err)
                         }
-
-                       // verify there's no "\n$$\n" inside the export data
-                       // TODO(gri) fragile - the end marker needs to be fixed
-                       // TODO(gri) investigate if exporting a string containing "\n$$\n"
-                       //           causes problems (old and new format)
-                       if bytes.Index(copy.Bytes(), []byte("\n$$\n")) >= 0 {
-                               Fatalf("export data contains end marker in its midst")
+                       // export data must contain no '$' so that we can find the end by searching for "$$"
+                       if bytes.IndexByte(copy.Bytes(), '$') >= 0 {
+                               Fatalf("export data contains $")
                         }
  
                         // verify that we can read the copied export data back in
diff --git a/src/go/internal/gcimporter/bimport.go b/src/go/internal/gcimporter/bimport.go

index 5358f4dfc9bfb7ddc79785b44743303557c1b944..ce36218d19fa11178b7410da021833301d1ce995 100644 (file)
--- a/src/go/internal/gcimporter/bimport.go
+++ b/src/go/internal/gcimporter/bimport.go
@@ -20,27 +20,24 @@ import (
  // If data is obviously malformed, an error is returned but in
  // general it is not recommended to call BImportData on untrusted data.
  func BImportData(imports map[string]*types.Package, data []byte, path string) (int, *types.Package, error) {
-       // determine low-level encoding format
-       read := 0
-       var format byte = 'm' // missing format
-       if len(data) > 0 {
-               format = data[0]
-               data = data[1:]
-               read++
+       p := importer{
+               imports: imports,
+               data:    data,
         }
-       if format != 'c' && format != 'd' {
-               return read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
+       p.buf = p.bufarray[:]
+
+       // read low-level encoding format
+       switch format := p.byte(); format {
+       case 'c':
+               // compact format - nothing to do
+       case 'd':
+               p.debugFormat = true
+       default:
+               return p.read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
         }
  
         // --- generic export data ---
  
-       p := importer{
-               imports:     imports,
-               data:        data,
-               debugFormat: format == 'd',
-               read:        read,
-       }
-
         if v := p.string(); v != "v0" {
                 return p.read, nil, fmt.Errorf("unknown version: %s", v)
         }
@@ -103,6 +100,8 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
                 _ = p.typ().(*types.Named)
         }
  
+       // ignore compiler-specific import data
+
         // complete interfaces
         for _, typ := range p.typList {
                 if it, ok := typ.(*types.Interface); ok {
@@ -122,10 +121,12 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
  }
  
  type importer struct {
-       imports map[string]*types.Package
-       data    []byte
-       pkgList []*types.Package
-       typList []types.Type
+       imports  map[string]*types.Package
+       data     []byte
+       buf      []byte   // for reading strings
+       bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib
+       pkgList  []*types.Package
+       typList  []types.Type
  
         debugFormat bool
         read        int // bytes read
@@ -440,7 +441,7 @@ func exported(name string) bool {
  }
  
  func (p *importer) value() constant.Value {
-       switch kind := constant.Kind(p.int()); kind {
+       switch tag := p.tagOrIndex(); tag {
         case falseTag:
                 return constant.MakeBool(false)
         case trueTag:
@@ -456,7 +457,7 @@ func (p *importer) value() constant.Value {
         case stringTag:
                 return constant.MakeString(p.string())
         default:
-               panic(fmt.Sprintf("unexpected value kind %d", kind))
+               panic(fmt.Sprintf("unexpected value tag %d", tag))
         }
  }
  
@@ -517,7 +518,11 @@ func (p *importer) tagOrIndex() int {
  }
  
  func (p *importer) int() int {
-       return int(p.int64())
+       x := p.int64()
+       if int64(int(x)) != x {
+               panic("exported integer too large")
+       }
+       return int(x)
  }
  
  func (p *importer) int64() int64 {
@@ -533,21 +538,25 @@ func (p *importer) string() string {
                 p.marker('s')
         }
  
-       var b []byte
         if n := int(p.rawInt64()); n > 0 {
-               b = p.data[:n]
-               p.data = p.data[n:]
-               p.read += n
+               if cap(p.buf) < n {
+                       p.buf = make([]byte, n)
+               } else {
+                       p.buf = p.buf[:n]
+               }
+               for i := 0; i < n; i++ {
+                       p.buf[i] = p.byte()
+               }
+               return string(p.buf)
         }
-       return string(b)
+
+       return ""
  }
  
  func (p *importer) marker(want byte) {
-       if got := p.data[0]; got != want {
+       if got := p.byte(); got != want {
                 panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read))
         }
-       p.data = p.data[1:]
-       p.read++
  
         pos := p.read
         if n := int(p.rawInt64()); n != pos {
@@ -557,12 +566,41 @@ func (p *importer) marker(want byte) {
  
  // rawInt64 should only be used by low-level decoders
  func (p *importer) rawInt64() int64 {
-       i, n := binary.Varint(p.data)
-       p.data = p.data[n:]
-       p.read += n
+       i, err := binary.ReadVarint(p)
+       if err != nil {
+               panic(fmt.Sprintf("read error: %v", err))
+       }
         return i
  }
  
+// needed for binary.ReadVarint in rawInt64
+func (p *importer) ReadByte() (byte, error) {
+       return p.byte(), nil
+}
+
+// byte is the bottleneck interface for reading p.data.
+// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
+func (p *importer) byte() byte {
+       b := p.data[0]
+       r := 1
+       if b == '|' {
+               b = p.data[1]
+               r = 2
+               switch b {
+               case 'S':
+                       b = '$'
+               case '|':
+                       // nothing to do
+               default:
+                       panic("unexpected escape sequence in export data")
+               }
+       }
+       p.data = p.data[r:]
+       p.read += r
+       return b
+
+}
+
  // ----------------------------------------------------------------------------
  // Export format
author	Robert Griesemer <gri@golang.org>
	Fri, 23 Oct 2015 23:01:09 +0000 (16:01 -0700)
committer	Robert Griesemer <gri@golang.org>
	Mon, 26 Oct 2015 17:27:17 +0000 (17:27 +0000)
src/cmd/compile/internal/gc/bexport.go		patch \| blob \| history
src/cmd/compile/internal/gc/bimport.go		patch \| blob \| history
src/cmd/compile/internal/gc/export.go		patch \| blob \| history
src/go/internal/gcimporter/bimport.go		patch \| blob \| history