go/build: use custom file readers to avoid I/O

author Russ Cox <rsc@golang.org>

Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)

committer Russ Cox <rsc@golang.org>

Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)
author Russ Cox <rsc@golang.org>
Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)
committer Russ Cox <rsc@golang.org>
Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)
diff --git a/src/pkg/go/build/build.go b/src/pkg/go/build/build.go

index 53daa6db2fb13105127c172d99de3efa2aa0fcb3..43ad4531ed266f7b7a7c08d9583547673a8b9575 100644 (file)
--- a/src/pkg/go/build/build.go
+++ b/src/pkg/go/build/build.go
@@ -512,7 +512,13 @@ Found:
                 if err != nil {
                         return p, err
                 }
-               data, err := ioutil.ReadAll(f)
+
+               var data []byte
+               if strings.HasSuffix(filename, ".go") {
+                       data, err = readImports(f, false)
+               } else {
+                       data, err = readComments(f)
+               }
                 f.Close()
                 if err != nil {
                         return p, fmt.Errorf("read %s: %v", filename, err)
diff --git a/src/pkg/go/build/read.go b/src/pkg/go/build/read.go

new file mode 100644 (file)

index 0000000..c8079df
--- /dev/null
+++ b/src/pkg/go/build/read.go
@@ -0,0 +1,238 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package build
+
+import (
+       "bufio"
+       "errors"
+       "io"
+)
+
+type importReader struct {
+       b    *bufio.Reader
+       buf  []byte
+       peek byte
+       err  error
+       eof  bool
+       nerr int
+}
+
+func isIdent(c byte) bool {
+       return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= 0x80
+}
+
+var (
+       errSyntax = errors.New("syntax error")
+       errNUL    = errors.New("unexpected NUL in input")
+)
+
+// syntaxError records a syntax error, but only if an I/O error has not already been recorded.
+func (r *importReader) syntaxError() {
+       if r.err == nil {
+               r.err = errSyntax
+       }
+}
+
+// readByte reads the next byte from the input, saves it in buf, and returns it.
+// If an error occurs, readByte records the error in r.err and returns 0.
+func (r *importReader) readByte() byte {
+       c, err := r.b.ReadByte()
+       if err == nil {
+               r.buf = append(r.buf, c)
+               if c == 0 {
+                       err = errNUL
+               }
+       }
+       if err != nil {
+               if err == io.EOF {
+                       r.eof = true
+               } else if r.err == nil {
+                       r.err = err
+               }
+               c = 0
+       }
+       return c
+}
+
+// peekByte returns the next byte from the input reader but does not advance beyond it.
+// If skipSpace is set, peekByte skips leading spaces and comments.
+func (r *importReader) peekByte(skipSpace bool) byte {
+       if r.err != nil {
+               if r.nerr++; r.nerr > 10000 {
+                       panic("go/build: import reader looping")
+               }
+               return 0
+       }
+
+       // Use r.peek as first input byte.
+       // Don't just return r.peek here: it might have been left by peekByte(false)
+       // and this might be peekByte(true).
+       c := r.peek
+       if c == 0 {
+               c = r.readByte()
+       }
+       for r.err == nil && !r.eof {
+               if skipSpace {
+                       // For the purposes of this reader, semicolons are never necessary to
+                       // understand the input and are treated as spaces.
+                       switch c {
+                       case ' ', '\f', '\t', '\r', '\n', ';':
+                               c = r.readByte()
+                               continue
+
+                       case '/':
+                               c = r.readByte()
+                               if c == '/' {
+                                       for c != '\n' && r.err == nil && !r.eof {
+                                               c = r.readByte()
+                                       }
+                               } else if c == '*' {
+                                       var c1 byte
+                                       for (c != '*' || c1 != '/') && r.err == nil {
+                                               if r.eof {
+                                                       r.syntaxError()
+                                               }
+                                               c, c1 = c1, r.readByte()
+                                       }
+                               } else {
+                                       r.syntaxError()
+                               }
+                               c = r.readByte()
+                               continue
+                       }
+               }
+               break
+       }
+       r.peek = c
+       return r.peek
+}
+
+// nextByte is like peekByte but advances beyond the returned byte.
+func (r *importReader) nextByte(skipSpace bool) byte {
+       c := r.peekByte(skipSpace)
+       r.peek = 0
+       return c
+}
+
+// readKeyword reads the given keyword from the input.
+// If the keyword is not present, readKeyword records a syntax error.
+func (r *importReader) readKeyword(kw string) {
+       r.peekByte(true)
+       for i := 0; i < len(kw); i++ {
+               if r.nextByte(false) != kw[i] {
+                       r.syntaxError()
+                       return
+               }
+       }
+       if isIdent(r.peekByte(false)) {
+               r.syntaxError()
+       }
+}
+
+// readIdent reads an identifier from the input.
+// If an identifier is not present, readIdent records a syntax error.
+func (r *importReader) readIdent() {
+       c := r.peekByte(true)
+       if !isIdent(c) {
+               r.syntaxError()
+               return
+       }
+       for isIdent(r.peekByte(false)) {
+               r.peek = 0
+       }
+}
+
+// readString reads a quoted string literal from the input.
+// If an identifier is not present, readString records a syntax error.
+func (r *importReader) readString() {
+       switch r.nextByte(true) {
+       case '`':
+               for r.err == nil {
+                       if r.nextByte(false) == '`' {
+                               break
+                       }
+                       if r.eof {
+                               r.syntaxError()
+                       }
+               }
+       case '"':
+               for r.err == nil {
+                       c := r.nextByte(false)
+                       if c == '"' {
+                               break
+                       }
+                       if r.eof || c == '\n' {
+                               r.syntaxError()
+                       }
+                       if c == '\\' {
+                               r.nextByte(false)
+                       }
+               }
+       default:
+               r.syntaxError()
+       }
+}
+
+// readImport reads an import clause - optional identifier followed by quoted string -
+// from the input.
+func (r *importReader) readImport() {
+       c := r.peekByte(true)
+       if c == '.' {
+               r.peek = 0
+       } else if isIdent(c) {
+               r.readIdent()
+       }
+       r.readString()
+}
+
+// readComments is like ioutil.ReadAll, except that it only reads the leading
+// block of comments in the file.
+func readComments(f io.Reader) ([]byte, error) {
+       r := &importReader{b: bufio.NewReader(f)}
+       r.peekByte(true)
+       if r.err == nil && !r.eof {
+               // Didn't reach EOF, so must have found a non-space byte. Remove it.
+               r.buf = r.buf[:len(r.buf)-1]
+       }
+       return r.buf, r.err
+}
+
+// readImports is like ioutil.ReadAll, except that it expects a Go file as input
+// and stops reading the input once the imports have completed.
+func readImports(f io.Reader, reportSyntaxError bool) ([]byte, error) {
+       r := &importReader{b: bufio.NewReader(f)}
+
+       r.readKeyword("package")
+       r.readIdent()
+       for r.peekByte(true) == 'i' {
+               r.readKeyword("import")
+               if r.peekByte(true) == '(' {
+                       r.nextByte(false)
+                       for r.peekByte(true) != ')' && r.err == nil {
+                               r.readImport()
+                       }
+                       r.nextByte(false)
+               } else {
+                       r.readImport()
+               }
+       }
+
+       // If we stopped successfully before EOF, we read a byte that told us we were done.
+       // Return all but that last byte, which would cause a syntax error if we let it through.
+       if r.err == nil && !r.eof {
+               return r.buf[:len(r.buf)-1], nil
+       }
+
+       // If we stopped for a syntax error, consume the whole file so that
+       // we are sure we don't change the errors that go/parser returns.
+       if r.err == errSyntax && !reportSyntaxError {
+               r.err = nil
+               for r.err == nil && !r.eof {
+                       r.readByte()
+               }
+       }
+
+       return r.buf, r.err
+}
diff --git a/src/pkg/go/build/read_test.go b/src/pkg/go/build/read_test.go

new file mode 100644 (file)

index 0000000..2dcc120
--- /dev/null
+++ b/src/pkg/go/build/read_test.go
@@ -0,0 +1,226 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package build
+
+import (
+       "io"
+       "strings"
+       "testing"
+)
+
+const quote = "`"
+
+type readTest struct {
+       // Test input contains ℙ where readImports should stop.
+       in  string
+       err string
+}
+
+var readImportsTests = []readTest{
+       {
+               `package p`,
+               "",
+       },
+       {
+               `package p; import "x"`,
+               "",
+       },
+       {
+               `package p; import . "x"`,
+               "",
+       },
+       {
+               `package p; import "x";ℙvar x = 1`,
+               "",
+       },
+       {
+               `package p
+               
+               // comment
+               
+               import "x"
+               import _ "x"
+               import a "x"
+               
+               /* comment */
+               
+               import (
+                       "x" /* comment */
+                       _ "x"
+                       a "x" // comment
+                       ` + quote + `x` + quote + `
+                       _ /*comment*/ ` + quote + `x` + quote + `
+                       a ` + quote + `x` + quote + `
+               )
+               import (
+               )
+               import ()
+               import()import()import()
+               import();import();import()
+               
+               ℙvar x = 1
+               `,
+               "",
+       },
+}
+
+var readCommentsTests = []readTest{
+       {
+               `ℙpackage p`,
+               "",
+       },
+       {
+               `ℙpackage p; import "x"`,
+               "",
+       },
+       {
+               `ℙpackage p; import . "x"`,
+               "",
+       },
+       {
+               `// foo
+
+               /* bar */
+
+               /* quux */ // baz
+               
+               /*/ zot */
+
+               // asdf
+               ℙHello, world`,
+               "",
+       },
+}
+
+func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, error)) {
+       for i, tt := range tests {
+               var in, testOut string
+               j := strings.Index(tt.in, "ℙ")
+               if j < 0 {
+                       in = tt.in
+                       testOut = tt.in
+               } else {
+                       in = tt.in[:j] + tt.in[j+len("ℙ"):]
+                       testOut = tt.in[:j]
+               }
+               r := strings.NewReader(in)
+               buf, err := read(r)
+               if err != nil {
+                       if tt.err == "" {
+                               t.Errorf("#%d: err=%q, expected success (%q)", i, err, string(buf))
+                               continue
+                       }
+                       if !strings.Contains(err.Error(), tt.err) {
+                               t.Errorf("#%d: err=%q, expected %q", i, err, tt.err)
+                               continue
+                       }
+                       continue
+               }
+               if err == nil && tt.err != "" {
+                       t.Errorf("#%d: success, expected %q", i, tt.err)
+                       continue
+               }
+
+               out := string(buf)
+               if out != testOut {
+                       t.Errorf("#%d: wrong output:\nhave %q\nwant %q\n", i, out, testOut)
+               }
+       }
+}
+
+func TestReadImports(t *testing.T) {
+       testRead(t, readImportsTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) })
+}
+
+func TestReadComments(t *testing.T) {
+       testRead(t, readCommentsTests, readComments)
+}
+
+var readFailuresTests = []readTest{
+       {
+               `package`,
+               "syntax error",
+       },
+       {
+               "package p\n\x00\nimport `math`\n",
+               "unexpected NUL in input",
+       },
+       {
+               `package p; import`,
+               "syntax error",
+       },
+       {
+               `package p; import "`,
+               "syntax error",
+       },
+       {
+               "package p; import ` \n\n",
+               "syntax error",
+       },
+       {
+               `package p; import "x`,
+               "syntax error",
+       },
+       {
+               `package p; import _`,
+               "syntax error",
+       },
+       {
+               `package p; import _ "`,
+               "syntax error",
+       },
+       {
+               `package p; import _ "x`,
+               "syntax error",
+       },
+       {
+               `package p; import .`,
+               "syntax error",
+       },
+       {
+               `package p; import . "`,
+               "syntax error",
+       },
+       {
+               `package p; import . "x`,
+               "syntax error",
+       },
+       {
+               `package p; import (`,
+               "syntax error",
+       },
+       {
+               `package p; import ("`,
+               "syntax error",
+       },
+       {
+               `package p; import ("x`,
+               "syntax error",
+       },
+       {
+               `package p; import ("x"`,
+               "syntax error",
+       },
+}
+
+func TestReadFailures(t *testing.T) {
+       // Errors should be reported (true arg to readImports).
+       testRead(t, readFailuresTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) })
+}
+
+func TestReadFailuresIgnored(t *testing.T) {
+       // Syntax errors should not be reported (false arg to readImports).
+       // Instead, entire file should be the output and no error.
+       // Convert tests not to return syntax errors.
+       tests := make([]readTest, len(readFailuresTests))
+       copy(tests, readFailuresTests)
+       for i := range tests {
+               tt := &tests[i]
+               if !strings.Contains(tt.err, "NUL") {
+                       tt.err = ""
+               }
+       }
+       testRead(t, tests, func(r io.Reader) ([]byte, error) { return readImports(r, false) })
+}
author	Russ Cox <rsc@golang.org>
	Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)
committer	Russ Cox <rsc@golang.org>
	Fri, 14 Sep 2012 16:22:45 +0000 (12:22 -0400)
src/pkg/go/build/build.go		patch \| blob \| history
src/pkg/go/build/read.go	[new file with mode: 0644]	patch \| blob
src/pkg/go/build/read_test.go	[new file with mode: 0644]	patch \| blob