cmd/go: ignore UTF8 BOM when reading source code

author unbyte <i@shangyes.net>

Tue, 15 Jun 2021 17:11:05 +0000 (17:11 +0000)

committer Bryan C. Mills <bcmills@google.com>

Tue, 15 Jun 2021 18:42:11 +0000 (18:42 +0000)
author unbyte <i@shangyes.net>
Tue, 15 Jun 2021 17:11:05 +0000 (17:11 +0000)
committer Bryan C. Mills <bcmills@google.com>
Tue, 15 Jun 2021 18:42:11 +0000 (18:42 +0000)
diff --git a/src/cmd/go/internal/imports/read.go b/src/cmd/go/internal/imports/read.go

index 5e270781d77bfeaca88908daf97d95d287ce7308..70d5190450502d042c2a2d0ed3d17105d50e6dbc 100644 (file)
--- a/src/cmd/go/internal/imports/read.go
+++ b/src/cmd/go/internal/imports/read.go
@@ -8,6 +8,7 @@ package imports
  
  import (
         "bufio"
+       "bytes"
         "errors"
         "io"
         "unicode/utf8"
@@ -22,6 +23,19 @@ type importReader struct {
         nerr int
  }
  
+var bom = []byte{0xef, 0xbb, 0xbf}
+
+func newImportReader(b *bufio.Reader) *importReader {
+       // Remove leading UTF-8 BOM.
+       // Per https://golang.org/ref/spec#Source_code_representation:
+       // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+       // if it is the first Unicode code point in the source text.
+       if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
+               b.Discard(3)
+       }
+       return &importReader{b: b}
+}
+
  func isIdent(c byte) bool {
         return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
  }
@@ -201,7 +215,7 @@ func (r *importReader) readImport(imports *[]string) {
  // ReadComments is like io.ReadAll, except that it only reads the leading
  // block of comments in the file.
  func ReadComments(f io.Reader) ([]byte, error) {
-       r := &importReader{b: bufio.NewReader(f)}
+       r := newImportReader(bufio.NewReader(f))
         r.peekByte(true)
         if r.err == nil && !r.eof {
                 // Didn't reach EOF, so must have found a non-space byte. Remove it.
@@ -213,7 +227,7 @@ func ReadComments(f io.Reader) ([]byte, error) {
  // ReadImports is like io.ReadAll, except that it expects a Go file as input
  // and stops reading the input once the imports have completed.
  func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
-       r := &importReader{b: bufio.NewReader(f)}
+       r := newImportReader(bufio.NewReader(f))
  
         r.readKeyword("package")
         r.readIdent()
diff --git a/src/cmd/go/internal/imports/read_test.go b/src/cmd/go/internal/imports/read_test.go

index 6ea356f1ff05acbd8aee3e9de9b5564cb1537dd3..6a1a6524a116d50c8036baa0b0ff3d60e08ccf1a 100644 (file)
--- a/src/cmd/go/internal/imports/read_test.go
+++ b/src/cmd/go/internal/imports/read_test.go
@@ -66,6 +66,10 @@ var readImportsTests = []readTest{
                 `,
                 "",
         },
+       {
+               "\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
+               "",
+       },
  }
  
  var readCommentsTests = []readTest{
@@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
                 `ℙpackage p; import . "x"`,
                 "",
         },
+       {
+               "\ufeff𝔻" + `ℙpackage p; import . "x"`,
+               "",
+       },
         {
                 `// foo
  
@@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
                 
                 /*/ zot */
  
+               // asdf
+               ℙHello, world`,
+               "",
+       },
+       {
+               "\ufeff𝔻" + `// foo
+
+               /* bar */
+
+               /* quux */ // baz
+
+               /*/ zot */
+
                 // asdf
                 ℙHello, world`,
                 "",
@@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
                         in = tt.in[:j] + tt.in[j+len("ℙ"):]
                         testOut = tt.in[:j]
                 }
+               d := strings.Index(tt.in, "𝔻")
+               if d >= 0 {
+                       in = in[:d] + in[d+len("𝔻"):]
+                       testOut = testOut[d+len("𝔻"):]
+               }
                 r := strings.NewReader(in)
                 buf, err := read(r)
                 if err != nil {
diff --git a/src/cmd/go/testdata/script/build_ignore_leading_bom.txt b/src/cmd/go/testdata/script/build_ignore_leading_bom.txt

new file mode 100644 (file)

index 0000000..37141f3
--- /dev/null
+++ b/src/cmd/go/testdata/script/build_ignore_leading_bom.txt
@@ -0,0 +1,27 @@
+# Per https://golang.org/ref/spec#Source_code_representation:
+# a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+# if it is the first Unicode code point in the source text.
+
+go list -f 'Imports: {{.Imports}} EmbedFiles: {{.EmbedFiles}}' .
+stdout '^Imports: \[embed m/hello\] EmbedFiles: \[.*file\]$'
+
+-- go.mod --
+module m
+
+go 1.16
+-- m.go --
+package main
+
+import (
+       _ "embed"
+
+       "m/hello"
+)
+
+//go:embed file
+var s string
+
+-- hello/hello.go --
+package hello
+
+-- file --
diff --git a/src/go/build/read.go b/src/go/build/read.go

index aa7c6ee59eb6d3350e262b3bb6f928646d40d10c..b98c7938a855993454b5e374ff99d9b9ebd04b95 100644 (file)
--- a/src/go/build/read.go
+++ b/src/go/build/read.go
@@ -6,6 +6,7 @@ package build
  
  import (
         "bufio"
+       "bytes"
         "errors"
         "fmt"
         "go/ast"
@@ -28,9 +29,19 @@ type importReader struct {
         pos  token.Position
  }
  
+var bom = []byte{0xef, 0xbb, 0xbf}
+
  func newImportReader(name string, r io.Reader) *importReader {
+       b := bufio.NewReader(r)
+       // Remove leading UTF-8 BOM.
+       // Per https://golang.org/ref/spec#Source_code_representation:
+       // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+       // if it is the first Unicode code point in the source text.
+       if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
+               b.Discard(3)
+       }
         return &importReader{
-               b: bufio.NewReader(r),
+               b: b,
                 pos: token.Position{
                         Filename: name,
                         Line:     1,
diff --git a/src/go/build/read_test.go b/src/go/build/read_test.go

index 32e6bae008841bb4db5b7fb456729f3e6267828e..1e5e1c2de2e6d110b539058fe64711c97547ee89 100644 (file)
--- a/src/go/build/read_test.go
+++ b/src/go/build/read_test.go
@@ -66,6 +66,10 @@ var readGoInfoTests = []readTest{
                 `,
                 "",
         },
+       {
+               "\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
+               "",
+       },
  }
  
  var readCommentsTests = []readTest{
@@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
                 `ℙpackage p; import . "x"`,
                 "",
         },
+       {
+               "\ufeff𝔻" + `ℙpackage p; import . "x"`,
+               "",
+       },
         {
                 `// foo
  
@@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
  
                 /*/ zot */
  
+               // asdf
+               ℙHello, world`,
+               "",
+       },
+       {
+               "\ufeff𝔻" + `// foo
+
+               /* bar */
+
+               /* quux */ // baz
+
+               /*/ zot */
+
                 // asdf
                 ℙHello, world`,
                 "",
@@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
                         in = tt.in[:j] + tt.in[j+len("ℙ"):]
                         testOut = tt.in[:j]
                 }
+               d := strings.Index(tt.in, "𝔻")
+               if d >= 0 {
+                       in = in[:d] + in[d+len("𝔻"):]
+                       testOut = testOut[d+len("𝔻"):]
+               }
                 r := strings.NewReader(in)
                 buf, err := read(r)
                 if err != nil {
@@ -264,6 +290,12 @@ var readEmbedTests = []struct {
                  test:3:14:y
                  test:3:16:z`,
         },
+       {
+               "\ufeffpackage p\nimport \"embed\"\n//go:embed x y z\nvar files embed.FS",
+               `test:3:12:x
+                test:3:14:y
+                test:3:16:z`,
+       },
         {
                 "package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS",
                 `test:4:12:x`,
@@ -292,6 +324,10 @@ var readEmbedTests = []struct {
                 "package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
                 "",
         },
+       {
+               "\ufeffpackage p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
+               "",
+       },
  }
  
  func TestReadEmbed(t *testing.T) {
author	unbyte <i@shangyes.net>
	Tue, 15 Jun 2021 17:11:05 +0000 (17:11 +0000)
committer	Bryan C. Mills <bcmills@google.com>
	Tue, 15 Jun 2021 18:42:11 +0000 (18:42 +0000)
src/cmd/go/internal/imports/read.go		patch \| blob \| history
src/cmd/go/internal/imports/read_test.go		patch \| blob \| history
src/cmd/go/testdata/script/build_ignore_leading_bom.txt	[new file with mode: 0644]	patch \| blob
src/go/build/read.go		patch \| blob \| history
src/go/build/read_test.go		patch \| blob \| history