cmd/compile/internal/syntax: implement comment reporting in scanner

author Robert Griesemer <gri@golang.org>

Thu, 18 Jan 2018 05:42:51 +0000 (21:42 -0800)

committer Robert Griesemer <gri@golang.org>

Mon, 12 Feb 2018 22:57:57 +0000 (22:57 +0000)
author Robert Griesemer <gri@golang.org>
Thu, 18 Jan 2018 05:42:51 +0000 (21:42 -0800)
committer Robert Griesemer <gri@golang.org>
Mon, 12 Feb 2018 22:57:57 +0000 (22:57 +0000)
diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go

index 0a872e03ef9b0961f1095412ec90233cef4d31c4..25bbbeec69a9b79deefe5a67b6db239264ff1f78 100644 (file)
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@@ -38,24 +38,31 @@ func (p *parser) init(base *src.PosBase, r io.Reader, errh ErrorHandler, pragh P
         p.mode = mode
         p.scanner.init(
                 r,
-               // Error and pragma handlers for scanner.
-               // Because the (line, col) positions passed to these
-               // handlers are always at or after the current reading
-               // position, it is save to use the most recent position
+               // Error and directive handler for scanner.
+               // Because the (line, col) positions passed to the
+               // handler is always at or after the current reading
+               // position, it is safe to use the most recent position
                 // base to compute the corresponding Pos value.
                 func(line, col uint, msg string) {
-                       p.error_at(p.pos_at(line, col), msg)
-               },
-               func(line, col uint, text string) {
-                       const prefix = "line "
-                       if strings.HasPrefix(text, prefix) {
-                               p.updateBase(line, col+uint(len(prefix)), text[len(prefix):])
+                       if msg[0] != '/' {
+                               p.error_at(p.pos_at(line, col), msg)
                                 return
                         }
-                       if pragh != nil {
+
+                       // otherwise it must be a comment containing a line or go: directive
+                       text := commentText(msg)
+                       col += 2 // text starts after // or /*
+                       if strings.HasPrefix(text, "line ") {
+                               p.updateBase(line, col+5, text[5:])
+                               return
+                       }
+
+                       // go: directive (but be conservative and test)
+                       if pragh != nil && strings.HasPrefix(text, "go:") {
                                 p.pragma |= pragh(p.pos_at(line, col), text)
                         }
                 },
+               directives,
         )
  
         p.first = nil
@@ -109,6 +116,20 @@ func (p *parser) updateBase(line, col uint, text string) {
         p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/)
  }
  
+func commentText(s string) string {
+       if s[:2] == "/*" {
+               return s[2 : len(s)-2] // lop off /* and */
+       }
+
+       // line comment (does not include newline)
+       // (on Windows, the line comment may end in \r\n)
+       i := len(s)
+       if s[i-1] == '\r' {
+               i--
+       }
+       return s[2:i] // lop off // and \r at end, if any
+}
+
  func trailingDigits(text string) (uint, uint, bool) {
         // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
         i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go

index be406d9753030d810ca4edb1bc08ede53df36f9e..1e0ff2e3cc27c7f64ba12b2575f973848f2bc1d1 100644 (file)
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -19,9 +19,17 @@ import (
         "unicode/utf8"
  )
  
+// The mode flags below control which comments are reported
+// by calling the error handler. If no flag is set, comments
+// are ignored.
+const (
+       comments   uint = 1 << iota // call handler for all comments
+       directives                  // call handler for directives only
+)
+
  type scanner struct {
         source
-       pragh  func(line, col uint, msg string)
+       mode   uint
         nlsemi bool // if set '\n' and EOF translate to ';'
  
         // current token, valid after calling next()
@@ -33,25 +41,32 @@ type scanner struct {
         prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
  }
  
-func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string)) {
+func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) {
         s.source.init(src, errh)
-       s.pragh = pragh
+       s.mode = mode
         s.nlsemi = false
  }
  
  // next advances the scanner by reading the next token.
  //
-// If a read, source encoding, or lexical error occurs, next
-// calls the error handler installed with init. The handler
-// must exist.
+// If a read, source encoding, or lexical error occurs, next calls
+// the installed error handler with the respective error position
+// and message. The error message is guaranteed to be non-empty and
+// never starts with a '/'. The error handler must exist.
+//
+// If the scanner mode includes the comments flag and a comment
+// (including comments containing directives) is encountered, the
+// error handler is also called with each comment position and text
+// (including opening /* or // and closing */, but without a newline
+// at the end of line comments). Comment text always starts with a /
+// which can be used to distinguish these handler calls from errors.
  //
-// If a //line or //go: directive is encountered at the start
-// of a line, next calls the directive handler pragh installed
-// with init, if not nil.
+// If the scanner mode includes the directives (but not the comments)
+// flag, only comments containing a //line, /*line, or //go: directive
+// are reported, in the same way as regular comments. Directives in
+// //-style comments are only recognized if they are at the beginning
+// of a line.
  //
-// The (line, col) position passed to the error and directive
-// handler is always at or after the current source reading
-// position.
  func (s *scanner) next() {
         nlsemi := s.nlsemi
         s.nlsemi = false
@@ -565,6 +580,10 @@ func (s *scanner) rawString() {
         s.tok = _Literal
  }
  
+func (s *scanner) comment(text string) {
+       s.errh(s.line, s.col, text)
+}
+
  func (s *scanner) skipLine(r rune) {
         for r >= 0 {
                 if r == '\n' {
@@ -578,14 +597,20 @@ func (s *scanner) skipLine(r rune) {
  func (s *scanner) lineComment() {
         r := s.getr()
  
+       if s.mode&comments != 0 {
+               s.startLit()
+               s.skipLine(r)
+               s.comment("//" + string(s.stopLit()))
+               return
+       }
+
         // directives must start at the beginning of the line (s.col == colbase)
-       if s.col != colbase || s.pragh == nil || (r != 'g' && r != 'l') {
+       if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') {
                 s.skipLine(r)
                 return
         }
-       // s.col == colbase && s.pragh != nil && (r == 'g' || r == 'l')
  
-       // recognize directives
+       // recognize go: or line directives
         prefix := "go:"
         if r == 'l' {
                 prefix = "line "
@@ -598,38 +623,43 @@ func (s *scanner) lineComment() {
                 r = s.getr()
         }
  
-       // directive text without line ending (which may be "\r\n" if Windows),
+       // directive text
         s.startLit()
         s.skipLine(r)
-       text := s.stopLit()
-       if i := len(text) - 1; i >= 0 && text[i] == '\r' {
-               text = text[:i]
-       }
-
-       s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after //
+       s.comment("//" + prefix + string(s.stopLit()))
  }
  
-func (s *scanner) skipComment(r rune) {
+func (s *scanner) skipComment(r rune) bool {
         for r >= 0 {
                 for r == '*' {
                         r = s.getr()
                         if r == '/' {
-                               return
+                               return true
                         }
                 }
                 r = s.getr()
         }
         s.errh(s.line, s.col, "comment not terminated")
+       return false
  }
  
  func (s *scanner) fullComment() {
         r := s.getr()
  
-       if s.pragh == nil || r != 'l' {
+       if s.mode&comments != 0 {
+               s.startLit()
+               if s.skipComment(r) {
+                       s.comment("/*" + string(s.stopLit()))
+               } else {
+                       s.killLit() // not a complete comment - ignore
+               }
+               return
+       }
+
+       if s.mode&directives == 0 || r != 'l' {
                 s.skipComment(r)
                 return
         }
-       // s.pragh != nil && r == 'l'
  
         // recognize line directive
         const prefix = "line "
@@ -641,15 +671,13 @@ func (s *scanner) fullComment() {
                 r = s.getr()
         }
  
-       // directive text without comment ending
+       // directive text
         s.startLit()
-       s.skipComment(r)
-       text := s.stopLit()
-       if i := len(text) - 2; i >= 0 && text[i] == '*' && text[i+1] == '/' {
-               text = text[:i]
+       if s.skipComment(r) {
+               s.comment("/*" + prefix + string(s.stopLit()))
+       } else {
+               s.killLit() // not a complete comment - ignore
         }
-
-       s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after /*
  }
  
  func (s *scanner) escape(quote rune) bool {
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go

index 160bcbee267060a6d9afeb829a7853e3ab98e9e8..4bfe5871fabe13bad73e2ec91c9ade18d6cf2f0c 100644 (file)
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -24,7 +24,7 @@ func TestScanner(t *testing.T) {
         defer src.Close()
  
         var s scanner
-       s.init(src, nil, nil)
+       s.init(src, nil, 0)
         for {
                 s.next()
                 if s.tok == _EOF {
@@ -53,7 +53,7 @@ func TestTokens(t *testing.T) {
  
         // scan source
         var got scanner
-       got.init(&buf, nil, nil)
+       got.init(&buf, nil, 0)
         got.next()
         for i, want := range sampleTokens {
                 nlsemi := false
@@ -263,6 +263,66 @@ var sampleTokens = [...]struct {
         {_Var, "var", 0, 0},
  }
  
+func TestComments(t *testing.T) {
+       type comment struct {
+               line, col uint // 0-based
+               text      string
+       }
+
+       for _, test := range []struct {
+               src  string
+               want comment
+       }{
+               // no comments
+               {"no comment here", comment{0, 0, ""}},
+               {" /", comment{0, 0, ""}},
+               {"\n /*/", comment{0, 0, ""}},
+
+               //-style comments
+               {"// line comment\n", comment{0, 0, "// line comment"}},
+               {"package p // line comment\n", comment{0, 10, "// line comment"}},
+               {"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
+               {"\n\n//\n", comment{2, 0, "//"}},
+               {"//", comment{0, 0, "//"}},
+
+               /*-style comments */
+               {"/* regular comment */", comment{0, 0, "/* regular comment */"}},
+               {"package p /* regular comment", comment{0, 0, ""}},
+               {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
+               {"\n\n/**/", comment{2, 0, "/**/"}},
+               {"/*", comment{0, 0, ""}},
+       } {
+               var s scanner
+               var got comment
+               s.init(strings.NewReader(test.src),
+                       func(line, col uint, msg string) {
+                               if msg[0] != '/' {
+                                       // error
+                                       if msg != "comment not terminated" {
+                                               t.Errorf("%q: %s", test.src, msg)
+                                       }
+                                       return
+                               }
+                               got = comment{line - linebase, col - colbase, msg} // keep last one
+                       }, comments)
+
+               for {
+                       s.next()
+                       if s.tok == _EOF {
+                               break
+                       }
+               }
+
+               want := test.want
+               if got.line != want.line || got.col != want.col {
+                       t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
+               }
+               if got.text != want.text {
+                       t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
+               }
+       }
+}
+
  func TestScanErrors(t *testing.T) {
         for _, test := range []struct {
                 src, msg  string
@@ -354,7 +414,7 @@ func TestScanErrors(t *testing.T) {
                                 // TODO(gri) make this use position info
                                 t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
                         }
-               }, nil)
+               }, 0)
  
                 for {
                         s.next()
@@ -373,7 +433,7 @@ func TestIssue21938(t *testing.T) {
         s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
  
         var got scanner
-       got.init(strings.NewReader(s), nil, nil)
+       got.init(strings.NewReader(s), nil, 0)
         got.next()
  
         if got.tok != _Literal || got.lit != ".5" {
diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go

index 4e3551225aad7f53d50979044e4b25a939be3ef5..62eb0fdc30140988ca4333183b75c775cbd94f3d 100644 (file)
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@@ -124,7 +124,8 @@ redo:
         // EOF
         if s.r == s.w {
                 if s.ioerr != io.EOF {
-                       s.error(s.ioerr.Error())
+                       // ensure we never start with a '/' (e.g., rooted path) in the error message
+                       s.error("I/O error: " + s.ioerr.Error())
                 }
                 return -1
         }
@@ -201,6 +202,10 @@ func (s *source) stopLit() []byte {
         if len(s.lit) > 0 {
                 lit = append(s.lit, lit...)
         }
-       s.suf = -1 // no pending literal
+       s.killLit()
         return lit
  }
+
+func (s *source) killLit() {
+       s.suf = -1 // no pending literal
+}
author	Robert Griesemer <gri@golang.org>
	Thu, 18 Jan 2018 05:42:51 +0000 (21:42 -0800)
committer	Robert Griesemer <gri@golang.org>
	Mon, 12 Feb 2018 22:57:57 +0000 (22:57 +0000)
src/cmd/compile/internal/syntax/parser.go		patch \| blob \| history
src/cmd/compile/internal/syntax/scanner.go		patch \| blob \| history
src/cmd/compile/internal/syntax/scanner_test.go		patch \| blob \| history
src/cmd/compile/internal/syntax/source.go		patch \| blob \| history