Reviewed in and cherry-picked from https://go-review.googlesource.com/#/c/33873/.
- simplify error handling in source.go
(move handling of first error into parser, where it belongs)
- clean up error handling in scanner.go
- move pragma and position base handling from scanner
to parser where it belongs
- have separate error methods in parser to avoid confusion
with handlers from scanner.go and source.go
- (source.go) and (scanner.go, source.go, tokens.go)
may be stand-alone packages if so desired, which means
these files are now less entangled and easier to maintain
Change-Id: I81510fc7ef943b78eaa49092c0eab2075a05878c
Reviewed-on: https://go-review.googlesource.com/34235
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
import (
"fmt"
"io"
+ "strconv"
"strings"
)
const gcCompat = true
type parser struct {
+ base *PosBase
+ errh ErrorHandler
scanner
+ first error // first error encountered
+ pragma Pragma // pragma flags
+
fnest int // function nesting level (for error handling)
xnest int // expression nesting level (for complit ambiguity resolution)
indent []byte // tracing support
}
func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
- p.scanner.init(filename, src, errh, pragh)
+ p.base = NewFileBase(filename)
+ p.errh = errh
+ p.scanner.init(src, p.error_at, func(line, col uint, text string) {
+ if strings.HasPrefix(text, "line ") {
+ p.updateBase(line, col, text[5:])
+ }
+ if pragh != nil {
+ p.pragma |= pragh(line, text)
+ }
+ }, gcCompat)
+
+ p.first = nil
+ p.pragma = 0
p.fnest = 0
p.xnest = 0
p.indent = nil
}
+func (p *parser) updateBase(line, col uint, text string) {
+ // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
+ i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
+ if i < 0 {
+ return
+ }
+ nstr := text[i+1:]
+ n, err := strconv.Atoi(nstr)
+ if err != nil || n <= 0 || n > lineMax {
+ p.error_at(line, col+uint(i+1), "invalid line number: "+nstr)
+ return
+ }
+ p.base = NewLinePragmaBase(MakePos(p.base.Pos().Base(), line, col), text[:i], uint(n))
+}
+
func (p *parser) got(tok token) bool {
if p.tok == tok {
p.next()
// ----------------------------------------------------------------------------
// Error handling
-// syntax_error reports a syntax error at the current line.
-func (p *parser) syntax_error(msg string) {
- p.syntax_error_at(p.line, p.col, msg)
+// error reports an error at the given position.
+func (p *parser) error_at(line, col uint, msg string) {
+ err := Error{line, col, msg}
+ if p.first == nil {
+ p.first = err
+ }
+ if p.errh == nil {
+ panic(p.first)
+ }
+ p.errh(err)
+}
+
+// error reports a (non-syntax) error at the current token position.
+func (p *parser) error(msg string) {
+ p.error_at(p.line, p.col, msg)
}
-// Like syntax_error, but reports error at given line rather than current lexer line.
+// syntax_error_at reports a syntax error at the given position.
func (p *parser) syntax_error_at(line, col uint, msg string) {
if trace {
defer p.trace("syntax_error (" + msg + ")")()
p.error_at(line, col, "syntax error: unexpected "+tok+msg)
}
+// syntax_error reports a syntax error at the current token position.
+func (p *parser) syntax_error(msg string) {
+ p.syntax_error_at(p.line, p.col, msg)
+}
+
// The stopset contains keywords that start a statement.
// They are good synchronization points in case of syntax
// errors and (usually) shouldn't be skipped over.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// This file implements scanner, a lexical tokenizer for
+// Go source. After initialization, consecutive calls of
+// next advance the scanner one token at a time.
+//
+// This file, source.go, and tokens.go are self-contained
+// (go tool compile scanner.go source.go tokens.go compiles)
+// and thus could be made into its own package.
+
package syntax
import (
"fmt"
"io"
- "strconv"
- "strings"
"unicode"
"unicode/utf8"
)
type scanner struct {
source
- nlsemi bool // if set '\n' and EOF translate to ';'
- pragma Pragma
+ pragh func(line, col uint, msg string)
+ gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
+ nlsemi bool // if set '\n' and EOF translate to ';'
// current token, valid after calling next()
- base *PosBase
line, col uint
tok token
lit string // valid if tok is _Name or _Literal
kind LitKind // valid if tok is _Literal
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
-
- pragh PragmaHandler
}
-func (s *scanner) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
+func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
s.source.init(src, errh)
- s.nlsemi = false
- s.base = NewFileBase(filename)
s.pragh = pragh
+ s.gcCompat = gcCompat
+ s.nlsemi = false
}
func (s *scanner) next() {
}
func (s *scanner) isCompatRune(c rune, start bool) bool {
- if !gcCompat || c < utf8.RuneSelf {
+ if !s.gcCompat || c < utf8.RuneSelf {
return false
}
if start && unicode.IsNumber(c) {
break
}
if r < 0 {
- s.error_at(s.line, s.col, "string not terminated")
+ s.errh(s.line, s.col, "string not terminated")
break
}
}
break
}
if r < 0 {
- s.error_at(s.line, s.col, "string not terminated")
+ s.errh(s.line, s.col, "string not terminated")
break
}
}
}
func (s *scanner) lineComment() {
- // recognize pragmas
- prefix := ""
r := s.getr()
- switch r {
- case 'g':
- if s.pragh == nil {
- s.skipLine(r)
- return
- }
- prefix = "go:"
- case 'l':
- prefix = "line "
- default:
+ if s.pragh == nil || (r != 'g' && r != 'l') {
s.skipLine(r)
return
}
+ // s.pragh != nil && (r == 'g' || r == 'l')
+ // recognize pragmas
+ prefix := "go:"
+ if r == 'l' {
+ prefix = "line "
+ }
for _, m := range prefix {
if r != m {
s.skipLine(r)
r = s.getr()
}
- // pragma text without prefix and line ending (which may be "\r\n" if Windows)
+ // pragma text without line ending (which may be "\r\n" if Windows),
s.startLit()
s.skipLine(r)
- text := strings.TrimSuffix(string(s.stopLit()), "\r")
-
- // process //line filename:line pragma
- if prefix[0] == 'l' {
- // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
- i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
- if i < 0 {
- return
- }
- nstr := text[i+1:]
- n, err := strconv.Atoi(nstr)
- if err != nil || n <= 0 || n > lineMax {
- s.error_at(s.line0, s.col0-uint(len(nstr)), "invalid line number: "+nstr)
- return
- }
- s.base = NewLinePragmaBase(MakePos(s.base.Pos().Base(), s.line, s.col), text[:i], uint(n))
- // TODO(gri) Return here once we rely exclusively
- // on node positions for line number information,
- // and remove //line pragma handling elsewhere.
- if s.pragh == nil {
- return
- }
+ text := s.stopLit()
+ if i := len(text) - 1; i >= 0 && text[i] == '\r' {
+ text = text[:i]
}
- s.pragma |= s.pragh(s.line, prefix+text)
+ s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since pragma text starts after //
}
func (s *scanner) fullComment() {
}
}
if r < 0 {
- s.error_at(s.line, s.col, "comment not terminated")
+ s.errh(s.line, s.col, "comment not terminated")
return
}
}
if c < 0 {
return true // complain in caller about EOF
}
- if gcCompat {
+ if s.gcCompat {
name := "hex"
if base == 8 {
name = "octal"
defer src.Close()
var s scanner
- s.init("parser.go", src, nil, nil)
+ s.init(src, nil, nil, false)
for {
s.next()
if s.tok == _EOF {
// scan source
var got scanner
- got.init("", &bytesReader{buf}, nil, nil)
+ got.init(&bytesReader{buf}, nil, nil, false)
got.next()
for i, want := range sampleTokens {
nlsemi := false
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19},
- {`//line :`, "invalid line number: ", 1, 9},
- {`//line :x`, "invalid line number: x", 1, 9},
- {`//line foo :`, "invalid line number: ", 1, 13},
- {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
- {`/**///line foo:x`, "invalid line number: x", 1, 16},
- {`//line foo:0`, "invalid line number: 0", 1, 12},
- {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
+ // TODO(gri) move these test cases into an appropriate parser test
+ // {`//line :`, "invalid line number: ", 1, 9},
+ // {`//line :x`, "invalid line number: x", 1, 9},
+ // {`//line foo :`, "invalid line number: ", 1, 13},
+ // {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
+ // {`/**///line foo:x`, "invalid line number: x", 1, 16},
+ // {`//line foo:0`, "invalid line number: 0", 1, 12},
+ // {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
// former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1},
} {
var s scanner
nerrors := 0
- s.init("", &bytesReader{[]byte(test.src)}, func(err error) {
+ s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
nerrors++
// only check the first error
- e := err.(Error) // we know it's an Error
if nerrors == 1 {
- if e.Msg != test.msg {
- t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg)
+ if msg != test.msg {
+ t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
}
- if e.Line != test.line {
- t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line)
+ if line != test.line {
+ t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
}
- if e.Col != test.col {
- t.Errorf("%q: got col = %d; want %d", test.src, e.Col, test.col)
+ if col != test.col {
+ t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
}
} else if nerrors > 1 {
// TODO(gri) make this use position info
- t.Errorf("%q: got unexpected %q at line = %d", test.src, e.Msg, e.Line)
+ t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
}
- }, nil)
+ }, nil, true)
for {
s.next()
// Contiguous sequences of runes (literals) are extracted
// directly as []byte without the need to re-encode the
// runes in UTF-8 (as would be necessary with bufio.Reader).
+//
+// This file is self-contained (go tool compile source.go
+// compiles) and thus could be made into its own package.
package syntax
// suf r0 r w
type source struct {
- src io.Reader
- errh ErrorHandler
- first error // first error encountered
+ src io.Reader
+ errh func(line, pos uint, msg string)
// source buffer
buf [4 << 10]byte
r0, r, w int // previous/current read and write buf positions, excluding sentinel
line0, line uint // previous/current line
col0, col uint // previous/current column
- err error // pending io error
+ ioerr error // pending io error
// literal buffer
lit []byte // literal prefix
suf int // literal suffix; suf >= 0 means we are scanning a literal
}
-func (s *source) init(src io.Reader, errh ErrorHandler) {
+// init initializes source to read from src and to report errors via errh.
+// errh must not be nil.
+func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
s.src = src
s.errh = errh
- s.first = nil
s.buf[0] = utf8.RuneSelf // terminate with sentinel
s.offs = 0
s.r0, s.r, s.w = 0, 0, 0
s.line0, s.line = 1, 1
s.col0, s.col = 1, 1
- s.err = nil
+ s.ioerr = nil
s.lit = s.lit[:0]
s.suf = -1
}
-func (s *source) error(msg string) {
- s.error_at(s.line0, s.col0, msg)
-}
-
-func (s *source) error_at(line, col uint, msg string) {
- err := Error{line, col, msg}
- if s.first == nil {
- s.first = err
- }
- if s.errh == nil {
- panic(s.first)
- }
- s.errh(err)
-}
-
// ungetr ungets the most recently read rune.
func (s *source) ungetr() {
s.r, s.line, s.col = s.r0, s.line0, s.col0
s.col0--
}
+func (s *source) error(msg string) {
+ s.errh(s.line0, s.col0, msg)
+}
+
+// getr reads and returns the next rune.
+// If an error occurs, the error handler provided to init
+// is called with position (line and column) information
+// and error message before getr returns.
func (s *source) getr() rune {
redo:
s.r0, s.line0, s.col0 = s.r, s.line, s.col
// in the buffer. Measure and optimize if necessary.
// make sure we have at least one rune in buffer, or we are at EOF
- for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+ for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) {
s.fill() // s.w-s.r < len(s.buf) => buffer is not full
}
// EOF
if s.r == s.w {
- if s.err != io.EOF {
- s.error(s.err.Error())
+ if s.ioerr != io.EOF {
+ s.error(s.ioerr.Error())
}
return -1
}
if n > 0 || err != nil {
s.buf[s.w] = utf8.RuneSelf // sentinel
if err != nil {
- s.err = err
+ s.ioerr = err
}
return
}
}
- s.err = io.ErrNoProgress
+ s.ioerr = io.ErrNoProgress
}
func (s *source) startLit() {