type scanner struct {
source
- pragh func(line, col uint, msg string)
- gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
- nlsemi bool // if set '\n' and EOF translate to ';'
+ pragh func(line, col uint, msg string)
+ nlsemi bool // if set '\n' and EOF translate to ';'
// current token, valid after calling next()
line, col uint
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
}
-func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
+func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string)) {
s.source.init(src, errh)
s.pragh = pragh
- s.gcCompat = gcCompat
s.nlsemi = false
}
// token start
s.line, s.col = s.source.line0, s.source.col0
- if isLetter(c) || c >= utf8.RuneSelf && (unicode.IsLetter(c) || s.isCompatRune(c, true)) {
+ if isLetter(c) || c >= utf8.RuneSelf && s.isIdentRune(c, true) {
s.ident()
return
}
default:
s.tok = 0
- s.error(fmt.Sprintf("illegal character %#U", c))
+ s.error(fmt.Sprintf("invalid character %#U", c))
goto redo
}
// general case
if c >= utf8.RuneSelf {
- for unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || s.isCompatRune(c, false) {
+ for s.isIdentRune(c, false) {
c = s.getr()
}
}
s.tok = _Name
}
-func (s *scanner) isCompatRune(c rune, start bool) bool {
- if !s.gcCompat || c < utf8.RuneSelf {
- return false
- }
- if start && unicode.IsNumber(c) {
- s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
- } else {
+func (s *scanner) isIdentRune(c rune, first bool) bool {
+ switch {
+ case unicode.IsLetter(c) || c == '_':
+ // ok
+ case unicode.IsDigit(c):
+ if first {
+ s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
+ }
+ case c >= utf8.RuneSelf:
s.error(fmt.Sprintf("invalid identifier character %#U", c))
+ default:
+ return false
}
return true
}
if c < 0 {
return true // complain in caller about EOF
}
- if s.gcCompat {
- name := "hex"
- if base == 8 {
- name = "octal"
- }
- s.error(fmt.Sprintf("non-%s character in escape sequence: %c", name, c))
- } else {
- if c != quote {
- s.error(fmt.Sprintf("illegal character %#U in escape sequence", c))
- } else {
- s.error("escape sequence incomplete")
- }
+ kind := "hex"
+ if base == 8 {
+ kind = "octal"
}
+ s.error(fmt.Sprintf("non-%s character in escape sequence: %c", kind, c))
s.ungetr()
return false
}
defer src.Close()
var s scanner
- s.init(src, nil, nil, false)
+ s.init(src, nil, nil)
for {
s.next()
if s.tok == _EOF {
// scan source
var got scanner
- got.init(&bytesReader{buf}, nil, nil, false)
+ got.init(&bytesReader{buf}, nil, nil)
got.next()
for i, want := range sampleTokens {
nlsemi := false
// token-level errors
{"x + ~y", "bitwise complement operator is ^", 1, 5},
- {"foo$bar = 0", "illegal character U+0024 '$'", 1, 4},
+ {"foo$bar = 0", "invalid character U+0024 '$'", 1, 4},
{"const x = 0xyz", "malformed hex constant", 1, 13},
{"0123456789", "malformed octal constant", 1, 11},
{"0123456789. /* foobar", "comment not terminated", 1, 13}, // valid float constant
// TODO(gri) make this use position info
t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
}
- }, nil, true)
+ }, nil)
for {
s.next()