]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: towards simpler and faster lexing: always use getr
authorRobert Griesemer <gri@golang.org>
Tue, 23 Feb 2016 07:07:30 +0000 (23:07 -0800)
committerRobert Griesemer <gri@golang.org>
Wed, 24 Feb 2016 04:36:25 +0000 (04:36 +0000)
Always reading runes (rather than bytes) has negligible overhead
(a simple if at the moment - it can be eliminated eventually) but
simplifies the lexer logic and opens up the door for speedups.
In the process remove many int conversions that are now not needed
anymore.

Also, because identifiers are now more easily recognized, remove
talph label and move identifier lexing "in place".

Also, instead of accepting all chars < 0x80 and then check for
"frogs", only permit valid characters in the first place. Removes
an extra call for common simple tokens and leads to simpler logic.

`time go build -a net/http` (best of 5 runs) seems 1% faster.
Assuming this is in the noise, there is no noticeable performance
degradation with this change.

Change-Id: I3454c9bf8b91808188cf7a5f559341749da9a1eb
Reviewed-on: https://go-review.googlesource.com/19847
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/gc/lex.go
test/fixedbugs/issue11610.go

index 6298bbbbba93217aefdb5b9fbfdd788231bc6a0f..ccbbeed2cc8df8eba1d888c0dfbfd798bbd9d5dc 100644 (file)
@@ -37,6 +37,8 @@ var (
        Debug_wb     int
 )
 
+const BOM = 0xFEFF
+
 // Debug arguments.
 // These can be specified with the -d flag, as in "-d nil"
 // to set the debug_checknil variable. In general the list passed
@@ -310,7 +312,6 @@ func Main() {
        dclcontext = PEXTERN
        nerrors = 0
        lexlineno = 1
-       const BOM = 0xFEFF
 
        loadsys()
 
@@ -575,10 +576,14 @@ func addidir(dir string) {
        }
 }
 
+func isDriveLetter(b byte) bool {
+       return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z'
+}
+
 // is this path a local name?  begins with ./ or ../ or /
 func islocalname(name string) bool {
        return strings.HasPrefix(name, "/") ||
-               Ctxt.Windows != 0 && len(name) >= 3 && isAlpha(int(name[0])) && name[1] == ':' && name[2] == '/' ||
+               Ctxt.Windows != 0 && len(name) >= 3 && isDriveLetter(name[0]) && name[1] == ':' && name[2] == '/' ||
                strings.HasPrefix(name, "./") || name == "." ||
                strings.HasPrefix(name, "../") || name == ".."
 }
@@ -829,20 +834,17 @@ func importfile(f *Val, indent []byte) {
        }
 }
 
-func isSpace(c int) bool {
+func isSpace(c rune) bool {
        return c == ' ' || c == '\t' || c == '\n' || c == '\r'
 }
 
-func isAlpha(c int) bool {
-       return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'
+func isLetter(c rune) bool {
+       return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
 }
 
-func isDigit(c int) bool {
+func isDigit(c rune) bool {
        return '0' <= c && c <= '9'
 }
-func isAlnum(c int) bool {
-       return isAlpha(c) || isDigit(c)
-}
 
 func plan9quote(s string) string {
        if s == "" {
@@ -856,23 +858,11 @@ func plan9quote(s string) string {
        return s
 }
 
-func isfrog(c int) bool {
-       // complain about possibly invisible control characters
-       if c < ' ' {
-               return !isSpace(c) // exclude good white space
-       }
-
-       if 0x7f <= c && c <= 0xa0 { // DEL, unicode block including unbreakable space.
-               return true
-       }
-       return false
-}
-
 type lexer struct {
        // source
        bin    *obj.Biobuf
-       peekc  int
-       peekc1 int // second peekc for ...
+       peekr1 rune
+       peekr2 rune // second peekc for ...
 
        nlsemi bool // if set, '\n' and EOF translate to ';'
 
@@ -932,7 +922,7 @@ const (
 )
 
 func (l *lexer) next() {
-       var c1 int
+       var c1 rune
        var op Op
        var escflag int
        var v int64
@@ -947,33 +937,73 @@ func (l *lexer) next() {
 
 l0:
        // skip white space
-       c := l.getc()
+       c := l.getr()
        for isSpace(c) {
                if c == '\n' && nlsemi {
-                       l.ungetc(c)
+                       // TODO(gri) we may be able avoid the ungetr and simply use lexlineno-1 below
+                       l.ungetr(c) // for correct line number
                        if Debug['x'] != 0 {
                                fmt.Printf("lex: implicit semi\n")
                        }
+                       lineno = lexlineno
                        l.tok = ';'
                        return
                }
-               c = l.getc()
+               c = l.getr()
        }
 
        // start of token
        lineno = lexlineno
 
-       if c >= utf8.RuneSelf {
-               // all multibyte runes are alpha
+       // identifiers and keywords
+       // (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
+       if isLetter(c) || c >= utf8.RuneSelf {
                cp = &lexbuf
                cp.Reset()
-               goto talph
-       }
 
-       if isAlpha(c) {
-               cp = &lexbuf
-               cp.Reset()
-               goto talph
+               // accelerate common case (7bit ASCII)
+               for isLetter(c) || isDigit(c) {
+                       cp.WriteByte(byte(c))
+                       c = l.getr()
+               }
+
+               // general case
+               for {
+                       if c >= utf8.RuneSelf {
+                               if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
+                                       if cp.Len() == 0 && unicode.IsDigit(c) {
+                                               Yyerror("identifier cannot begin with digit %#U", c)
+                                       }
+                               } else {
+                                       Yyerror("invalid identifier character %#U", c)
+                               }
+                               cp.WriteRune(c)
+                       } else if isLetter(c) || isDigit(c) {
+                               cp.WriteByte(byte(c))
+                       } else {
+                               break
+                       }
+                       c = l.getr()
+               }
+
+               cp = nil
+               l.ungetr(c)
+
+               s = LookupBytes(lexbuf.Bytes())
+               if s.Lexical == LIGNORE {
+                       goto l0
+               }
+
+               if Debug['x'] != 0 {
+                       fmt.Printf("lex: %s %s\n", s, lexname(rune(s.Lexical)))
+               }
+               l.sym_ = s
+               switch s.Lexical {
+               case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
+                       l.nlsemi = true
+               }
+               l.tok = int32(s.Lexical)
+               return
        }
 
        if isDigit(c) {
@@ -982,7 +1012,7 @@ l0:
                if c != '0' {
                        for {
                                cp.WriteByte(byte(c))
-                               c = l.getc()
+                               c = l.getr()
                                if isDigit(c) {
                                        continue
                                }
@@ -1000,11 +1030,11 @@ l0:
                }
 
                cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
                if c == 'x' || c == 'X' {
                        for {
                                cp.WriteByte(byte(c))
-                               c = l.getc()
+                               c = l.getr()
                                if isDigit(c) {
                                        continue
                                }
@@ -1037,7 +1067,7 @@ l0:
                                c1 = 1 // not octal
                        }
                        cp.WriteByte(byte(c))
-                       c = l.getc()
+                       c = l.getr()
                }
 
                if c == '.' {
@@ -1057,8 +1087,7 @@ l0:
 
        switch c {
        case EOF:
-               lineno = prevlineno
-               l.ungetc(EOF)
+               l.ungetr(EOF) // return EOF again in future next call
                // Treat EOF as "end of line" for the purposes
                // of inserting a semicolon.
                if nlsemi {
@@ -1071,13 +1100,8 @@ l0:
                l.tok = -1
                return
 
-       case '_':
-               cp = &lexbuf
-               cp.Reset()
-               goto talph
-
        case '.':
-               c1 = l.getc()
+               c1 = l.getr()
                if isDigit(c1) {
                        cp = &lexbuf
                        cp.Reset()
@@ -1087,13 +1111,13 @@ l0:
                }
 
                if c1 == '.' {
-                       c1 = l.getc()
+                       c1 = l.getr()
                        if c1 == '.' {
                                c = LDDD
                                goto lx
                        }
 
-                       l.ungetc(c1)
+                       l.ungetr(c1)
                        c1 = '.'
                }
 
@@ -1127,7 +1151,7 @@ l0:
                cp.Reset()
 
                for {
-                       c = int(l.getr())
+                       c = l.getr()
                        if c == '\r' {
                                continue
                        }
@@ -1139,7 +1163,7 @@ l0:
                        if c == '`' {
                                break
                        }
-                       cp.WriteRune(rune(c))
+                       cp.WriteRune(c)
                }
 
                goto strlit
@@ -1153,7 +1177,7 @@ l0:
 
                if !l.escchar('\'', &escflag, &v) {
                        Yyerror("missing '")
-                       l.ungetc(int(v))
+                       l.ungetr(rune(v))
                }
 
                x := new(Mpint)
@@ -1163,25 +1187,25 @@ l0:
                if Debug['x'] != 0 {
                        fmt.Printf("lex: codepoint literal\n")
                }
-               litbuf = "string literal"
+               litbuf = "rune literal"
                l.nlsemi = true
                l.tok = LLITERAL
                return
 
        case '/':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '*' {
                        nl := false
                        for {
-                               c = int(l.getr())
+                               c = l.getr()
                                if c == '\n' {
                                        nl = true
                                }
                                for c == '*' {
-                                       c = int(l.getr())
+                                       c = l.getr()
                                        if c == '/' {
                                                if nl {
-                                                       l.ungetc('\n')
+                                                       l.ungetr('\n')
                                                }
                                                goto l0
                                        }
@@ -1202,11 +1226,11 @@ l0:
                        c = l.getlinepragma()
                        for {
                                if c == '\n' || c == EOF {
-                                       l.ungetc(c)
+                                       l.ungetr(c)
                                        goto l0
                                }
 
-                               c = int(l.getr())
+                               c = l.getr()
                        }
                }
 
@@ -1216,31 +1240,31 @@ l0:
                }
 
        case ':':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
-                       c = int(LCOLAS)
+                       c = LCOLAS
                        goto lx
                }
 
        case '*':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
                        op = OMUL
                        goto asop
                }
 
        case '%':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
                        op = OMOD
                        goto asop
                }
 
        case '+':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '+' {
                        l.nlsemi = true
-                       c = int(LINC)
+                       c = LINC
                        goto lx
                }
 
@@ -1250,10 +1274,10 @@ l0:
                }
 
        case '-':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '-' {
                        l.nlsemi = true
-                       c = int(LDEC)
+                       c = LDEC
                        goto lx
                }
 
@@ -1263,10 +1287,10 @@ l0:
                }
 
        case '>':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '>' {
-                       c = int(LRSH)
-                       c1 = l.getc()
+                       c = LRSH
+                       c1 = l.getr()
                        if c1 == '=' {
                                op = ORSH
                                goto asop
@@ -1276,17 +1300,17 @@ l0:
                }
 
                if c1 == '=' {
-                       c = int(LGE)
+                       c = LGE
                        goto lx
                }
 
-               c = int(LGT)
+               c = LGT
 
        case '<':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '<' {
-                       c = int(LLSH)
-                       c1 = l.getc()
+                       c = LLSH
+                       c1 = l.getr()
                        if c1 == '=' {
                                op = OLSH
                                goto asop
@@ -1296,41 +1320,41 @@ l0:
                }
 
                if c1 == '=' {
-                       c = int(LLE)
+                       c = LLE
                        goto lx
                }
 
                if c1 == '-' {
-                       c = int(LCOMM)
+                       c = LCOMM
                        goto lx
                }
 
-               c = int(LLT)
+               c = LLT
 
        case '=':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
-                       c = int(LEQ)
+                       c = LEQ
                        goto lx
                }
 
        case '!':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
-                       c = int(LNE)
+                       c = LNE
                        goto lx
                }
 
        case '&':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '&' {
-                       c = int(LANDAND)
+                       c = LANDAND
                        goto lx
                }
 
                if c1 == '^' {
-                       c = int(LANDNOT)
-                       c1 = l.getc()
+                       c = LANDNOT
+                       c1 = l.getr()
                        if c1 == '=' {
                                op = OANDNOT
                                goto asop
@@ -1345,9 +1369,9 @@ l0:
                }
 
        case '|':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '|' {
-                       c = int(LOROR)
+                       c = LOROR
                        goto lx
                }
 
@@ -1357,21 +1381,32 @@ l0:
                }
 
        case '^':
-               c1 = l.getc()
+               c1 = l.getr()
                if c1 == '=' {
                        op = OXOR
                        goto asop
                }
 
+       case '(', '[', '{', ',', ';':
+               goto lx
+
        case ')', ']', '}':
                l.nlsemi = true
                goto lx
 
+       case '#', '$', '?', '@', '\\':
+               if importpkg != nil {
+                       goto lx
+               }
+               fallthrough
+
        default:
-               goto lx
+               // anything else is illegal
+               Yyerror("syntax error: illegal character %#U", c)
+               goto l0
        }
 
-       l.ungetc(c1)
+       l.ungetr(c1)
 
 lx:
        if Debug['x'] != 0 {
@@ -1381,17 +1416,8 @@ lx:
                        fmt.Printf("%v lex: TOKEN '%c'\n", Ctxt.Line(int(lexlineno)), c)
                }
        }
-       if isfrog(c) {
-               Yyerror("illegal character 0x%x", uint(c))
-               goto l0
-       }
-
-       if importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\') {
-               Yyerror("%s: unexpected %c", "syntax error", c)
-               goto l0
-       }
 
-       l.tok = int32(c)
+       l.tok = c
        return
 
 asop:
@@ -1402,52 +1428,9 @@ asop:
        l.tok = LASOP
        return
 
-       // cp is set to lexbuf and some
-       // prefix has been stored
-talph:
-       for {
-               if c >= utf8.RuneSelf {
-                       l.ungetc(c)
-                       r := rune(l.getr())
-
-                       // 0xb7 ยท is used for internal names
-                       if !unicode.IsLetter(r) && !unicode.IsDigit(r) && (importpkg == nil || r != 0xb7) {
-                               Yyerror("invalid identifier character U+%04x", r)
-                       }
-                       if cp.Len() == 0 && unicode.IsDigit(r) {
-                               Yyerror("identifier cannot begin with digit U+%04x", r)
-                       }
-                       cp.WriteRune(r)
-               } else if !isAlnum(c) && c != '_' {
-                       break
-               } else {
-                       cp.WriteByte(byte(c))
-               }
-               c = l.getc()
-       }
-
-       cp = nil
-       l.ungetc(c)
-
-       s = LookupBytes(lexbuf.Bytes())
-       if s.Lexical == LIGNORE {
-               goto l0
-       }
-
-       if Debug['x'] != 0 {
-               fmt.Printf("lex: %s %s\n", s, lexname(int(s.Lexical)))
-       }
-       l.sym_ = s
-       switch s.Lexical {
-       case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
-               l.nlsemi = true
-       }
-       l.tok = int32(s.Lexical)
-       return
-
 ncu:
        cp = nil
-       l.ungetc(c)
+       l.ungetr(c)
 
        str = lexbuf.String()
        l.val.U = new(Mpint)
@@ -1468,7 +1451,7 @@ ncu:
 casedot:
        for {
                cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
                if !isDigit(c) {
                        break
                }
@@ -1488,10 +1471,10 @@ caseep:
                Yyerror("malformed floating point constant")
        }
        cp.WriteByte(byte(c))
-       c = l.getc()
+       c = l.getr()
        if c == '+' || c == '-' {
                cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
        }
 
        if !isDigit(c) {
@@ -1499,7 +1482,7 @@ caseep:
        }
        for isDigit(c) {
                cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
        }
 
        if c == 'i' {
@@ -1530,7 +1513,7 @@ casei:
 
 caseout:
        cp = nil
-       l.ungetc(c)
+       l.ungetr(c)
 
        str = lexbuf.String()
        l.val.U = newMpflt()
@@ -1571,7 +1554,7 @@ func internString(b []byte) string {
 
 func more(pp *string) bool {
        p := *pp
-       for p != "" && isSpace(int(p[0])) {
+       for p != "" && isSpace(rune(p[0])) {
                p = p[1:]
        }
        *pp = p
@@ -1582,16 +1565,16 @@ func more(pp *string) bool {
 // //line parse.y:15
 // as a discontinuity in sequential line numbers.
 // the next line of input comes from parse.y:15
-func (l *lexer) getlinepragma() int {
+func (l *lexer) getlinepragma() rune {
        var cmd, verb, name string
 
-       c := int(l.getr())
+       c := l.getr()
        if c == 'g' {
                cp := &lexbuf
                cp.Reset()
                cp.WriteByte('g') // already read
                for {
-                       c = int(l.getr())
+                       c = l.getr()
                        if c == EOF || c >= utf8.RuneSelf {
                                return c
                        }
@@ -1683,8 +1666,8 @@ func (l *lexer) getlinepragma() int {
                return c
        }
        for i := 1; i < 5; i++ {
-               c = int(l.getr())
-               if c != int("line "[i]) {
+               c = l.getr()
+               if c != rune("line "[i]) {
                        return c
                }
        }
@@ -1693,7 +1676,7 @@ func (l *lexer) getlinepragma() int {
        cp.Reset()
        linep := 0
        for {
-               c = int(l.getr())
+               c = l.getr()
                if c == EOF {
                        return c
                }
@@ -1746,7 +1729,7 @@ func getimpsym(pp *string) string {
                return ""
        }
        i := 0
-       for i < len(p) && !isSpace(int(p[i])) && p[i] != '"' {
+       for i < len(p) && !isSpace(rune(p[i])) && p[i] != '"' {
                i++
        }
        sym := p[:i]
@@ -1874,79 +1857,72 @@ func pragcgo(text string) {
        }
 }
 
-func (l *lexer) getc() int {
-       c := l.peekc
-       if c != 0 {
-               l.peekc = l.peekc1
-               l.peekc1 = 0
-               goto check
+func (l *lexer) getr() rune {
+       // unread rune != 0 available
+       if r := l.peekr1; r != 0 {
+               l.peekr1 = l.peekr2
+               l.peekr2 = 0
+               if r == '\n' && importpkg == nil {
+                       lexlineno++
+               }
+               return r
        }
 
-loop:
-       c = obj.Bgetc(l.bin)
-       // recognize BOM (U+FEFF): UTF-8 encoding is 0xef 0xbb 0xbf
-       if c == 0xef {
-               buf, err := l.bin.Peek(2)
-               if err != nil {
-                       yyerrorl(int(lexlineno), "illegal UTF-8 sequence ef % x followed by read error (%v)", string(buf), err)
-                       errorexit()
+redo:
+       // common case: 7bit ASCII
+       c := obj.Bgetc(l.bin)
+       if c < utf8.RuneSelf {
+               if c == 0 {
+                       // TODO(gri) do we need lineno = lexlineno here? Why not?
+                       Yyerror("illegal NUL byte")
+                       return 0
                }
-               if buf[0] == 0xbb && buf[1] == 0xbf {
-                       yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
-
-                       // consume BOM bytes
-                       obj.Bgetc(l.bin)
-                       obj.Bgetc(l.bin)
-                       goto loop
+               if c == '\n' && importpkg == nil {
+                       lexlineno++
                }
+               return rune(c)
        }
+       // c >= utf8.RuneSelf
 
-check:
-       if c == 0 {
-               Yyerror("illegal NUL byte")
-               return 0
+       // uncommon case: non-ASCII
+       var buf [utf8.UTFMax]byte
+       buf[0] = byte(c)
+       buf[1] = byte(obj.Bgetc(l.bin))
+       i := 2
+       for ; i < len(buf) && !utf8.FullRune(buf[:i]); i++ {
+               buf[i] = byte(obj.Bgetc(l.bin))
        }
-       if c == '\n' && importpkg == nil {
-               lexlineno++
+
+       r, w := utf8.DecodeRune(buf[:i])
+       if r == utf8.RuneError && w == 1 {
+               lineno = lexlineno
+               // The string conversion here makes a copy for passing
+               // to fmt.Printf, so that buf itself does not escape and
+               // can be allocated on the stack.
+               Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
        }
-       return c
-}
 
-func (l *lexer) ungetc(c int) {
-       l.peekc1 = l.peekc
-       l.peekc = c
-       if c == '\n' && importpkg == nil {
-               lexlineno--
+       if r == BOM {
+               // TODO(gri) can we use Yyerror here? Why not?
+               yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
+               goto redo
        }
-}
 
-func (l *lexer) getr() int32 {
-       var buf [utf8.UTFMax]byte
+       return r
+}
 
-       for i := 0; ; i++ {
-               c := l.getc()
-               if i == 0 && c < utf8.RuneSelf {
-                       return int32(c)
-               }
-               buf[i] = byte(c)
-               if i+1 == len(buf) || utf8.FullRune(buf[:i+1]) {
-                       r, w := utf8.DecodeRune(buf[:i+1])
-                       if r == utf8.RuneError && w == 1 {
-                               lineno = lexlineno
-                               // The string conversion here makes a copy for passing
-                               // to fmt.Printf, so that buf itself does not escape and can
-                               // be allocated on the stack.
-                               Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
-                       }
-                       return int32(r)
-               }
+func (l *lexer) ungetr(r rune) {
+       l.peekr2 = l.peekr1
+       l.peekr1 = r
+       if r == '\n' && importpkg == nil {
+               lexlineno--
        }
 }
 
-func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
+func (l *lexer) escchar(e rune, escflg *int, val *int64) bool {
        *escflg = 0
 
-       c := int(l.getr())
+       c := l.getr()
        switch c {
        case EOF:
                Yyerror("eof in string")
@@ -1968,7 +1944,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
        }
 
        u := 0
-       c = int(l.getr())
+       c = l.getr()
        var i int
        switch c {
        case 'x':
@@ -1997,14 +1973,14 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
                *escflg = 1 // it's a byte
                x := int64(c) - '0'
                for i := 2; i > 0; i-- {
-                       c = l.getc()
+                       c = l.getr()
                        if c >= '0' && c <= '7' {
                                x = x*8 + int64(c) - '0'
                                continue
                        }
 
                        Yyerror("non-octal character in escape sequence: %c", c)
-                       l.ungetc(c)
+                       l.ungetr(c)
                }
 
                if x > 255 {
@@ -2043,7 +2019,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
 hex:
        x := int64(0)
        for ; i > 0; i-- {
-               c = l.getc()
+               c = l.getr()
                if c >= '0' && c <= '9' {
                        x = x*16 + int64(c) - '0'
                        continue
@@ -2060,7 +2036,7 @@ hex:
                }
 
                Yyerror("non-hex character in escape sequence: %c", c)
-               l.ungetc(c)
+               l.ungetr(c)
                break
        }
 
@@ -2377,7 +2353,7 @@ func lexfini() {
        nodfp.Sym = Lookup(".fp")
 }
 
-var lexn = map[int]string{
+var lexn = map[rune]string{
        LANDAND:    "ANDAND",
        LANDNOT:    "ANDNOT",
        LASOP:      "ASOP",
@@ -2424,7 +2400,7 @@ var lexn = map[int]string{
        LVAR:       "VAR",
 }
 
-func lexname(lex int) string {
+func lexname(lex rune) string {
        if s, ok := lexn[lex]; ok {
                return s
        }
index 56f245dee513a430d28808b27115b0fdf712eef4..f32d48048254df25d272daee452a45622f6aab94 100644 (file)
@@ -9,7 +9,7 @@
 
 package a
 import""  // ERROR "import path is empty"
-var?      // ERROR "unexpected \?"
+var?      // ERROR "illegal character U\+003F '\?'"
 
 var x int // ERROR "unexpected var" "cannot declare name"