cmd/compile: towards simpler and faster lexing: always use getr

author Robert Griesemer <gri@golang.org>

Tue, 23 Feb 2016 07:07:30 +0000 (23:07 -0800)

committer Robert Griesemer <gri@golang.org>

Wed, 24 Feb 2016 04:36:25 +0000 (04:36 +0000)
author Robert Griesemer <gri@golang.org>
Tue, 23 Feb 2016 07:07:30 +0000 (23:07 -0800)
committer Robert Griesemer <gri@golang.org>
Wed, 24 Feb 2016 04:36:25 +0000 (04:36 +0000)
diff --git a/src/cmd/compile/internal/gc/lex.go b/src/cmd/compile/internal/gc/lex.go

index 6298bbbbba93217aefdb5b9fbfdd788231bc6a0f..ccbbeed2cc8df8eba1d888c0dfbfd798bbd9d5dc 100644 (file)
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@@ -37,6 +37,8 @@ var (
         Debug_wb     int
  )
  
+const BOM = 0xFEFF
+
  // Debug arguments.
  // These can be specified with the -d flag, as in "-d nil"
  // to set the debug_checknil variable. In general the list passed
@@ -310,7 +312,6 @@ func Main() {
         dclcontext = PEXTERN
         nerrors = 0
         lexlineno = 1
-       const BOM = 0xFEFF
  
         loadsys()
  
@@ -575,10 +576,14 @@ func addidir(dir string) {
         }
  }
  
+func isDriveLetter(b byte) bool {
+       return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z'
+}
+
  // is this path a local name?  begins with ./ or ../ or /
  func islocalname(name string) bool {
         return strings.HasPrefix(name, "/") ||
-               Ctxt.Windows != 0 && len(name) >= 3 && isAlpha(int(name[0])) && name[1] == ':' && name[2] == '/' ||
+               Ctxt.Windows != 0 && len(name) >= 3 && isDriveLetter(name[0]) && name[1] == ':' && name[2] == '/' ||
                 strings.HasPrefix(name, "./") || name == "." ||
                 strings.HasPrefix(name, "../") || name == ".."
  }
@@ -829,20 +834,17 @@ func importfile(f *Val, indent []byte) {
         }
  }
  
-func isSpace(c int) bool {
+func isSpace(c rune) bool {
         return c == ' ' || c == '\t' || c == '\n' || c == '\r'
  }
  
-func isAlpha(c int) bool {
-       return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'
+func isLetter(c rune) bool {
+       return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
  }
  
-func isDigit(c int) bool {
+func isDigit(c rune) bool {
         return '0' <= c && c <= '9'
  }
-func isAlnum(c int) bool {
-       return isAlpha(c) || isDigit(c)
-}
  
  func plan9quote(s string) string {
         if s == "" {
@@ -856,23 +858,11 @@ func plan9quote(s string) string {
         return s
  }
  
-func isfrog(c int) bool {
-       // complain about possibly invisible control characters
-       if c < ' ' {
-               return !isSpace(c) // exclude good white space
-       }
-
-       if 0x7f <= c && c <= 0xa0 { // DEL, unicode block including unbreakable space.
-               return true
-       }
-       return false
-}
-
  type lexer struct {
         // source
         bin    *obj.Biobuf
-       peekc  int
-       peekc1 int // second peekc for ...
+       peekr1 rune
+       peekr2 rune // second peekc for ...
  
         nlsemi bool // if set, '\n' and EOF translate to ';'
  
@@ -932,7 +922,7 @@ const (
  )
  
  func (l *lexer) next() {
-       var c1 int
+       var c1 rune
         var op Op
         var escflag int
         var v int64
@@ -947,33 +937,73 @@ func (l *lexer) next() {
  
  l0:
         // skip white space
-       c := l.getc()
+       c := l.getr()
         for isSpace(c) {
                 if c == '\n' && nlsemi {
-                       l.ungetc(c)
+                       // TODO(gri) we may be able avoid the ungetr and simply use lexlineno-1 below
+                       l.ungetr(c) // for correct line number
                         if Debug['x'] != 0 {
                                 fmt.Printf("lex: implicit semi\n")
                         }
+                       lineno = lexlineno
                         l.tok = ';'
                         return
                 }
-               c = l.getc()
+               c = l.getr()
         }
  
         // start of token
         lineno = lexlineno
  
-       if c >= utf8.RuneSelf {
-               // all multibyte runes are alpha
+       // identifiers and keywords
+       // (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
+       if isLetter(c) || c >= utf8.RuneSelf {
                 cp = &lexbuf
                 cp.Reset()
-               goto talph
-       }
  
-       if isAlpha(c) {
-               cp = &lexbuf
-               cp.Reset()
-               goto talph
+               // accelerate common case (7bit ASCII)
+               for isLetter(c) || isDigit(c) {
+                       cp.WriteByte(byte(c))
+                       c = l.getr()
+               }
+
+               // general case
+               for {
+                       if c >= utf8.RuneSelf {
+                               if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
+                                       if cp.Len() == 0 && unicode.IsDigit(c) {
+                                               Yyerror("identifier cannot begin with digit %#U", c)
+                                       }
+                               } else {
+                                       Yyerror("invalid identifier character %#U", c)
+                               }
+                               cp.WriteRune(c)
+                       } else if isLetter(c) || isDigit(c) {
+                               cp.WriteByte(byte(c))
+                       } else {
+                               break
+                       }
+                       c = l.getr()
+               }
+
+               cp = nil
+               l.ungetr(c)
+
+               s = LookupBytes(lexbuf.Bytes())
+               if s.Lexical == LIGNORE {
+                       goto l0
+               }
+
+               if Debug['x'] != 0 {
+                       fmt.Printf("lex: %s %s\n", s, lexname(rune(s.Lexical)))
+               }
+               l.sym_ = s
+               switch s.Lexical {
+               case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
+                       l.nlsemi = true
+               }
+               l.tok = int32(s.Lexical)
+               return
         }
  
         if isDigit(c) {
@@ -982,7 +1012,7 @@ l0:
                 if c != '0' {
                         for {
                                 cp.WriteByte(byte(c))
-                               c = l.getc()
+                               c = l.getr()
                                 if isDigit(c) {
                                         continue
                                 }
@@ -1000,11 +1030,11 @@ l0:
                 }
  
                 cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
                 if c == 'x' || c == 'X' {
                         for {
                                 cp.WriteByte(byte(c))
-                               c = l.getc()
+                               c = l.getr()
                                 if isDigit(c) {
                                         continue
                                 }
@@ -1037,7 +1067,7 @@ l0:
                                 c1 = 1 // not octal
                         }
                         cp.WriteByte(byte(c))
-                       c = l.getc()
+                       c = l.getr()
                 }
  
                 if c == '.' {
@@ -1057,8 +1087,7 @@ l0:
  
         switch c {
         case EOF:
-               lineno = prevlineno
-               l.ungetc(EOF)
+               l.ungetr(EOF) // return EOF again in future next call
                 // Treat EOF as "end of line" for the purposes
                 // of inserting a semicolon.
                 if nlsemi {
@@ -1071,13 +1100,8 @@ l0:
                 l.tok = -1
                 return
  
-       case '_':
-               cp = &lexbuf
-               cp.Reset()
-               goto talph
-
         case '.':
-               c1 = l.getc()
+               c1 = l.getr()
                 if isDigit(c1) {
                         cp = &lexbuf
                         cp.Reset()
@@ -1087,13 +1111,13 @@ l0:
                 }
  
                 if c1 == '.' {
-                       c1 = l.getc()
+                       c1 = l.getr()
                         if c1 == '.' {
                                 c = LDDD
                                 goto lx
                         }
  
-                       l.ungetc(c1)
+                       l.ungetr(c1)
                         c1 = '.'
                 }
  
@@ -1127,7 +1151,7 @@ l0:
                 cp.Reset()
  
                 for {
-                       c = int(l.getr())
+                       c = l.getr()
                         if c == '\r' {
                                 continue
                         }
@@ -1139,7 +1163,7 @@ l0:
                         if c == '`' {
                                 break
                         }
-                       cp.WriteRune(rune(c))
+                       cp.WriteRune(c)
                 }
  
                 goto strlit
@@ -1153,7 +1177,7 @@ l0:
  
                 if !l.escchar('\'', &escflag, &v) {
                         Yyerror("missing '")
-                       l.ungetc(int(v))
+                       l.ungetr(rune(v))
                 }
  
                 x := new(Mpint)
@@ -1163,25 +1187,25 @@ l0:
                 if Debug['x'] != 0 {
                         fmt.Printf("lex: codepoint literal\n")
                 }
-               litbuf = "string literal"
+               litbuf = "rune literal"
                 l.nlsemi = true
                 l.tok = LLITERAL
                 return
  
         case '/':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '*' {
                         nl := false
                         for {
-                               c = int(l.getr())
+                               c = l.getr()
                                 if c == '\n' {
                                         nl = true
                                 }
                                 for c == '*' {
-                                       c = int(l.getr())
+                                       c = l.getr()
                                         if c == '/' {
                                                 if nl {
-                                                       l.ungetc('\n')
+                                                       l.ungetr('\n')
                                                 }
                                                 goto l0
                                         }
@@ -1202,11 +1226,11 @@ l0:
                         c = l.getlinepragma()
                         for {
                                 if c == '\n' || c == EOF {
-                                       l.ungetc(c)
+                                       l.ungetr(c)
                                         goto l0
                                 }
  
-                               c = int(l.getr())
+                               c = l.getr()
                         }
                 }
  
@@ -1216,31 +1240,31 @@ l0:
                 }
  
         case ':':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
-                       c = int(LCOLAS)
+                       c = LCOLAS
                         goto lx
                 }
  
         case '*':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
                         op = OMUL
                         goto asop
                 }
  
         case '%':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
                         op = OMOD
                         goto asop
                 }
  
         case '+':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '+' {
                         l.nlsemi = true
-                       c = int(LINC)
+                       c = LINC
                         goto lx
                 }
  
@@ -1250,10 +1274,10 @@ l0:
                 }
  
         case '-':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '-' {
                         l.nlsemi = true
-                       c = int(LDEC)
+                       c = LDEC
                         goto lx
                 }
  
@@ -1263,10 +1287,10 @@ l0:
                 }
  
         case '>':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '>' {
-                       c = int(LRSH)
-                       c1 = l.getc()
+                       c = LRSH
+                       c1 = l.getr()
                         if c1 == '=' {
                                 op = ORSH
                                 goto asop
@@ -1276,17 +1300,17 @@ l0:
                 }
  
                 if c1 == '=' {
-                       c = int(LGE)
+                       c = LGE
                         goto lx
                 }
  
-               c = int(LGT)
+               c = LGT
  
         case '<':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '<' {
-                       c = int(LLSH)
-                       c1 = l.getc()
+                       c = LLSH
+                       c1 = l.getr()
                         if c1 == '=' {
                                 op = OLSH
                                 goto asop
@@ -1296,41 +1320,41 @@ l0:
                 }
  
                 if c1 == '=' {
-                       c = int(LLE)
+                       c = LLE
                         goto lx
                 }
  
                 if c1 == '-' {
-                       c = int(LCOMM)
+                       c = LCOMM
                         goto lx
                 }
  
-               c = int(LLT)
+               c = LLT
  
         case '=':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
-                       c = int(LEQ)
+                       c = LEQ
                         goto lx
                 }
  
         case '!':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
-                       c = int(LNE)
+                       c = LNE
                         goto lx
                 }
  
         case '&':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '&' {
-                       c = int(LANDAND)
+                       c = LANDAND
                         goto lx
                 }
  
                 if c1 == '^' {
-                       c = int(LANDNOT)
-                       c1 = l.getc()
+                       c = LANDNOT
+                       c1 = l.getr()
                         if c1 == '=' {
                                 op = OANDNOT
                                 goto asop
@@ -1345,9 +1369,9 @@ l0:
                 }
  
         case '|':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '|' {
-                       c = int(LOROR)
+                       c = LOROR
                         goto lx
                 }
  
@@ -1357,21 +1381,32 @@ l0:
                 }
  
         case '^':
-               c1 = l.getc()
+               c1 = l.getr()
                 if c1 == '=' {
                         op = OXOR
                         goto asop
                 }
  
+       case '(', '[', '{', ',', ';':
+               goto lx
+
         case ')', ']', '}':
                 l.nlsemi = true
                 goto lx
  
+       case '#', '$', '?', '@', '\\':
+               if importpkg != nil {
+                       goto lx
+               }
+               fallthrough
+
         default:
-               goto lx
+               // anything else is illegal
+               Yyerror("syntax error: illegal character %#U", c)
+               goto l0
         }
  
-       l.ungetc(c1)
+       l.ungetr(c1)
  
  lx:
         if Debug['x'] != 0 {
@@ -1381,17 +1416,8 @@ lx:
                         fmt.Printf("%v lex: TOKEN '%c'\n", Ctxt.Line(int(lexlineno)), c)
                 }
         }
-       if isfrog(c) {
-               Yyerror("illegal character 0x%x", uint(c))
-               goto l0
-       }
-
-       if importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\') {
-               Yyerror("%s: unexpected %c", "syntax error", c)
-               goto l0
-       }
  
-       l.tok = int32(c)
+       l.tok = c
         return
  
  asop:
@@ -1402,52 +1428,9 @@ asop:
         l.tok = LASOP
         return
  
-       // cp is set to lexbuf and some
-       // prefix has been stored
-talph:
-       for {
-               if c >= utf8.RuneSelf {
-                       l.ungetc(c)
-                       r := rune(l.getr())
-
-                       // 0xb7 · is used for internal names
-                       if !unicode.IsLetter(r) && !unicode.IsDigit(r) && (importpkg == nil || r != 0xb7) {
-                               Yyerror("invalid identifier character U+%04x", r)
-                       }
-                       if cp.Len() == 0 && unicode.IsDigit(r) {
-                               Yyerror("identifier cannot begin with digit U+%04x", r)
-                       }
-                       cp.WriteRune(r)
-               } else if !isAlnum(c) && c != '_' {
-                       break
-               } else {
-                       cp.WriteByte(byte(c))
-               }
-               c = l.getc()
-       }
-
-       cp = nil
-       l.ungetc(c)
-
-       s = LookupBytes(lexbuf.Bytes())
-       if s.Lexical == LIGNORE {
-               goto l0
-       }
-
-       if Debug['x'] != 0 {
-               fmt.Printf("lex: %s %s\n", s, lexname(int(s.Lexical)))
-       }
-       l.sym_ = s
-       switch s.Lexical {
-       case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
-               l.nlsemi = true
-       }
-       l.tok = int32(s.Lexical)
-       return
-
  ncu:
         cp = nil
-       l.ungetc(c)
+       l.ungetr(c)
  
         str = lexbuf.String()
         l.val.U = new(Mpint)
@@ -1468,7 +1451,7 @@ ncu:
  casedot:
         for {
                 cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
                 if !isDigit(c) {
                         break
                 }
@@ -1488,10 +1471,10 @@ caseep:
                 Yyerror("malformed floating point constant")
         }
         cp.WriteByte(byte(c))
-       c = l.getc()
+       c = l.getr()
         if c == '+' || c == '-' {
                 cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
         }
  
         if !isDigit(c) {
@@ -1499,7 +1482,7 @@ caseep:
         }
         for isDigit(c) {
                 cp.WriteByte(byte(c))
-               c = l.getc()
+               c = l.getr()
         }
  
         if c == 'i' {
@@ -1530,7 +1513,7 @@ casei:
  
  caseout:
         cp = nil
-       l.ungetc(c)
+       l.ungetr(c)
  
         str = lexbuf.String()
         l.val.U = newMpflt()
@@ -1571,7 +1554,7 @@ func internString(b []byte) string {
  
  func more(pp *string) bool {
         p := *pp
-       for p != "" && isSpace(int(p[0])) {
+       for p != "" && isSpace(rune(p[0])) {
                 p = p[1:]
         }
         *pp = p
@@ -1582,16 +1565,16 @@ func more(pp *string) bool {
  // //line parse.y:15
  // as a discontinuity in sequential line numbers.
  // the next line of input comes from parse.y:15
-func (l *lexer) getlinepragma() int {
+func (l *lexer) getlinepragma() rune {
         var cmd, verb, name string
  
-       c := int(l.getr())
+       c := l.getr()
         if c == 'g' {
                 cp := &lexbuf
                 cp.Reset()
                 cp.WriteByte('g') // already read
                 for {
-                       c = int(l.getr())
+                       c = l.getr()
                         if c == EOF || c >= utf8.RuneSelf {
                                 return c
                         }
@@ -1683,8 +1666,8 @@ func (l *lexer) getlinepragma() int {
                 return c
         }
         for i := 1; i < 5; i++ {
-               c = int(l.getr())
-               if c != int("line "[i]) {
+               c = l.getr()
+               if c != rune("line "[i]) {
                         return c
                 }
         }
@@ -1693,7 +1676,7 @@ func (l *lexer) getlinepragma() int {
         cp.Reset()
         linep := 0
         for {
-               c = int(l.getr())
+               c = l.getr()
                 if c == EOF {
                         return c
                 }
@@ -1746,7 +1729,7 @@ func getimpsym(pp *string) string {
                 return ""
         }
         i := 0
-       for i < len(p) && !isSpace(int(p[i])) && p[i] != '"' {
+       for i < len(p) && !isSpace(rune(p[i])) && p[i] != '"' {
                 i++
         }
         sym := p[:i]
@@ -1874,79 +1857,72 @@ func pragcgo(text string) {
         }
  }
  
-func (l *lexer) getc() int {
-       c := l.peekc
-       if c != 0 {
-               l.peekc = l.peekc1
-               l.peekc1 = 0
-               goto check
+func (l *lexer) getr() rune {
+       // unread rune != 0 available
+       if r := l.peekr1; r != 0 {
+               l.peekr1 = l.peekr2
+               l.peekr2 = 0
+               if r == '\n' && importpkg == nil {
+                       lexlineno++
+               }
+               return r
         }
  
-loop:
-       c = obj.Bgetc(l.bin)
-       // recognize BOM (U+FEFF): UTF-8 encoding is 0xef 0xbb 0xbf
-       if c == 0xef {
-               buf, err := l.bin.Peek(2)
-               if err != nil {
-                       yyerrorl(int(lexlineno), "illegal UTF-8 sequence ef % x followed by read error (%v)", string(buf), err)
-                       errorexit()
+redo:
+       // common case: 7bit ASCII
+       c := obj.Bgetc(l.bin)
+       if c < utf8.RuneSelf {
+               if c == 0 {
+                       // TODO(gri) do we need lineno = lexlineno here? Why not?
+                       Yyerror("illegal NUL byte")
+                       return 0
                 }
-               if buf[0] == 0xbb && buf[1] == 0xbf {
-                       yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
-
-                       // consume BOM bytes
-                       obj.Bgetc(l.bin)
-                       obj.Bgetc(l.bin)
-                       goto loop
+               if c == '\n' && importpkg == nil {
+                       lexlineno++
                 }
+               return rune(c)
         }
+       // c >= utf8.RuneSelf
  
-check:
-       if c == 0 {
-               Yyerror("illegal NUL byte")
-               return 0
+       // uncommon case: non-ASCII
+       var buf [utf8.UTFMax]byte
+       buf[0] = byte(c)
+       buf[1] = byte(obj.Bgetc(l.bin))
+       i := 2
+       for ; i < len(buf) && !utf8.FullRune(buf[:i]); i++ {
+               buf[i] = byte(obj.Bgetc(l.bin))
         }
-       if c == '\n' && importpkg == nil {
-               lexlineno++
+
+       r, w := utf8.DecodeRune(buf[:i])
+       if r == utf8.RuneError && w == 1 {
+               lineno = lexlineno
+               // The string conversion here makes a copy for passing
+               // to fmt.Printf, so that buf itself does not escape and
+               // can be allocated on the stack.
+               Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
         }
-       return c
-}
  
-func (l *lexer) ungetc(c int) {
-       l.peekc1 = l.peekc
-       l.peekc = c
-       if c == '\n' && importpkg == nil {
-               lexlineno--
+       if r == BOM {
+               // TODO(gri) can we use Yyerror here? Why not?
+               yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
+               goto redo
         }
-}
  
-func (l *lexer) getr() int32 {
-       var buf [utf8.UTFMax]byte
+       return r
+}
  
-       for i := 0; ; i++ {
-               c := l.getc()
-               if i == 0 && c < utf8.RuneSelf {
-                       return int32(c)
-               }
-               buf[i] = byte(c)
-               if i+1 == len(buf) || utf8.FullRune(buf[:i+1]) {
-                       r, w := utf8.DecodeRune(buf[:i+1])
-                       if r == utf8.RuneError && w == 1 {
-                               lineno = lexlineno
-                               // The string conversion here makes a copy for passing
-                               // to fmt.Printf, so that buf itself does not escape and can
-                               // be allocated on the stack.
-                               Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
-                       }
-                       return int32(r)
-               }
+func (l *lexer) ungetr(r rune) {
+       l.peekr2 = l.peekr1
+       l.peekr1 = r
+       if r == '\n' && importpkg == nil {
+               lexlineno--
         }
  }
  
-func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
+func (l *lexer) escchar(e rune, escflg *int, val *int64) bool {
         *escflg = 0
  
-       c := int(l.getr())
+       c := l.getr()
         switch c {
         case EOF:
                 Yyerror("eof in string")
@@ -1968,7 +1944,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
         }
  
         u := 0
-       c = int(l.getr())
+       c = l.getr()
         var i int
         switch c {
         case 'x':
@@ -1997,14 +1973,14 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
                 *escflg = 1 // it's a byte
                 x := int64(c) - '0'
                 for i := 2; i > 0; i-- {
-                       c = l.getc()
+                       c = l.getr()
                         if c >= '0' && c <= '7' {
                                 x = x*8 + int64(c) - '0'
                                 continue
                         }
  
                         Yyerror("non-octal character in escape sequence: %c", c)
-                       l.ungetc(c)
+                       l.ungetr(c)
                 }
  
                 if x > 255 {
@@ -2043,7 +2019,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
  hex:
         x := int64(0)
         for ; i > 0; i-- {
-               c = l.getc()
+               c = l.getr()
                 if c >= '0' && c <= '9' {
                         x = x*16 + int64(c) - '0'
                         continue
@@ -2060,7 +2036,7 @@ hex:
                 }
  
                 Yyerror("non-hex character in escape sequence: %c", c)
-               l.ungetc(c)
+               l.ungetr(c)
                 break
         }
  
@@ -2377,7 +2353,7 @@ func lexfini() {
         nodfp.Sym = Lookup(".fp")
  }
  
-var lexn = map[int]string{
+var lexn = map[rune]string{
         LANDAND:    "ANDAND",
         LANDNOT:    "ANDNOT",
         LASOP:      "ASOP",
@@ -2424,7 +2400,7 @@ var lexn = map[int]string{
         LVAR:       "VAR",
  }
  
-func lexname(lex int) string {
+func lexname(lex rune) string {
         if s, ok := lexn[lex]; ok {
                 return s
         }
diff --git a/test/fixedbugs/issue11610.go b/test/fixedbugs/issue11610.go

index 56f245dee513a430d28808b27115b0fdf712eef4..f32d48048254df25d272daee452a45622f6aab94 100644 (file)
--- a/test/fixedbugs/issue11610.go
+++ b/test/fixedbugs/issue11610.go
@@ -9,7 +9,7 @@
  
  package a
  import""  // ERROR "import path is empty"
-var?      // ERROR "unexpected \?"
+var?      // ERROR "illegal character U\+003F '\?'"
  
  var x int // ERROR "unexpected var" "cannot declare name"
author	Robert Griesemer <gri@golang.org>
	Tue, 23 Feb 2016 07:07:30 +0000 (23:07 -0800)
committer	Robert Griesemer <gri@golang.org>
	Wed, 24 Feb 2016 04:36:25 +0000 (04:36 +0000)
src/cmd/compile/internal/gc/lex.go		patch \| blob \| history
test/fixedbugs/issue11610.go		patch \| blob \| history