errorexit()
}
+ // Instead of converting EOF into '\n' in getc and count it as an extra line
+ // for the line history to work, and which then has to be corrected elsewhere,
+ // just add a line here.
+ lexlineno++
+
linehistpop()
obj.Bterm(bin)
}
}
type lexer struct {
- // TODO(gri) move other lexer state here and out of global variables
- // (source, current line number, etc.)
+ // source
+ bin *obj.Biobuf
+ peekc int
+ peekc1 int // second peekc for ...
+
nlsemi bool // if set, '\n' and EOF translate to ';'
// current token
l0:
// skip white space
- c := getc()
+ c := l.getc()
for isSpace(c) {
if c == '\n' && nlsemi {
- ungetc(c)
+ l.ungetc(c)
if Debug['x'] != 0 {
fmt.Printf("lex: implicit semi\n")
}
l.tok = ';'
return
}
- c = getc()
+ c = l.getc()
}
// start of token
if c != '0' {
for {
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
if isDigit(c) {
continue
}
}
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
if c == 'x' || c == 'X' {
for {
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
if isDigit(c) {
continue
}
c1 = 1 // not octal
}
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
}
if c == '.' {
switch c {
case EOF:
lineno = prevlineno
- ungetc(EOF)
+ l.ungetc(EOF)
// Treat EOF as "end of line" for the purposes
// of inserting a semicolon.
if nlsemi {
goto talph
case '.':
- c1 = getc()
+ c1 = l.getc()
if isDigit(c1) {
cp = &lexbuf
cp.Reset()
}
if c1 == '.' {
- c1 = getc()
+ c1 = l.getc()
if c1 == '.' {
c = LDDD
goto lx
}
- ungetc(c1)
+ l.ungetc(c1)
c1 = '.'
}
cp.Reset()
for {
- if escchar('"', &escflag, &v) {
+ if l.escchar('"', &escflag, &v) {
break
}
if v < utf8.RuneSelf || escflag != 0 {
cp.Reset()
for {
- c = int(getr())
+ c = int(l.getr())
if c == '\r' {
continue
}
// '.'
case '\'':
- if escchar('\'', &escflag, &v) {
+ if l.escchar('\'', &escflag, &v) {
Yyerror("empty character literal or unescaped ' in character literal")
v = '\''
}
- if !escchar('\'', &escflag, &v) {
+ if !l.escchar('\'', &escflag, &v) {
Yyerror("missing '")
- ungetc(int(v))
+ l.ungetc(int(v))
}
x := new(Mpint)
return
case '/':
- c1 = getc()
+ c1 = l.getc()
if c1 == '*' {
nl := false
for {
- c = int(getr())
+ c = int(l.getr())
if c == '\n' {
nl = true
}
for c == '*' {
- c = int(getr())
+ c = int(l.getr())
if c == '/' {
if nl {
- ungetc('\n')
+ l.ungetc('\n')
}
goto l0
}
}
if c1 == '/' {
- c = getlinepragma()
+ c = l.getlinepragma()
for {
if c == '\n' || c == EOF {
- ungetc(c)
+ l.ungetc(c)
goto l0
}
- c = int(getr())
+ c = int(l.getr())
}
}
}
case ':':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
c = int(LCOLAS)
goto lx
}
case '*':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = OMUL
goto asop
}
case '%':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = OMOD
goto asop
}
case '+':
- c1 = getc()
+ c1 = l.getc()
if c1 == '+' {
l.nlsemi = true
c = int(LINC)
}
case '-':
- c1 = getc()
+ c1 = l.getc()
if c1 == '-' {
l.nlsemi = true
c = int(LDEC)
}
case '>':
- c1 = getc()
+ c1 = l.getc()
if c1 == '>' {
c = int(LRSH)
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = ORSH
goto asop
c = int(LGT)
case '<':
- c1 = getc()
+ c1 = l.getc()
if c1 == '<' {
c = int(LLSH)
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = OLSH
goto asop
c = int(LLT)
case '=':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
c = int(LEQ)
goto lx
}
case '!':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
c = int(LNE)
goto lx
}
case '&':
- c1 = getc()
+ c1 = l.getc()
if c1 == '&' {
c = int(LANDAND)
goto lx
if c1 == '^' {
c = int(LANDNOT)
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = OANDNOT
goto asop
}
case '|':
- c1 = getc()
+ c1 = l.getc()
if c1 == '|' {
c = int(LOROR)
goto lx
}
case '^':
- c1 = getc()
+ c1 = l.getc()
if c1 == '=' {
op = OXOR
goto asop
goto lx
}
- ungetc(c1)
+ l.ungetc(c1)
lx:
if Debug['x'] != 0 {
talph:
for {
if c >= utf8.RuneSelf {
- ungetc(c)
- r := rune(getr())
+ l.ungetc(c)
+ r := rune(l.getr())
// 0xb7 ยท is used for internal names
if !unicode.IsLetter(r) && !unicode.IsDigit(r) && (importpkg == nil || r != 0xb7) {
} else {
cp.WriteByte(byte(c))
}
- c = getc()
+ c = l.getc()
}
cp = nil
- ungetc(c)
+ l.ungetc(c)
s = LookupBytes(lexbuf.Bytes())
if s.Lexical == LIGNORE {
ncu:
cp = nil
- ungetc(c)
+ l.ungetc(c)
str = lexbuf.String()
l.val.U = new(Mpint)
casedot:
for {
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
if !isDigit(c) {
break
}
Yyerror("malformed floating point constant")
}
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
if c == '+' || c == '-' {
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
}
if !isDigit(c) {
}
for isDigit(c) {
cp.WriteByte(byte(c))
- c = getc()
+ c = l.getc()
}
if c == 'i' {
caseout:
cp = nil
- ungetc(c)
+ l.ungetc(c)
str = lexbuf.String()
l.val.U = newMpflt()
func internString(b []byte) string {
s, ok := internedStrings[string(b)] // string(b) here doesn't allocate
- if ok {
- return s
+ if !ok {
+ s = string(b)
+ internedStrings[s] = s
}
- s = string(b)
- internedStrings[s] = s
return s
}
// //line parse.y:15
// as a discontinuity in sequential line numbers.
// the next line of input comes from parse.y:15
-func getlinepragma() int {
+func (l *lexer) getlinepragma() int {
var cmd, verb, name string
- c := int(getr())
+ c := int(l.getr())
if c == 'g' {
cp := &lexbuf
cp.Reset()
cp.WriteByte('g') // already read
for {
- c = int(getr())
+ c = int(l.getr())
if c == EOF || c >= utf8.RuneSelf {
return c
}
return c
}
for i := 1; i < 5; i++ {
- c = int(getr())
+ c = int(l.getr())
if c != int("line "[i]) {
return c
}
cp.Reset()
linep := 0
for {
- c = int(getr())
+ c = int(l.getr())
if c == EOF {
return c
}
}
}
-func getc() int {
- c := curio.peekc
+func (l *lexer) getc() int {
+ c := l.peekc
if c != 0 {
- curio.peekc = curio.peekc1
- curio.peekc1 = 0
+ l.peekc = l.peekc1
+ l.peekc1 = 0
goto check
}
loop:
- c = obj.Bgetc(curio.bin)
+ c = obj.Bgetc(l.bin)
// recognize BOM (U+FEFF): UTF-8 encoding is 0xef 0xbb 0xbf
if c == 0xef {
- buf, err := curio.bin.Peek(2)
+ buf, err := l.bin.Peek(2)
if err != nil {
yyerrorl(int(lexlineno), "illegal UTF-8 sequence ef % x followed by read error (%v)", string(buf), err)
errorexit()
yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
// consume BOM bytes
- obj.Bgetc(curio.bin)
- obj.Bgetc(curio.bin)
+ obj.Bgetc(l.bin)
+ obj.Bgetc(l.bin)
goto loop
}
}
check:
- switch c {
- case 0:
+ if c == 0 {
Yyerror("illegal NUL byte")
-
- // insert \n at EOF
- case EOF:
- if curio.eofnl || curio.last == '\n' {
- return EOF
- }
- curio.eofnl = true
- c = '\n'
- fallthrough
-
- case '\n':
- if importpkg == nil {
- lexlineno++
- }
+ return 0
+ }
+ if c == '\n' && importpkg == nil {
+ lexlineno++
}
-
- curio.last = c
return c
}
-func ungetc(c int) {
- curio.peekc1 = curio.peekc
- curio.peekc = c
+func (l *lexer) ungetc(c int) {
+ l.peekc1 = l.peekc
+ l.peekc = c
if c == '\n' && importpkg == nil {
lexlineno--
}
}
-func getr() int32 {
+func (l *lexer) getr() int32 {
var buf [utf8.UTFMax]byte
for i := 0; ; i++ {
- c := getc()
+ c := l.getc()
if i == 0 && c < utf8.RuneSelf {
return int32(c)
}
}
}
-func escchar(e int, escflg *int, val *int64) bool {
+func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
*escflg = 0
- c := int(getr())
+ c := int(l.getr())
switch c {
case EOF:
Yyerror("eof in string")
}
u := 0
- c = int(getr())
+ c = int(l.getr())
var i int
switch c {
case 'x':
'6',
'7':
*escflg = 1 // it's a byte
- l := int64(c) - '0'
+ x := int64(c) - '0'
for i := 2; i > 0; i-- {
- c = getc()
+ c = l.getc()
if c >= '0' && c <= '7' {
- l = l*8 + int64(c) - '0'
+ x = x*8 + int64(c) - '0'
continue
}
Yyerror("non-octal character in escape sequence: %c", c)
- ungetc(c)
+ l.ungetc(c)
}
- if l > 255 {
- Yyerror("octal escape value > 255: %d", l)
+ if x > 255 {
+ Yyerror("octal escape value > 255: %d", x)
}
- *val = l
+ *val = x
return false
case 'a':
return false
hex:
- l := int64(0)
+ x := int64(0)
for ; i > 0; i-- {
- c = getc()
+ c = l.getc()
if c >= '0' && c <= '9' {
- l = l*16 + int64(c) - '0'
+ x = x*16 + int64(c) - '0'
continue
}
if c >= 'a' && c <= 'f' {
- l = l*16 + int64(c) - 'a' + 10
+ x = x*16 + int64(c) - 'a' + 10
continue
}
if c >= 'A' && c <= 'F' {
- l = l*16 + int64(c) - 'A' + 10
+ x = x*16 + int64(c) - 'A' + 10
continue
}
Yyerror("non-hex character in escape sequence: %c", c)
- ungetc(c)
+ l.ungetc(c)
break
}
- if u != 0 && (l > utf8.MaxRune || (0xd800 <= l && l < 0xe000)) {
- Yyerror("invalid Unicode code point in escape sequence: %#x", l)
- l = utf8.RuneError
+ if u != 0 && (x > utf8.MaxRune || (0xd800 <= x && x < 0xe000)) {
+ Yyerror("invalid Unicode code point in escape sequence: %#x", x)
+ x = utf8.RuneError
}
- *val = l
+ *val = x
return false
}
const trace = false // if set, parse tracing can be enabled with -x
+// parse_import parses the export data of a package that is imported.
func parse_import(bin *obj.Biobuf, indent []byte) {
- pushedio := curio
- curio = Io{bin: bin}
-
- importparser := parser{indent: indent} // preserve indentation
- importparser.next()
- importparser.import_package()
-
- curio = pushedio
+ newparser(bin, indent).import_package()
}
-// parse_file sets up a new parser and parses a single Go source file.
+// parse_file parses a single Go source file.
func parse_file(bin *obj.Biobuf) {
- curio = Io{bin: bin}
-
- fileparser := parser{}
- fileparser.next()
- fileparser.file()
+ newparser(bin, nil).file()
}
type parser struct {
indent []byte // tracing support
}
+// newparser returns a new parser ready to parse from src.
+// indent is the initial indentation for tracing output.
+func newparser(src *obj.Biobuf, indent []byte) *parser {
+ var p parser
+ p.bin = src
+ p.indent = indent
+ p.next()
+ return &p
+}
+
func (p *parser) got(tok int32) bool {
if p.tok == tok {
p.next()