From: Robert Griesemer Date: Tue, 22 Jan 2019 23:20:50 +0000 (-0800) Subject: cmd/compile/internal/syntax: allow more than one rune "unread" X-Git-Tag: go1.13beta1~1496 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=58365b34bb4d84dda4e2fe73cd9eb7e9267ed86c;p=gostls13.git cmd/compile/internal/syntax: allow more than one rune "unread" Make it possible to "unread" more than one byte before the most recently read rune. Use a better name than ungetr2 and make it slightly more efficient. R=Go1.13 Change-Id: I45d5dfa11e508259a972ca6560d1f78d7a51fe15 Reviewed-on: https://go-review.googlesource.com/c/158957 Reviewed-by: Russ Cox --- diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go index 7db33fb6b9..112afa5eb6 100644 --- a/src/cmd/compile/internal/syntax/scanner.go +++ b/src/cmd/compile/internal/syntax/scanner.go @@ -150,7 +150,7 @@ redo: case '.': c = s.getr() if isDigit(c) { - s.ungetr2() + s.unread(1) s.number('.') break } @@ -160,7 +160,7 @@ redo: s.tok = _DotDotDot break } - s.ungetr2() + s.unread(1) } s.ungetr() s.tok = _Dot diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go index c6168b8594..c671e3c11e 100644 --- a/src/cmd/compile/internal/syntax/source.go +++ b/src/cmd/compile/internal/syntax/source.go @@ -22,6 +22,9 @@ import ( const linebase = 1 const colbase = 1 +// max. number of bytes to unread +const maxunread = 10 + // buf [...read...|...|...unread...|s|...free...] // ^ ^ ^ ^ // | | | | @@ -59,20 +62,21 @@ func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) { s.suf = -1 } -// ungetr ungets the most recently read rune. +// ungetr sets the reading position to a previous reading +// position, usually the one of the most recently read +// rune, but possibly earlier (see unread below). func (s *source) ungetr() { s.r, s.line, s.col = s.r0, s.line0, s.col0 } -// ungetr2 is like ungetr but enables a 2nd ungetr. -// It must not be called if one of the runes seen -// was a newline or had a UTF-8 encoding longer than -// 1 byte. -func (s *source) ungetr2() { - s.ungetr() - // line must not have changed - s.r0-- - s.col0-- +// unread moves the previous reading position to a position +// that is n bytes earlier in the source. The next ungetr +// call will set the reading position to that moved position. +// The "unread" runes must be single byte and not contain any +// newlines; and 0 <= n <= maxunread must hold. +func (s *source) unread(n int) { + s.r0 -= n + s.col0 -= uint(n) } func (s *source) error(msg string) { @@ -142,7 +146,7 @@ redo: // BOM's are only allowed as the first character in a file const BOM = 0xfeff if r == BOM { - if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1) + if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to maxunread) s.error("invalid BOM in the middle of the file") } goto redo @@ -153,20 +157,25 @@ redo: func (s *source) fill() { // Slide unread bytes to beginning but preserve last read char - // (for one ungetr call) plus one extra byte (for a 2nd ungetr - // call, only for ".." character sequence and float literals - // starting with "."). - if s.r0 > 1 { + // (for one ungetr call) plus maxunread extra bytes (for one + // unread call). + if s.r0 > maxunread { + n := s.r0 - maxunread // number of bytes to slide down // save literal prefix, if any - // (We see at most one ungetr call while reading - // a literal, so make sure s.r0 remains in buf.) + // (make sure we keep maxunread bytes and the last + // read char in the buffer) if s.suf >= 0 { - s.lit = append(s.lit, s.buf[s.suf:s.r0]...) - s.suf = 1 // == s.r0 after slide below + // we have a literal + if s.suf < n { + // save literal prefix + s.lit = append(s.lit, s.buf[s.suf:n]...) + s.suf = 0 + } else { + s.suf -= n + } } - n := s.r0 - 1 copy(s.buf[:], s.buf[n:s.w]) - s.r0 = 1 // eqv: s.r0 -= n + s.r0 = maxunread // eqv: s.r0 -= n s.r -= n s.w -= n }