"unicode/utf8"
)
-// runeUnreader is the interface to something that can unread runes.
-// If the object provided to Scan does not satisfy this interface,
-// a local buffer will be used to back up the input, but its contents
-// will be lost when Scan returns.
-type runeUnreader interface {
- UnreadRune() error
-}
-
// ScanState represents the scanner state passed to custom scanners.
// Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token.
// ss is the internal implementation of ScanState.
type ss struct {
- rr io.RuneReader // where to read input
- buf buffer // token accumulator
- peekRune rune // one-rune lookahead
- prevRune rune // last rune returned by ReadRune
- count int // runes consumed so far.
- atEOF bool // already read EOF
+ rs io.RuneScanner // where to read input
+ buf buffer // token accumulator
+ count int // runes consumed so far.
+ atEOF bool // already read EOF
ssave
}
}
func (s *ss) ReadRune() (r rune, size int, err error) {
- if s.peekRune >= 0 {
- s.count++
- r = s.peekRune
- size = utf8.RuneLen(r)
- s.prevRune = r
- s.peekRune = -1
- return
- }
- if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
+ if s.atEOF || s.count >= s.argLimit {
err = io.EOF
return
}
- r, size, err = s.rr.ReadRune()
+ r, size, err = s.rs.ReadRune()
if err == nil {
s.count++
- s.prevRune = r
+ if s.nlIsEnd && r == '\n' {
+ s.atEOF = true
+ }
} else if err == io.EOF {
s.atEOF = true
}
}
func (s *ss) UnreadRune() error {
- if u, ok := s.rr.(runeUnreader); ok {
- u.UnreadRune()
- } else {
- s.peekRune = s.prevRune
- }
- s.prevRune = -1
+ s.rs.UnreadRune()
+ s.atEOF = false
s.count--
return nil
}
}
// readRune is a structure to enable reading UTF-8 encoded code points
-// from an io.Reader. It is used if the Reader given to the scanner does
-// not already implement io.RuneReader.
+// from an io.Reader. It is used if the Reader given to the scanner does
+// not already implement io.RuneScanner.
type readRune struct {
- reader io.Reader
- buf [utf8.UTFMax]byte // used only inside ReadRune
- pending int // number of bytes in pendBuf; only >0 for bad UTF-8
- pendBuf [utf8.UTFMax]byte // bytes left over
+ reader io.Reader
+ buf [utf8.UTFMax]byte // used only inside ReadRune
+ pending int // number of bytes in pendBuf; only >0 for bad UTF-8
+ pendBuf [utf8.UTFMax]byte // bytes left over
+ peekRune rune // if >=0 next rune; when <0 is ^(previous Rune)
}
// readByte returns the next byte from the input, which may be
r.pending--
return
}
- n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
- if n != 1 {
- return 0, err
+ _, err = r.reader.Read(r.pendBuf[:1])
+ if err != nil {
+ return
}
return r.pendBuf[0], err
}
-// unread saves the bytes for the next read.
-func (r *readRune) unread(buf []byte) {
- copy(r.pendBuf[r.pending:], buf)
- r.pending += len(buf)
-}
-
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
func (r *readRune) ReadRune() (rr rune, size int, err error) {
+ if r.peekRune >= 0 {
+ rr = r.peekRune
+ r.peekRune = ^r.peekRune
+ size = utf8.RuneLen(rr)
+ return
+ }
r.buf[0], err = r.readByte()
if err != nil {
- return 0, 0, err
+ return
}
if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
rr = rune(r.buf[0])
size = 1 // Known to be 1.
+ // Flip the bits of the rune so it's available to UnreadRune.
+ r.peekRune = ^rr
return
}
var n int
- for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
+ for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
r.buf[n], err = r.readByte()
if err != nil {
if err == io.EOF {
return
}
}
- rr, size = utf8.DecodeRune(r.buf[0:n])
- if size < n { // an error
- r.unread(r.buf[size:n])
+ rr, size = utf8.DecodeRune(r.buf[:n])
+ if size < n { // an error, save the bytes for the next read
+ copy(r.pendBuf[r.pending:], r.buf[size:n])
+ r.pending += n - size
}
+ // Flip the bits of the rune so it's available to UnreadRune.
+ r.peekRune = ^rr
return
}
+func (r *readRune) UnreadRune() error {
+ if r.peekRune >= 0 {
+ return errors.New("fmt: scanning called UnreadRune with no rune available")
+ }
+ // Reverse bit flip of previously read rune to obtain valid >=0 state.
+ r.peekRune = ^r.peekRune
+ return nil
+}
+
var ssFree = sync.Pool{
New: func() interface{} { return new(ss) },
}
// newScanState allocates a new ss struct or grab a cached one.
func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
s = ssFree.Get().(*ss)
- if rr, ok := r.(io.RuneReader); ok {
- s.rr = rr
+ if rs, ok := r.(io.RuneScanner); ok {
+ s.rs = rs
} else {
- s.rr = &readRune{reader: r}
+ s.rs = &readRune{reader: r, peekRune: -1}
}
s.nlIsSpace = nlIsSpace
s.nlIsEnd = nlIsEnd
- s.prevRune = -1
- s.peekRune = -1
s.atEOF = false
s.limit = hugeWid
s.argLimit = hugeWid
return
}
s.buf = s.buf[:0]
- s.rr = nil
+ s.rs = nil
ssFree.Put(s)
}
}
// doScanf does the real work when scanning with a format string.
-// At the moment, it handles only pointers to basic types.
+// At the moment, it handles only pointers to basic types.
func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
defer errorHandler(&err)
end := len(format) - 1