From 6520da6ed5241b0383d50a286b4be0da70dc352b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Mo=CC=88hrmann?= Date: Wed, 24 Feb 2016 23:22:40 +0100 Subject: [PATCH] fmt: use public io.RuneScanner interface for ScanState reader MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit All io.Reader that are passed to newScanState in all the standard library tests that implement io.RuneReader also implement io.RuneScanner. Do not check on each call ScanState's UnreadRune that the used RuneReader also implements the UnreadRune method by using a private interface. Instead require the used Reader to implement the public RuneScanner interface. The extra implementation logic for UnreadRune is removed from ScanState. Instead the readRune wrapper is extended to implement UnreadRune for the RuneScanner interface. If the Reader passed to newScanstate does not implement RuneScanner the readRune wrapper is used to implement the missing functionality. Note that a RuneReader that does not implement RuneScanner will also be wrapped by runeRead which was not the case before. Performance with the readRune wrapper is better than without before. Add benchmark to compare performance with and without using the readRune wrapper. name old time/op new time/op delta ScanInts-2 704µs ± 0% 615µs ± 1% -12.73% (p=0.000 n=20+20) ScanRecursiveInt-2 82.6ms ± 0% 51.4ms ± 0% -37.71% (p=0.000 n=20+20) ScanRecursiveIntReaderWrapper-2 85.1ms ± 0% 52.4ms ± 0% -38.36% (p=0.000 n=20+20) Change-Id: I8c6e85db9b87a8171caab12f020b6e256b498e81 Reviewed-on: https://go-review.googlesource.com/19895 Run-TryBot: Rob Pike TryBot-Result: Gobot Gobot Reviewed-by: Rob Pike --- src/fmt/scan.go | 109 ++++++++++++++++++++----------------------- src/fmt/scan_test.go | 12 +++++ 2 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/fmt/scan.go b/src/fmt/scan.go index 4618ed4a82..99cb1af93b 100644 --- a/src/fmt/scan.go +++ b/src/fmt/scan.go @@ -15,14 +15,6 @@ import ( "unicode/utf8" ) -// runeUnreader is the interface to something that can unread runes. -// If the object provided to Scan does not satisfy this interface, -// a local buffer will be used to back up the input, but its contents -// will be lost when Scan returns. -type runeUnreader interface { - UnreadRune() error -} - // ScanState represents the scanner state passed to custom scanners. // Scanners may do rune-at-a-time scanning or ask the ScanState // to discover the next space-delimited token. @@ -163,12 +155,10 @@ const eof = -1 // ss is the internal implementation of ScanState. type ss struct { - rr io.RuneReader // where to read input - buf buffer // token accumulator - peekRune rune // one-rune lookahead - prevRune rune // last rune returned by ReadRune - count int // runes consumed so far. - atEOF bool // already read EOF + rs io.RuneScanner // where to read input + buf buffer // token accumulator + count int // runes consumed so far. + atEOF bool // already read EOF ssave } @@ -191,23 +181,17 @@ func (s *ss) Read(buf []byte) (n int, err error) { } func (s *ss) ReadRune() (r rune, size int, err error) { - if s.peekRune >= 0 { - s.count++ - r = s.peekRune - size = utf8.RuneLen(r) - s.prevRune = r - s.peekRune = -1 - return - } - if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit { + if s.atEOF || s.count >= s.argLimit { err = io.EOF return } - r, size, err = s.rr.ReadRune() + r, size, err = s.rs.ReadRune() if err == nil { s.count++ - s.prevRune = r + if s.nlIsEnd && r == '\n' { + s.atEOF = true + } } else if err == io.EOF { s.atEOF = true } @@ -246,12 +230,8 @@ func (s *ss) mustReadRune() (r rune) { } func (s *ss) UnreadRune() error { - if u, ok := s.rr.(runeUnreader); ok { - u.UnreadRune() - } else { - s.peekRune = s.prevRune - } - s.prevRune = -1 + s.rs.UnreadRune() + s.atEOF = false s.count-- return nil } @@ -326,13 +306,14 @@ func (s *ss) SkipSpace() { } // readRune is a structure to enable reading UTF-8 encoded code points -// from an io.Reader. It is used if the Reader given to the scanner does -// not already implement io.RuneReader. +// from an io.Reader. It is used if the Reader given to the scanner does +// not already implement io.RuneScanner. type readRune struct { - reader io.Reader - buf [utf8.UTFMax]byte // used only inside ReadRune - pending int // number of bytes in pendBuf; only >0 for bad UTF-8 - pendBuf [utf8.UTFMax]byte // bytes left over + reader io.Reader + buf [utf8.UTFMax]byte // used only inside ReadRune + pending int // number of bytes in pendBuf; only >0 for bad UTF-8 + pendBuf [utf8.UTFMax]byte // bytes left over + peekRune rune // if >=0 next rune; when <0 is ^(previous Rune) } // readByte returns the next byte from the input, which may be @@ -344,33 +325,35 @@ func (r *readRune) readByte() (b byte, err error) { r.pending-- return } - n, err := io.ReadFull(r.reader, r.pendBuf[0:1]) - if n != 1 { - return 0, err + _, err = r.reader.Read(r.pendBuf[:1]) + if err != nil { + return } return r.pendBuf[0], err } -// unread saves the bytes for the next read. -func (r *readRune) unread(buf []byte) { - copy(r.pendBuf[r.pending:], buf) - r.pending += len(buf) -} - // ReadRune returns the next UTF-8 encoded code point from the // io.Reader inside r. func (r *readRune) ReadRune() (rr rune, size int, err error) { + if r.peekRune >= 0 { + rr = r.peekRune + r.peekRune = ^r.peekRune + size = utf8.RuneLen(rr) + return + } r.buf[0], err = r.readByte() if err != nil { - return 0, 0, err + return } if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case rr = rune(r.buf[0]) size = 1 // Known to be 1. + // Flip the bits of the rune so it's available to UnreadRune. + r.peekRune = ^rr return } var n int - for n = 1; !utf8.FullRune(r.buf[0:n]); n++ { + for n = 1; !utf8.FullRune(r.buf[:n]); n++ { r.buf[n], err = r.readByte() if err != nil { if err == io.EOF { @@ -380,13 +363,25 @@ func (r *readRune) ReadRune() (rr rune, size int, err error) { return } } - rr, size = utf8.DecodeRune(r.buf[0:n]) - if size < n { // an error - r.unread(r.buf[size:n]) + rr, size = utf8.DecodeRune(r.buf[:n]) + if size < n { // an error, save the bytes for the next read + copy(r.pendBuf[r.pending:], r.buf[size:n]) + r.pending += n - size } + // Flip the bits of the rune so it's available to UnreadRune. + r.peekRune = ^rr return } +func (r *readRune) UnreadRune() error { + if r.peekRune >= 0 { + return errors.New("fmt: scanning called UnreadRune with no rune available") + } + // Reverse bit flip of previously read rune to obtain valid >=0 state. + r.peekRune = ^r.peekRune + return nil +} + var ssFree = sync.Pool{ New: func() interface{} { return new(ss) }, } @@ -394,15 +389,13 @@ var ssFree = sync.Pool{ // newScanState allocates a new ss struct or grab a cached one. func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) { s = ssFree.Get().(*ss) - if rr, ok := r.(io.RuneReader); ok { - s.rr = rr + if rs, ok := r.(io.RuneScanner); ok { + s.rs = rs } else { - s.rr = &readRune{reader: r} + s.rs = &readRune{reader: r, peekRune: -1} } s.nlIsSpace = nlIsSpace s.nlIsEnd = nlIsEnd - s.prevRune = -1 - s.peekRune = -1 s.atEOF = false s.limit = hugeWid s.argLimit = hugeWid @@ -424,7 +417,7 @@ func (s *ss) free(old ssave) { return } s.buf = s.buf[:0] - s.rr = nil + s.rs = nil ssFree.Put(s) } @@ -1142,7 +1135,7 @@ func (s *ss) advance(format string) (i int) { } // doScanf does the real work when scanning with a format string. -// At the moment, it handles only pointers to basic types. +// At the moment, it handles only pointers to basic types. func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) { defer errorHandler(&err) end := len(format) - 1 diff --git a/src/fmt/scan_test.go b/src/fmt/scan_test.go index ce6f08659a..3302a7ca2d 100644 --- a/src/fmt/scan_test.go +++ b/src/fmt/scan_test.go @@ -1001,6 +1001,18 @@ func BenchmarkScanRecursiveInt(b *testing.B) { } } +func BenchmarkScanRecursiveIntReaderWrapper(b *testing.B) { + b.ResetTimer() + ints := makeInts(intCount) + var r RecursiveInt + for i := b.N - 1; i >= 0; i-- { + buf := newReader(string(ints)) + b.StartTimer() + Fscan(buf, &r) + b.StopTimer() + } +} + // Issue 9124. // %x on bytes couldn't handle non-space bytes terminating the scan. func TestHexBytes(t *testing.T) { -- 2.50.0