bufio: optimize bufio.Reader.ReadString to avoid an allocation and copy

author Alex Gaynor <alex.gaynor@gmail.com>

Mon, 27 Apr 2020 23:10:23 +0000 (23:10 +0000)

committer Ian Lance Taylor <iant@golang.org>

Tue, 28 Apr 2020 00:53:32 +0000 (00:53 +0000)
author Alex Gaynor <alex.gaynor@gmail.com>
Mon, 27 Apr 2020 23:10:23 +0000 (23:10 +0000)
committer Ian Lance Taylor <iant@golang.org>
Tue, 28 Apr 2020 00:53:32 +0000 (00:53 +0000)
diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go

index f0810be3a44e9743542971386338a6176b8aa269..7cbd5424ea6c27ea44532423256edd0b434bb76a 100644 (file)
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@@ -11,6 +11,7 @@ import (
         "bytes"
         "errors"
         "io"
+       "strings"
         "unicode/utf8"
  )
  
@@ -419,20 +420,16 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) {
         return
  }
  
-// ReadBytes reads until the first occurrence of delim in the input,
-// returning a slice containing the data up to and including the delimiter.
-// If ReadBytes encounters an error before finding a delimiter,
-// it returns the data read before the error and the error itself (often io.EOF).
-// ReadBytes returns err != nil if and only if the returned data does not end in
-// delim.
-// For simple uses, a Scanner may be more convenient.
-func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
-       // Use ReadSlice to look for array,
-       // accumulating full buffers.
+// collectFragments reads until the first occurrence of delim in the input. It
+// returns (slice of full buffers, remaining bytes before delim, total number
+// of bytes in the combined first two elements, error).
+// The complete result is equal to
+// `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a
+// length of `totalLen`. The result is strucured in this way to allow callers
+// to minimize allocations and copies.
+func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) {
         var frag []byte
-       var full [][]byte
-       var err error
-       n := 0
+       // Use ReadSlice to look for delim, accumulating full buffers.
         for {
                 var e error
                 frag, e = b.ReadSlice(delim)
@@ -447,12 +444,23 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
                 // Make a copy of the buffer.
                 buf := make([]byte, len(frag))
                 copy(buf, frag)
-               full = append(full, buf)
-               n += len(buf)
+               fullBuffers = append(fullBuffers, buf)
+               totalLen += len(buf)
         }
  
-       n += len(frag)
+       totalLen += len(frag)
+       return fullBuffers, frag, totalLen, err
+}
  
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+// For simple uses, a Scanner may be more convenient.
+func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
+       full, frag, n, err := b.collectFragments(delim)
         // Allocate new buffer to hold the full pieces and the fragment.
         buf := make([]byte, n)
         n = 0
@@ -472,8 +480,16 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
  // delim.
  // For simple uses, a Scanner may be more convenient.
  func (b *Reader) ReadString(delim byte) (string, error) {
-       bytes, err := b.ReadBytes(delim)
-       return string(bytes), err
+       full, frag, n, err := b.collectFragments(delim)
+       // Allocate new buffer to hold the full pieces and the fragment.
+       var buf strings.Builder
+       buf.Grow(n)
+       // Copy full pieces and fragment in.
+       for _, fb := range full {
+               buf.Write(fb)
+       }
+       buf.Write(frag)
+       return buf.String(), err
  }
  
  // WriteTo implements io.WriterTo.
diff --git a/src/bufio/bufio_test.go b/src/bufio/bufio_test.go

index 4c4522c66042641167f01e2e82f2c5abce3c9036..cb68f3ba2396a5b2e34d2def9dca11a282f6e280 100644 (file)
--- a/src/bufio/bufio_test.go
+++ b/src/bufio/bufio_test.go
@@ -535,6 +535,23 @@ func TestReadWriteRune(t *testing.T) {
         }
  }
  
+func TestReadStringAllocs(t *testing.T) {
+       r := strings.NewReader("       foo       foo        42        42        42        42        42        42        42        42       4.2       4.2       4.2       4.2\n")
+       buf := NewReader(r)
+       allocs := testing.AllocsPerRun(100, func() {
+               r.Seek(0, io.SeekStart)
+               buf.Reset(r)
+
+               _, err := buf.ReadString('\n')
+               if err != nil {
+                       t.Fatal(err)
+               }
+       })
+       if allocs != 1 {
+               t.Errorf("Unexpected number of allocations, got %f, want 1", allocs)
+       }
+}
+
  func TestWriter(t *testing.T) {
         var data [8192]byte
  
@@ -1644,6 +1661,21 @@ func BenchmarkReaderWriteToOptimal(b *testing.B) {
         }
  }
  
+func BenchmarkReaderReadString(b *testing.B) {
+       r := strings.NewReader("       foo       foo        42        42        42        42        42        42        42        42       4.2       4.2       4.2       4.2\n")
+       buf := NewReader(r)
+       b.ReportAllocs()
+       for i := 0; i < b.N; i++ {
+               r.Seek(0, io.SeekStart)
+               buf.Reset(r)
+
+               _, err := buf.ReadString('\n')
+               if err != nil {
+                       b.Fatal(err)
+               }
+       }
+}
+
  func BenchmarkWriterCopyOptimal(b *testing.B) {
         // Optimal case is where the underlying writer implements io.ReaderFrom
         srcBuf := bytes.NewBuffer(make([]byte, 8192))
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go

index fad165cf603f5186a0b02bfc6328dad17d8b2903..45c92c8eb41f3fbb2a1bf0d117cfe0a1dccc11f2 100644 (file)
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -84,7 +84,7 @@ var pkgDeps = map[string][]string{
         },
  
         // L2 adds Unicode and strings processing.
-       "bufio":   {"L0", "unicode/utf8", "bytes"},
+       "bufio":   {"L0", "unicode/utf8", "bytes", "strings"},
         "bytes":   {"L0", "unicode", "unicode/utf8"},
         "path":    {"L0", "unicode/utf8", "strings"},
         "strings": {"L0", "unicode", "unicode/utf8"},
author	Alex Gaynor <alex.gaynor@gmail.com>
	Mon, 27 Apr 2020 23:10:23 +0000 (23:10 +0000)
committer	Ian Lance Taylor <iant@golang.org>
	Tue, 28 Apr 2020 00:53:32 +0000 (00:53 +0000)
src/bufio/bufio.go		patch \| blob \| history
src/bufio/bufio_test.go		patch \| blob \| history
src/go/build/deps_test.go		patch \| blob \| history