step 2 of the great buffer shift.

author Rob Pike <r@golang.org>

Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)

committer Rob Pike <r@golang.org>

Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)
author Rob Pike <r@golang.org>
Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)
committer Rob Pike <r@golang.org>
Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)
diff --git a/src/pkg/base64/base64_test.go b/src/pkg/base64/base64_test.go

index 8d0c67cf7fdd47f30a8baee81daca35b9b9094e9..6c2b4343fa8dfb7ce0eb3b787f558ab6c9fe7807 100644 (file)
--- a/src/pkg/base64/base64_test.go
+++ b/src/pkg/base64/base64_test.go
@@ -112,7 +112,7 @@ func TestDecode(t *testing.T) {
  
  func TestDecoder(t *testing.T) {
         for _, p := range pairs {
-               decoder := NewDecoder(StdEncoding, strings.NewBuffer(p.encoded));
+               decoder := NewDecoder(StdEncoding, strings.NewBufferString(p.encoded));
                 dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded)));
                 count, err := decoder.Read(dbuf);
                 if err != nil && err != os.EOF {
@@ -129,7 +129,7 @@ func TestDecoder(t *testing.T) {
  
  func TestDecoderBuffering(t *testing.T) {
         for bs := 1; bs <= 12; bs++ {
-               decoder := NewDecoder(StdEncoding, strings.NewBuffer(bigtest.encoded));
+               decoder := NewDecoder(StdEncoding, strings.NewBufferString(bigtest.encoded));
                 buf := make([]byte, len(bigtest.decoded) + 12);
                 var total int;
                 for total = 0; total < len(bigtest.decoded); {
diff --git a/src/pkg/bufio/bufio_test.go b/src/pkg/bufio/bufio_test.go

index 7d590df0b776b4a812b4f7247643d4ab0014ce15..81e66e0eb4eddb61f1c07256b293201f611d6a9d 100644 (file)
--- a/src/pkg/bufio/bufio_test.go
+++ b/src/pkg/bufio/bufio_test.go
@@ -61,12 +61,12 @@ func readBytes(buf *Reader) string {
  
  func TestReaderSimple(t *testing.T) {
         data := "hello world";
-       b := NewReader(strings.NewBuffer(data));
+       b := NewReader(strings.NewBufferString(data));
         if s := readBytes(b); s != "hello world" {
                 t.Errorf("simple hello world test failed: got %q", s);
         }
  
-       b = NewReader(newRot13Reader(strings.NewBuffer(data)));
+       b = NewReader(newRot13Reader(strings.NewBufferString(data)));
         if s := readBytes(b); s != "uryyb jbeyq" {
                 t.Error("rot13 hello world test failed: got %q", s);
         }
@@ -154,7 +154,7 @@ func TestReader(t *testing.T) {
                                         readmaker := readMakers[i];
                                         bufreader := bufreaders[j];
                                         bufsize := bufsizes[k];
-                                       read := readmaker.fn(strings.NewBuffer(text));
+                                       read := readmaker.fn(strings.NewBufferString(text));
                                         buf, _ := NewReaderSize(read, bufsize);
                                         s := bufreader.fn(buf);
                                         if s != text {
@@ -308,7 +308,7 @@ func TestWriteErrors(t *testing.T) {
  
  func TestNewReaderSizeIdempotent(t *testing.T) {
         const BufSize = 1000;
-       b, err := NewReaderSize(strings.NewBuffer("hello world"), BufSize);
+       b, err := NewReaderSize(strings.NewBufferString("hello world"), BufSize);
         if err != nil {
                 t.Error("NewReaderSize create fail", err);
         }
diff --git a/src/pkg/gob/encoder_test.go b/src/pkg/gob/encoder_test.go

index 94d8033f2ffb7b4f34feef4bb01fddf0049b391a..4095e0b121e5f83492b4c5ce44646609ec392202 100644 (file)
--- a/src/pkg/gob/encoder_test.go
+++ b/src/pkg/gob/encoder_test.go
@@ -228,7 +228,7 @@ func TestWrongTypeDecoder(t *testing.T) {
  }
  
  func corruptDataCheck(s string, err os.Error, t *testing.T) {
-       b := strings.NewBuffer(s);
+       b := strings.NewBufferString(s);
         dec := NewDecoder(b);
         dec.Decode(new(ET2));
         if dec.state.err != err {
diff --git a/src/pkg/strings/buffer.go b/src/pkg/strings/buffer.go

index c290b9277ec3481542f56a85521ba9ff4ac2bfeb..3c091a345f16eef3e83cc3e04ffc587ee96ae08b 100644 (file)
--- a/src/pkg/strings/buffer.go
+++ b/src/pkg/strings/buffer.go
@@ -6,20 +6,140 @@ package strings
  
  import "os"
  
-// Efficient construction of large strings.
+// Efficient construction of large strings and byte arrays.
  // Implements io.Reader and io.Writer.
  
-// A Buffer is a variable-sized buffer of strings
-// with Read and Write methods.  Appends (writes) are efficient.
+// A Buffer provides efficient construction of large strings
+// and slices of bytes.  It implements io.Reader and io.Writer.
+// Appends (writes) are efficient.
  // The zero value for Buffer is an empty buffer ready to use.
  type Buffer struct {
-       str     []string;
+       blk     []block;
         len     int;
-       byteBuf [1]byte;
+       oneByte [1]byte;
+}
+
+// There are two kinds of block: a string or a []byte.
+// When the user writes big strings, we add string blocks;
+// when the user writes big byte slices, we add []byte blocks.
+// Small writes are coalesced onto the end of the last block,
+// whatever it is.
+// This strategy is intended to reduce unnecessary allocation.
+type block interface {
+       Len()   int;
+       String()        string;
+       appendBytes(s []byte);
+       appendString(s string);
+       setSlice(m, n int);
+}
+
+// stringBlocks represent strings. We use pointer receivers
+// so append and setSlice can overwrite the receiver.
+type stringBlock string
+
+func (b *stringBlock) Len() int {
+       return len(*b)
+}
+
+func (b *stringBlock) String() string {
+       return string(*b)
+}
+
+func (b *stringBlock) appendBytes(s []byte) {
+       *b += stringBlock(s)
+}
+
+func (b *stringBlock) appendString(s string) {
+       *b = stringBlock(s)
+}
+
+func (b *stringBlock) setSlice(m, n int) {
+       *b = (*b)[m:n]
+}
+
+// byteBlock represent slices of bytes.  We use pointer receivers
+// so append and setSlice can overwrite the receiver.
+type byteBlock []byte
+
+func (b *byteBlock) Len() int {
+       return len(*b)
+}
+
+func (b *byteBlock) String() string {
+       return string(*b)
+}
+
+func (b *byteBlock) resize(max int) {
+       by := []byte(*b);
+       if cap(by) >= max {
+               by = by[0:max];
+       } else {
+               nby := make([]byte, max, 3*(max+10)/2);
+               copyBytes(nby, 0, by);
+               by = nby;
+       }
+       *b = by;
+}
+
+func (b *byteBlock) appendBytes(s []byte) {
+       curLen := b.Len();
+       b.resize(curLen + len(s));
+       copyBytes([]byte(*b), curLen, s);
+}
+
+func (b *byteBlock) appendString(s string) {
+       curLen := b.Len();
+       b.resize(curLen + len(s));
+       copyString([]byte(*b), curLen, s);
+}
+
+func (b *byteBlock) setSlice(m, n int) {
+       *b = (*b)[m:n]
+}
+
+// Because the user may overwrite the contents of byte slices, we need
+// to make a copy.  Allocation strategy: leave some space on the end so
+// small subsequent writes can avoid another allocation.  The input
+// is known to be non-empty.
+func newByteBlock(s []byte) *byteBlock {
+       l := len(s);
+       // Capacity with room to grow.  If small, allocate a mininum.  If medium,
+       // double the size.  If huge, use the size plus epsilon (room for a newline,
+       // at least).
+       c := l;
+       switch {
+       case l < 32:
+               c = 64
+       case l < 1<<18:
+               c *= 2;
+       default:
+               c += 8
+       }
+       b := make([]byte, l, c);
+       copyBytes(b, 0, s);
+       return &b;
+}
+
+// Copy from block to byte array at offset doff.  Assume there's room.
+func copy(dst []byte, doff int, src block) {
+       switch s := src.(type) {
+       case *stringBlock:
+               copyString(dst, doff, string(*s));
+       case *byteBlock:
+               copyBytes(dst, doff, []byte(*s));
+       }
  }
  
  // Copy from string to byte array at offset doff.  Assume there's room.
-func copy(dst []byte, doff int, src string) {
+func copyString(dst []byte, doff int, str string) {
+       for soff := 0; soff < len(str); soff++ {
+               dst[doff] = str[soff];
+               doff++;
+       }
+}
+
+// Copy from bytes to byte array at offset doff.  Assume there's room.
+func copyBytes(dst []byte, doff int, src []byte) {
         for soff := 0; soff < len(src); soff++ {
                 dst[doff] = src[soff];
                 doff++;
@@ -32,9 +152,9 @@ func (b *Buffer) Bytes() []byte {
         n := b.len;
         bytes := make([]byte, n);
         nbytes := 0;
-       for _, s := range b.str {
+       for _, s := range b.blk {
                 copy(bytes, nbytes, s);
-               nbytes += len(s);
+               nbytes += s.Len();
         }
         return bytes;
  }
@@ -42,33 +162,33 @@ func (b *Buffer) Bytes() []byte {
  // String returns the contents of the unread portion of the buffer
  // as a string.
  func (b *Buffer) String() string {
-       if len(b.str) == 1 {    // important special case
-               return b.str[0]
+       if len(b.blk) == 1 {    // important special case
+               return b.blk[0].String()
         }
         return string(b.Bytes())
  }
  
  // Len returns the number of bytes in the unread portion of the buffer;
  // b.Len() == len(b.Bytes()) == len(b.String()).
-func (b *Buffer) Len() (n int) {
+func (b *Buffer) Len() int {
         return b.len
  }
  
  // Truncate discards all but the first n unread bytes from the buffer.
  func (b *Buffer) Truncate(n int) {
         b.len = 0;      // recompute during scan.
-       for i, s := range b.str {
+       for i, s := range b.blk {
                 if n <= 0 {
-                       b.str = b.str[0:i];
+                       b.blk = b.blk[0:i];
                         break;
                 }
-               if n < len(s) {
-                       b.str[i] = s[0:n];
+               if l := s.Len(); n < l {
+                       b.blk[i].setSlice(0, n);
                         b.len += n;
                         n = 0;
                 } else {
-                       b.len += len(s);
-                       n -= len(s);
+                       b.len += l;
+                       n -= l;
                 }
         }
  }
@@ -76,58 +196,84 @@ func (b *Buffer) Truncate(n int) {
  // Reset resets the buffer so it has no content.
  // b.Reset() is the same as b.Truncate(0).
  func (b *Buffer) Reset() {
-       b.str = b.str[0:0];
+       b.blk = b.blk[0:0];
         b.len = 0;
  }
  
  // Can n bytes be appended efficiently to the end of the final string?
  func (b *Buffer) canCombine(n int) bool {
-       return len(b.str) > 0 && n+len(b.str[len(b.str)-1]) <= 64
+       return len(b.blk) > 0 && n+b.blk[len(b.blk)-1].Len() <= 64
  }
  
  // WriteString appends string s to the buffer.  The return
  // value n is the length of s; err is always nil.
  func (b *Buffer) WriteString(s string) (n int, err os.Error) {
         n = len(s);
+       if n == 0 {
+               return
+       }
         b.len += n;
-       numStr := len(b.str);
-       // Special case: If the last string is short and this one is short,
+       numStr := len(b.blk);
+       // Special case: If the last piece is short and this one is short,
         // combine them and avoid growing the list.
         if b.canCombine(n) {
-               b.str[numStr-1] += s;
+               b.blk[numStr-1].appendString(s);
                 return
         }
-       if cap(b.str) == numStr {
-               nstr := make([]string, numStr, 3*(numStr+10)/2);
-               for i, s := range b.str {
+       if cap(b.blk) == numStr {
+               nstr := make([]block, numStr, 3*(numStr+10)/2);
+               for i, s := range b.blk {
                         nstr[i] = s;
                 }
-               b.str = nstr;
+               b.blk = nstr;
         }
-       b.str = b.str[0:numStr+1];
-       b.str[numStr] = s;
+       b.blk = b.blk[0:numStr+1];
+       // The string is immutable; no need to make a copy.
+       b.blk[numStr] = (*stringBlock)(&s);
         return
  }
  
  // Write appends the contents of p to the buffer.  The return
  // value n is the length of p; err is always nil.
  func (b *Buffer) Write(p []byte) (n int, err os.Error) {
-       return b.WriteString(string(p))
+       n = len(p);
+       if n == 0 {
+               return
+       }
+       b.len += n;
+       numStr := len(b.blk);
+       // Special case: If the last piece is short and this one is short,
+       // combine them and avoid growing the list.
+       if b.canCombine(n) {
+               b.blk[numStr-1].appendBytes(p);
+               return
+       }
+       if cap(b.blk) == numStr {
+               nstr := make([]block, numStr, 3*(numStr+10)/2);
+               for i, s := range b.blk {
+                       nstr[i] = s;
+               }
+               b.blk = nstr;
+       }
+       b.blk = b.blk[0:numStr+1];
+       // Need to copy the data - user might overwrite the data.
+       b.blk[numStr] = newByteBlock(p);
+       return
  }
  
  // WriteByte appends the byte c to the buffer.
  // The returned error is always nil, but is included
  // to match bufio.Writer's WriteByte.
  func (b *Buffer) WriteByte(c byte) os.Error {
-       s := string(c);
+       b.oneByte[0] = c;
         // For WriteByte, canCombine is almost always true so it's worth
         // doing here.
         if b.canCombine(1) {
-               b.str[len(b.str)-1] += s;
+               b.blk[len(b.blk)-1].appendBytes(&b.oneByte);
                 b.len++;
                 return nil
         }
-       b.WriteString(s);
+       b.Write(&b.oneByte);
         return nil;
  }
  
@@ -136,22 +282,27 @@ func (b *Buffer) WriteByte(c byte) os.Error {
  // buffer has no data to return, err is os.EOF even if len(p) is zero;
  // otherwise it is nil.
  func (b *Buffer) Read(p []byte) (n int, err os.Error) {
-       if len(b.str) == 0 {
+       if len(b.blk) == 0 {
                 return 0, os.EOF
         }
-       for len(b.str) > 0 {
-               s := b.str[0];
+       for len(b.blk) > 0 {
+               blk := b.blk[0];
                 m := len(p) - n;
-               if m >= len(s) {
+               if l := blk.Len(); m >= l {
                         // consume all of this string.
-                       copy(p, n, s);
-                       n += len(s);
-                       b.str = b.str[1:len(b.str)];
+                       copy(p, n, blk);
+                       n += l;
+                       b.blk = b.blk[1:len(b.blk)];
                 } else {
-                       // consume some of this string; it's the last piece.
-                       copy(p, n, s[0:m]);
+                       // consume some of this block; it's the last piece.
+                       switch b := blk.(type) {
+                       case *stringBlock:
+                               copyString(p, n, string(*b)[0:m]);
+                       case *byteBlock:
+                               copyBytes(p, n, []byte(*b)[0:m]);
+                       }
                         n += m;
-                       b.str[0] = s[m:len(s)];
+                       b.blk[0].setSlice(m, l);
                         break;
                 }
         }
@@ -162,18 +313,28 @@ func (b *Buffer) Read(p []byte) (n int, err os.Error) {
  // ReadByte reads and returns the next byte from the buffer.
  // If no byte is available, it returns error os.EOF.
  func (b *Buffer) ReadByte() (c byte, err os.Error) {
-       if _, err := b.Read(&b.byteBuf); err != nil {
+       if _, err := b.Read(&b.oneByte); err != nil {
                 return 0, err
         }
-       return b.byteBuf[0], nil
+       return b.oneByte[0], nil
  }
  
-// NewBuffer creates and initializes a new Buffer
-// using str as its initial contents.
-func NewBuffer(str string) *Buffer {
+// NewBufferString creates and initializes a new Buffer
+// using a string as its initial contents.
+func NewBufferString(str string) *Buffer {
         b := new(Buffer);
-       b.str = make([]string, 1, 10);  // room to grow
-       b.str[0] = str;
+       b.blk = make([]block, 1, 10);   // room to grow
+       b.blk[0] = (*stringBlock)(&str);
         b.len = len(str);
         return b;
  }
+
+// NewBuffer creates and initializes a new Buffer
+// using a byte slice as its initial contents.
+func NewBuffer(by []byte) *Buffer {
+       b := new(Buffer);
+       b.blk = make([]block, 1, 10);   // room to grow
+       b.blk[0] = (*byteBlock)(&by);
+       b.len = len(by);
+       return b;
+}
diff --git a/src/pkg/strings/buffer_test.go b/src/pkg/strings/buffer_test.go

index cc1ce936bc1bc2dcfd23fb029b5bda64ba5902b1..df0f30c6251bf4b096d6f43aa6133e8915a2f912 100644 (file)
--- a/src/pkg/strings/buffer_test.go
+++ b/src/pkg/strings/buffer_test.go
@@ -13,10 +13,11 @@ import (
  
  const N = 10000  // make this bigger for a larger (and slower) test
  var data string  // test data for write tests
+var bytes []byte       // test data; same as data but as a slice.
  
  
  func init() {
-       bytes := make([]byte, N);
+       bytes = make([]byte, N);
         for i := 0; i < N; i++ {
                 bytes[i] = 'a' + byte(i % 26)
         }
@@ -45,10 +46,10 @@ func check(t *testing.T, testname string, buf *Buffer, s string) {
  }
  
  
-// Fill buf through n writes of fus.
+// Fill buf through n writes of string fus.
  // The initial contents of buf corresponds to the string s;
  // the result is the final contents of buf returned as a string.
-func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string {
+func fillString(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string {
         check(t, testname + " (fill 1)", buf, s);
         for ; n > 0; n-- {
                 m, err := buf.WriteString(fus);
@@ -65,12 +66,38 @@ func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus strin
  }
  
  
+// Fill buf through n writes of byte slice fub.
+// The initial contents of buf corresponds to the string s;
+// the result is the final contents of buf returned as a string.
+func fillBytes(t *testing.T, testname string, buf *Buffer, s string, n int, fub []byte) string {
+       check(t, testname + " (fill 1)", buf, s);
+       for ; n > 0; n-- {
+               m, err := buf.Write(fub);
+               if m != len(fub) {
+                       t.Errorf(testname + " (fill 2): m == %d, expected %d\n", m, len(fub));
+               }
+               if err != nil {
+                       t.Errorf(testname + " (fill 3): err should always be nil, found err == %s\n", err);
+               }
+               s += string(fub);
+               check(t, testname + " (fill 4)", buf, s);
+       }
+       return s;
+}
+
+
  func TestNewBuffer(t *testing.T) {
-       buf := NewBuffer(data);
+       buf := NewBuffer(bytes);
         check(t, "NewBuffer", buf, data);
  }
  
  
+func TestNewBufferString(t *testing.T) {
+       buf := NewBufferString(data);
+       check(t, "NewBufferString", buf, data);
+}
+
+
  // Empty buf through repeated reads into fub.
  // The initial contents of buf corresponds to the string s.
  func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) {
@@ -147,23 +174,43 @@ func TestBasicOperations(t *testing.T) {
  }
  
  
-func TestLargeWrites(t *testing.T) {
+func TestLargeStringWrites(t *testing.T) {
+       var buf Buffer;
+       for i := 3; i < 30; i += 3 {
+               s := fillString(t, "TestLargeWrites (1)", &buf, "", 5, data);
+               empty(t, "TestLargeStringWrites (2)", &buf, s, make([]byte, len(data)/i));
+       }
+       check(t, "TestLargeStringWrites (3)", &buf, "");
+}
+
+
+func TestLargeByteWrites(t *testing.T) {
         var buf Buffer;
         for i := 3; i < 30; i += 3 {
-               s := fill(t, "TestLargeWrites (1)", &buf, "", 5, data);
-               empty(t, "TestLargeWrites (2)", &buf, s, make([]byte, len(data)/i));
+               s := fillBytes(t, "TestLargeWrites (1)", &buf, "", 5, bytes);
+               empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(data)/i));
         }
-       check(t, "TestLargeWrites (3)", &buf, "");
+       check(t, "TestLargeByteWrites (3)", &buf, "");
  }
  
  
-func TestLargeReads(t *testing.T) {
+func TestLargeStringReads(t *testing.T) {
         var buf Buffer;
         for i := 3; i < 30; i += 3 {
-               s := fill(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]);
+               s := fillString(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]);
                 empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
         }
-       check(t, "TestLargeReads (3)", &buf, "");
+       check(t, "TestLargeStringReads (3)", &buf, "");
+}
+
+
+func TestLargeByteReads(t *testing.T) {
+       var buf Buffer;
+       for i := 3; i < 30; i += 3 {
+               s := fillBytes(t, "TestLargeReads (1)", &buf, "", 5, bytes[0 : len(bytes)/i]);
+               empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
+       }
+       check(t, "TestLargeByteReads (3)", &buf, "");
  }
  
  
@@ -172,7 +219,11 @@ func TestMixedReadsAndWrites(t *testing.T) {
         s := "";
         for i := 0; i < 50; i++ {
                 wlen := rand.Intn(len(data));
-               s = fill(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]);
+               if i % 2 == 0 {
+                       s = fillString(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]);
+               } else {
+                       s = fillBytes(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, bytes[0 : wlen]);
+               }
  
                 rlen := rand.Intn(len(data));
                 fub := make([]byte, rlen);
author	Rob Pike <r@golang.org>
	Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)
committer	Rob Pike <r@golang.org>
	Mon, 21 Sep 2009 19:20:15 +0000 (12:20 -0700)
src/pkg/base64/base64_test.go		patch \| blob \| history
src/pkg/bufio/bufio_test.go		patch \| blob \| history
src/pkg/gob/encoder_test.go		patch \| blob \| history
src/pkg/strings/buffer.go		patch \| blob \| history
src/pkg/strings/buffer_test.go		patch \| blob \| history