From 229e97616324b8623eeb2b30bfdfed6637252b94 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Wed, 16 Sep 2009 23:32:17 -0700 Subject: [PATCH] first cut at a string buffer. can be made more efficient but this is reasonable. R=rsc DELTA=363 (363 added, 0 deleted, 0 changed) OCL=34720 CL=34720 --- src/pkg/strings/Makefile | 1 + src/pkg/strings/buffer.go | 179 ++++++++++++++++++++++++++++++++ src/pkg/strings/buffer_test.go | 183 +++++++++++++++++++++++++++++++++ 3 files changed, 363 insertions(+) create mode 100644 src/pkg/strings/buffer.go create mode 100644 src/pkg/strings/buffer_test.go diff --git a/src/pkg/strings/Makefile b/src/pkg/strings/Makefile index dcfa6066cd..96be1f4913 100644 --- a/src/pkg/strings/Makefile +++ b/src/pkg/strings/Makefile @@ -6,6 +6,7 @@ include $(GOROOT)/src/Make.$(GOARCH) TARG=strings GOFILES=\ + buffer.go\ strings.go\ include $(GOROOT)/src/Make.pkg diff --git a/src/pkg/strings/buffer.go b/src/pkg/strings/buffer.go new file mode 100644 index 0000000000..c290b9277e --- /dev/null +++ b/src/pkg/strings/buffer.go @@ -0,0 +1,179 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import "os" + +// Efficient construction of large strings. +// Implements io.Reader and io.Writer. + +// A Buffer is a variable-sized buffer of strings +// with Read and Write methods. Appends (writes) are efficient. +// The zero value for Buffer is an empty buffer ready to use. +type Buffer struct { + str []string; + len int; + byteBuf [1]byte; +} + +// Copy from string to byte array at offset doff. Assume there's room. +func copy(dst []byte, doff int, src string) { + for soff := 0; soff < len(src); soff++ { + dst[doff] = src[soff]; + doff++; + } +} + +// Bytes returns the contents of the unread portion of the buffer +// as a byte array. +func (b *Buffer) Bytes() []byte { + n := b.len; + bytes := make([]byte, n); + nbytes := 0; + for _, s := range b.str { + copy(bytes, nbytes, s); + nbytes += len(s); + } + return bytes; +} + +// String returns the contents of the unread portion of the buffer +// as a string. +func (b *Buffer) String() string { + if len(b.str) == 1 { // important special case + return b.str[0] + } + return string(b.Bytes()) +} + +// Len returns the number of bytes in the unread portion of the buffer; +// b.Len() == len(b.Bytes()) == len(b.String()). +func (b *Buffer) Len() (n int) { + return b.len +} + +// Truncate discards all but the first n unread bytes from the buffer. +func (b *Buffer) Truncate(n int) { + b.len = 0; // recompute during scan. + for i, s := range b.str { + if n <= 0 { + b.str = b.str[0:i]; + break; + } + if n < len(s) { + b.str[i] = s[0:n]; + b.len += n; + n = 0; + } else { + b.len += len(s); + n -= len(s); + } + } +} + +// Reset resets the buffer so it has no content. +// b.Reset() is the same as b.Truncate(0). +func (b *Buffer) Reset() { + b.str = b.str[0:0]; + b.len = 0; +} + +// Can n bytes be appended efficiently to the end of the final string? +func (b *Buffer) canCombine(n int) bool { + return len(b.str) > 0 && n+len(b.str[len(b.str)-1]) <= 64 +} + +// WriteString appends string s to the buffer. The return +// value n is the length of s; err is always nil. +func (b *Buffer) WriteString(s string) (n int, err os.Error) { + n = len(s); + b.len += n; + numStr := len(b.str); + // Special case: If the last string is short and this one is short, + // combine them and avoid growing the list. + if b.canCombine(n) { + b.str[numStr-1] += s; + return + } + if cap(b.str) == numStr { + nstr := make([]string, numStr, 3*(numStr+10)/2); + for i, s := range b.str { + nstr[i] = s; + } + b.str = nstr; + } + b.str = b.str[0:numStr+1]; + b.str[numStr] = s; + return +} + +// Write appends the contents of p to the buffer. The return +// value n is the length of p; err is always nil. +func (b *Buffer) Write(p []byte) (n int, err os.Error) { + return b.WriteString(string(p)) +} + +// WriteByte appends the byte c to the buffer. +// The returned error is always nil, but is included +// to match bufio.Writer's WriteByte. +func (b *Buffer) WriteByte(c byte) os.Error { + s := string(c); + // For WriteByte, canCombine is almost always true so it's worth + // doing here. + if b.canCombine(1) { + b.str[len(b.str)-1] += s; + b.len++; + return nil + } + b.WriteString(s); + return nil; +} + +// Read reads the next len(p) bytes from the buffer or until the buffer +// is drained. The return value n is the number of bytes read. If the +// buffer has no data to return, err is os.EOF even if len(p) is zero; +// otherwise it is nil. +func (b *Buffer) Read(p []byte) (n int, err os.Error) { + if len(b.str) == 0 { + return 0, os.EOF + } + for len(b.str) > 0 { + s := b.str[0]; + m := len(p) - n; + if m >= len(s) { + // consume all of this string. + copy(p, n, s); + n += len(s); + b.str = b.str[1:len(b.str)]; + } else { + // consume some of this string; it's the last piece. + copy(p, n, s[0:m]); + n += m; + b.str[0] = s[m:len(s)]; + break; + } + } + b.len -= n; + return +} + +// ReadByte reads and returns the next byte from the buffer. +// If no byte is available, it returns error os.EOF. +func (b *Buffer) ReadByte() (c byte, err os.Error) { + if _, err := b.Read(&b.byteBuf); err != nil { + return 0, err + } + return b.byteBuf[0], nil +} + +// NewBuffer creates and initializes a new Buffer +// using str as its initial contents. +func NewBuffer(str string) *Buffer { + b := new(Buffer); + b.str = make([]string, 1, 10); // room to grow + b.str[0] = str; + b.len = len(str); + return b; +} diff --git a/src/pkg/strings/buffer_test.go b/src/pkg/strings/buffer_test.go new file mode 100644 index 0000000000..cc1ce936bc --- /dev/null +++ b/src/pkg/strings/buffer_test.go @@ -0,0 +1,183 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + . "strings"; + "rand"; + "testing"; +) + + +const N = 10000 // make this bigger for a larger (and slower) test +var data string // test data for write tests + + +func init() { + bytes := make([]byte, N); + for i := 0; i < N; i++ { + bytes[i] = 'a' + byte(i % 26) + } + data = string(bytes); +} + +// Verify that contents of buf match the string s. +func check(t *testing.T, testname string, buf *Buffer, s string) { + bytes := buf.Bytes(); + str := buf.String(); + if buf.Len() != len(bytes) { + t.Errorf("%s: buf.Len() == %d, len(buf.Bytes()) == %d\n", testname, buf.Len(), len(bytes)) + } + + if buf.Len() != len(str) { + t.Errorf("%s: buf.Len() == %d, len(buf.String()) == %d\n", testname, buf.Len(), len(str)) + } + + if buf.Len() != len(s) { + t.Errorf("%s: buf.Len() == %d, len(s) == %d\n", testname, buf.Len(), len(s)) + } + + if string(bytes) != s { + t.Errorf("%s: string(buf.Bytes()) == %q, s == %q\n", testname, string(bytes), s) + } +} + + +// Fill buf through n writes of fus. +// The initial contents of buf corresponds to the string s; +// the result is the final contents of buf returned as a string. +func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string { + check(t, testname + " (fill 1)", buf, s); + for ; n > 0; n-- { + m, err := buf.WriteString(fus); + if m != len(fus) { + t.Errorf(testname + " (fill 2): m == %d, expected %d\n", m, len(fus)); + } + if err != nil { + t.Errorf(testname + " (fill 3): err should always be nil, found err == %s\n", err); + } + s += fus; + check(t, testname + " (fill 4)", buf, s); + } + return s; +} + + +func TestNewBuffer(t *testing.T) { + buf := NewBuffer(data); + check(t, "NewBuffer", buf, data); +} + + +// Empty buf through repeated reads into fub. +// The initial contents of buf corresponds to the string s. +func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) { + check(t, testname + " (empty 1)", buf, s); + + for { + n, err := buf.Read(fub); + if n == 0 { + break; + } + if err != nil { + t.Errorf(testname + " (empty 2): err should always be nil, found err == %s\n", err); + } + s = s[n : len(s)]; + check(t, testname + " (empty 3)", buf, s); + } + + check(t, testname + " (empty 4)", buf, ""); +} + + +func TestBasicOperations(t *testing.T) { + var buf Buffer; + + for i := 0; i < 5; i++ { + check(t, "TestBasicOperations (1)", &buf, ""); + + buf.Reset(); + check(t, "TestBasicOperations (2)", &buf, ""); + + buf.Truncate(0); + check(t, "TestBasicOperations (3)", &buf, ""); + + n, err := buf.Write(Bytes(data[0 : 1])); + if n != 1 { + t.Errorf("wrote 1 byte, but n == %d\n", n); + } + if err != nil { + t.Errorf("err should always be nil, but err == %s\n", err); + } + check(t, "TestBasicOperations (4)", &buf, "a"); + + buf.WriteByte(data[1]); + check(t, "TestBasicOperations (5)", &buf, "ab"); + + n, err = buf.Write(Bytes(data[2 : 26])); + if n != 24 { + t.Errorf("wrote 25 bytes, but n == %d\n", n); + } + check(t, "TestBasicOperations (6)", &buf, string(data[0 : 26])); + + buf.Truncate(26); + check(t, "TestBasicOperations (7)", &buf, string(data[0 : 26])); + + buf.Truncate(20); + check(t, "TestBasicOperations (8)", &buf, string(data[0 : 20])); + + empty(t, "TestBasicOperations (9)", &buf, string(data[0 : 20]), make([]byte, 5)); + empty(t, "TestBasicOperations (10)", &buf, "", make([]byte, 100)); + + buf.WriteByte(data[1]); + c, err := buf.ReadByte(); + if err != nil { + t.Errorf("ReadByte unexpected eof\n"); + } + if c != data[1] { + t.Errorf("ReadByte wrong value c=%v\n", c); + } + c, err = buf.ReadByte(); + if err == nil { + t.Errorf("ReadByte unexpected not eof\n"); + } + } +} + + +func TestLargeWrites(t *testing.T) { + var buf Buffer; + for i := 3; i < 30; i += 3 { + s := fill(t, "TestLargeWrites (1)", &buf, "", 5, data); + empty(t, "TestLargeWrites (2)", &buf, s, make([]byte, len(data)/i)); + } + check(t, "TestLargeWrites (3)", &buf, ""); +} + + +func TestLargeReads(t *testing.T) { + var buf Buffer; + for i := 3; i < 30; i += 3 { + s := fill(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]); + empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data))); + } + check(t, "TestLargeReads (3)", &buf, ""); +} + + +func TestMixedReadsAndWrites(t *testing.T) { + var buf Buffer; + s := ""; + for i := 0; i < 50; i++ { + wlen := rand.Intn(len(data)); + s = fill(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]); + + rlen := rand.Intn(len(data)); + fub := make([]byte, rlen); + n, _ := buf.Read(fub); + s = s[n : len(s)]; + } + empty(t, "TestMixedReadsAndWrites (2)", &buf, s, make([]byte, buf.Len())); +} -- 2.48.1