From: Rob Pike Date: Fri, 5 Mar 2010 19:34:53 +0000 (-0800) Subject: Add ReadRune and WriteRune to bytes.Buffer X-Git-Tag: weekly.2010-03-15~52 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0ac5ef70db410a219eeb46fd9c45205df6a3e2f6;p=gostls13.git Add ReadRune and WriteRune to bytes.Buffer The comments mention bufio's WriteRune, which should come next. R=rsc CC=golang-dev https://golang.org/cl/245041 --- diff --git a/src/pkg/bytes/buffer.go b/src/pkg/bytes/buffer.go index 76126959fe..acd4523ada 100644 --- a/src/pkg/bytes/buffer.go +++ b/src/pkg/bytes/buffer.go @@ -9,6 +9,7 @@ package bytes import ( "io" "os" + "utf8" ) // Copy from string to byte array at offset doff. Assume there's room. @@ -22,10 +23,10 @@ func copyString(dst []byte, doff int, str string) { // A Buffer is a variable-sized buffer of bytes with Read and Write methods. // The zero value for Buffer is an empty buffer ready to use. type Buffer struct { - buf []byte // contents are the bytes buf[off : len(buf)] - off int // read at &buf[off], write at &buf[len(buf)] - oneByte [1]byte // avoid allocation of slice on each WriteByte - bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation. + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + runeBytes [utf8.UTFMax]byte // avoid allocation of slice on each WriteByte or Rune + bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation. } // Bytes returns a slice of the contents of the unread portion of the buffer; @@ -176,11 +177,25 @@ func (b *Buffer) WriteTo(w io.Writer) (n int64, err os.Error) { // The returned error is always nil, but is included // to match bufio.Writer's WriteByte. func (b *Buffer) WriteByte(c byte) os.Error { - b.oneByte[0] = c - b.Write(&b.oneByte) + b.runeBytes[0] = c + b.Write(b.runeBytes[0:1]) return nil } +// WriteRune appends the UTF-8 encoding of Unicode +// code point r to the buffer, returning its length and +// an error, which is always nil but is included +// to match bufio.Writer's WriteRune. +func (b *Buffer) WriteRune(r int) (n int, err os.Error) { + if r < utf8.RuneSelf { + b.WriteByte(byte(r)) + return 1, nil + } + n = utf8.EncodeRune(r, &b.runeBytes) + b.Write(b.runeBytes[0:n]) + return n, nil +} + // Read reads the next len(p) bytes from the buffer or until the buffer // is drained. The return value n is the number of bytes read. If the // buffer has no data to return, err is os.EOF even if len(p) is zero; @@ -217,6 +232,27 @@ func (b *Buffer) ReadByte() (c byte, err os.Error) { return c, nil } +// ReadRune reads and returns the next UTF-8-encoded +// Unicode code point from the buffer. +// If no bytes are available, the error returned is os.EOF. +// If the bytes are an erroneous UTF-8 encoding, it +// consumes one byte and returns U+FFFD, 1. +func (b *Buffer) ReadRune() (r int, size int, err os.Error) { + if b.off >= len(b.buf) { + // Buffer is empty, reset to recover space. + b.Truncate(0) + return 0, 0, os.EOF + } + c := b.buf[b.off] + if c < utf8.RuneSelf { + b.off++ + return int(c), 1, nil + } + r, n := utf8.DecodeRune(b.buf[b.off:]) + b.off += n + return r, n, nil +} + // NewBuffer creates and initializes a new Buffer using buf as its initial // contents. It is intended to prepare a Buffer to read existing data. It // can also be used to to size the internal buffer for writing. To do that, diff --git a/src/pkg/bytes/buffer_test.go b/src/pkg/bytes/buffer_test.go index af637cf63c..17acd72184 100644 --- a/src/pkg/bytes/buffer_test.go +++ b/src/pkg/bytes/buffer_test.go @@ -8,6 +8,7 @@ import ( . "bytes" "rand" "testing" + "utf8" ) @@ -262,3 +263,37 @@ func TestWriteTo(t *testing.T) { empty(t, "TestReadFrom (2)", &b, s, make([]byte, len(data))) } } + +func TestRuneIO(t *testing.T) { + const NRune = 1000 + // Built a test array while we write the data + b := make([]byte, utf8.UTFMax*NRune) + var buf Buffer + n := 0 + for r := 0; r < NRune; r++ { + size := utf8.EncodeRune(r, b[n:]) + nbytes, err := buf.WriteRune(r) + if err != nil { + t.Fatalf("WriteRune(0x%x) error: %s", r, err) + } + if nbytes != size { + t.Fatalf("WriteRune(0x%x) expected %d, got %d", size, nbytes) + } + n += size + } + b = b[0:n] + + // Check the resulting bytes + if !Equal(buf.Bytes(), b) { + t.Fatalf("incorrect result from WriteRune: %q not %q", buf.Bytes(), b) + } + + // Read it back with ReadRune + for r := 0; r < NRune; r++ { + size := utf8.EncodeRune(r, b) + nr, nbytes, err := buf.ReadRune() + if nr != r || nbytes != size || err != nil { + t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r, nr, nbytes, r, size, err) + } + } +}