encoding/binary: support for varint encoding

author Robert Griesemer <gri@golang.org>

Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)

committer Robert Griesemer <gri@golang.org>

Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)
author Robert Griesemer <gri@golang.org>
Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)
committer Robert Griesemer <gri@golang.org>
Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)
diff --git a/src/pkg/encoding/binary/Makefile b/src/pkg/encoding/binary/Makefile

index dc46abe909dc4307b662d1da9b720eaebc6f811c..3246f5a387a559abbd72efbbd497efb63d139698 100644 (file)
--- a/src/pkg/encoding/binary/Makefile
+++ b/src/pkg/encoding/binary/Makefile
@@ -7,5 +7,6 @@ include ../../../Make.inc
  TARG=encoding/binary
  GOFILES=\
         binary.go\
+        varint.go\
  
  include ../../../Make.pkg
diff --git a/src/pkg/encoding/binary/binary.go b/src/pkg/encoding/binary/binary.go

index 8e55cb23b7c4707bf2a6ce6fd5fec4e02f76d788..c58f73694b8a63937651b4dc030130fdd89336cf 100644 (file)
--- a/src/pkg/encoding/binary/binary.go
+++ b/src/pkg/encoding/binary/binary.go
@@ -17,9 +17,9 @@ import (
  // A ByteOrder specifies how to convert byte sequences into
  // 16-, 32-, or 64-bit unsigned integers.
  type ByteOrder interface {
-       Uint16(b []byte) uint16
-       Uint32(b []byte) uint32
-       Uint64(b []byte) uint64
+       Uint16([]byte) uint16
+       Uint32([]byte) uint32
+       Uint64([]byte) uint64
         PutUint16([]byte, uint16)
         PutUint32([]byte, uint32)
         PutUint64([]byte, uint64)
diff --git a/src/pkg/encoding/binary/varint.go b/src/pkg/encoding/binary/varint.go

new file mode 100644 (file)

index 0000000..1439dd3
--- /dev/null
+++ b/src/pkg/encoding/binary/varint.go
@@ -0,0 +1,163 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+// This file implements "varint" encoding of 64-bit integers.
+// The encoding is:
+// - unsigned integers are serialized 7 bits at a time, starting with the
+//   least significant bits
+// - the most significant bit (msb) in each output byte indicates if there
+//   is a continuation byte (msb = 1)
+// - signed integers are mapped to unsigned integers using "zig-zag"
+//   encoding: Positive values x are written as 2*x + 0, negative values
+//   are written as 2*(^x) + 1; that is, negative numbers are complemented
+//   and whether to complement is encoded in bit 0.
+//
+// Design note:
+// At most 10 bytes are needed for 64-bit values. The encoding could
+// be more dense: a full 64-bit value needs an extra byte just to hold bit 63.
+// Instead, the msb of the previous byte could be used to hold bit 63 since we
+// know there can't be more than 64 bits. This is a trivial improvement and
+// would reduce the maximum encoding length to 9 bytes. However, it breaks the
+// invariant that the msb is always the "continuation bit" and thus makes the
+// format incompatible with a varint encoding for larger numbers (say 128-bit).
+
+import (
+       "io"
+       "os"
+)
+
+// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer.
+const (
+       MaxVarintLen16 = 3
+       MaxVarintLen32 = 5
+       MaxVarintLen64 = 10
+)
+
+// PutUvarint encodes a uint64 into buf and returns the number of bytes written.
+// If the buffer is too small, the result is the negated number of bytes required
+// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x).
+func PutUvarint(buf []byte, x uint64) int {
+       var i int
+       for i = range buf {
+               if x < 0x80 {
+                       buf[i] = byte(x)
+                       return i + 1
+               }
+               buf[i] = byte(x) | 0x80
+               x >>= 7
+       }
+       // buffer too small; compute number of bytes required
+       for x >= 0x4000 {
+               x >>= 2 * 7
+               i += 2
+       }
+       if x >= 0x80 {
+               i++
+       }
+       return -(i + 1)
+}
+
+// Uvarint decodes a uint64 from buf and returns that value and the
+// number of bytes read (> 0). If an error occurred, the value is 0
+// and the number of bytes n is <= 0 meaning:
+//
+//     n == 0: buf too small
+//     n  < 0: value larger than 64 bits (overflow)
+//              and -n is the number of bytes read
+//
+func Uvarint(buf []byte) (uint64, int) {
+       var x uint64
+       var s uint
+       for i, b := range buf {
+               if b < 0x80 {
+                       if i > 9 || i == 9 && b > 1 {
+                               return 0, -(i + 1) // overflow
+                       }
+                       return x | uint64(b)<<s, i + 1
+               }
+               x |= uint64(b&0x7f) << s
+               s += 7
+       }
+       return 0, 0
+}
+
+// PutVarint encodes an int64 into buf and returns the number of bytes written.
+// If the buffer is too small, the result is the negated number of bytes required
+// (that is, -PutVarint(nil, x) is the number of bytes required to encode x).
+func PutVarint(buf []byte, x int64) int {
+       ux := uint64(x) << 1
+       if x < 0 {
+               ux = ^ux
+       }
+       return PutUvarint(buf, ux)
+}
+
+// Varint decodes an int64 from buf and returns that value and the
+// number of bytes read (> 0). If an error occurred, the value is 0
+// and the number of bytes n is <= 0 with the following meaning:
+//
+//     n == 0: buf too small
+//     n  < 0: value larger than 64 bits (overflow)
+//              and -n is the number of bytes read
+//
+func Varint(buf []byte) (int64, int) {
+       ux, n := Uvarint(buf) // ok to continue in presence of error
+       x := int64(ux >> 1)
+       if ux&1 != 0 {
+               x = ^x
+       }
+       return x, n
+}
+
+// WriteUvarint encodes x and writes the result to w.
+func WriteUvarint(w io.Writer, x uint64) os.Error {
+       var buf [MaxVarintLen64]byte
+       n := PutUvarint(buf[:], x) // won't fail
+       _, err := w.Write(buf[0:n])
+       return err
+}
+
+var overflow = os.NewError("binary: varint overflows a 64-bit integer")
+
+// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64.
+func ReadUvarint(r io.ByteReader) (uint64, os.Error) {
+       var x uint64
+       var s uint
+       for i := 0; ; i++ {
+               b, err := r.ReadByte()
+               if err != nil {
+                       return x, err
+               }
+               if b < 0x80 {
+                       if i > 9 || i == 9 && b > 1 {
+                               return x, overflow
+                       }
+                       return x | uint64(b)<<s, nil
+               }
+               x |= uint64(b&0x7f) << s
+               s += 7
+       }
+       panic("unreachable")
+}
+
+// WriteVarint encodes x and writes the result to w.
+func WriteVarint(w io.Writer, x int64) os.Error {
+       ux := uint64(x) << 1
+       if x < 0 {
+               ux = ^ux
+       }
+       return WriteUvarint(w, ux)
+}
+
+// ReadVarint reads an encoded unsigned integer from r and returns it as a uint64.
+func ReadVarint(r io.ByteReader) (int64, os.Error) {
+       ux, err := ReadUvarint(r) // ok to continue in presence of error
+       x := int64(ux >> 1)
+       if ux&1 != 0 {
+               x = ^x
+       }
+       return x, err
+}
diff --git a/src/pkg/encoding/binary/varint_test.go b/src/pkg/encoding/binary/varint_test.go

new file mode 100644 (file)

index 0000000..a85acee
--- /dev/null
+++ b/src/pkg/encoding/binary/varint_test.go
@@ -0,0 +1,175 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+import (
+       "bytes"
+       "os"
+       "testing"
+)
+
+func testConstant(t *testing.T, w uint, max int) {
+       n := -PutUvarint(nil, 1<<w-1)
+       if n != max {
+               t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n)
+       }
+}
+
+func TestConstants(t *testing.T) {
+       testConstant(t, 16, MaxVarintLen16)
+       testConstant(t, 32, MaxVarintLen32)
+       testConstant(t, 64, MaxVarintLen64)
+}
+
+func testVarint(t *testing.T, x int64) {
+       var buf1 [10]byte
+       n := PutVarint(buf1[:], x)
+       y, m := Varint(buf1[0:n])
+       if x != y {
+               t.Errorf("Varint(%d): got %d", x, y)
+       }
+       if n != m {
+               t.Errorf("Varint(%d): got n = %d; want %d", x, m, n)
+       }
+
+       var buf2 bytes.Buffer
+       err := WriteVarint(&buf2, x)
+       if err != nil {
+               t.Errorf("WriteVarint(%d): %s", x, err)
+       }
+       if n != buf2.Len() {
+               t.Errorf("WriteVarint(%d): got n = %d; want %d", x, buf2.Len(), n)
+       }
+       y, err = ReadVarint(&buf2)
+       if err != nil {
+               t.Errorf("ReadVarint(%d): %s", x, err)
+       }
+       if x != y {
+               t.Errorf("ReadVarint(%d): got %d", x, y)
+       }
+}
+
+func testUvarint(t *testing.T, x uint64) {
+       var buf1 [10]byte
+       n := PutUvarint(buf1[:], x)
+       y, m := Uvarint(buf1[0:n])
+       if x != y {
+               t.Errorf("Uvarint(%d): got %d", x, y)
+       }
+       if n != m {
+               t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n)
+       }
+
+       var buf2 bytes.Buffer
+       err := WriteUvarint(&buf2, x)
+       if err != nil {
+               t.Errorf("WriteUvarint(%d): %s", x, err)
+       }
+       if n != buf2.Len() {
+               t.Errorf("WriteUvarint(%d): got n = %d; want %d", x, buf2.Len(), n)
+       }
+       y, err = ReadUvarint(&buf2)
+       if err != nil {
+               t.Errorf("ReadUvarint(%d): %s", x, err)
+       }
+       if x != y {
+               t.Errorf("ReadUvarint(%d): got %d", x, y)
+       }
+}
+
+var tests = []int64{
+       -1 << 63,
+       -1<<63 + 1,
+       -1,
+       0,
+       1,
+       2,
+       10,
+       20,
+       63,
+       64,
+       65,
+       127,
+       128,
+       129,
+       255,
+       256,
+       257,
+       1<<63 - 1,
+}
+
+func TestVarint(t *testing.T) {
+       for _, x := range tests {
+               testVarint(t, x)
+               testVarint(t, -x)
+       }
+       for x := int64(0x7); x != 0; x <<= 1 {
+               testVarint(t, x)
+               testVarint(t, -x)
+       }
+}
+
+func TestUvarint(t *testing.T) {
+       for _, x := range tests {
+               testUvarint(t, uint64(x))
+       }
+       for x := uint64(0x7); x != 0; x <<= 1 {
+               testUvarint(t, x)
+       }
+}
+
+func TestBufferTooSmall(t *testing.T) {
+       for i := 0; i < 10; i++ {
+               buf := make([]byte, i)
+               x := uint64(1) << (uint(i) * 7)
+               n0 := -i
+               if i == 0 {
+                       n0 = -1 // encoding 0 takes one byte
+               }
+               if n := PutUvarint(buf, x); n != n0 {
+                       t.Errorf("PutUvarint([%d]byte, %d): got n = %d; want %d", len(buf), x, n, n0)
+               }
+       }
+
+       buf := []byte{0x80, 0x80, 0x80, 0x80}
+       for i := 0; i <= len(buf); i++ {
+               buf := buf[0:i]
+               x, n := Uvarint(buf)
+               if x != 0 || n != 0 {
+                       t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n)
+               }
+
+               x, err := ReadUvarint(bytes.NewBuffer(buf))
+               if x != 0 || err != os.EOF {
+                       t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err)
+               }
+       }
+}
+
+func testOverflow(t *testing.T, buf []byte, n0 int, err0 os.Error) {
+       x, n := Uvarint(buf)
+       if x != 0 || n != n0 {
+               t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
+       }
+
+       x, err := ReadUvarint(bytes.NewBuffer(buf))
+       if x != 0 || err != err0 {
+               t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0)
+       }
+}
+
+func TestOverflow(t *testing.T) {
+       testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow)
+       testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow)
+}
+
+func TestNonCanonicalZero(t *testing.T) {
+       buf := []byte{0x80, 0x80, 0x80, 0}
+       x, n := Uvarint(buf)
+       if x != 0 || n != 4 {
+               t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n)
+
+       }
+}
author	Robert Griesemer <gri@golang.org>
	Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)
committer	Robert Griesemer <gri@golang.org>
	Thu, 29 Sep 2011 05:36:52 +0000 (22:36 -0700)
src/pkg/encoding/binary/Makefile		patch \| blob \| history
src/pkg/encoding/binary/binary.go		patch \| blob \| history
src/pkg/encoding/binary/varint.go	[new file with mode: 0644]	patch \| blob
src/pkg/encoding/binary/varint_test.go	[new file with mode: 0644]	patch \| blob