compress/bzip2: make decoding faster

author Alberto Donizetti <alb.donizetti@gmail.com>

Sun, 23 Aug 2015 15:15:07 +0000 (17:15 +0200)

committer Nigel Tao <nigeltao@golang.org>

Fri, 28 Aug 2015 04:20:56 +0000 (04:20 +0000)
author Alberto Donizetti <alb.donizetti@gmail.com>
Sun, 23 Aug 2015 15:15:07 +0000 (17:15 +0200)
committer Nigel Tao <nigeltao@golang.org>
Fri, 28 Aug 2015 04:20:56 +0000 (04:20 +0000)
diff --git a/src/compress/bzip2/bit_reader.go b/src/compress/bzip2/bit_reader.go

index 32d1036ae1b759df5f4094e5641867e1b8555038..ab1d60651436bd5a0b8118b46d5bb7f58c24869a 100644 (file)
--- a/src/compress/bzip2/bit_reader.go
+++ b/src/compress/bzip2/bit_reader.go
@@ -77,14 +77,6 @@ func (br *bitReader) ReadBit() bool {
         return n != 0
  }
  
-func (br *bitReader) TryReadBit() (bit byte, ok bool) {
-       if br.bits > 0 {
-               br.bits--
-               return byte(br.n>>br.bits) & 1, true
-       }
-       return 0, false
-}
-
  func (br *bitReader) Err() error {
         return br.err
  }
diff --git a/src/compress/bzip2/bzip2_test.go b/src/compress/bzip2/bzip2_test.go

index 77c50dfe948b6dc9c52b0a693ba50228431bf2d2..c315e70b9c30683f0848cb2da880a4de7001775b 100644 (file)
--- a/src/compress/bzip2/bzip2_test.go
+++ b/src/compress/bzip2/bzip2_test.go
@@ -173,6 +173,7 @@ const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c59
  const (
         digits = iota
         twain
+       random
  )
  
  var testfiles = []string{
@@ -182,6 +183,8 @@ var testfiles = []string{
         digits: "testdata/e.txt.bz2",
         // Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
         twain: "testdata/Mark.Twain-Tom.Sawyer.txt.bz2",
+       // 16KB of random data from /dev/urandom
+       random: "testdata/random.data.bz2",
  }
  
  func benchmarkDecode(b *testing.B, testfile int) {
@@ -198,6 +201,7 @@ func benchmarkDecode(b *testing.B, testfile int) {
  
  func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
  func BenchmarkDecodeTwain(b *testing.B)  { benchmarkDecode(b, twain) }
+func BenchmarkDecodeRand(b *testing.B)   { benchmarkDecode(b, random) }
  
  func TestBufferOverrun(t *testing.T) {
         // Tests https://golang.org/issue/5747.
diff --git a/src/compress/bzip2/huffman.go b/src/compress/bzip2/huffman.go

index 75a6223d8134c42fb41ba9c95050a2bc2c45271e..9d574b9bdef3f3bfe782b19ad450c8d13c109a23 100644 (file)
--- a/src/compress/bzip2/huffman.go
+++ b/src/compress/bzip2/huffman.go
@@ -38,23 +38,35 @@ func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
  
         for {
                 node := &t.nodes[nodeIndex]
-               bit, ok := br.TryReadBit()
-               if !ok && br.ReadBit() {
-                       bit = 1
-               }
-               // bzip2 encodes left as a true bit.
-               if bit != 0 {
-                       // left
-                       if node.left == invalidNodeValue {
-                               return node.leftValue
-                       }
-                       nodeIndex = node.left
+
+               var bit uint16
+               if br.bits > 0 {
+                       // Get next bit - fast path.
+                       br.bits--
+                       bit = 0 - (uint16(br.n>>br.bits) & 1)
                 } else {
-                       // right
-                       if node.right == invalidNodeValue {
-                               return node.rightValue
-                       }
-                       nodeIndex = node.right
+                       // Get next bit - slow path.
+                       // Use ReadBits to retrieve a single bit
+                       // from the underling io.ByteReader.
+                       bit = 0 - uint16(br.ReadBits(1))
+               }
+               // now
+               // bit = 0xffff if the next bit was 1
+               // bit = 0x0000 if the next bit was 0
+
+               // 1 means left, 0 means right.
+               //
+               // if bit == 0xffff {
+               //     nodeIndex = node.left
+               // } else {
+               //     nodeIndex = node.right
+               // }
+               nodeIndex = (bit & node.left) | (^bit & node.right)
+
+               if nodeIndex == invalidNodeValue {
+                       // We found a leaf. Use the value of bit to decide
+                       // whether is a left or a right value.
+                       return (bit & node.leftValue) | (^bit & node.rightValue)
                 }
         }
  }
diff --git a/src/compress/bzip2/testdata/random.data.bz2 b/src/compress/bzip2/testdata/random.data.bz2

new file mode 100644 (file)

index 0000000..1ef2300

Binary files /dev/null and b/src/compress/bzip2/testdata/random.data.bz2 differ
author	Alberto Donizetti <alb.donizetti@gmail.com>
	Sun, 23 Aug 2015 15:15:07 +0000 (17:15 +0200)
committer	Nigel Tao <nigeltao@golang.org>
	Fri, 28 Aug 2015 04:20:56 +0000 (04:20 +0000)
src/compress/bzip2/bit_reader.go		patch \| blob \| history
src/compress/bzip2/bzip2_test.go		patch \| blob \| history
src/compress/bzip2/huffman.go		patch \| blob \| history
src/compress/bzip2/testdata/random.data.bz2	[new file with mode: 0644]	patch \| blob