bzip2: improve performance

author Jeff R. Allen <jra@nella.org>

Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)

committer Adam Langley <agl@golang.org>

Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)
author Jeff R. Allen <jra@nella.org>
Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)
committer Adam Langley <agl@golang.org>
Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)
diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go

index 6b8711b8116e8c23de8e55b459ed9ac7e798faca..fb79d089eb3b0dbf3bdc5ea08620b15f48e8c3d0 100644 (file)
--- a/src/pkg/compress/bzip2/bzip2_test.go
+++ b/src/pkg/compress/bzip2/bzip2_test.go
@@ -216,6 +216,44 @@ func TestOutOfRangeSelector(t *testing.T) {
         ioutil.ReadAll(decompressor)
  }
  
+func TestMTF(t *testing.T) {
+       mtf := newMTFDecoderWithRange(5)
+
+       // 0 1 2 3 4
+       expect := byte(1)
+       x := mtf.Decode(1)
+       if x != expect {
+               t.Errorf("expected %v, got %v", expect, x)
+       }
+
+       // 1 0 2 3 4
+       x = mtf.Decode(0)
+       if x != expect {
+               t.Errorf("expected %v, got %v", expect, x)
+       }
+
+       // 1 0 2 3 4
+       expect = byte(0)
+       x = mtf.Decode(1)
+       if x != expect {
+               t.Errorf("expected %v, got %v", expect, x)
+       }
+
+       // 0 1 2 3 4
+       expect = byte(4)
+       x = mtf.Decode(4)
+       if x != expect {
+               t.Errorf("expected %v, got %v", expect, x)
+       }
+
+       // 4 0 1 2 3
+       expect = byte(0)
+       x = mtf.Decode(1)
+       if x != expect {
+               t.Errorf("expected %v, got %v", expect, x)
+       }
+}
+
  var bufferOverrunBase64 string = `
  QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
  ////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD
diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go

index b7e75a700a18815843fe3964868445677c9f372a..526dfb34cc06db6dcc2a61cc35e82b3e3b9bf0af 100644 (file)
--- a/src/pkg/compress/bzip2/move_to_front.go
+++ b/src/pkg/compress/bzip2/move_to_front.go
@@ -11,88 +11,43 @@ package bzip2
  // index into that list. When a symbol is referenced, it's moved to the front
  // of the list. Thus, a repeated symbol ends up being encoded with many zeros,
  // as the symbol will be at the front of the list after the first access.
-type moveToFrontDecoder struct {
-       // Rather than actually keep the list in memory, the symbols are stored
-       // as a circular, double linked list with the symbol indexed by head
-       // at the front of the list.
-       symbols [256]byte
-       next    [256]uint8
-       prev    [256]uint8
-       head    uint8
-       len     int
-}
+type moveToFrontDecoder []byte
  
  // newMTFDecoder creates a move-to-front decoder with an explicit initial list
  // of symbols.
-func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
+func newMTFDecoder(symbols []byte) moveToFrontDecoder {
         if len(symbols) > 256 {
                 panic("too many symbols")
         }
-
-       m := new(moveToFrontDecoder)
-       copy(m.symbols[:], symbols)
-       m.len = len(symbols)
-       m.threadLinkedList()
-       return m
+       return moveToFrontDecoder(symbols)
  }
  
  // newMTFDecoderWithRange creates a move-to-front decoder with an initial
  // symbol list of 0...n-1.
-func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
+func newMTFDecoderWithRange(n int) moveToFrontDecoder {
         if n > 256 {
                 panic("newMTFDecoderWithRange: cannot have > 256 symbols")
         }
  
-       m := new(moveToFrontDecoder)
+       m := make([]byte, n)
         for i := 0; i < n; i++ {
-               m.symbols[byte(i)] = byte(i)
-       }
-       m.len = n
-       m.threadLinkedList()
-       return m
-}
-
-// threadLinkedList creates the initial linked-list pointers.
-func (m *moveToFrontDecoder) threadLinkedList() {
-       if m.len == 0 {
-               return
-       }
-
-       m.prev[0] = uint8(m.len - 1)
-
-       for i := byte(0); int(i) < m.len-1; i++ {
-               m.next[i] = uint8(i + 1)
-               m.prev[i+1] = uint8(i)
+               m[i] = byte(i)
         }
-
-       m.next[m.len-1] = 0
+       return moveToFrontDecoder(m)
  }
  
-func (m *moveToFrontDecoder) Decode(n int) (b byte) {
-       // Most of the time, n will be zero so it's worth dealing with this
-       // simple case.
-       if n == 0 {
-               return m.symbols[m.head]
-       }
-
-       i := m.head
-       for j := 0; j < n; j++ {
-               i = m.next[i]
-       }
-       b = m.symbols[i]
-
-       m.next[m.prev[i]] = m.next[i]
-       m.prev[m.next[i]] = m.prev[i]
-       m.next[i] = m.head
-       m.prev[i] = m.prev[m.head]
-       m.next[m.prev[m.head]] = i
-       m.prev[m.head] = i
-       m.head = i
-
+func (m moveToFrontDecoder) Decode(n int) (b byte) {
+       // Implement move-to-front with a simple copy. This approach
+       // beats more sophisticated approaches in benchmarking, probably
+       // because it has high locality of reference inside of a
+       // single cache line (most move-to-front operations have n < 64).
+       b = m[n]
+       copy(m[1:], m[:n])
+       m[0] = b
         return
  }
  
  // First returns the symbol at the front of the list.
-func (m *moveToFrontDecoder) First() byte {
-       return m.symbols[m.head]
+func (m moveToFrontDecoder) First() byte {
+       return m[0]
  }
author	Jeff R. Allen <jra@nella.org>
	Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)
committer	Adam Langley <agl@golang.org>
	Mon, 18 Aug 2014 21:41:28 +0000 (14:41 -0700)
src/pkg/compress/bzip2/bzip2_test.go		patch \| blob \| history
src/pkg/compress/bzip2/move_to_front.go		patch \| blob \| history