compress/flate: add pure huffman deflater

author Klaus Post <klauspost@gmail.com>

Mon, 21 Mar 2016 13:51:28 +0000 (14:51 +0100)

committer Nigel Tao <nigeltao@golang.org>

Tue, 29 Mar 2016 09:34:52 +0000 (09:34 +0000)
author Klaus Post <klauspost@gmail.com>
Mon, 21 Mar 2016 13:51:28 +0000 (14:51 +0100)
committer Nigel Tao <nigeltao@golang.org>
Tue, 29 Mar 2016 09:34:52 +0000 (09:34 +0000)
diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go

index 428f2508d3be7cb8b111c8e310fa29cc811a3fec..3bb8b5e02ae66e169e114c5e53ff2ae587cb1d78 100644 (file)
--- a/src/compress/flate/deflate.go
+++ b/src/compress/flate/deflate.go
@@ -15,6 +15,7 @@ const (
         BestSpeed          = 1
         BestCompression    = 9
         DefaultCompression = -1
+       HuffmanOnly        = -2 // Disables match search and only does Huffman entropy reduction.
         logWindowSize      = 15
         windowSize         = 1 << logWindowSize
         windowMask         = windowSize - 1
@@ -462,6 +463,18 @@ func (d *compressor) store() {
         d.windowEnd = 0
  }
  
+// storeHuff compresses and stores the currently added data
+// when the d.window is full or we are at the end of the stream.
+// Any error that occurred will be in d.err
+func (d *compressor) storeHuff() {
+       if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
+               return
+       }
+       d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+       d.err = d.w.err
+       d.windowEnd = 0
+}
+
  func (d *compressor) write(b []byte) (n int, err error) {
         if d.err != nil {
                 return 0, d.err
@@ -500,6 +513,10 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
                 d.window = make([]byte, maxStoreBlockSize)
                 d.fill = (*compressor).fillStore
                 d.step = (*compressor).store
+       case level == HuffmanOnly:
+               d.window = make([]byte, maxStoreBlockSize)
+               d.fill = (*compressor).fillStore
+               d.step = (*compressor).storeHuff
         case level == DefaultCompression:
                 level = 6
                 fallthrough
@@ -509,7 +526,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
                 d.fill = (*compressor).fillDeflate
                 d.step = (*compressor).deflate
         default:
-               return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level)
+               return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
         }
         return nil
  }
@@ -565,10 +582,14 @@ func (d *compressor) close() error {
  // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
  // higher levels typically run slower but compress more. Level 0
  // (NoCompression) does not attempt any compression; it only adds the
-// necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default
-// compression level.
+// necessary DEFLATE framing.
+// Level -1 (DefaultCompression) uses the default compression level.
+// Level -2 (HuffmanOnly) will use Huffman compression only, giving
+// a very fast compression for all types of input, but sacrificing considerable
+// compression efficiency.
+//
  //
-// If level is in the range [-1, 9] then the error returned will be nil.
+// If level is in the range [-2, 9] then the error returned will be nil.
  // Otherwise the error returned will be non-nil.
  func NewWriter(w io.Writer, level int) (*Writer, error) {
         var dw Writer
diff --git a/src/compress/flate/deflate_test.go b/src/compress/flate/deflate_test.go

index 6201213f013f181ceb189a116ef7ce1ccbbca801..42208cba57ab3c3360e961f5c0df30381e032d58 100644 (file)
--- a/src/compress/flate/deflate_test.go
+++ b/src/compress/flate/deflate_test.go
@@ -332,15 +332,17 @@ func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name str
         testSync(t, level, input, name)
  }
  
-func testToFromWithLimit(t *testing.T, input []byte, name string, limit [10]int) {
+func testToFromWithLimit(t *testing.T, input []byte, name string, limit [11]int) {
         for i := 0; i < 10; i++ {
                 testToFromWithLevelAndLimit(t, i, input, name, limit[i])
         }
+       // Test HuffmanCompression
+       testToFromWithLevelAndLimit(t, -2, input, name, limit[10])
  }
  
  func TestDeflateInflate(t *testing.T) {
         for i, h := range deflateInflateTests {
-               testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [10]int{})
+               testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [11]int{})
         }
  }
  
@@ -356,19 +358,19 @@ func TestReverseBits(t *testing.T) {
  type deflateInflateStringTest struct {
         filename string
         label    string
-       limit    [10]int
+       limit    [11]int
  }
  
  var deflateInflateStringTests = []deflateInflateStringTest{
         {
                 "../testdata/e.txt",
                 "2.718281828...",
-               [...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790},
+               [...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683},
         },
         {
                 "../testdata/Mark.Twain-Tom.Sawyer.txt",
                 "Mark.Twain-Tom.Sawyer",
-               [...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295},
+               [...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295, 233460},
         },
  }
  
diff --git a/src/compress/flate/reader_test.go b/src/compress/flate/reader_test.go

index bd8873239df46bb1d3a6bc9b9c8f9ce034437a6b..b336278c07d0158efea45ea828f86b01c93b9f48 100644 (file)
--- a/src/compress/flate/reader_test.go
+++ b/src/compress/flate/reader_test.go
@@ -74,8 +74,12 @@ const (
         speed    = BestSpeed
         default_ = DefaultCompression
         compress = BestCompression
+       huffman  = HuffmanOnly
  )
  
+func BenchmarkDecodeDigitsHuffman1e4(b *testing.B)  { benchmarkDecode(b, digits, huffman, 1e4) }
+func BenchmarkDecodeDigitsHuffman1e5(b *testing.B)  { benchmarkDecode(b, digits, huffman, 1e5) }
+func BenchmarkDecodeDigitsHuffman1e6(b *testing.B)  { benchmarkDecode(b, digits, huffman, 1e6) }
  func BenchmarkDecodeDigitsSpeed1e4(b *testing.B)    { benchmarkDecode(b, digits, speed, 1e4) }
  func BenchmarkDecodeDigitsSpeed1e5(b *testing.B)    { benchmarkDecode(b, digits, speed, 1e5) }
  func BenchmarkDecodeDigitsSpeed1e6(b *testing.B)    { benchmarkDecode(b, digits, speed, 1e6) }
@@ -85,6 +89,9 @@ func BenchmarkDecodeDigitsDefault1e6(b *testing.B)  { benchmarkDecode(b, digits,
  func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) }
  func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) }
  func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) }
+func BenchmarkDecodeTwainHuffman1e4(b *testing.B)   { benchmarkDecode(b, twain, huffman, 1e4) }
+func BenchmarkDecodeTwainHuffman1e5(b *testing.B)   { benchmarkDecode(b, twain, huffman, 1e5) }
+func BenchmarkDecodeTwainHuffman1e6(b *testing.B)   { benchmarkDecode(b, twain, huffman, 1e6) }
  func BenchmarkDecodeTwainSpeed1e4(b *testing.B)     { benchmarkDecode(b, twain, speed, 1e4) }
  func BenchmarkDecodeTwainSpeed1e5(b *testing.B)     { benchmarkDecode(b, twain, speed, 1e5) }
  func BenchmarkDecodeTwainSpeed1e6(b *testing.B)     { benchmarkDecode(b, twain, speed, 1e6) }
diff --git a/src/compress/flate/writer_test.go b/src/compress/flate/writer_test.go

index 428152f3044f451f8aa7c4cf61d34573d2f01019..dd479bea82fbdd448c0f4c18a72ad713dd55386e 100644 (file)
--- a/src/compress/flate/writer_test.go
+++ b/src/compress/flate/writer_test.go
@@ -45,6 +45,9 @@ func benchmarkEncoder(b *testing.B, testfile, level, n int) {
         }
  }
  
+func BenchmarkEncodeDigitsHuffman1e4(b *testing.B)  { benchmarkEncoder(b, digits, huffman, 1e4) }
+func BenchmarkEncodeDigitsHuffman1e5(b *testing.B)  { benchmarkEncoder(b, digits, huffman, 1e5) }
+func BenchmarkEncodeDigitsHuffman1e6(b *testing.B)  { benchmarkEncoder(b, digits, huffman, 1e6) }
  func BenchmarkEncodeDigitsSpeed1e4(b *testing.B)    { benchmarkEncoder(b, digits, speed, 1e4) }
  func BenchmarkEncodeDigitsSpeed1e5(b *testing.B)    { benchmarkEncoder(b, digits, speed, 1e5) }
  func BenchmarkEncodeDigitsSpeed1e6(b *testing.B)    { benchmarkEncoder(b, digits, speed, 1e6) }
@@ -54,6 +57,9 @@ func BenchmarkEncodeDigitsDefault1e6(b *testing.B)  { benchmarkEncoder(b, digits
  func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) }
  func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) }
  func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) }
+func BenchmarkEncodeTwainHuffman1e4(b *testing.B)   { benchmarkEncoder(b, twain, huffman, 1e4) }
+func BenchmarkEncodeTwainHuffman1e5(b *testing.B)   { benchmarkEncoder(b, twain, huffman, 1e5) }
+func BenchmarkEncodeTwainHuffman1e6(b *testing.B)   { benchmarkEncoder(b, twain, huffman, 1e6) }
  func BenchmarkEncodeTwainSpeed1e4(b *testing.B)     { benchmarkEncoder(b, twain, speed, 1e4) }
  func BenchmarkEncodeTwainSpeed1e5(b *testing.B)     { benchmarkEncoder(b, twain, speed, 1e5) }
  func BenchmarkEncodeTwainSpeed1e6(b *testing.B)     { benchmarkEncoder(b, twain, speed, 1e6) }
@@ -131,16 +137,17 @@ func TestWriteError(t *testing.T) {
  
  // Test if two runs produce identical results
  // even when writing different sizes to the Writer.
-func TestDeterministicL0(t *testing.T) { testDeterministic(0, t) }
-func TestDeterministicL1(t *testing.T) { testDeterministic(1, t) }
-func TestDeterministicL2(t *testing.T) { testDeterministic(2, t) }
-func TestDeterministicL3(t *testing.T) { testDeterministic(3, t) }
-func TestDeterministicL4(t *testing.T) { testDeterministic(4, t) }
-func TestDeterministicL5(t *testing.T) { testDeterministic(5, t) }
-func TestDeterministicL6(t *testing.T) { testDeterministic(6, t) }
-func TestDeterministicL7(t *testing.T) { testDeterministic(7, t) }
-func TestDeterministicL8(t *testing.T) { testDeterministic(8, t) }
-func TestDeterministicL9(t *testing.T) { testDeterministic(9, t) }
+func TestDeterministicL0(t *testing.T)  { testDeterministic(0, t) }
+func TestDeterministicL1(t *testing.T)  { testDeterministic(1, t) }
+func TestDeterministicL2(t *testing.T)  { testDeterministic(2, t) }
+func TestDeterministicL3(t *testing.T)  { testDeterministic(3, t) }
+func TestDeterministicL4(t *testing.T)  { testDeterministic(4, t) }
+func TestDeterministicL5(t *testing.T)  { testDeterministic(5, t) }
+func TestDeterministicL6(t *testing.T)  { testDeterministic(6, t) }
+func TestDeterministicL7(t *testing.T)  { testDeterministic(7, t) }
+func TestDeterministicL8(t *testing.T)  { testDeterministic(8, t) }
+func TestDeterministicL9(t *testing.T)  { testDeterministic(9, t) }
+func TestDeterministicLM2(t *testing.T) { testDeterministic(-2, t) }
  
  func testDeterministic(i int, t *testing.T) {
         // Test so much we cross a good number of block boundaries.
author	Klaus Post <klauspost@gmail.com>
	Mon, 21 Mar 2016 13:51:28 +0000 (14:51 +0100)
committer	Nigel Tao <nigeltao@golang.org>
	Tue, 29 Mar 2016 09:34:52 +0000 (09:34 +0000)
src/compress/flate/deflate.go		patch \| blob \| history
src/compress/flate/deflate_test.go		patch \| blob \| history
src/compress/flate/reader_test.go		patch \| blob \| history
src/compress/flate/writer_test.go		patch \| blob \| history