From: Raph Levien Date: Fri, 18 Jan 2013 20:09:42 +0000 (-0500) Subject: compress/flate: Performance improvement for inflate X-Git-Tag: go1.1rc2~1363 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ebf35167eed54afb75332f879351dffdc5615e5d;p=gostls13.git compress/flate: Performance improvement for inflate Decode as much as possible of a Huffman symbol in a single table lookup (much like the zlib implementation), filling more bits (conservatively, so we don't consume past the end of the stream) when the code prefix indicates more bits are needed. This results in about a 50% performance gain in speed benchmarks. The following set is benchcmp done on a retina MacBook Pro: benchmark old MB/s new MB/s speedup BenchmarkDecodeDigitsSpeed1e4 28.41 42.79 1.51x BenchmarkDecodeDigitsSpeed1e5 30.18 47.62 1.58x BenchmarkDecodeDigitsSpeed1e6 30.81 48.14 1.56x BenchmarkDecodeDigitsDefault1e4 30.28 44.61 1.47x BenchmarkDecodeDigitsDefault1e5 32.18 51.94 1.61x BenchmarkDecodeDigitsDefault1e6 35.57 53.28 1.50x BenchmarkDecodeDigitsCompress1e4 30.39 44.83 1.48x BenchmarkDecodeDigitsCompress1e5 33.05 51.64 1.56x BenchmarkDecodeDigitsCompress1e6 35.69 53.04 1.49x BenchmarkDecodeTwainSpeed1e4 25.90 43.04 1.66x BenchmarkDecodeTwainSpeed1e5 29.97 48.19 1.61x BenchmarkDecodeTwainSpeed1e6 31.36 49.43 1.58x BenchmarkDecodeTwainDefault1e4 28.79 45.02 1.56x BenchmarkDecodeTwainDefault1e5 37.12 55.65 1.50x BenchmarkDecodeTwainDefault1e6 39.28 58.16 1.48x BenchmarkDecodeTwainCompress1e4 28.64 44.90 1.57x BenchmarkDecodeTwainCompress1e5 37.40 55.98 1.50x BenchmarkDecodeTwainCompress1e6 39.35 58.06 1.48x R=rsc, dave, minux.ma, bradfitz, nigeltao CC=golang-dev https://golang.org/cl/6872063 --- diff --git a/src/pkg/compress/flate/fixedhuff.go b/src/pkg/compress/flate/fixedhuff.go new file mode 100644 index 0000000000..41a6b25dfd --- /dev/null +++ b/src/pkg/compress/flate/fixedhuff.go @@ -0,0 +1,74 @@ +package flate + +// autogenerated by gen.go, DO NOT EDIT + +var fixedHuffmanDecoder = huffmanDecoder{ + 7, + [huffmanNumChunks]uint32{ + 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c09, + 0x1087, 0x0608, 0x0208, 0x0a09, 0x0008, 0x0808, 0x0408, 0x0e09, + 0x1047, 0x0588, 0x0188, 0x0909, 0x1147, 0x0788, 0x0388, 0x0d09, + 0x10c7, 0x0688, 0x0288, 0x0b09, 0x0088, 0x0888, 0x0488, 0x0f09, + 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c89, + 0x10a7, 0x0648, 0x0248, 0x0a89, 0x0048, 0x0848, 0x0448, 0x0e89, + 0x1067, 0x05c8, 0x01c8, 0x0989, 0x1167, 0x07c8, 0x03c8, 0x0d89, + 0x10e7, 0x06c8, 0x02c8, 0x0b89, 0x00c8, 0x08c8, 0x04c8, 0x0f89, + 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c49, + 0x1097, 0x0628, 0x0228, 0x0a49, 0x0028, 0x0828, 0x0428, 0x0e49, + 0x1057, 0x05a8, 0x01a8, 0x0949, 0x1157, 0x07a8, 0x03a8, 0x0d49, + 0x10d7, 0x06a8, 0x02a8, 0x0b49, 0x00a8, 0x08a8, 0x04a8, 0x0f49, + 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cc9, + 0x10b7, 0x0668, 0x0268, 0x0ac9, 0x0068, 0x0868, 0x0468, 0x0ec9, + 0x1077, 0x05e8, 0x01e8, 0x09c9, 0x1177, 0x07e8, 0x03e8, 0x0dc9, + 0x10f7, 0x06e8, 0x02e8, 0x0bc9, 0x00e8, 0x08e8, 0x04e8, 0x0fc9, + 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c29, + 0x1087, 0x0618, 0x0218, 0x0a29, 0x0018, 0x0818, 0x0418, 0x0e29, + 0x1047, 0x0598, 0x0198, 0x0929, 0x1147, 0x0798, 0x0398, 0x0d29, + 0x10c7, 0x0698, 0x0298, 0x0b29, 0x0098, 0x0898, 0x0498, 0x0f29, + 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0ca9, + 0x10a7, 0x0658, 0x0258, 0x0aa9, 0x0058, 0x0858, 0x0458, 0x0ea9, + 0x1067, 0x05d8, 0x01d8, 0x09a9, 0x1167, 0x07d8, 0x03d8, 0x0da9, + 0x10e7, 0x06d8, 0x02d8, 0x0ba9, 0x00d8, 0x08d8, 0x04d8, 0x0fa9, + 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c69, + 0x1097, 0x0638, 0x0238, 0x0a69, 0x0038, 0x0838, 0x0438, 0x0e69, + 0x1057, 0x05b8, 0x01b8, 0x0969, 0x1157, 0x07b8, 0x03b8, 0x0d69, + 0x10d7, 0x06b8, 0x02b8, 0x0b69, 0x00b8, 0x08b8, 0x04b8, 0x0f69, + 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0ce9, + 0x10b7, 0x0678, 0x0278, 0x0ae9, 0x0078, 0x0878, 0x0478, 0x0ee9, + 0x1077, 0x05f8, 0x01f8, 0x09e9, 0x1177, 0x07f8, 0x03f8, 0x0de9, + 0x10f7, 0x06f8, 0x02f8, 0x0be9, 0x00f8, 0x08f8, 0x04f8, 0x0fe9, + 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c19, + 0x1087, 0x0608, 0x0208, 0x0a19, 0x0008, 0x0808, 0x0408, 0x0e19, + 0x1047, 0x0588, 0x0188, 0x0919, 0x1147, 0x0788, 0x0388, 0x0d19, + 0x10c7, 0x0688, 0x0288, 0x0b19, 0x0088, 0x0888, 0x0488, 0x0f19, + 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c99, + 0x10a7, 0x0648, 0x0248, 0x0a99, 0x0048, 0x0848, 0x0448, 0x0e99, + 0x1067, 0x05c8, 0x01c8, 0x0999, 0x1167, 0x07c8, 0x03c8, 0x0d99, + 0x10e7, 0x06c8, 0x02c8, 0x0b99, 0x00c8, 0x08c8, 0x04c8, 0x0f99, + 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c59, + 0x1097, 0x0628, 0x0228, 0x0a59, 0x0028, 0x0828, 0x0428, 0x0e59, + 0x1057, 0x05a8, 0x01a8, 0x0959, 0x1157, 0x07a8, 0x03a8, 0x0d59, + 0x10d7, 0x06a8, 0x02a8, 0x0b59, 0x00a8, 0x08a8, 0x04a8, 0x0f59, + 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cd9, + 0x10b7, 0x0668, 0x0268, 0x0ad9, 0x0068, 0x0868, 0x0468, 0x0ed9, + 0x1077, 0x05e8, 0x01e8, 0x09d9, 0x1177, 0x07e8, 0x03e8, 0x0dd9, + 0x10f7, 0x06e8, 0x02e8, 0x0bd9, 0x00e8, 0x08e8, 0x04e8, 0x0fd9, + 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c39, + 0x1087, 0x0618, 0x0218, 0x0a39, 0x0018, 0x0818, 0x0418, 0x0e39, + 0x1047, 0x0598, 0x0198, 0x0939, 0x1147, 0x0798, 0x0398, 0x0d39, + 0x10c7, 0x0698, 0x0298, 0x0b39, 0x0098, 0x0898, 0x0498, 0x0f39, + 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0cb9, + 0x10a7, 0x0658, 0x0258, 0x0ab9, 0x0058, 0x0858, 0x0458, 0x0eb9, + 0x1067, 0x05d8, 0x01d8, 0x09b9, 0x1167, 0x07d8, 0x03d8, 0x0db9, + 0x10e7, 0x06d8, 0x02d8, 0x0bb9, 0x00d8, 0x08d8, 0x04d8, 0x0fb9, + 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c79, + 0x1097, 0x0638, 0x0238, 0x0a79, 0x0038, 0x0838, 0x0438, 0x0e79, + 0x1057, 0x05b8, 0x01b8, 0x0979, 0x1157, 0x07b8, 0x03b8, 0x0d79, + 0x10d7, 0x06b8, 0x02b8, 0x0b79, 0x00b8, 0x08b8, 0x04b8, 0x0f79, + 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0cf9, + 0x10b7, 0x0678, 0x0278, 0x0af9, 0x0078, 0x0878, 0x0478, 0x0ef9, + 0x1077, 0x05f8, 0x01f8, 0x09f9, 0x1177, 0x07f8, 0x03f8, 0x0df9, + 0x10f7, 0x06f8, 0x02f8, 0x0bf9, 0x00f8, 0x08f8, 0x04f8, 0x0ff9, + }, + nil, 0, +} diff --git a/src/pkg/compress/flate/flate_test.go b/src/pkg/compress/flate/flate_test.go index 94efc90acf..aba820a1f9 100644 --- a/src/pkg/compress/flate/flate_test.go +++ b/src/pkg/compress/flate/flate_test.go @@ -10,122 +10,9 @@ package flate import ( "bytes" - "reflect" "testing" ) -// The Huffman code lengths used by the fixed-format Huffman blocks. -var fixedHuffmanBits = [...]int{ - // 0-143 length 8 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - - // 144-255 length 9 - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - - // 256-279 length 7 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - - // 280-287 length 8 - 8, 8, 8, 8, 8, 8, 8, 8, -} - -type InitDecoderTest struct { - in []int - out huffmanDecoder - ok bool -} - -var initDecoderTests = []*InitDecoderTest{ - // Example from Connell 1973, - { - []int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5}, - huffmanDecoder{ - 2, 5, - [maxCodeLen + 1]int{2: 0, 4, 13, 31}, - [maxCodeLen + 1]int{2: 0, 1, 6, 20}, - // Paper used different code assignment: - // 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6 - // Reordered here so that codes of same length - // are assigned to increasing numbers. - []int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11}, - }, - true, - }, - - // Example from RFC 1951 section 3.2.2 - { - []int{2, 1, 3, 3}, - huffmanDecoder{ - 1, 3, - [maxCodeLen + 1]int{1: 0, 2, 7}, - [maxCodeLen + 1]int{1: 0, 1, 4}, - []int{1, 0, 2, 3}, - }, - true, - }, - - // Second example from RFC 1951 section 3.2.2 - { - []int{3, 3, 3, 3, 3, 2, 4, 4}, - huffmanDecoder{ - 2, 4, - [maxCodeLen + 1]int{2: 0, 6, 15}, - [maxCodeLen + 1]int{2: 0, 1, 8}, - []int{5, 0, 1, 2, 3, 4, 6, 7}, - }, - true, - }, - - // Static Huffman codes (RFC 1951 section 3.2.6) - { - fixedHuffmanBits[0:], - fixedHuffmanDecoder, - true, - }, - - // Illegal input. - { - []int{}, - huffmanDecoder{}, - false, - }, - - // Illegal input. - { - []int{0, 0, 0, 0, 0, 0, 0}, - huffmanDecoder{}, - false, - }, -} - -func TestInitDecoder(t *testing.T) { - for i, tt := range initDecoderTests { - var h huffmanDecoder - if h.init(tt.in) != tt.ok { - t.Errorf("test %d: init = %v", i, !tt.ok) - continue - } - if !reflect.DeepEqual(&h, &tt.out) { - t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out) - } - } -} - func TestUncompressedSource(t *testing.T) { decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11})) output := make([]byte, 1) diff --git a/src/pkg/compress/flate/gen.go b/src/pkg/compress/flate/gen.go new file mode 100644 index 0000000000..1427557f80 --- /dev/null +++ b/src/pkg/compress/flate/gen.go @@ -0,0 +1,165 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// This program generates fixedhuff.go +// Invoke as +// +// go run gen.go |gofmt >fixedhuff.go + +package main + +import ( + "fmt" +) + +const maxCodeLen = 16 + +// Note: the definition of the huffmanDecoder struct is copied from +// inflate.go, as it is private to the implementation. + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +func (h *huffmanDecoder) init(bits []int) bool { + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range bits { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + if max == 0 { + return false + } + + h.min = min + var linkBits uint + var numLinks int + if max > huffmanChunkBits { + linkBits = uint(max) - huffmanChunkBits + numLinks = 1 << linkBits + h.linkMask = uint32(numLinks - 1) + } + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + if i == huffmanChunkBits+1 { + // create link tables + link := code >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + h.chunks[off] = chunk + } + } else { + linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] + reverse >>= huffmanChunkBits + for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + linktab[off] = chunk + } + } + } + return true +} + +func main() { + var h huffmanDecoder + var bits [288]int + initReverseByte() + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + h.init(bits[:]) + fmt.Println("package flate") + fmt.Println() + fmt.Println("// autogenerated by gen.go, DO NOT EDIT") + fmt.Println() + fmt.Println("var fixedHuffmanDecoder = huffmanDecoder{") + fmt.Printf("\t%d,\n", h.min) + fmt.Println("\t[huffmanNumChunks]uint32{") + for i := 0; i < huffmanNumChunks; i++ { + if i&7 == 0 { + fmt.Printf("\t\t") + } else { + fmt.Printf(" ") + } + fmt.Printf("0x%04x,", h.chunks[i]) + if i&7 == 7 { + fmt.Println() + } + } + fmt.Println("\t},") + fmt.Println("\tnil, 0,") + fmt.Println("}") +} + +var reverseByte [256]byte + +func initReverseByte() { + for x := 0; x < 256; x++ { + var result byte + for i := uint(0); i < 8; i++ { + result |= byte(((x >> i) & 1) << (7 - i)) + } + reverseByte[x] = result + } +} diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go index c5a54b998f..a8d6460192 100644 --- a/src/pkg/compress/flate/inflate.go +++ b/src/pkg/compress/flate/inflate.go @@ -54,32 +54,46 @@ func (e *WriteError) Error() string { return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() } -// Huffman decoder is based on -// J. Brian Connell, ``A Huffman-Shannon-Fano Code,'' -// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047. -type huffmanDecoder struct { - // min, max code length - min, max int - - // limit[i] = largest code word of length i - // Given code v of length n, - // need more bits if v > limit[n]. - limit [maxCodeLen + 1]int +// Note that much of the implemenation of huffmanDecoder is also copied +// into gen.go (in package main) for the purpose of precomputing the +// fixed huffman tables so they can be included statically. + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. + +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link - // base[i] = smallest code word of length i - seq number - base [maxCodeLen + 1]int +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) - // codes[seq number] = output code. - // Given code v of length n, value is - // codes[v - base[n]]. - codes []int +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table } // Initialize Huffman decoding tables from array of code lengths. func (h *huffmanDecoder) init(bits []int) bool { // Count number of codes of each length, // compute min and max length. - var count [maxCodeLen + 1]int + var count [maxCodeLen]int var min, max int for _, n := range bits { if n == 0 { @@ -98,93 +112,58 @@ func (h *huffmanDecoder) init(bits []int) bool { } h.min = min - h.max = max - - // For each code range, compute - // nextcode (first code of that length), - // limit (last code of that length), and - // base (offset from first code to sequence number). + var linkBits uint + var numLinks int + if max > huffmanChunkBits { + linkBits = uint(max) - huffmanChunkBits + numLinks = 1 << linkBits + h.linkMask = uint32(numLinks - 1) + } code := 0 - seq := 0 var nextcode [maxCodeLen]int for i := min; i <= max; i++ { + if i == huffmanChunkBits+1 { + // create link tables + link := code >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + h.chunks[off] = chunk + } + } else { + linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] + reverse >>= huffmanChunkBits + for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + linktab[off] = chunk + } + } } return true } -// Hard-coded Huffman tables for DEFLATE algorithm. -// See RFC 1951, section 3.2.6. -var fixedHuffmanDecoder = huffmanDecoder{ - 7, 9, - [maxCodeLen + 1]int{7: 23, 199, 511}, - [maxCodeLen + 1]int{7: 0, 24, 224}, - []int{ - // length 7: 256-279 - 256, 257, 258, 259, 260, 261, 262, - 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, - 277, 278, 279, - - // length 8: 0-143 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, - 92, 93, 94, 95, 96, 97, 98, 99, 100, - 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 111, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, 123, 124, - 125, 126, 127, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 139, 140, - 141, 142, 143, - - // length 8: 280-287 - 280, 281, 282, 283, 284, 285, 286, 287, - - // length 9: 144-255 - 144, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, - 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, - 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, - 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, - 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, - 248, 249, 250, 251, 252, 253, 254, 255, - }, -} - // The actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. @@ -644,23 +623,23 @@ func (f *decompressor) moreBits() error { // Read the next Huffman-encoded symbol from f according to h. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - for n := uint(h.min); n <= uint(h.max); n++ { - lim := h.limit[n] - if lim == -1 { - continue - } + n := uint(h.min) + for { for f.nb < n { if err := f.moreBits(); err != nil { return 0, err } } - v := int(f.b & uint32(1<>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits - if v <= lim { + chunk := h.chunks[f.b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= f.nb { f.b >>= n f.nb -= n - return h.codes[v-h.base[n]], nil + return int(chunk >> huffmanValueShift), nil } } return 0, CorruptInputError(f.roffset)