From 5c78589b69c87f25a4792d9c8a1446294c07600e Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Tue, 1 Sep 2015 23:42:43 -0700 Subject: [PATCH] compress/flate: simplify inflate logic The flate library contains generator code, which is used to generate the fixed huffman table. This is done so that fixed blocks can be processed quicker since there is no need generate the decoder table for fixed codes. Instead, delete the precomputed table, and use sync.Once to generate it at runtime when used. Advantages: * Reduces duplicated logic in flate package * Reduces binary size by approximately 2KiB Disadvantages: * For the simplest possible program that simply decodes the fixed block "\x03\x00" once, the modified code takes 4.7% longer for the first decode. Compression performance for subsequent blocks afterwards has no noticeable slow down. Change-Id: I8f351218debf7d732118808859eda481b01011f6 Reviewed-on: https://go-review.googlesource.com/14181 Reviewed-by: Nigel Tao --- src/compress/flate/fixedhuff.go | 78 ---------- src/compress/flate/gen.go | 265 -------------------------------- src/compress/flate/inflate.go | 44 ++++-- 3 files changed, 35 insertions(+), 352 deletions(-) delete mode 100644 src/compress/flate/fixedhuff.go delete mode 100644 src/compress/flate/gen.go diff --git a/src/compress/flate/fixedhuff.go b/src/compress/flate/fixedhuff.go deleted file mode 100644 index 7df8b9a293..0000000000 --- a/src/compress/flate/fixedhuff.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// autogenerated by go run gen.go -output fixedhuff.go, DO NOT EDIT - -var fixedHuffmanDecoder = huffmanDecoder{ - 7, - [huffmanNumChunks]uint32{ - 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c09, - 0x1087, 0x0608, 0x0208, 0x0a09, 0x0008, 0x0808, 0x0408, 0x0e09, - 0x1047, 0x0588, 0x0188, 0x0909, 0x1147, 0x0788, 0x0388, 0x0d09, - 0x10c7, 0x0688, 0x0288, 0x0b09, 0x0088, 0x0888, 0x0488, 0x0f09, - 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c89, - 0x10a7, 0x0648, 0x0248, 0x0a89, 0x0048, 0x0848, 0x0448, 0x0e89, - 0x1067, 0x05c8, 0x01c8, 0x0989, 0x1167, 0x07c8, 0x03c8, 0x0d89, - 0x10e7, 0x06c8, 0x02c8, 0x0b89, 0x00c8, 0x08c8, 0x04c8, 0x0f89, - 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c49, - 0x1097, 0x0628, 0x0228, 0x0a49, 0x0028, 0x0828, 0x0428, 0x0e49, - 0x1057, 0x05a8, 0x01a8, 0x0949, 0x1157, 0x07a8, 0x03a8, 0x0d49, - 0x10d7, 0x06a8, 0x02a8, 0x0b49, 0x00a8, 0x08a8, 0x04a8, 0x0f49, - 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cc9, - 0x10b7, 0x0668, 0x0268, 0x0ac9, 0x0068, 0x0868, 0x0468, 0x0ec9, - 0x1077, 0x05e8, 0x01e8, 0x09c9, 0x1177, 0x07e8, 0x03e8, 0x0dc9, - 0x10f7, 0x06e8, 0x02e8, 0x0bc9, 0x00e8, 0x08e8, 0x04e8, 0x0fc9, - 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c29, - 0x1087, 0x0618, 0x0218, 0x0a29, 0x0018, 0x0818, 0x0418, 0x0e29, - 0x1047, 0x0598, 0x0198, 0x0929, 0x1147, 0x0798, 0x0398, 0x0d29, - 0x10c7, 0x0698, 0x0298, 0x0b29, 0x0098, 0x0898, 0x0498, 0x0f29, - 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0ca9, - 0x10a7, 0x0658, 0x0258, 0x0aa9, 0x0058, 0x0858, 0x0458, 0x0ea9, - 0x1067, 0x05d8, 0x01d8, 0x09a9, 0x1167, 0x07d8, 0x03d8, 0x0da9, - 0x10e7, 0x06d8, 0x02d8, 0x0ba9, 0x00d8, 0x08d8, 0x04d8, 0x0fa9, - 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c69, - 0x1097, 0x0638, 0x0238, 0x0a69, 0x0038, 0x0838, 0x0438, 0x0e69, - 0x1057, 0x05b8, 0x01b8, 0x0969, 0x1157, 0x07b8, 0x03b8, 0x0d69, - 0x10d7, 0x06b8, 0x02b8, 0x0b69, 0x00b8, 0x08b8, 0x04b8, 0x0f69, - 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0ce9, - 0x10b7, 0x0678, 0x0278, 0x0ae9, 0x0078, 0x0878, 0x0478, 0x0ee9, - 0x1077, 0x05f8, 0x01f8, 0x09e9, 0x1177, 0x07f8, 0x03f8, 0x0de9, - 0x10f7, 0x06f8, 0x02f8, 0x0be9, 0x00f8, 0x08f8, 0x04f8, 0x0fe9, - 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c19, - 0x1087, 0x0608, 0x0208, 0x0a19, 0x0008, 0x0808, 0x0408, 0x0e19, - 0x1047, 0x0588, 0x0188, 0x0919, 0x1147, 0x0788, 0x0388, 0x0d19, - 0x10c7, 0x0688, 0x0288, 0x0b19, 0x0088, 0x0888, 0x0488, 0x0f19, - 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c99, - 0x10a7, 0x0648, 0x0248, 0x0a99, 0x0048, 0x0848, 0x0448, 0x0e99, - 0x1067, 0x05c8, 0x01c8, 0x0999, 0x1167, 0x07c8, 0x03c8, 0x0d99, - 0x10e7, 0x06c8, 0x02c8, 0x0b99, 0x00c8, 0x08c8, 0x04c8, 0x0f99, - 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c59, - 0x1097, 0x0628, 0x0228, 0x0a59, 0x0028, 0x0828, 0x0428, 0x0e59, - 0x1057, 0x05a8, 0x01a8, 0x0959, 0x1157, 0x07a8, 0x03a8, 0x0d59, - 0x10d7, 0x06a8, 0x02a8, 0x0b59, 0x00a8, 0x08a8, 0x04a8, 0x0f59, - 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cd9, - 0x10b7, 0x0668, 0x0268, 0x0ad9, 0x0068, 0x0868, 0x0468, 0x0ed9, - 0x1077, 0x05e8, 0x01e8, 0x09d9, 0x1177, 0x07e8, 0x03e8, 0x0dd9, - 0x10f7, 0x06e8, 0x02e8, 0x0bd9, 0x00e8, 0x08e8, 0x04e8, 0x0fd9, - 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c39, - 0x1087, 0x0618, 0x0218, 0x0a39, 0x0018, 0x0818, 0x0418, 0x0e39, - 0x1047, 0x0598, 0x0198, 0x0939, 0x1147, 0x0798, 0x0398, 0x0d39, - 0x10c7, 0x0698, 0x0298, 0x0b39, 0x0098, 0x0898, 0x0498, 0x0f39, - 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0cb9, - 0x10a7, 0x0658, 0x0258, 0x0ab9, 0x0058, 0x0858, 0x0458, 0x0eb9, - 0x1067, 0x05d8, 0x01d8, 0x09b9, 0x1167, 0x07d8, 0x03d8, 0x0db9, - 0x10e7, 0x06d8, 0x02d8, 0x0bb9, 0x00d8, 0x08d8, 0x04d8, 0x0fb9, - 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c79, - 0x1097, 0x0638, 0x0238, 0x0a79, 0x0038, 0x0838, 0x0438, 0x0e79, - 0x1057, 0x05b8, 0x01b8, 0x0979, 0x1157, 0x07b8, 0x03b8, 0x0d79, - 0x10d7, 0x06b8, 0x02b8, 0x0b79, 0x00b8, 0x08b8, 0x04b8, 0x0f79, - 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0cf9, - 0x10b7, 0x0678, 0x0278, 0x0af9, 0x0078, 0x0878, 0x0478, 0x0ef9, - 0x1077, 0x05f8, 0x01f8, 0x09f9, 0x1177, 0x07f8, 0x03f8, 0x0df9, - 0x10f7, 0x06f8, 0x02f8, 0x0bf9, 0x00f8, 0x08f8, 0x04f8, 0x0ff9, - }, - nil, 0, -} diff --git a/src/compress/flate/gen.go b/src/compress/flate/gen.go deleted file mode 100644 index 154c89a488..0000000000 --- a/src/compress/flate/gen.go +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// This program generates fixedhuff.go -// Invoke as -// -// go run gen.go -output fixedhuff.go - -package main - -import ( - "bytes" - "flag" - "fmt" - "go/format" - "io/ioutil" - "log" -) - -var filename = flag.String("output", "fixedhuff.go", "output file name") - -const maxCodeLen = 16 - -// Note: the definition of the huffmanDecoder struct is copied from -// inflate.go, as it is private to the implementation. - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - min int // the minimum code length - chunks [huffmanNumChunks]uint32 // chunks as described above - links [][]uint32 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(bits []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.min != 0 { - *h = huffmanDecoder{} - } - - // Count number of codes of each length, - // compute min and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range bits { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i] = code - code += count[i] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1< huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 - reverse >>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -func main() { - flag.Parse() - - var h huffmanDecoder - var bits [288]int - initReverseByte() - for i := 0; i < 144; i++ { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - h.init(bits[:]) - if h.links != nil { - log.Fatal("Unexpected links table in fixed Huffman decoder") - } - - var buf bytes.Buffer - - fmt.Fprintf(&buf, `// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file.`+"\n\n") - - fmt.Fprintln(&buf, "package flate") - fmt.Fprintln(&buf) - fmt.Fprintln(&buf, "// autogenerated by go run gen.go -output fixedhuff.go, DO NOT EDIT") - fmt.Fprintln(&buf) - fmt.Fprintln(&buf, "var fixedHuffmanDecoder = huffmanDecoder{") - fmt.Fprintf(&buf, "\t%d,\n", h.min) - fmt.Fprintln(&buf, "\t[huffmanNumChunks]uint32{") - for i := 0; i < huffmanNumChunks; i++ { - if i&7 == 0 { - fmt.Fprintf(&buf, "\t\t") - } else { - fmt.Fprintf(&buf, " ") - } - fmt.Fprintf(&buf, "0x%04x,", h.chunks[i]) - if i&7 == 7 { - fmt.Fprintln(&buf) - } - } - fmt.Fprintln(&buf, "\t},") - fmt.Fprintln(&buf, "\tnil, 0,") - fmt.Fprintln(&buf, "}") - - data, err := format.Source(buf.Bytes()) - if err != nil { - log.Fatal(err) - } - err = ioutil.WriteFile(*filename, data, 0644) - if err != nil { - log.Fatal(err) - } -} - -var reverseByte [256]byte - -func initReverseByte() { - for x := 0; x < 256; x++ { - var result byte - for i := uint(0); i < 8; i++ { - result |= byte(((x >> i) & 1) << (7 - i)) - } - reverseByte[x] = result - } -} diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go index 04372dec24..09f6115804 100644 --- a/src/compress/flate/inflate.go +++ b/src/compress/flate/inflate.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:generate go run gen.go -output fixedhuff.go - // Package flate implements the DEFLATE compressed data format, described in // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file // formats. @@ -13,6 +11,7 @@ import ( "bufio" "io" "strconv" + "sync" ) const ( @@ -26,6 +25,10 @@ const ( numCodes = 19 // number of codes in Huffman meta-code ) +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + // A CorruptInputError reports the presence of corrupt input at a given offset. type CorruptInputError int64 @@ -67,10 +70,6 @@ type Resetter interface { Reset(r io.Reader, dict []byte) error } -// Note that much of the implementation of huffmanDecoder is also copied -// into gen.go (in package main) for the purpose of precomputing the -// fixed huffman tables so they can be included statically. - // The data structure for decoding Huffman tables is based on that of // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), // For codes smaller than the table width, there are multiple entries @@ -78,12 +77,15 @@ type Resetter interface { // larger than the table width, the table contains a link to an overflow // table. The width of each entry in the link table is the maximum code // size minus the chunk width. - +// // Note that you can do a lookup in the table even without all bits // filled. Since the extra bits are zero, and the DEFLATE Huffman codes // have the property that shorter codes come before longer ones, the // bit length estimate in the result is a lower bound on the actual // number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt // chunk & 15 is number of bits // chunk >> 4 is value, including table link @@ -760,6 +762,26 @@ func makeReader(r io.Reader) Reader { return bufio.NewReader(r) } +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + func (f *decompressor) Reset(r io.Reader, dict []byte) error { *f = decompressor{ r: makeReader(r), @@ -783,11 +805,13 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { // // The ReadCloser returned by NewReader also implements Resetter. func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + var f decompressor - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) f.r = makeReader(r) f.hist = new([maxHist]byte) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock return &f } @@ -800,6 +824,8 @@ func NewReader(r io.Reader) io.ReadCloser { // // The ReadCloser returned by NewReader also implements Resetter. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + var f decompressor f.r = makeReader(r) f.hist = new([maxHist]byte) -- 2.48.1