import (
"io"
- "math"
)
const (
codegen[outIndex] = badCode
}
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
+ numCodegens = len(w.codegenFreq)
+ for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+ numCodegens--
+ }
+ header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
+ w.codegenEncoding.bitLength(w.codegenFreq[:]) +
+ int(w.codegenFreq[16])*2 +
+ int(w.codegenFreq[17])*3 +
+ int(w.codegenFreq[18])*7
+ size = header +
+ litEnc.bitLength(w.literalFreq) +
+ offEnc.bitLength(w.offsetFreq) +
+ extraBits
+
+ return size, numCodegens
+}
+
+// fixedSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) fixedSize(extraBits int) int {
+ return 3 +
+ fixedLiteralEncoding.bitLength(w.literalFreq) +
+ fixedOffsetEncoding.bitLength(w.offsetFreq) +
+ extraBits
+}
+
+// storedSize calculates the stored size, including header.
+// The function returns the size in bits and whether the block
+// fits inside a single block.
+func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
+ if in == nil {
+ return 0, false
+ }
+ if len(in) <= maxStoreBlockSize {
+ return (len(in) + 5) * 8, true
+ }
+ return 0, false
+}
+
func (w *huffmanBitWriter) writeCode(c hcode) {
if w.err != nil {
return
w.writeBits(value, 3)
}
+// writeBlock will write a block of tokens with the smallest encoding.
+// The original input can be supplied, and if the huffman encoded data
+// is larger than the original bytes, the data will be written as a
+// stored block.
+// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens)
- storedBytes := 0
- if input != nil {
- storedBytes = len(input)
- }
- var extraBits int64
- var storedSize int64 = math.MaxInt64
- if storedBytes <= maxStoreBlockSize && input != nil {
- storedSize = int64((storedBytes + 5) * 8)
+ var extraBits int
+ storedSize, storable := w.storedSize(input)
+ if storable {
// We only bother calculating the costs of the extra bits required by
// the length of offset fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings
// against stored encoding.
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
// First eight length codes have extra size = 0.
- extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart])
+ extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
}
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
// First four offset codes have extra size = 0.
- extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
+ extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
}
}
// Figure out smallest code.
// Fixed Huffman baseline.
- var size = int64(3) +
- fixedLiteralEncoding.bitLength(w.literalFreq) +
- fixedOffsetEncoding.bitLength(w.offsetFreq) +
- extraBits
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
+ var size = w.fixedSize(extraBits)
// Dynamic Huffman?
var numCodegens int
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
- numCodegens = len(w.codegenFreq)
- for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
- numCodegens--
- }
- dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
- w.codegenEncoding.bitLength(w.codegenFreq[:]) +
- extraBits +
- int64(w.codegenFreq[16]*2) +
- int64(w.codegenFreq[17]*3) +
- int64(w.codegenFreq[18]*7)
- dynamicSize := dynamicHeader +
- w.literalEncoding.bitLength(w.literalFreq) +
- w.offsetEncoding.bitLength(w.offsetFreq)
+ dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
if dynamicSize < size {
size = dynamicSize
}
// Stored bytes?
- if storedSize < size {
- w.writeStoredHeader(storedBytes, eof)
- w.writeBytes(input[:storedBytes])
+ if storable && storedSize < size {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
return
}
// Write the tokens.
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
-
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
+// If input is supplied and the compression savings are below 1/16th of the
+// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
- numCodegens := len(w.codegenFreq)
- for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
- numCodegens--
+ size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
+
+ // Store bytes, if we don't get a reasonable improvement.
+ if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
}
// Write Huffman table.
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
- numCodegens = len(w.codegenFreq)
- for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
- numCodegens--
- }
- headerSize := int64(3+5+5+4+(3*numCodegens)) +
- w.codegenEncoding.bitLength(w.codegenFreq[:]) +
- int64(w.codegenFreq[16]*2) +
- int64(w.codegenFreq[17]*3) +
- int64(w.codegenFreq[18]*7)
-
- // Includes EOB marker
- size := headerSize + w.literalEncoding.bitLength(w.literalFreq)
-
- // Calculate stored size
- var storedSize int64 = math.MaxInt64
- var storedBytes = len(input)
- if storedBytes <= maxStoreBlockSize {
- storedSize = int64(storedBytes+5) * 8
- }
+ size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
// Store bytes, if we don't get a reasonable improvement.
- if storedSize < (size + size>>4) {
- w.writeStoredHeader(storedBytes, eof)
+ if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}