// The largest offset code.
offsetCodeCount = 30
- // The largest offset code in the extensions.
- extendedOffsetCodeCount = 42
-
// The special code used to mark the end of a block.
endBlockMarker = 256
return &huffmanBitWriter{
w: w,
literalFreq: make([]int32, maxLit),
- offsetFreq: make([]int32, extendedOffsetCodeCount),
- codegen: make([]uint8, maxLit+extendedOffsetCodeCount+1),
+ offsetFreq: make([]int32, offsetCodeCount),
+ codegen: make([]uint8, maxLit+offsetCodeCount+1),
codegenFreq: make([]int32, codegenCodeCount),
literalEncoding: newHuffmanEncoder(maxLit),
- offsetEncoding: newHuffmanEncoder(extendedOffsetCodeCount),
+ offsetEncoding: newHuffmanEncoder(offsetCodeCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
}
}
}
w.writeBits(firstBits, 3)
w.writeBits(int32(numLiterals-257), 5)
- if numOffsets > offsetCodeCount {
- // Extended version of decompressor
- w.writeBits(int32(offsetCodeCount+((numOffsets-(1+offsetCodeCount))>>3)), 5)
- w.writeBits(int32((numOffsets-(1+offsetCodeCount))&0x7), 3)
- } else {
- w.writeBits(int32(numOffsets-1), 5)
- }
+ w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
tokens = tokens[0 : n+1]
tokens[n] = endBlockMarker
- totalLength := -1 // Subtract 1 for endBlock.
for _, t := range tokens {
switch t.typ() {
case literalType:
w.literalFreq[t.literal()]++
- totalLength++
- break
case matchType:
length := t.length()
offset := t.offset()
- totalLength += int(length + 3)
w.literalFreq[lengthCodesStart+lengthCode(length)]++
w.offsetFreq[offsetCode(offset)]++
- break
}
}
- w.literalEncoding.generate(w.literalFreq, 15)
- w.offsetEncoding.generate(w.offsetFreq, 15)
// get the number of literals
numLiterals := len(w.literalFreq)
}
// get the number of offsets
numOffsets := len(w.offsetFreq)
- for numOffsets > 1 && w.offsetFreq[numOffsets-1] == 0 {
+ for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
numOffsets--
}
+ if numOffsets == 0 {
+ // We haven't found a single match. If we want to go with the dynamic encoding,
+ // we should count at least one offset to be sure that the offset huffman tree could be encoded.
+ w.offsetFreq[0] = 1
+ numOffsets = 1
+ }
+
+ w.literalEncoding.generate(w.literalFreq, 15)
+ w.offsetEncoding.generate(w.offsetFreq, 15)
+
storedBytes := 0
if input != nil {
storedBytes = len(input)
}
var extraBits int64
- var storedSize int64
+ var storedSize int64 = math.MaxInt64
if storedBytes <= maxStoreBlockSize && input != nil {
storedSize = int64((storedBytes + 5) * 8)
// We only bother calculating the costs of the extra bits required by
// First four offset codes have extra size = 0.
extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
}
- } else {
- storedSize = math.MaxInt32
}
- // Figure out which generates smaller code, fixed Huffman, dynamic
- // Huffman, or just storing the data.
- var fixedSize int64 = math.MaxInt64
- if numOffsets <= offsetCodeCount {
- fixedSize = int64(3) +
- fixedLiteralEncoding.bitLength(w.literalFreq) +
- fixedOffsetEncoding.bitLength(w.offsetFreq) +
- extraBits
- }
+ // Figure out smallest code.
+ // Fixed Huffman baseline.
+ var size = int64(3) +
+ fixedLiteralEncoding.bitLength(w.literalFreq) +
+ fixedOffsetEncoding.bitLength(w.offsetFreq) +
+ extraBits
+ var literalEncoding = fixedLiteralEncoding
+ var offsetEncoding = fixedOffsetEncoding
+
+ // Dynamic Huffman?
+ var numCodegens int
+
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets)
w.codegenEncoding.generate(w.codegenFreq, 7)
- numCodegens := len(w.codegenFreq)
+ numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
- extensionSummand := 0
- if numOffsets > offsetCodeCount {
- extensionSummand = 3
- }
dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
- // Following line is an extension.
- int64(extensionSummand) +
w.codegenEncoding.bitLength(w.codegenFreq) +
int64(extraBits) +
int64(w.codegenFreq[16]*2) +
w.literalEncoding.bitLength(w.literalFreq) +
w.offsetEncoding.bitLength(w.offsetFreq)
- if storedSize < fixedSize && storedSize < dynamicSize {
+ if dynamicSize < size {
+ size = dynamicSize
+ literalEncoding = w.literalEncoding
+ offsetEncoding = w.offsetEncoding
+ }
+
+ // Stored bytes?
+ if storedSize < size {
w.writeStoredHeader(storedBytes, eof)
w.writeBytes(input[0:storedBytes])
return
}
- var literalEncoding *huffmanEncoder
- var offsetEncoding *huffmanEncoder
- if fixedSize <= dynamicSize {
+ // Huffman.
+ if literalEncoding == fixedLiteralEncoding {
w.writeFixedHeader(eof)
- literalEncoding = fixedLiteralEncoding
- offsetEncoding = fixedOffsetEncoding
} else {
- // Write the header.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
- literalEncoding = w.literalEncoding
- offsetEncoding = w.offsetEncoding
}
-
- // Write the tokens.
for _, t := range tokens {
switch t.typ() {
case literalType:
import (
"bytes"
+ "fmt"
"io"
"io/ioutil"
"os"
"../testdata/pi.txt",
}
+var data = []string{
+ "test a reasonable sized string that can be compressed",
+}
+
// Tests that compressing and then decompressing the given file at the given compression level and dictionary
// yields equivalent bytes to the original file.
func testFileLevelDict(t *testing.T, fn string, level int, d string) {
- // Read dictionary, if given.
- var dict []byte
- if d != "" {
- dict = []byte(d)
- }
-
// Read the file, as golden output.
golden, err := os.Open(fn)
if err != nil {
return
}
defer golden.Close()
-
- // Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
- raw, err := os.Open(fn)
- if err != nil {
- t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ b0, err0 := ioutil.ReadAll(golden)
+ if err0 != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
return
}
+ testLevelDict(t, fn, b0, level, d)
+}
+
+func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) {
+ // Make dictionary, if given.
+ var dict []byte
+ if d != "" {
+ dict = []byte(d)
+ }
+
+ // Push data through a pipe that compresses at the write end, and decompresses at the read end.
piper, pipew := io.Pipe()
defer piper.Close()
go func() {
- defer raw.Close()
defer pipew.Close()
zlibw, err := NewWriterDict(pipew, level, dict)
if err != nil {
return
}
defer zlibw.Close()
- var b [1024]byte
- for {
- n, err0 := raw.Read(b[0:])
- if err0 != nil && err0 != os.EOF {
- t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
- return
- }
- _, err1 := zlibw.Write(b[0:n])
- if err1 == os.EPIPE {
- // Fail, but do not report the error, as some other (presumably reportable) error broke the pipe.
- return
- }
- if err1 != nil {
- t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
- return
- }
- if err0 == os.EOF {
- break
- }
+ _, err = zlibw.Write(b0)
+ if err == os.EPIPE {
+ // Fail, but do not report the error, as some other (presumably reported) error broke the pipe.
+ return
+ }
+ if err != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ return
}
}()
zlibr, err := NewReaderDict(piper, dict)
}
defer zlibr.Close()
- // Compare the two.
- b0, err0 := ioutil.ReadAll(golden)
+ // Compare the decompressed data.
b1, err1 := ioutil.ReadAll(zlibr)
- if err0 != nil {
- t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
- return
- }
if err1 != nil {
t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
return
}
func TestWriter(t *testing.T) {
+ for i, s := range data {
+ b := []byte(s)
+ tag := fmt.Sprintf("#%d", i)
+ testLevelDict(t, tag, b, DefaultCompression, "")
+ testLevelDict(t, tag, b, NoCompression, "")
+ for level := BestSpeed; level <= BestCompression; level++ {
+ testLevelDict(t, tag, b, level, "")
+ }
+ }
+}
+
+func TestWriterBig(t *testing.T) {
for _, fn := range filenames {
testFileLevelDict(t, fn, DefaultCompression, "")
testFileLevelDict(t, fn, NoCompression, "")