import (
"bytes"
+ "encoding/hex"
+ "io/ioutil"
"testing"
)
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8}
- h := new(huffmanDecoder)
- ok := h.init(bits)
- if ok == true {
+ var h huffmanDecoder
+ if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
func TestIssue5962(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0,
5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11}
- h := new(huffmanDecoder)
- ok := h.init(bits)
- if ok == true {
+ var h huffmanDecoder
+ if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
func TestIssue6255(t *testing.T) {
bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11}
bits2 := []int{11, 13}
- h := new(huffmanDecoder)
+ var h huffmanDecoder
if !h.init(bits1) {
t.Fatalf("Given sequence of bits is good and should succeed.")
}
t.Fatal("Should have rejected invalid bit sequence")
}
}
+
+func TestInvalidBits(t *testing.T) {
+ oversubscribed := []int{1, 2, 3, 4, 4, 5}
+ incomplete := []int{1, 2, 4, 4}
+ var h huffmanDecoder
+ if h.init(oversubscribed) {
+ t.Fatal("Should reject oversubscribed bit-length set")
+ }
+ if h.init(incomplete) {
+ t.Fatal("Should reject incomplete bit-length set")
+ }
+}
+
+func TestDegenerateHuffmanCoding(t *testing.T) {
+ // This test case is notable because:
+ // 1. It's decompressable by zlib.
+ // 2. It was generated by Go 1.4's compress/flate package.
+ // 3. It uses a degenerate dynamic Huffman coding block.
+ //
+ // The input is somewhat contrived though. It's a sequence of
+ // 258 bytes with no 3+ byte sequence occuring more than once,
+ // except that the whole sequence is repeated twice. This
+ // results in package flate emitting a single match token,
+ // which consequently means a single symbol in the distance
+ // coding table.
+ //
+ // Additionally, it uses very few unique byte values so that
+ // the overhead from storing the dynamic Huffman coding still
+ // results in a smaller encoding than using the fixed Huffman
+ // coding.
+ const (
+ originalHalf = "00013534215002452243512505010034133042401113" +
+ "400415101454022532410254513251155411055331124453555" +
+ "023120320201523334303524252551414033503012344230210" +
+ "310431305153005314321221315440455204052144332205422" +
+ "235434504441211420062622646656236416326065565261624" +
+ "6256136546"
+ compressedHex = "ecd081000030104251a5fad5f9a34d640a4f92b3144" +
+ "fa28366669a2ca54e542adba954cf7257c1422dd639ccde6a6b" +
+ "4b6cda659b885110f248d228a38ccd75954c91494b8415ab713" +
+ "42fd2e20683e3b5ea86aae13601ad40d6746a6bec221d07d7bb" +
+ "1db9fac2e9b61be7a3c7ceb9f5bec00b0000ffffecd08100003" +
+ "0104251a5fad5f9a34d640a4f92b3144fa28366669a2ca54e54" +
+ "2adba954cf7257c1422dd639ccde6a6b4b6cda659b885110f24" +
+ "8d228a38ccd75954c91494b8415ab71342fd2e20683e3b5ea86" +
+ "aae13601ad40d6746a6bec221d07d7bb1db9fac2e9b61be7a3c" +
+ "7ceb9f5bec00b0000ffff"
+ )
+
+ compressed, err := hex.DecodeString(compressedHex)
+ if err != nil {
+ t.Fatal(err)
+ }
+ data, err := ioutil.ReadAll(NewReader(bytes.NewReader(compressed)))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if string(data) != originalHalf+originalHalf {
+ t.Fatal("Decompressed data does not match original")
+ }
+}
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
+ // Sanity enables additional runtime tests during Huffman
+ // table construction. It's intended to be used during
+ // development to supplement the currently ad-hoc unit tests.
+ const sanity = false
+
+ if h.min != 0 {
+ *h = huffmanDecoder{}
+ }
+
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
return false
}
- h.min = min
- var linkBits uint
- var numLinks int
- if max > huffmanChunkBits {
- linkBits = uint(max) - huffmanChunkBits
- numLinks = 1 << linkBits
- h.linkMask = uint32(numLinks - 1)
- }
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
- if i == huffmanChunkBits+1 {
- // create link tables
- link := code >> 1
- h.links = make([][]uint32, huffmanNumChunks-link)
- for j := uint(link); j < huffmanNumChunks; j++ {
- reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
- reverse >>= uint(16 - huffmanChunkBits)
- off := j - uint(link)
- h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
- h.links[off] = make([]uint32, 1<<linkBits)
+ code <<= 1
+ nextcode[i] = code
+ code += count[i]
+ }
+
+ // Check that the coding is complete (i.e., that we've
+ // assigned all 2-to-the-max possible bit sequences).
+ // Exception: To be compatible with zlib, we also need to
+ // accept degenerate single-code codings. See also
+ // TestDegenerateHuffmanCoding.
+ if code != 1<<uint(max) && !(code == 1 && max == 1) {
+ return false
+ }
+
+ h.min = min
+ if max > huffmanChunkBits {
+ numLinks := 1 << (uint(max) - huffmanChunkBits)
+ h.linkMask = uint32(numLinks - 1)
+
+ // create link tables
+ link := nextcode[huffmanChunkBits+1] >> 1
+ h.links = make([][]uint32, huffmanNumChunks-link)
+ for j := uint(link); j < huffmanNumChunks; j++ {
+ reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+ reverse >>= uint(16 - huffmanChunkBits)
+ off := j - uint(link)
+ if sanity && h.chunks[reverse] != 0 {
+ panic("impossible: overwriting existing chunk")
}
+ h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+ h.links[off] = make([]uint32, numLinks)
}
- n := count[i]
- nextcode[i] = code
- code += n
- code <<= 1
}
for i, n := range bits {
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
- for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+ for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+ // We should never need to overwrite
+ // an existing chunk. Also, 0 is
+ // never a valid chunk, because the
+ // lower 4 "count" bits should be
+ // between 1 and 15.
+ if sanity && h.chunks[off] != 0 {
+ panic("impossible: overwriting existing chunk")
+ }
h.chunks[off] = chunk
}
} else {
- linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
+ j := reverse & (huffmanNumChunks - 1)
+ if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+ // Longer codes should have been
+ // associated with a link table above.
+ panic("impossible: not an indirect chunk")
+ }
+ value := h.chunks[j] >> huffmanValueShift
+ linktab := h.links[value]
reverse >>= huffmanChunkBits
- for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+ for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+ if sanity && linktab[off] != 0 {
+ panic("impossible: overwriting existing chunk")
+ }
linktab[off] = chunk
}
}
}
+
+ if sanity {
+ // Above we've sanity checked that we never overwrote
+ // an existing entry. Here we additionally check that
+ // we filled the tables completely.
+ for i, chunk := range h.chunks {
+ if chunk == 0 {
+ // As an exception, in the degenerate
+ // single-code case, we allow odd
+ // chunks to be missing.
+ if code == 1 && i%2 == 1 {
+ continue
+ }
+ panic("impossible: missing chunk")
+ }
+ }
+ for _, linktab := range h.links {
+ for _, chunk := range linktab {
+ if chunk == 0 {
+ panic("impossible: missing chunk")
+ }
+ }
+ }
+ }
+
return true
}
bits[i] = 8
}
h.init(bits[:])
+ if h.links != nil {
+ log.Fatal("Unexpected links table in fixed Huffman decoder")
+ }
var buf bytes.Buffer
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
- //
- // TODO(mdempsky): TestIssue5962 and TestIssue6255 currently
- // fail with these enabled.
const sanity = false
if h.min != 0 {
return false
}
- h.min = min
- var linkBits uint
- var numLinks int
- if max > huffmanChunkBits {
- linkBits = uint(max) - huffmanChunkBits
- numLinks = 1 << linkBits
- h.linkMask = uint32(numLinks - 1)
- }
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
- if i == huffmanChunkBits+1 {
- // create link tables
- link := code >> 1
- if huffmanNumChunks < link {
- return false
- }
- h.links = make([][]uint32, huffmanNumChunks-link)
- for j := uint(link); j < huffmanNumChunks; j++ {
- reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
- reverse >>= uint(16 - huffmanChunkBits)
- off := j - uint(link)
- if sanity && h.chunks[reverse] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
- h.links[off] = make([]uint32, 1<<linkBits)
+ code <<= 1
+ nextcode[i] = code
+ code += count[i]
+ }
+
+ // Check that the coding is complete (i.e., that we've
+ // assigned all 2-to-the-max possible bit sequences).
+ // Exception: To be compatible with zlib, we also need to
+ // accept degenerate single-code codings. See also
+ // TestDegenerateHuffmanCoding.
+ if code != 1<<uint(max) && !(code == 1 && max == 1) {
+ return false
+ }
+
+ h.min = min
+ if max > huffmanChunkBits {
+ numLinks := 1 << (uint(max) - huffmanChunkBits)
+ h.linkMask = uint32(numLinks - 1)
+
+ // create link tables
+ link := nextcode[huffmanChunkBits+1] >> 1
+ h.links = make([][]uint32, huffmanNumChunks-link)
+ for j := uint(link); j < huffmanNumChunks; j++ {
+ reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+ reverse >>= uint(16 - huffmanChunkBits)
+ off := j - uint(link)
+ if sanity && h.chunks[reverse] != 0 {
+ panic("impossible: overwriting existing chunk")
}
+ h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+ h.links[off] = make([]uint32, numLinks)
}
- n := count[i]
- nextcode[i] = code
- code += n
- code <<= 1
}
for i, n := range bits {
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
- for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+ for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
- if value >= uint32(len(h.links)) {
- return false
- }
linktab := h.links[value]
reverse >>= huffmanChunkBits
- for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+ for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}