From 5cc8561333313c4318d980fea9b5c2dab596b503 Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 13 Apr 2015 15:31:28 -0700 Subject: [PATCH] compress/flate: reject invalid Huffman bit sizes If the requested coding bit sizes don't result in a full binary tree, then reject the input as invalid. Exception: We still need to allow degenerate Huffman codings with a single 1-bit code to be compatible with zlib and files compressed with Go's compress/flate package. Update #10426. Change-Id: I171b98d12e65b4deb9f4031cd802407ebb5e266c Reviewed-on: https://go-review.googlesource.com/8922 Reviewed-by: Nigel Tao --- src/compress/flate/flate_test.go | 75 ++++++++++++++++++-- src/compress/flate/gen.go | 113 ++++++++++++++++++++++++------- src/compress/flate/inflate.go | 68 +++++++++---------- 3 files changed, 188 insertions(+), 68 deletions(-) diff --git a/src/compress/flate/flate_test.go b/src/compress/flate/flate_test.go index 5483641510..abf4784932 100644 --- a/src/compress/flate/flate_test.go +++ b/src/compress/flate/flate_test.go @@ -10,6 +10,8 @@ package flate import ( "bytes" + "encoding/hex" + "io/ioutil" "testing" ) @@ -30,9 +32,8 @@ func TestIssue5915(t *testing.T) { bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8} - h := new(huffmanDecoder) - ok := h.init(bits) - if ok == true { + var h huffmanDecoder + if h.init(bits) { t.Fatalf("Given sequence of bits is bad, and should not succeed.") } } @@ -41,9 +42,8 @@ func TestIssue5915(t *testing.T) { func TestIssue5962(t *testing.T) { bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11} - h := new(huffmanDecoder) - ok := h.init(bits) - if ok == true { + var h huffmanDecoder + if h.init(bits) { t.Fatalf("Given sequence of bits is bad, and should not succeed.") } } @@ -52,7 +52,7 @@ func TestIssue5962(t *testing.T) { func TestIssue6255(t *testing.T) { bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11} bits2 := []int{11, 13} - h := new(huffmanDecoder) + var h huffmanDecoder if !h.init(bits1) { t.Fatalf("Given sequence of bits is good and should succeed.") } @@ -77,3 +77,64 @@ func TestInvalidEncoding(t *testing.T) { t.Fatal("Should have rejected invalid bit sequence") } } + +func TestInvalidBits(t *testing.T) { + oversubscribed := []int{1, 2, 3, 4, 4, 5} + incomplete := []int{1, 2, 4, 4} + var h huffmanDecoder + if h.init(oversubscribed) { + t.Fatal("Should reject oversubscribed bit-length set") + } + if h.init(incomplete) { + t.Fatal("Should reject incomplete bit-length set") + } +} + +func TestDegenerateHuffmanCoding(t *testing.T) { + // This test case is notable because: + // 1. It's decompressable by zlib. + // 2. It was generated by Go 1.4's compress/flate package. + // 3. It uses a degenerate dynamic Huffman coding block. + // + // The input is somewhat contrived though. It's a sequence of + // 258 bytes with no 3+ byte sequence occuring more than once, + // except that the whole sequence is repeated twice. This + // results in package flate emitting a single match token, + // which consequently means a single symbol in the distance + // coding table. + // + // Additionally, it uses very few unique byte values so that + // the overhead from storing the dynamic Huffman coding still + // results in a smaller encoding than using the fixed Huffman + // coding. + const ( + originalHalf = "00013534215002452243512505010034133042401113" + + "400415101454022532410254513251155411055331124453555" + + "023120320201523334303524252551414033503012344230210" + + "310431305153005314321221315440455204052144332205422" + + "235434504441211420062622646656236416326065565261624" + + "6256136546" + compressedHex = "ecd081000030104251a5fad5f9a34d640a4f92b3144" + + "fa28366669a2ca54e542adba954cf7257c1422dd639ccde6a6b" + + "4b6cda659b885110f248d228a38ccd75954c91494b8415ab713" + + "42fd2e20683e3b5ea86aae13601ad40d6746a6bec221d07d7bb" + + "1db9fac2e9b61be7a3c7ceb9f5bec00b0000ffffecd08100003" + + "0104251a5fad5f9a34d640a4f92b3144fa28366669a2ca54e54" + + "2adba954cf7257c1422dd639ccde6a6b4b6cda659b885110f24" + + "8d228a38ccd75954c91494b8415ab71342fd2e20683e3b5ea86" + + "aae13601ad40d6746a6bec221d07d7bb1db9fac2e9b61be7a3c" + + "7ceb9f5bec00b0000ffff" + ) + + compressed, err := hex.DecodeString(compressedHex) + if err != nil { + t.Fatal(err) + } + data, err := ioutil.ReadAll(NewReader(bytes.NewReader(compressed))) + if err != nil { + t.Fatal(err) + } + if string(data) != originalHalf+originalHalf { + t.Fatal("Decompressed data does not match original") + } +} diff --git a/src/compress/flate/gen.go b/src/compress/flate/gen.go index 6288ecddd0..eeafa84a5d 100644 --- a/src/compress/flate/gen.go +++ b/src/compress/flate/gen.go @@ -46,6 +46,15 @@ type huffmanDecoder struct { // Initialize Huffman decoding tables from array of code lengths. func (h *huffmanDecoder) init(bits []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.min != 0 { + *h = huffmanDecoder{} + } + // Count number of codes of each length, // compute min and max length. var count [maxCodeLen]int @@ -66,33 +75,41 @@ func (h *huffmanDecoder) init(bits []int) bool { return false } - h.min = min - var linkBits uint - var numLinks int - if max > huffmanChunkBits { - linkBits = uint(max) - huffmanChunkBits - numLinks = 1 << linkBits - h.linkMask = uint32(numLinks - 1) - } code := 0 var nextcode [maxCodeLen]int for i := min; i <= max; i++ { - if i == huffmanChunkBits+1 { - // create link tables - link := code >> 1 - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - h.chunks[reverse] = uint32(off< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") } + h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 reverse >>= uint(16 - n) if n <= huffmanChunkBits { - for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } h.chunks[off] = chunk } } else { - linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] reverse >>= huffmanChunkBits - for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } linktab[off] = chunk } } } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + return true } @@ -138,6 +198,9 @@ func main() { bits[i] = 8 } h.init(bits[:]) + if h.links != nil { + log.Fatal("Unexpected links table in fixed Huffman decoder") + } var buf bytes.Buffer diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go index 291e62343e..6f88159dfa 100644 --- a/src/compress/flate/inflate.go +++ b/src/compress/flate/inflate.go @@ -105,9 +105,6 @@ func (h *huffmanDecoder) init(bits []int) bool { // Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during // development to supplement the currently ad-hoc unit tests. - // - // TODO(mdempsky): TestIssue5962 and TestIssue6255 currently - // fail with these enabled. const sanity = false if h.min != 0 { @@ -134,39 +131,41 @@ func (h *huffmanDecoder) init(bits []int) bool { return false } - h.min = min - var linkBits uint - var numLinks int - if max > huffmanChunkBits { - linkBits = uint(max) - huffmanChunkBits - numLinks = 1 << linkBits - h.linkMask = uint32(numLinks - 1) - } code := 0 var nextcode [maxCodeLen]int for i := min; i <= max; i++ { - if i == huffmanChunkBits+1 { - // create link tables - link := code >> 1 - if huffmanNumChunks < link { - return false - } - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint32(off< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") } + h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 reverse >>= uint(16 - n) if n <= huffmanChunkBits { - for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { // We should never need to overwrite // an existing chunk. Also, 0 is // never a valid chunk, because the @@ -198,12 +197,9 @@ func (h *huffmanDecoder) init(bits []int) bool { panic("impossible: not an indirect chunk") } value := h.chunks[j] >> huffmanValueShift - if value >= uint32(len(h.links)) { - return false - } linktab := h.links[value] reverse >>= huffmanChunkBits - for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { if sanity && linktab[off] != 0 { panic("impossible: overwriting existing chunk") } -- 2.48.1