compress/flate: reject invalid Huffman bit sizes

author Matthew Dempsky <mdempsky@google.com>

Mon, 13 Apr 2015 22:31:28 +0000 (15:31 -0700)

committer Matthew Dempsky <mdempsky@google.com>

Thu, 16 Apr 2015 07:34:04 +0000 (07:34 +0000)
author Matthew Dempsky <mdempsky@google.com>
Mon, 13 Apr 2015 22:31:28 +0000 (15:31 -0700)
committer Matthew Dempsky <mdempsky@google.com>
Thu, 16 Apr 2015 07:34:04 +0000 (07:34 +0000)
diff --git a/src/compress/flate/flate_test.go b/src/compress/flate/flate_test.go

index 5483641510b4866810358d8efe796bd456fb2664..abf4784932b67fcc8377f817b3078c84d4c55593 100644 (file)
--- a/src/compress/flate/flate_test.go
+++ b/src/compress/flate/flate_test.go
@@ -10,6 +10,8 @@ package flate
  
  import (
         "bytes"
+       "encoding/hex"
+       "io/ioutil"
         "testing"
  )
  
@@ -30,9 +32,8 @@ func TestIssue5915(t *testing.T) {
         bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8}
-       h := new(huffmanDecoder)
-       ok := h.init(bits)
-       if ok == true {
+       var h huffmanDecoder
+       if h.init(bits) {
                 t.Fatalf("Given sequence of bits is bad, and should not succeed.")
         }
  }
@@ -41,9 +42,8 @@ func TestIssue5915(t *testing.T) {
  func TestIssue5962(t *testing.T) {
         bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0,
                 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11}
-       h := new(huffmanDecoder)
-       ok := h.init(bits)
-       if ok == true {
+       var h huffmanDecoder
+       if h.init(bits) {
                 t.Fatalf("Given sequence of bits is bad, and should not succeed.")
         }
  }
@@ -52,7 +52,7 @@ func TestIssue5962(t *testing.T) {
  func TestIssue6255(t *testing.T) {
         bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11}
         bits2 := []int{11, 13}
-       h := new(huffmanDecoder)
+       var h huffmanDecoder
         if !h.init(bits1) {
                 t.Fatalf("Given sequence of bits is good and should succeed.")
         }
@@ -77,3 +77,64 @@ func TestInvalidEncoding(t *testing.T) {
                 t.Fatal("Should have rejected invalid bit sequence")
         }
  }
+
+func TestInvalidBits(t *testing.T) {
+       oversubscribed := []int{1, 2, 3, 4, 4, 5}
+       incomplete := []int{1, 2, 4, 4}
+       var h huffmanDecoder
+       if h.init(oversubscribed) {
+               t.Fatal("Should reject oversubscribed bit-length set")
+       }
+       if h.init(incomplete) {
+               t.Fatal("Should reject incomplete bit-length set")
+       }
+}
+
+func TestDegenerateHuffmanCoding(t *testing.T) {
+       // This test case is notable because:
+       //   1. It's decompressable by zlib.
+       //   2. It was generated by Go 1.4's compress/flate package.
+       //   3. It uses a degenerate dynamic Huffman coding block.
+       //
+       // The input is somewhat contrived though.  It's a sequence of
+       // 258 bytes with no 3+ byte sequence occuring more than once,
+       // except that the whole sequence is repeated twice.  This
+       // results in package flate emitting a single match token,
+       // which consequently means a single symbol in the distance
+       // coding table.
+       //
+       // Additionally, it uses very few unique byte values so that
+       // the overhead from storing the dynamic Huffman coding still
+       // results in a smaller encoding than using the fixed Huffman
+       // coding.
+       const (
+               originalHalf = "00013534215002452243512505010034133042401113" +
+                       "400415101454022532410254513251155411055331124453555" +
+                       "023120320201523334303524252551414033503012344230210" +
+                       "310431305153005314321221315440455204052144332205422" +
+                       "235434504441211420062622646656236416326065565261624" +
+                       "6256136546"
+               compressedHex = "ecd081000030104251a5fad5f9a34d640a4f92b3144" +
+                       "fa28366669a2ca54e542adba954cf7257c1422dd639ccde6a6b" +
+                       "4b6cda659b885110f248d228a38ccd75954c91494b8415ab713" +
+                       "42fd2e20683e3b5ea86aae13601ad40d6746a6bec221d07d7bb" +
+                       "1db9fac2e9b61be7a3c7ceb9f5bec00b0000ffffecd08100003" +
+                       "0104251a5fad5f9a34d640a4f92b3144fa28366669a2ca54e54" +
+                       "2adba954cf7257c1422dd639ccde6a6b4b6cda659b885110f24" +
+                       "8d228a38ccd75954c91494b8415ab71342fd2e20683e3b5ea86" +
+                       "aae13601ad40d6746a6bec221d07d7bb1db9fac2e9b61be7a3c" +
+                       "7ceb9f5bec00b0000ffff"
+       )
+
+       compressed, err := hex.DecodeString(compressedHex)
+       if err != nil {
+               t.Fatal(err)
+       }
+       data, err := ioutil.ReadAll(NewReader(bytes.NewReader(compressed)))
+       if err != nil {
+               t.Fatal(err)
+       }
+       if string(data) != originalHalf+originalHalf {
+               t.Fatal("Decompressed data does not match original")
+       }
+}
diff --git a/src/compress/flate/gen.go b/src/compress/flate/gen.go

index 6288ecddd0e601948b2f9ea6f8622c7a04ee7351..eeafa84a5d0c0b07a5067a6651cbfe0f76d5f025 100644 (file)
--- a/src/compress/flate/gen.go
+++ b/src/compress/flate/gen.go
@@ -46,6 +46,15 @@ type huffmanDecoder struct {
  
  // Initialize Huffman decoding tables from array of code lengths.
  func (h *huffmanDecoder) init(bits []int) bool {
+       // Sanity enables additional runtime tests during Huffman
+       // table construction.  It's intended to be used during
+       // development to supplement the currently ad-hoc unit tests.
+       const sanity = false
+
+       if h.min != 0 {
+               *h = huffmanDecoder{}
+       }
+
         // Count number of codes of each length,
         // compute min and max length.
         var count [maxCodeLen]int
@@ -66,33 +75,41 @@ func (h *huffmanDecoder) init(bits []int) bool {
                 return false
         }
  
-       h.min = min
-       var linkBits uint
-       var numLinks int
-       if max > huffmanChunkBits {
-               linkBits = uint(max) - huffmanChunkBits
-               numLinks = 1 << linkBits
-               h.linkMask = uint32(numLinks - 1)
-       }
         code := 0
         var nextcode [maxCodeLen]int
         for i := min; i <= max; i++ {
-               if i == huffmanChunkBits+1 {
-                       // create link tables
-                       link := code >> 1
-                       h.links = make([][]uint32, huffmanNumChunks-link)
-                       for j := uint(link); j < huffmanNumChunks; j++ {
-                               reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
-                               reverse >>= uint(16 - huffmanChunkBits)
-                               off := j - uint(link)
-                               h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
-                               h.links[off] = make([]uint32, 1<<linkBits)
+               code <<= 1
+               nextcode[i] = code
+               code += count[i]
+       }
+
+       // Check that the coding is complete (i.e., that we've
+       // assigned all 2-to-the-max possible bit sequences).
+       // Exception: To be compatible with zlib, we also need to
+       // accept degenerate single-code codings.  See also
+       // TestDegenerateHuffmanCoding.
+       if code != 1<<uint(max) && !(code == 1 && max == 1) {
+               return false
+       }
+
+       h.min = min
+       if max > huffmanChunkBits {
+               numLinks := 1 << (uint(max) - huffmanChunkBits)
+               h.linkMask = uint32(numLinks - 1)
+
+               // create link tables
+               link := nextcode[huffmanChunkBits+1] >> 1
+               h.links = make([][]uint32, huffmanNumChunks-link)
+               for j := uint(link); j < huffmanNumChunks; j++ {
+                       reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+                       reverse >>= uint(16 - huffmanChunkBits)
+                       off := j - uint(link)
+                       if sanity && h.chunks[reverse] != 0 {
+                               panic("impossible: overwriting existing chunk")
                         }
+                       h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+                       h.links[off] = make([]uint32, numLinks)
                 }
-               n := count[i]
-               nextcode[i] = code
-               code += n
-               code <<= 1
         }
  
         for i, n := range bits {
@@ -105,17 +122,60 @@ func (h *huffmanDecoder) init(bits []int) bool {
                 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
                 reverse >>= uint(16 - n)
                 if n <= huffmanChunkBits {
-                       for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+                       for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+                               // We should never need to overwrite
+                               // an existing chunk.  Also, 0 is
+                               // never a valid chunk, because the
+                               // lower 4 "count" bits should be
+                               // between 1 and 15.
+                               if sanity && h.chunks[off] != 0 {
+                                       panic("impossible: overwriting existing chunk")
+                               }
                                 h.chunks[off] = chunk
                         }
                 } else {
-                       linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
+                       j := reverse & (huffmanNumChunks - 1)
+                       if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+                               // Longer codes should have been
+                               // associated with a link table above.
+                               panic("impossible: not an indirect chunk")
+                       }
+                       value := h.chunks[j] >> huffmanValueShift
+                       linktab := h.links[value]
                         reverse >>= huffmanChunkBits
-                       for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+                       for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+                               if sanity && linktab[off] != 0 {
+                                       panic("impossible: overwriting existing chunk")
+                               }
                                 linktab[off] = chunk
                         }
                 }
         }
+
+       if sanity {
+               // Above we've sanity checked that we never overwrote
+               // an existing entry.  Here we additionally check that
+               // we filled the tables completely.
+               for i, chunk := range h.chunks {
+                       if chunk == 0 {
+                               // As an exception, in the degenerate
+                               // single-code case, we allow odd
+                               // chunks to be missing.
+                               if code == 1 && i%2 == 1 {
+                                       continue
+                               }
+                               panic("impossible: missing chunk")
+                       }
+               }
+               for _, linktab := range h.links {
+                       for _, chunk := range linktab {
+                               if chunk == 0 {
+                                       panic("impossible: missing chunk")
+                               }
+                       }
+               }
+       }
+
         return true
  }
  
@@ -138,6 +198,9 @@ func main() {
                 bits[i] = 8
         }
         h.init(bits[:])
+       if h.links != nil {
+               log.Fatal("Unexpected links table in fixed Huffman decoder")
+       }
  
         var buf bytes.Buffer
  
diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go

index 291e62343ec9720842accc17de53ef7071937747..6f88159dfac78362a88cbb3179e5f999f0b379d0 100644 (file)
--- a/src/compress/flate/inflate.go
+++ b/src/compress/flate/inflate.go
@@ -105,9 +105,6 @@ func (h *huffmanDecoder) init(bits []int) bool {
         // Sanity enables additional runtime tests during Huffman
         // table construction.  It's intended to be used during
         // development to supplement the currently ad-hoc unit tests.
-       //
-       // TODO(mdempsky): TestIssue5962 and TestIssue6255 currently
-       // fail with these enabled.
         const sanity = false
  
         if h.min != 0 {
@@ -134,39 +131,41 @@ func (h *huffmanDecoder) init(bits []int) bool {
                 return false
         }
  
-       h.min = min
-       var linkBits uint
-       var numLinks int
-       if max > huffmanChunkBits {
-               linkBits = uint(max) - huffmanChunkBits
-               numLinks = 1 << linkBits
-               h.linkMask = uint32(numLinks - 1)
-       }
         code := 0
         var nextcode [maxCodeLen]int
         for i := min; i <= max; i++ {
-               if i == huffmanChunkBits+1 {
-                       // create link tables
-                       link := code >> 1
-                       if huffmanNumChunks < link {
-                               return false
-                       }
-                       h.links = make([][]uint32, huffmanNumChunks-link)
-                       for j := uint(link); j < huffmanNumChunks; j++ {
-                               reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
-                               reverse >>= uint(16 - huffmanChunkBits)
-                               off := j - uint(link)
-                               if sanity && h.chunks[reverse] != 0 {
-                                       panic("impossible: overwriting existing chunk")
-                               }
-                               h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
-                               h.links[off] = make([]uint32, 1<<linkBits)
+               code <<= 1
+               nextcode[i] = code
+               code += count[i]
+       }
+
+       // Check that the coding is complete (i.e., that we've
+       // assigned all 2-to-the-max possible bit sequences).
+       // Exception: To be compatible with zlib, we also need to
+       // accept degenerate single-code codings.  See also
+       // TestDegenerateHuffmanCoding.
+       if code != 1<<uint(max) && !(code == 1 && max == 1) {
+               return false
+       }
+
+       h.min = min
+       if max > huffmanChunkBits {
+               numLinks := 1 << (uint(max) - huffmanChunkBits)
+               h.linkMask = uint32(numLinks - 1)
+
+               // create link tables
+               link := nextcode[huffmanChunkBits+1] >> 1
+               h.links = make([][]uint32, huffmanNumChunks-link)
+               for j := uint(link); j < huffmanNumChunks; j++ {
+                       reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+                       reverse >>= uint(16 - huffmanChunkBits)
+                       off := j - uint(link)
+                       if sanity && h.chunks[reverse] != 0 {
+                               panic("impossible: overwriting existing chunk")
                         }
+                       h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+                       h.links[off] = make([]uint32, numLinks)
                 }
-               n := count[i]
-               nextcode[i] = code
-               code += n
-               code <<= 1
         }
  
         for i, n := range bits {
@@ -179,7 +178,7 @@ func (h *huffmanDecoder) init(bits []int) bool {
                 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
                 reverse >>= uint(16 - n)
                 if n <= huffmanChunkBits {
-                       for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+                       for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
                                 // We should never need to overwrite
                                 // an existing chunk.  Also, 0 is
                                 // never a valid chunk, because the
@@ -198,12 +197,9 @@ func (h *huffmanDecoder) init(bits []int) bool {
                                 panic("impossible: not an indirect chunk")
                         }
                         value := h.chunks[j] >> huffmanValueShift
-                       if value >= uint32(len(h.links)) {
-                               return false
-                       }
                         linktab := h.links[value]
                         reverse >>= huffmanChunkBits
-                       for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+                       for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
                                 if sanity && linktab[off] != 0 {
                                         panic("impossible: overwriting existing chunk")
                                 }
author	Matthew Dempsky <mdempsky@google.com>
	Mon, 13 Apr 2015 22:31:28 +0000 (15:31 -0700)
committer	Matthew Dempsky <mdempsky@google.com>
	Thu, 16 Apr 2015 07:34:04 +0000 (07:34 +0000)
src/compress/flate/flate_test.go		patch \| blob \| history
src/compress/flate/gen.go		patch \| blob \| history
src/compress/flate/inflate.go		patch \| blob \| history