import (
"bytes"
+ "encoding/hex"
"io/ioutil"
- "strings"
"testing"
)
-func TestUncompressedSource(t *testing.T) {
- decoder := NewReader(bytes.NewReader([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11}))
- output := make([]byte, 1)
- n, error := decoder.Read(output)
- if n != 1 || error != nil {
- t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error)
- }
- if output[0] != 0x11 {
- t.Errorf("output[0] = %x, want 0x11", output[0])
- }
-}
-
// The following test should not panic.
func TestIssue5915(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
}
}
-func TestDegenerateHuffmanCoding(t *testing.T) {
- const (
- want = "abcabc"
- // This compressed form has a dynamic Huffman block, even though a
- // sensible encoder would use a literal data block, as the latter is
- // shorter. Still, it is a valid flate compression of "abcabc". It has
- // a degenerate Huffman table with only one coded value: the one
- // non-literal back-ref copy of the first "abc" to the second "abc".
- //
- // To verify that this is decompressible with zlib (the C library),
- // it's easy to use the Python wrapper library:
- // >>> import zlib
- // >>> compressed = "\x0c\xc2...etc...\xff\xff"
- // >>> zlib.decompress(compressed, -15) # negative means no GZIP header.
- // 'abcabc'
- compressed = "\x0c\xc2\x01\x0d\x00\x00\x00\x82\xb0\xac\x4a\xff\x0e\xb0\x7d\x27" +
- "\x06\x00\x00\xff\xff"
- )
- b, err := ioutil.ReadAll(NewReader(strings.NewReader(compressed)))
- if err != nil {
- t.Fatal(err)
- }
- if got := string(b); got != want {
- t.Fatalf("got %q, want %q", got, want)
+func TestStreams(t *testing.T) {
+ // To verify any of these hexstrings as valid or invalid flate streams
+ // according to the C zlib library, you can use the Python wrapper library:
+ // >>> hex_string = "010100feff11"
+ // >>> import zlib
+ // >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE
+ // '\x11'
+
+ testCases := []struct {
+ desc string // Description of the stream
+ stream string // Hexstring of the input DEFLATE stream
+ want string // Expected result. Use "fail" to expect failure
+ }{{
+ "degenerate HCLenTree",
+ "05e0010000000000100000000000000000000000000000000000000000000000" +
+ "00000000000000000004",
+ "fail",
+ }, {
+ "complete HCLenTree, empty HLitTree, empty HDistTree",
+ "05e0010400000000000000000000000000000000000000000000000000000000" +
+ "00000000000000000010",
+ "fail",
+ }, {
+ "empty HCLenTree",
+ "05e0010000000000000000000000000000000000000000000000000000000000" +
+ "00000000000000000010",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol",
+ "000100feff000de0010400000000100000000000000000000000000000000000" +
+ "0000000000000000000000000000002c",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol",
+ "000100feff000de0010000000000000000000000000000000000000000000000" +
+ "00000000000000000610000000004070",
+ "fail",
+ }, {
+ "complete HCLenTree, empty HLitTree, empty HDistTree",
+ "05e0010400000000100400000000000000000000000000000000000000000000" +
+ "0000000000000000000000000008",
+ "fail",
+ }, {
+ "complete HCLenTree, empty HLitTree, degenerate HDistTree",
+ "05e0010400000000100400000000000000000000000000000000000000000000" +
+ "0000000000000000000800000008",
+ "fail",
+ }, {
+ "complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol",
+ "05e0010400000000100000000000000000000000000000000000000000000000" +
+ "0000000000000000001c",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree, too large HDistTree",
+ "edff870500000000200400000000000000000000000000000000000000000000" +
+ "000000000000000000080000000000000004",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code",
+ "edfd870500000000200400000000000000000000000000000000000000000000" +
+ "000000000000000000e8b100",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30",
+ "05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
+ "ffffffffffffffffff07000000fe01",
+ "",
+ }, {
+ "complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31",
+ "05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
+ "ffffffffffffffffff07000000fc03",
+ "fail",
+ }, {
+ "complete HCLenTree, over-subscribed HLitTree, empty HDistTree",
+ "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
+ "ffffffffffffffffff07f00f",
+ "fail",
+ }, {
+ "complete HCLenTree, under-subscribed HLitTree, empty HDistTree",
+ "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
+ "fffffffffcffffffff07f00f",
+ "fail",
+ }, {
+ "complete HCLenTree, complete HLitTree with single code, empty HDistTree",
+ "05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
+ "ffffffffffffffffff07f00f",
+ "01",
+ }, {
+ "complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree",
+ "05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
+ "ffffffffffffffffff07807f",
+ "01",
+ }, {
+ "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol",
+ "000100feff000de0010400000000100000000000000000000000000000000000" +
+ "0000000000000000000000000000003c",
+ "00000000",
+ }, {
+ "complete HCLenTree, degenerate HLitTree, degenerate HDistTree",
+ "05e0010400000000100000000000000000000000000000000000000000000000" +
+ "0000000000000000000c",
+ "",
+ }, {
+ "complete HCLenTree, degenerate HLitTree, empty HDistTree",
+ "05e0010400000000100000000000000000000000000000000000000000000000" +
+ "00000000000000000004",
+ "",
+ }, {
+ "complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code",
+ "edfd870500000000200400000000000000000000000000000000000000000000" +
+ "000000000000000000e8b000",
+ "",
+ }, {
+ "complete HCLenTree with length codes, complete HLitTree, empty HDistTree",
+ "ede0010400000000100000000000000000000000000000000000000000000000" +
+ "0000000000000000000400004000",
+ "",
+ }, {
+ "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31",
+ "000100feff00ede0010400000000100000000000000000000000000000000000" +
+ "000000000000000000000000000000040000407f00",
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "0000000000000000000000000000000000000000000000000000000000000000" +
+ "000000",
+ }, {
+ "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols",
+ "0cc2010d00000082b0ac4aff0eb07d27060000ffff",
+ "616263616263",
+ }, {
+ "fixed block, use reserved symbol 287",
+ "33180700",
+ "fail",
+ }, {
+ "raw block",
+ "010100feff11",
+ "11",
+ }, {
+ "issue 10426 - over-subscribed HCLenTree causes a hang",
+ "344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff",
+ "fail",
+ }, {
+ "issue 11030 - empty HDistTree unexpectedly leads to error",
+ "05c0070600000080400fff37a0ca",
+ "",
+ }, {
+ "issue 11033 - empty HDistTree unexpectedly leads to error",
+ "050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" +
+ "b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701",
+ "3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" +
+ "c6d7",
+ }}
+
+ for i, tc := range testCases {
+ data, err := hex.DecodeString(tc.stream)
+ if err != nil {
+ t.Fatal(err)
+ }
+ data, err = ioutil.ReadAll(NewReader(bytes.NewReader(data)))
+ if tc.want == "fail" {
+ if err == nil {
+ t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc)
+ }
+ } else {
+ if err != nil {
+ t.Errorf("#%d (%s): %v", i, tc.desc, err)
+ continue
+ }
+ if got := hex.EncodeToString(data); got != tc.want {
+ t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want)
+ }
+
+ }
}
}
const (
maxCodeLen = 16 // max length of Huffman code
maxHist = 32768 // max history required
- // The next three numbers come from the RFC, section 3.2.7.
- maxLit = 286
- maxDist = 32
- numCodes = 19 // number of codes in Huffman meta-code
+ // The next three numbers come from the RFC section 3.2.7, with the
+ // additional proviso in section 3.2.5 which implies that distance codes
+ // 30 and 31 should never occur in compressed data.
+ maxNumLit = 286
+ maxNumDist = 30
+ numCodes = 19 // number of codes in Huffman meta-code
)
// A CorruptInputError reports the presence of corrupt input at a given offset.
}
// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
func (h *huffmanDecoder) init(bits []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
}
count[n]++
}
+
+ // Empty tree. The decompressor.huffSym function will fail later if the tree
+ // is used. Technically, an empty tree is only valid for the HDIST tree and
+ // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+ // is guaranteed to fail since it will attempt to use the tree to decode the
+ // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+ // guaranteed to fail later since the compressed data section must be
+ // composed of at least one symbol (the end-of-block marker).
if max == 0 {
- return false
+ return true
}
code := 0
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
- bits *[maxLit + maxDist]int
+ bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
}
}
nlit := int(f.b&0x1F) + 257
- if nlit > maxLit {
+ if nlit > maxNumLit {
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
- // maxDist is 32, so ndist is always valid.
+ if ndist > maxNumDist {
+ return CorruptInputError(f.roffset)
+ }
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
- default:
+ case v < maxNumLit:
length = 258
n = 0
+ default:
+ f.err = CorruptInputError(f.roffset)
+ return
}
if n > 0 {
for f.nb < n {
switch {
case dist < 4:
dist++
- case dist >= 30:
- f.err = CorruptInputError(f.roffset)
- return
- default:
+ case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
+ default:
+ f.err = CorruptInputError(f.roffset)
+ return
}
// Copy history[-dist:-dist+length] into output.
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
n := uint(h.min)
for {
for f.nb < n {
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
var f decompressor
- f.bits = new([maxLit + maxDist]int)
+ f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.r = makeReader(r)
f.hist = new([maxHist]byte)
var f decompressor
f.r = makeReader(r)
f.hist = new([maxHist]byte)
- f.bits = new([maxLit + maxDist]int)
+ f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.setDict(dict)