Unlike RFC 1951 (DEFLATE), bzip2 does not use zero-length Huffman codes
to indicate that the symbol is missing. Instead, bzip2 uses a sparse
bitmap to indicate which symbols are present. Thus, it is undefined what
happens when a length of zero is used. Thus, fix the parsing logic so that
the length cannot ever go below 1-bit similar to how the C logic does things.
To confirm that the C bzip2 utility chokes on this data:
$ echo "
425a6836314159265359b1f7404b000000400040002000217d184682ee48
a70a12163ee80960" | xxd -r -p | bzip2 -d
bzip2: Data integrity error when decompressing
For reference see:
bzip2-1.0.6/decompress.c:320
Change-Id: Ic1568f8e7f80cdea51d887b4d712cc239c2fe85e
Reviewed-on: https://go-review.googlesource.com/20119
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
length := br.ReadBits(5)
for j := range lengths {
for {
+ if length < 1 || length > 20 {
+ return StructuralError("Huffman length out of range")
+ }
if !br.ReadBit() {
break
}
length++
}
}
- if length < 0 || length > 20 {
- return StructuralError("Huffman length out of range")
- }
lengths[j] = uint8(length)
}
huffmanTrees[i], err = newHuffmanTree(lengths)
const badBlockSize = "425a683131415926535936dc55330063ffc0006000200020a40830008b0008b8bb9229c28481b6e2a998"
+const badHuffmanDelta = "425a6836314159265359b1f7404b000000400040002000217d184682ee48a70a12163ee80960"
+
const (
digits = iota
twain
}
}
+func TestBadHuffmanDelta(t *testing.T) {
+ _, err := decompressHex(badHuffmanDelta)
+ if err == nil {
+ t.Errorf("unexpected success")
+ }
+}
+
var bufferOverrunBase64 string = `
QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD