From 9f0008bb93694410ccde7f44a86648ef1117e069 Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Fri, 14 Feb 2014 17:17:19 -0500 Subject: [PATCH] compress/bzip2: support superfluous Huffman levels. These should never be found in a bzip2 file but it does appear that there's a buggy encoder that is producing them. Since the official bzip2 handles this case, this change makes the Go code do likewise. With this change, the code produces the same output as the official bzip2 code on the invalid example given in the bug. Fixes #7279. LGTM=r R=golang-codereviews, r CC=golang-codereviews https://golang.org/cl/64010043 --- src/pkg/compress/bzip2/huffman.go | 32 ++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go index 8f6b0c9cad..75a6223d81 100644 --- a/src/pkg/compress/bzip2/huffman.go +++ b/src/pkg/compress/bzip2/huffman.go @@ -190,7 +190,37 @@ func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIn right := codes[firstRightIndex:] if len(left) == 0 || len(right) == 0 { - return 0, StructuralError("superfluous level in Huffman tree") + // There is a superfluous level in the Huffman tree indicating + // a bug in the encoder. However, this bug has been observed in + // the wild so we handle it. + + // If this function was called recursively then we know that + // len(codes) >= 2 because, otherwise, we would have hit the + // "leaf node" case, below, and not recursed. + // + // However, for the initial call it's possible that len(codes) + // is zero or one. Both cases are invalid because a zero length + // tree cannot encode anything and a length-1 tree can only + // encode EOF and so is superfluous. We reject both. + if len(codes) < 2 { + return 0, StructuralError("empty Huffman tree") + } + + // In this case the recursion doesn't always reduce the length + // of codes so we need to ensure termination via another + // mechanism. + if level == 31 { + // Since len(codes) >= 2 the only way that the values + // can match at all 32 bits is if they are equal, which + // is invalid. This ensures that we never enter + // infinite recursion. + return 0, StructuralError("equal symbols in Huffman tree") + } + + if len(left) == 0 { + return buildHuffmanNode(t, right, level+1) + } + return buildHuffmanNode(t, left, level+1) } nodeIndex = uint16(t.nextNode) -- 2.48.1