import (
"bytes"
- "encoding/hex"
"io/ioutil"
+ "strings"
"testing"
)
}
func TestDegenerateHuffmanCoding(t *testing.T) {
- // This test case is notable because:
- // 1. It's decompressable by zlib.
- // 2. It was generated by Go 1.4's compress/flate package.
- // 3. It uses a degenerate dynamic Huffman coding block.
- //
- // The input is somewhat contrived though. It's a sequence of
- // 258 bytes with no 3+ byte sequence occuring more than once,
- // except that the whole sequence is repeated twice. This
- // results in package flate emitting a single match token,
- // which consequently means a single symbol in the distance
- // coding table.
- //
- // Additionally, it uses very few unique byte values so that
- // the overhead from storing the dynamic Huffman coding still
- // results in a smaller encoding than using the fixed Huffman
- // coding.
const (
- originalHalf = "00013534215002452243512505010034133042401113" +
- "400415101454022532410254513251155411055331124453555" +
- "023120320201523334303524252551414033503012344230210" +
- "310431305153005314321221315440455204052144332205422" +
- "235434504441211420062622646656236416326065565261624" +
- "6256136546"
- compressedHex = "ecd081000030104251a5fad5f9a34d640a4f92b3144" +
- "fa28366669a2ca54e542adba954cf7257c1422dd639ccde6a6b" +
- "4b6cda659b885110f248d228a38ccd75954c91494b8415ab713" +
- "42fd2e20683e3b5ea86aae13601ad40d6746a6bec221d07d7bb" +
- "1db9fac2e9b61be7a3c7ceb9f5bec00b0000ffffecd08100003" +
- "0104251a5fad5f9a34d640a4f92b3144fa28366669a2ca54e54" +
- "2adba954cf7257c1422dd639ccde6a6b4b6cda659b885110f24" +
- "8d228a38ccd75954c91494b8415ab71342fd2e20683e3b5ea86" +
- "aae13601ad40d6746a6bec221d07d7bb1db9fac2e9b61be7a3c" +
- "7ceb9f5bec00b0000ffff"
+ want = "abcabc"
+ // This compressed form has a dynamic Huffman block, even though a
+ // sensible encoder would use a literal data block, as the latter is
+ // shorter. Still, it is a valid flate compression of "abcabc". It has
+ // a degenerate Huffman table with only one coded value: the one
+ // non-literal back-ref copy of the first "abc" to the second "abc".
+ //
+ // To verify that this is decompressible with zlib (the C library),
+ // it's easy to use the Python wrapper library:
+ // >>> import zlib
+ // >>> compressed = "\x0c\xc2...etc...\xff\xff"
+ // >>> zlib.decompress(compressed, -15) # negative means no GZIP header.
+ // 'abcabc'
+ compressed = "\x0c\xc2\x01\x0d\x00\x00\x00\x82\xb0\xac\x4a\xff\x0e\xb0\x7d\x27" +
+ "\x06\x00\x00\xff\xff"
)
-
- compressed, err := hex.DecodeString(compressedHex)
- if err != nil {
- t.Fatal(err)
- }
- data, err := ioutil.ReadAll(NewReader(bytes.NewReader(compressed)))
+ b, err := ioutil.ReadAll(NewReader(strings.NewReader(compressed)))
if err != nil {
t.Fatal(err)
}
- if string(data) != originalHalf+originalHalf {
- t.Fatal("Decompressed data does not match original")
+ if got := string(b); got != want {
+ t.Fatalf("got %q, want %q", got, want)
}
}