From: Nigel Tao Date: Thu, 8 Jun 2017 00:58:20 +0000 (+1000) Subject: compress/lzw: don't follow code == hi if last is invalid. X-Git-Tag: go1.9beta1~80 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=32d42fb6ec5421f0c64fe7f7ffec0b9e7956e1ea;p=gostls13.git compress/lzw: don't follow code == hi if last is invalid. Fixes #20597. Change-Id: Iaa059277c02e80a2fbef59cf8bb3c58d2b90fda0 Reviewed-on: https://go-review.googlesource.com/45111 Reviewed-by: David Symonds --- diff --git a/src/compress/lzw/reader.go b/src/compress/lzw/reader.go index 557955bc3f..1be52d55e4 100644 --- a/src/compress/lzw/reader.go +++ b/src/compress/lzw/reader.go @@ -169,7 +169,7 @@ loop: break loop case code <= d.hi: c, i := code, len(d.output)-1 - if code == d.hi { + if code == d.hi && d.last != decoderInvalidCode { // code == hi is a special case which expands to the last expansion // followed by the head of the last expansion. To find the head, we walk // the prefix chain until we find a literal code. diff --git a/src/compress/lzw/reader_test.go b/src/compress/lzw/reader_test.go index 53c9cdd865..f8974de28f 100644 --- a/src/compress/lzw/reader_test.go +++ b/src/compress/lzw/reader_test.go @@ -146,6 +146,77 @@ func TestHiCodeDoesNotOverflow(t *testing.T) { } } +// TestNoLongerSavingPriorExpansions tests the decoder state when codes other +// than clear codes continue to be seen after decoder.hi and decoder.width +// reach their maximum values (4095 and 12), i.e. after we no longer save prior +// expansions. In particular, it tests seeing the highest possible code, 4095. +func TestNoLongerSavingPriorExpansions(t *testing.T) { + // Iterations is used to calculate how many input bits are needed to get + // the decoder.hi and decoder.width values up to their maximum. + iterations := []struct { + width, n int + }{ + // The final term is 257, not 256, as NewReader initializes d.hi to + // d.clear+1 and the clear code is 256. + {9, 512 - 257}, + {10, 1024 - 512}, + {11, 2048 - 1024}, + {12, 4096 - 2048}, + } + nCodes, nBits := 0, 0 + for _, e := range iterations { + nCodes += e.n + nBits += e.n * e.width + } + if nCodes != 3839 { + t.Fatalf("nCodes: got %v, want %v", nCodes, 3839) + } + if nBits != 43255 { + t.Fatalf("nBits: got %v, want %v", nBits, 43255) + } + + // Construct our input of 43255 zero bits (which gets d.hi and d.width up + // to 4095 and 12), followed by 0xfff (4095) as 12 bits, followed by 0x101 + // (EOF) as 12 bits. + // + // 43255 = 5406*8 + 7, and codes are read in LSB order. The final bytes are + // therefore: + // + // xwwwwwww xxxxxxxx yyyyyxxx zyyyyyyy + // 10000000 11111111 00001111 00001000 + // + // or split out: + // + // .0000000 ........ ........ ........ w = 0x000 + // 1....... 11111111 .....111 ........ x = 0xfff + // ........ ........ 00001... .0001000 y = 0x101 + // + // The 12 'w' bits (not all are shown) form the 3839'th code, with value + // 0x000. Just after decoder.read returns that code, d.hi == 4095 and + // d.last == 0. + // + // The 12 'x' bits form the 3840'th code, with value 0xfff or 4095. Just + // after decoder.read returns that code, d.hi == 4095 and d.last == + // decoderInvalidCode. + // + // The 12 'y' bits form the 3841'st code, with value 0x101, the EOF code. + // + // The 'z' bit is unused. + in := make([]byte, 5406) + in = append(in, 0x80, 0xff, 0x0f, 0x08) + + r := NewReader(bytes.NewReader(in), LSB, 8) + nDecoded, err := io.Copy(ioutil.Discard, r) + if err != nil { + t.Fatalf("Copy: %v", err) + } + // nDecoded should be 3841: 3839 literal codes and then 2 decoded bytes + // from 1 non-literal code. The EOF code contributes 0 decoded bytes. + if nDecoded != int64(nCodes+2) { + t.Fatalf("nDecoded: got %v, want %v", nDecoded, nCodes+2) + } +} + func BenchmarkDecoder(b *testing.B) { buf, err := ioutil.ReadFile("../testdata/e.txt") if err != nil {