From a3d1c1bdce6101212465a59ef24107402d920dda Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Wed, 13 Mar 2013 10:44:45 +1100 Subject: [PATCH] image/jpeg: ignore extraneous data, the same as what libjpeg does. Fixes #4705. Note that libjpeg will print a warning to stderr if there are many extraneous bytes, but can be silent if the extraneous bytes can fit into its int32 bit-buffer for Huffman decoding. I'm guessing that this is why whatever encoder that produced the image filed for issue 4705 did not realize that they are, strictly speaking, generating an invalid JPEG. That issue's attached image has two extraneous bytes. For example, piping the program below into libjpeg's djpeg program will print an "18 extraneous bytes" warning, even though N == 20. $ cat main.go package main import ( "bytes" "image" "image/color" "image/jpeg" "os" ) const N = 20 func main() { // Encode a 1x1 red image. m := image.NewRGBA(image.Rect(0, 0, 1, 1)) m.Set(0, 0, color.RGBA{255, 0, 0, 255}) buf := new(bytes.Buffer) jpeg.Encode(buf, m, nil) b := buf.Bytes() // Strip the final "\xff\xd9" EOI marker. b = b[:len(b)-2] // Append N dummy 0x80 bytes to the SOS data. for i := 0; i < N; i++ { b = append(b, 0x80) } // Put back the "\xff\xd9" EOI marker. b = append(b, 0xff, 0xd9) os.Stdout.Write(b) } $ go run main.go | djpeg /dev/stdin > /tmp/foo.pnm Corrupt JPEG data: 18 extraneous bytes before marker 0xd9 The resultant /tmp/foo.pnm is a perfectly good 1x1 red image. R=r CC=golang-dev https://golang.org/cl/7750043 --- src/pkg/image/jpeg/reader.go | 32 +++++++++++++++- src/pkg/image/jpeg/reader_test.go | 63 +++++++++++++++++++++++++++++++ src/pkg/image/jpeg/writer_test.go | 39 +++++++++++-------- 3 files changed, 117 insertions(+), 17 deletions(-) diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go index 1ee6bbcd1a..862d8dc1b2 100644 --- a/src/pkg/image/jpeg/reader.go +++ b/src/pkg/image/jpeg/reader.go @@ -245,10 +245,38 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) { if err != nil { return nil, err } - if d.tmp[0] != 0xff { - return nil, FormatError("missing 0xff marker start") + for d.tmp[0] != 0xff { + // Strictly speaking, this is a format error. However, libjpeg is + // liberal in what it accepts. As of version 9, next_marker in + // jdmarker.c treats this as a warning (JWRN_EXTRANEOUS_DATA) and + // continues to decode the stream. Even before next_marker sees + // extraneous data, jpeg_fill_bit_buffer in jdhuff.c reads as many + // bytes as it can, possibly past the end of a scan's data. It + // effectively puts back any markers that it overscanned (e.g. an + // "\xff\xd9" EOI marker), but it does not put back non-marker data, + // and thus it can silently ignore a small number of extraneous + // non-marker bytes before next_marker has a chance to see them (and + // print a warning). + // + // We are therefore also liberal in what we accept. Extraneous data + // is silently ignored. + // + // This is similar to, but not exactly the same as, the restart + // mechanism within a scan (the RST[0-7] markers). + // + // Note that extraneous 0xff bytes in e.g. SOS data are escaped as + // "\xff\x00", and so are detected a little further down below. + d.tmp[0] = d.tmp[1] + d.tmp[1], err = d.r.ReadByte() + if err != nil { + return nil, err + } } marker := d.tmp[1] + if marker == 0 { + // Treat "\xff\x00" as extraneous data. + continue + } for marker == 0xff { // Section B.1.1.2 says, "Any marker may optionally be preceded by any // number of fill bytes, which are bytes assigned code X'FF'". diff --git a/src/pkg/image/jpeg/reader_test.go b/src/pkg/image/jpeg/reader_test.go index b520a8ab18..e951e038c0 100644 --- a/src/pkg/image/jpeg/reader_test.go +++ b/src/pkg/image/jpeg/reader_test.go @@ -8,8 +8,11 @@ import ( "bytes" "fmt" "image" + "image/color" "io/ioutil" + "math/rand" "os" + "strings" "testing" ) @@ -131,6 +134,66 @@ func pixString(pix []byte, stride, x, y int) string { return s.String() } +func TestExtraneousData(t *testing.T) { + // Encode a 1x1 red image. + src := image.NewRGBA(image.Rect(0, 0, 1, 1)) + src.Set(0, 0, color.RGBA{0xff, 0x00, 0x00, 0xff}) + buf := new(bytes.Buffer) + if err := Encode(buf, src, nil); err != nil { + t.Fatalf("encode: %v", err) + } + enc := buf.String() + // Sanity check that the encoded JPEG is long enough, that it ends in a + // "\xff\xd9" EOI marker, and that it contains a "\xff\xda" SOS marker + // somewhere in the final 64 bytes. + if len(enc) < 64 { + t.Fatalf("encoded JPEG is too short: %d bytes", len(enc)) + } + if got, want := enc[len(enc)-2:], "\xff\xd9"; got != want { + t.Fatalf("encoded JPEG ends with %q, want %q", got, want) + } + if s := enc[len(enc)-64:]; !strings.Contains(s, "\xff\xda") { + t.Fatalf("encoded JPEG does not contain a SOS marker (ff da) near the end: % x", s) + } + // Test that adding some random junk between the SOS marker and the + // EOI marker does not affect the decoding. + rnd := rand.New(rand.NewSource(1)) + for i, nerr := 0, 0; i < 1000 && nerr < 10; i++ { + buf.Reset() + // Write all but the trailing "\xff\xd9" EOI marker. + buf.WriteString(enc[:len(enc)-2]) + // Write some random extraneous data. + for n := rnd.Intn(10); n > 0; n-- { + if x := byte(rnd.Intn(256)); x != 0xff { + buf.WriteByte(x) + } else { + // The JPEG format escapes a SOS 0xff data byte as "\xff\x00". + buf.WriteString("\xff\x00") + } + } + // Write the "\xff\xd9" EOI marker. + buf.WriteString("\xff\xd9") + + // Check that we can still decode the resultant image. + got, err := Decode(buf) + if err != nil { + t.Errorf("could not decode image #%d: %v", i, err) + nerr++ + continue + } + if got.Bounds() != src.Bounds() { + t.Errorf("image #%d, bounds differ: %v and %v", i, got.Bounds(), src.Bounds()) + nerr++ + continue + } + if averageDelta(got, src) > 2<<8 { + t.Errorf("image #%d changed too much after a round trip", i) + nerr++ + continue + } + } +} + func benchmarkDecode(b *testing.B, filename string) { b.StopTimer() data, err := ioutil.ReadFile(filename) diff --git a/src/pkg/image/jpeg/writer_test.go b/src/pkg/image/jpeg/writer_test.go index 0b2143f5b8..514b455dce 100644 --- a/src/pkg/image/jpeg/writer_test.go +++ b/src/pkg/image/jpeg/writer_test.go @@ -148,29 +148,38 @@ func TestWriter(t *testing.T) { t.Error(tc.filename, err) continue } - // Compute the average delta in RGB space. - b := m0.Bounds() - var sum, n int64 - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - c0 := m0.At(x, y) - c1 := m1.At(x, y) - r0, g0, b0, _ := c0.RGBA() - r1, g1, b1, _ := c1.RGBA() - sum += delta(r0, r1) - sum += delta(g0, g1) - sum += delta(b0, b1) - n += 3 - } + if m0.Bounds() != m1.Bounds() { + t.Errorf("%s, bounds differ: %v and %v", tc.filename, m0.Bounds(), m1.Bounds()) + continue } // Compare the average delta to the tolerance level. - if sum/n > tc.tolerance { + if averageDelta(m0, m1) > tc.tolerance { t.Errorf("%s, quality=%d: average delta is too high", tc.filename, tc.quality) continue } } } +// averageDelta returns the average delta in RGB space. The two images must +// have the same bounds. +func averageDelta(m0, m1 image.Image) int64 { + b := m0.Bounds() + var sum, n int64 + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + c0 := m0.At(x, y) + c1 := m1.At(x, y) + r0, g0, b0, _ := c0.RGBA() + r1, g1, b1, _ := c1.RGBA() + sum += delta(r0, r1) + sum += delta(g0, g1) + sum += delta(b0, b1) + n += 3 + } + } + return sum / n +} + func BenchmarkEncode(b *testing.B) { b.StopTimer() img := image.NewRGBA(image.Rect(0, 0, 640, 480)) -- 2.48.1