From e7aeeae0c89f5bae76ba263b1ab2b82c56ad32a3 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Mon, 22 Apr 2024 23:36:32 +1000 Subject: [PATCH] image/jpeg: ignore garbage bytes before a RST marker Well-formed JPEG images will not have garbage bytes. However, for corrupted JPEG images, the RST (restart) mechanism is specifically designed so that a decoder can re-synchronize to an upcoming restartable MCU (Minimum Coded Unit, e.g. 16x16 block of pixels) boundary and resume decoding. Even if the resultant image isn't perfect, a 98%-good image is better than a fatal error. Every JPEG marker is encoded in two bytes, the first of which is 0xFF. There are 8 possible RST markers, cycling as "0xFF 0xD0", "0xFF 0xD1", ..., "0xFF 0xD7". Suppose that, our decoder is expecting "0xFF 0xD1". Before this commit, Go's image/jpeg package would accept only two possible inputs: a well-formed "0xFF 0xD1" or one very specific pattern of spec non-compliance, "0xFF 0x00 0xFF 0xD1". After this commit, it is more lenient, similar to libjpeg's jdmarker.c's next_marker function. https://github.com/libjpeg-turbo/libjpeg-turbo/blob/2dfe6c0fe9e18671105e94f7cbf044d4a1d157e6/jdmarker.c#L892-L935 The new testdata file was created by: $ convert video-001.png a.ppm $ cjpeg -restart 2 a.ppm > video-001.restart2.jpeg $ rm a.ppm Fixes #40130 Change-Id: Ic598a5f489f110d6bd63e0735200fb6acac3aca3 Reviewed-on: https://go-review.googlesource.com/c/go/+/580755 Reviewed-by: Ian Lance Taylor LUCI-TryBot-Result: Go LUCI Reviewed-by: Joedian Reid --- src/image/jpeg/reader_test.go | 42 ++++++++++++ src/image/jpeg/scan.go | 71 ++++++++++++++------- src/image/testdata/video-001.restart2.jpeg | Bin 0 -> 4855 bytes 3 files changed, 91 insertions(+), 22 deletions(-) create mode 100644 src/image/testdata/video-001.restart2.jpeg diff --git a/src/image/jpeg/reader_test.go b/src/image/jpeg/reader_test.go index cdac2dd756..0872f5e91d 100644 --- a/src/image/jpeg/reader_test.go +++ b/src/image/jpeg/reader_test.go @@ -504,6 +504,48 @@ func TestIssue56724(t *testing.T) { } } +func TestBadRestartMarker(t *testing.T) { + b, err := os.ReadFile("../testdata/video-001.restart2.jpeg") + if err != nil { + t.Fatal(err) + } else if len(b) != 4855 { + t.Fatal("test image had unexpected length") + } else if (b[2816] != 0xff) || (b[2817] != 0xd1) { + t.Fatal("test image did not have FF D1 restart marker at expected offset") + } + prefix, suffix := b[:2816], b[2816:] + + testCases := []string{ + "PASS:", + "PASS:\x00", + "PASS:\x61", + "PASS:\x61\x62\x63\xff\x00\x64", + "PASS:\xff", + "PASS:\xff\x00", + "PASS:\xff\xff\xff\x00\xff\x00\x00\xff\xff\xff", + + "FAIL:\xff\x03", + "FAIL:\xff\xd5", + "FAIL:\xff\xff\xd5", + } + + for _, tc := range testCases { + want := tc[:5] == "PASS:" + infix := tc[5:] + + data := []byte(nil) + data = append(data, prefix...) + data = append(data, infix...) + data = append(data, suffix...) + _, err := Decode(bytes.NewReader(data)) + got := err == nil + + if got != want { + t.Errorf("%q: got %v, want %v", tc, got, want) + } + } +} + func benchmarkDecode(b *testing.B, filename string) { data, err := os.ReadFile(filename) if err != nil { diff --git a/src/image/jpeg/scan.go b/src/image/jpeg/scan.go index 94f3d3a326..de82a29bff 100644 --- a/src/image/jpeg/scan.go +++ b/src/image/jpeg/scan.go @@ -305,33 +305,16 @@ func (d *decoder) processSOS(n int) error { } // for i mcu++ if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy { - // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input, - // but this one assumes well-formed input, and hence the restart marker follows immediately. + // For well-formed input, the RST[0-7] restart marker follows + // immediately. For corrupt input, call findRST to try to + // resynchronize. if err := d.readFull(d.tmp[:2]); err != nil { return err - } - - // Section F.1.2.3 says that "Byte alignment of markers is - // achieved by padding incomplete bytes with 1-bits. If padding - // with 1-bits creates a X’FF’ value, a zero byte is stuffed - // before adding the marker." - // - // Seeing "\xff\x00" here is not spec compliant, as we are not - // expecting an *incomplete* byte (that needed padding). Still, - // some real world encoders (see golang.org/issue/28717) insert - // it, so we accept it and re-try the 2 byte read. - // - // libjpeg issues a warning (but not an error) for this: - // https://github.com/LuaDist/libjpeg/blob/6c0fcb8ddee365e7abc4d332662b06900612e923/jdmarker.c#L1041-L1046 - if d.tmp[0] == 0xff && d.tmp[1] == 0x00 { - if err := d.readFull(d.tmp[:2]); err != nil { + } else if d.tmp[0] != 0xff || d.tmp[1] != expectedRST { + if err := d.findRST(expectedRST); err != nil { return err } } - - if d.tmp[0] != 0xff || d.tmp[1] != expectedRST { - return FormatError("bad RST marker") - } expectedRST++ if expectedRST == rst7Marker+1 { expectedRST = rst0Marker @@ -521,3 +504,47 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error { } return nil } + +// findRST advances past the next RST restart marker that matches expectedRST. +// Other than I/O errors, it is also an error if we encounter an {0xFF, M} +// two-byte marker sequence where M is not 0x00, 0xFF or the expectedRST. +// +// This is similar to libjpeg's jdmarker.c's next_marker function. +// https://github.com/libjpeg-turbo/libjpeg-turbo/blob/2dfe6c0fe9e18671105e94f7cbf044d4a1d157e6/jdmarker.c#L892-L935 +// +// Precondition: d.tmp[:2] holds the next two bytes of JPEG-encoded input +// (input in the d.readFull sense). +func (d *decoder) findRST(expectedRST uint8) error { + for { + // i is the index such that, at the bottom of the loop, we read 2-i + // bytes into d.tmp[i:2], maintaining the invariant that d.tmp[:2] + // holds the next two bytes of JPEG-encoded input. It is either 0 or 1, + // so that each iteration advances by 1 or 2 bytes (or returns). + i := 0 + + if d.tmp[0] == 0xff { + if d.tmp[1] == expectedRST { + return nil + } else if d.tmp[1] == 0xff { + i = 1 + } else if d.tmp[1] != 0x00 { + // libjpeg's jdmarker.c's jpeg_resync_to_restart does something + // fancy here, treating RST markers within two (modulo 8) of + // expectedRST differently from RST markers that are 'more + // distant'. Until we see evidence that recovering from such + // cases is frequent enough to be worth the complexity, we take + // a simpler approach for now. Any marker that's not 0x00, 0xff + // or expectedRST is a fatal FormatError. + return FormatError("bad RST marker") + } + + } else if d.tmp[1] == 0xff { + d.tmp[0] = 0xff + i = 1 + } + + if err := d.readFull(d.tmp[i:2]); err != nil { + return err + } + } +} diff --git a/src/image/testdata/video-001.restart2.jpeg b/src/image/testdata/video-001.restart2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..707639eba191eda0e7cb4a81cfab68d720db7ea3 GIT binary patch literal 4855 zcmbW(cTm&Mn+Na@1PCBS5R?)KO zP*PBUDaa|oU@#RGB{dBbEzOlHG^~u*=$Sa!pqw0R5C|8ar~ntwEnWyjP*&)cxP-K{ zG*m!dSx!<(R7zU%pC%+!R8%xqXjo`zStPk3+>-z25ZeF-O27@s0Fm$jqzoh=1`^^I z0Q&cwgMj@85A568WtXbNO=4t@#(YYN$8ia(lau%US}7Vl$Moa-c(f9 zzi+^PXl%kYcXWRF+ST3D+c!EkJ~25pJu|!VeHFj9zOlKry?=0cbbNApMmYb+MFN2S z&HB6k8}>h341ZmuWMm*R@INjR(y+f1#6U*QEkVJkW&*w+aE(VYj*>|oRrIc%idV{X zkJ&kJg!($4^vccse`x=b{qJD$|1a5p!T#H|0FVJp#94p_MDq6vK@5N@aBytaqnc@J zT6hF8>k2Ax9A6vO=z?{5|AwXg;M$)nDr2}dq%G_ci{m5b7$(hDuZmuYmn2!`NuROe zJs6TB0@Mg02&YOq|9q_io4@_(`Doy^2bv7y#x7*$bB0En9`Z`2P4>y62%0}a@k+AF z%EAv#@*hWgtQk4XC&|aJxhA_oK)czAyXtM2QO(Pc@VMO5qsp-9-*SI@XH*QYS{XVX zNY)a85S}ryfrULoAyx{h#oT4EcW`fc{mX>x1g*CdWNz6GDId$V6(*i7`@GSS3!TMq z7?t_jhP&;eyBQrNGwn0oADNuFN#~*`k}Q~l<*PY^=0{@XST0~1=A*t1k(1bu7K`$x z7~!35`j~IXEM~YLIBwTm`e*I4mbXTF!p8KF&|;)>r^ilGhGGPNhTM0DVcy3UN)OP$ zdg5MH6$zADLfZhAl#()KIOtc zg10(TUiu>j=ditRhS1h_Nox%&`&%^u6~dVB0; zfED*N!pV5#JZxLI?b$IH+*EpyirKI^yb*{?4qi<%5>pj@lvvF&OayFSaj{?q6w~8F zIJ7Lrt*%G3|K7UT(RUxBf0V9o0o{bZUCg@J-qK&vwpjW(qsTqKkZ2(5$Fg3n5Zf() zE0;Vsw;vVL42z+4di$~I(~S=&2RBnFKaM z81R;b7YcLZK>S8n=_RGt`j0o~Ru8M}@4lGXru7`{6`t!M^}|28pQZ}}@o_36!OYzO z`Kr{%pPx6EB`cV2388`nGIkg-)JfgzB4-T9mfu#dyf3ue6#0Yi+npfNlH;{+>=zfT zr6PoVktM`$D;)=tIR4xofAE%(_3*g)e*5VaIW6zl!6i^Gf(Qr`$|4kbx9!I*=9Jyq zO;fDePmDy#2Xkba`x~y41SAYcH!+t)22O@{4j4zQnCSAqQ{nW==0o^@b2O@7A|&r@ z+$JCTxruNCXj{X7+1ciFdY$Ct>1lA}6=z1z)6nm4!bb4vnG&6bl`qpj!0A^-^g0d= z^~3QoA8u`~GnP8}a2;=rM|(Ymy&PW2#!%FNdh4xIniV0c0-g4JRBIJJjdLWK~vdxo+=#($H3|!BWq~G$VVal1Ac)VO}xp`@`bVV zJmm~Wau-6Oj9?HC(g}CZeJ6FO!m};L6%F3UtQ=BL9N>1chbK$s%0B}0f*NK1^EtMYcmSRVW*%Iq4o z{-T@5-qGDpFdZj{#ps609>i6Oh>JU{r>Auq2_|Nh%E_Tt&MV?G>3|l8oN9+kK~1=8 zLfz7t%a`l31Ke(Ck-l%Zj-F)k{`|#OI!fj(g@P`2!05#7X&qmaaG?yo2bD0TDyrK} z_=+&@80s51^mJL}pZiv|uX;Co&anZ9`W_JE&CDb_{7N9Kf9sG2TI2u3|8rQvBIeVC z7`D~mnAQ+K7$$l9dkOV3IqH(Ptd|(Rm*%^oy7i4HU8MBwOINjrV6oCyEuNM$i!&w?&j(>RH3e%eC56KYgutjqUt zpDKb%h_>(!{T=<>KPtOyL!2WcO|v-5t~*XagRa&~hABc_4^TsD0tvD5L_le(;&E}2 zfl4*u!jOB&Cs?M==WCzz15!#ZJSerJUR1Q!u?u#kReRlf(qXUjW~Y+NJMQT4)v*HG ziNupd_MMP%MqQt%uZOG8PvjlXlv4>kJB|dOz15D;553+>s(v9%v6<+=hd$kqylxCA zEs_p9UJ}jvRMlnuSO<^P3h7I42j?aQ%hH01yL8*RKw|<0{Y0S7fe3s}rS-_ap&;dP zZqb??Cec%m#c@ZqH^e1ORXJU9Jc;l=B$&aux2TH}*nZ8W5pITzF(j86s5UKiH*Gc)g0CPF-hB9r`V0g0y#HhH$YSzqDD>|W3nrS&; zwA#Ddv9+SeP|Es{tz6^1%H>2(?UxPh^a+l1zSJP{9{WaC<@bZ$%!-Bx#Hw8FoZ}vJ z?n2R{bnDd14Kz>1pRr}QWgYO56!1n>g|vtC8+RiBB^m z86L%Dt$BqA`36a&eO^rG2Ax@bw2Km|pA3fSzdrVwt`!v-wY2W=R_~N+C(Cu?h!NI@ z#w|kMz4jFgNl_0~*J;=3is6Z|T~E=O;aT8c7{2cQdGa+&y&pAe4tW!AU4_ii`qRoC zL`q7bhNn}w+HdQ)W^!nUojf0&&Yxt6j-+oac!}cdA*YY5Q6nvNMr40NOWh>A_o6aC zZeKx+>8#m@-ROxy>Ge^2Q8N9hn)rHs^I&`M*Yz_$gZ)GlGqNH= z;9MzgG@QBw+-`q}8bVkN=kx7}wqmwVZ{ME0wgmS)ld{F+bkd~s;b3V}@v4Si+h!iL zKj@jBOaz)&-k!fSpHpV+Y8&vGG#=A=fJ(0ZItK*3oCd&!Od7K?ktxj{a_`r6McL;R zUj!+q4{$YWc3Ef3kUuj)!yzb#_HQ?Pq2BNL809?ZFMA!wqKl_JL(#U59j@^PAJcH{ zT=5E#v7YENSv7l<2R9!&D1e&?%v7;830h`KPkxs2w61%Ab9sjsDM??ct8V8k*B}D* zW~%iikPl^U-GymH;0JccZ`p`eC-B_Eu&)4< z{V@p9riG*n^Vc?WsubM779)M{BiAioHKiHwsa<w&X#3?DEIxouJ4Q6XBC}J0pH8~U@o65emV?>Go#Wn@q$;l8qWN_Ags{a^KeKb{nw14pBG8eIM`RT6@h@xdr>E<~ zU^G|n@a}Z`3+pG#bic|i25^-kJbx;BFF7YR14WQjhNfX|W2#ig{kt*k1zzJewkx8g zkL%-Q$*;sm@8EtpKHL*l6N|;%|K`aUM-V|+bn0@n3{!*CO_I3f=BJk*1@HP?{kd2j zQCzQzb=Ft(N=JK?qU4R~z`OzXFBF5urjwwS&6c=i+xWdh){Pq=)|LAYQEOhCbV;Hh~0HVOAzLB!bg( zMYm5+dExmhg7ez+R$B>0i&STknSDBXtyX2gCES$|JDe4pd+jG&zI=`(Kore6BBotQ z)+kF;?eH4!w8E-G^){&65R-KDF#VvnJI7B9b{vfqbQv!h>^~<2D^Kr#4f>?y^NDoI zZgfcF-29S(&>~b$uZ$t(D7KUrCE~TC*F3F&8C#zfL-!y)!R*$%xHerV}h~NzPe8EoJR8)<;$9@9CZ&+WdXvwv2M|NcZB_#E4$cXofw?zdxDJ zO+?^r^3plARmFa`V3D^m4!<0vhQA6YGtm-<+1YfuSFI)9ysF8H)G-ccEgkn32{BOd zVxMlAi_o63N9Bj(gQl2Fv}VKX&?n+QF34AQRer*%B5fwEvlI%jI@)SIJtFavU@%hX zAe>h{Z2`)^b$x4QL#K{kGtkh`TpWtQGFg7Msa^|{jj`19Xom^bHMu=Mz7j(b&bS@P zB{Dflo6#oZUz3_Ck3YP}yXasP87Zx*+HHs5q&k#8)=jpyKF$kcX1- zT>mXC-dtDyTXZW&(4OyAyLU~rg0McwoP`DSto8_$gke2dS)9!visG wj~Yzu%zgw}Oyp&}=86^tB@3q{0D<6`4J{_CK4+2LGK^*9_^)0L1e7@QA77UwX8-^I literal 0 HcmV?d00001 -- 2.48.1