From df57592276bc26e2eb4e4ca5e77e4e2e422c7c6b Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 24 Feb 2022 19:23:17 -0800 Subject: [PATCH] archive/zip: permit zip files to have prefixes A Java jar file is a zip file, but it can have a prefix that is a bash script that unpacks the zip file. Most zip programs ignore such prefixes. This CL changes the archive/zip package to do the same. Fixes #10464 Fixes #51337 Change-Id: I976e9c64684644317bd21077bc5b4a2baf626ee6 Reviewed-on: https://go-review.googlesource.com/c/go/+/387976 Run-TryBot: Ian Lance Taylor Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot Run-TryBot: Ian Lance Taylor Reviewed-by: David Chase Reviewed-by: Joseph Tsai Auto-Submit: Ian Lance Taylor --- src/archive/zip/reader.go | 32 +++++++++++++++-------- src/archive/zip/reader_test.go | 22 ++++++++++++++-- src/archive/zip/testdata/readme.notzip | Bin 1906 -> 1906 bytes src/archive/zip/testdata/test-prefix.zip | Bin 0 -> 1227 bytes 4 files changed, 41 insertions(+), 13 deletions(-) create mode 100644 src/archive/zip/testdata/test-prefix.zip diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go index 19a9c3b2db..d875c7be25 100644 --- a/src/archive/zip/reader.go +++ b/src/archive/zip/reader.go @@ -33,6 +33,10 @@ type Reader struct { Comment string decompressors map[uint16]Decompressor + // Some JAR files are zip files with a prefix that is a bash script. + // The baseOffset field is the start of the zip file proper. + baseOffset int64 + // fileList is a list of files sorted by ename, // for use by the Open method. fileListOnce sync.Once @@ -52,7 +56,7 @@ type File struct { FileHeader zip *Reader zipr io.ReaderAt - headerOffset int64 + headerOffset int64 // includes overall ZIP archive baseOffset zip64 bool // zip64 extended information extra field presence } @@ -90,11 +94,12 @@ func NewReader(r io.ReaderAt, size int64) (*Reader, error) { } func (z *Reader) init(r io.ReaderAt, size int64) error { - end, err := readDirectoryEnd(r, size) + end, baseOffset, err := readDirectoryEnd(r, size) if err != nil { return err } z.r = r + z.baseOffset = baseOffset // Since the number of directory records is not validated, it is not // safe to preallocate z.File without first checking that the specified // number of files is reasonable, since a malformed archive may @@ -106,7 +111,7 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { } z.Comment = end.comment rs := io.NewSectionReader(r, 0, size) - if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { + if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil { return err } buf := bufio.NewReader(rs) @@ -124,6 +129,7 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { if err != nil { return err } + f.headerOffset += z.baseOffset z.File = append(z.File, f) } if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here @@ -494,7 +500,7 @@ func readDataDescriptor(r io.Reader, f *File) error { return nil } -func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) { // look for directoryEndSignature in the last 1k, then in the last 65k var buf []byte var directoryEndOffset int64 @@ -504,7 +510,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) } buf = make([]byte, int(bLen)) if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { - return nil, err + return nil, 0, err } if p := findSignatureInBlock(buf); p >= 0 { buf = buf[p:] @@ -512,7 +518,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) break } if i == 1 || bLen == size { - return nil, ErrFormat + return nil, 0, ErrFormat } } @@ -529,7 +535,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) } l := int(d.commentLen) if l > len(b) { - return nil, errors.New("zip: invalid comment length") + return nil, 0, errors.New("zip: invalid comment length") } d.comment = string(b[:l]) @@ -537,17 +543,21 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { p, err := findDirectory64End(r, directoryEndOffset) if err == nil && p >= 0 { + directoryEndOffset = p err = readDirectory64End(r, p, d) } if err != nil { - return nil, err + return nil, 0, err } } + + baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset) + // Make sure directoryOffset points to somewhere in our file. - if o := int64(d.directoryOffset); o < 0 || o >= size { - return nil, ErrFormat + if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size { + return nil, 0, ErrFormat } - return d, nil + return d, baseOffset, nil } // findDirectory64End tries to read the zip64 locator just before the diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go index fd0a171304..4c1e82b9d4 100644 --- a/src/archive/zip/reader_test.go +++ b/src/archive/zip/reader_test.go @@ -90,6 +90,24 @@ var tests = []ZipTest{ }, }, }, + { + Name: "test-prefix.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Modified: time.Date(2010, 9, 5, 12, 12, 1, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Modified: time.Date(2010, 9, 5, 15, 52, 58, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + }, + }, { Name: "r.zip", Source: returnRecursiveZip, @@ -1011,7 +1029,7 @@ func TestIssue10957(t *testing.T) { "\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00000" + "00000000PK\x01\x0200000000" + "0000000000000000\v\x00\x00\x00" + - "\x00\x0000PK\x05\x06000000\x05\x000000" + + "\x00\x0000PK\x05\x06000000\x05\x00\xfd\x00\x00\x00" + "\v\x00\x00\x00\x00\x00") z, err := NewReader(bytes.NewReader(data), int64(len(data))) if err != nil { @@ -1056,7 +1074,7 @@ func TestIssue11146(t *testing.T) { "0000000000000000PK\x01\x02" + "0000\b0\b\x00000000000000" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000PK\x05\x06\x00\x00" + - "\x00\x0000\x01\x0000008\x00\x00\x00\x00\x00") + "\x00\x0000\x01\x00\x26\x00\x00\x008\x00\x00\x00\x00\x00") z, err := NewReader(bytes.NewReader(data), int64(len(data))) if err != nil { t.Fatal(err) diff --git a/src/archive/zip/testdata/readme.notzip b/src/archive/zip/testdata/readme.notzip index 81737275c6ebf5ea69b992753ab4050f031f31b8..79b1cb6de33c6ae86451acedbd50df4207a5710e 100644 GIT binary patch delta 15 Wcmeyw_la-AHzp?UfXzRcs#pLyiw1E3 delta 15 Wcmeyw_la-AHzuY4@6A7$s#pLykOpx8 diff --git a/src/archive/zip/testdata/test-prefix.zip b/src/archive/zip/testdata/test-prefix.zip new file mode 100644 index 0000000000000000000000000000000000000000..1eabb4861ece50d8464a5020e6f626ae22289ee2 GIT binary patch literal 1227 zcmXRYN=?hGP$>m%u`6MNKGy+NleN~Rme&#Qb^0pNe%F3W)Wdv zVBlbAOjNZ!aQ*FJNgz)OEF!~Dl3HA%S5i?D8p6rIT==KSrwoWoE4UdLS-vtdFo1P( zpFE)x#<0kxb<&kbLJEO_izZB)IeFD_RtAvKTtI^u80OZg+Ws@V8Y0NdzyQJ`3^EMq z`2`uNMalU&`9+3i6^3Sd1$pUUYpe{KeXM~P&6>^tKX+a(DUgA@o*phiKw1EZ`8n8t z?&#~tz;Nxx75@k$AYZb?HKHUqKdq!Zu_%?nF(n@u44%1(>8U}fi7AzZ zCsS=07?>7&x;TbZ+$xzI?-3j*abSP-`FF=kqr+}#spjUe1%`czSKh9qw4tiQk>8t# zYjG2ke3F!_{=()ePpvjl)BhF4KKRVlB(5Fs+eDs?A+gLCFYu_Y5&KIi;X6Pm`FkilwdYAO)d zb>PDThxE_f%a6Vmb@*e&^lWqJf=2>J>ie3T4i@|l48I$8c-yfE9WS$NPeF}_)S1bw zyGmut^TQG&xf0L6P_KD%A+Baoj(XkYkbd=;^BeL{bm~X>alQU*wjph^h*EuR7UGY$!;PJQp4|g~Rysb#rkMlhmyT40zn{ zd9RPNxLp6bOSL|+kW)hJ;Ffr{ySHQlR$3%@GbXF7szKjIE9Rac*G`nk;2LE~xHu}2&7s|{zAiK-nAfAH!mgFyH3 z*moB{szy~+1?YIoXDaYUi#oW7J#XUH`MutIp~Hv6@(Wn%ulbrt@MubT{(3i?#UMrVVjiBX%MnHZWbXO!z#mt3Nnp0|2+cr{yf!(&nV%=U0;XvO zPgg&ebxsLQ0p5&Ea?H5$t^`Oa12B&=ENKL>5ILC@l9SQ07sNDN*%e|M$ebmO(LfS= z<^`Gz%DfmRvx2fH0}~Lg0MfgF1%MPoNJeI{0uUuCRAm-`3L;=Zm7ANISE5&(pPQ