]> Cypherpunks repositories - gostls13.git/commitdiff
archive/tar: detect truncated files
authorJoe Tsai <joetsai@digital-static.net>
Thu, 1 Oct 2015 09:30:29 +0000 (02:30 -0700)
committerIan Lance Taylor <iant@golang.org>
Fri, 6 Nov 2015 04:31:26 +0000 (04:31 +0000)
Motivation:
* Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange
given that io.ErrUnexpectedEOF is given through Reader.Read if the
user manually reads the file.
* Reader.skipUnread fails to detect truncated files since io.Seeker
is lazy about reporting errors. Thus, the behavior of Reader differs
whether the input io.Reader also satisfies io.Seeker or not.

To solve this, we seek to one before the end of the data section and
always rely on at least one call to io.CopyN. If the tr.r satisfies
io.Seeker, this is guarunteed to never read more than blockSize.

Fixes #12557

Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279
Reviewed-on: https://go-review.googlesource.com/15175
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/archive/tar/reader.go
src/archive/tar/reader_test.go
src/archive/tar/testdata/pax-path-hdr.tar [new file with mode: 0644]
src/archive/tar/testdata/ustar-file-reg.tar [new file with mode: 0644]

index b2c45fd38841d026e325291e8903d615cbee1ac7..4af5807b72a6cb6c0009e2b37c8ece46501819d6 100644 (file)
@@ -446,16 +446,45 @@ func (tr *Reader) octal(b []byte) int64 {
        return int64(x)
 }
 
-// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
-func (tr *Reader) skipUnread() {
-       nr := tr.numBytes() + tr.pad // number of bytes to skip
+// skipUnread skips any unread bytes in the existing file entry, as well as any
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
+//
+// Note that this function still works properly even when sparse files are being
+// used since numBytes returns the bytes remaining in the underlying io.Reader.
+func (tr *Reader) skipUnread() error {
+       dataSkip := tr.numBytes()      // Number of data bytes to skip
+       totalSkip := dataSkip + tr.pad // Total number of bytes to skip
        tr.curr, tr.pad = nil, 0
-       if sr, ok := tr.r.(io.Seeker); ok {
-               if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
-                       return
+
+       // If possible, Seek to the last byte before the end of the data section.
+       // Do this because Seek is often lazy about reporting errors; this will mask
+       // the fact that the tar stream may be truncated. We can rely on the
+       // io.CopyN done shortly afterwards to trigger any IO errors.
+       var seekSkipped int64 // Number of bytes skipped via Seek
+       if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
+               // Not all io.Seeker can actually Seek. For example, os.Stdin implements
+               // io.Seeker, but calling Seek always returns an error and performs
+               // no action. Thus, we try an innocent seek to the current position
+               // to see if Seek is really supported.
+               pos1, err := sr.Seek(0, os.SEEK_CUR)
+               if err == nil {
+                       // Seek seems supported, so perform the real Seek.
+                       pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
+                       if err != nil {
+                               tr.err = err
+                               return tr.err
+                       }
+                       seekSkipped = pos2 - pos1
                }
        }
-       _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
+
+       var copySkipped int64 // Number of bytes skipped via CopyN
+       copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
+       if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
+               tr.err = io.ErrUnexpectedEOF
+       }
+       return tr.err
 }
 
 func (tr *Reader) verifyChecksum(header []byte) bool {
@@ -468,18 +497,25 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
        return given == unsigned || given == signed
 }
 
+// readHeader reads the next block header and assumes that the underlying reader
+// is already aligned to a block boundary.
+//
+// The err will be set to io.EOF only when one of the following occurs:
+//     * Exactly 0 bytes are read and EOF is hit.
+//     * Exactly 1 block of zeros is read and EOF is hit.
+//     * At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() *Header {
        header := tr.hdrBuff[:]
        copy(header, zeroBlock)
 
        if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
-               return nil
+               return nil // io.EOF is okay here
        }
 
        // Two blocks of zero bytes marks the end of the archive.
        if bytes.Equal(header, zeroBlock[0:blockSize]) {
                if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
-                       return nil
+                       return nil // io.EOF is okay here
                }
                if bytes.Equal(header, zeroBlock[0:blockSize]) {
                        tr.err = io.EOF
index 4a6d1a9e9f275b247304af63976859c9145541e9..f8b344da6e13c64c46330f8417904b5056e48d3e 100644 (file)
@@ -422,35 +422,6 @@ func TestPartialRead(t *testing.T) {
        }
 }
 
-func TestNonSeekable(t *testing.T) {
-       test := gnuTarTest
-       f, err := os.Open(test.file)
-       if err != nil {
-               t.Fatalf("Unexpected error: %v", err)
-       }
-       defer f.Close()
-
-       type readerOnly struct {
-               io.Reader
-       }
-       tr := NewReader(readerOnly{f})
-       nread := 0
-
-       for ; ; nread++ {
-               _, err := tr.Next()
-               if err == io.EOF {
-                       break
-               }
-               if err != nil {
-                       t.Fatalf("Unexpected error: %v", err)
-               }
-       }
-
-       if nread != len(test.headers) {
-               t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
-       }
-}
-
 func TestParsePAXHeader(t *testing.T) {
        paxTests := [][3]string{
                {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
@@ -803,3 +774,130 @@ func TestUninitializedRead(t *testing.T) {
        }
 
 }
+
+type reader struct{ io.Reader }
+type readSeeker struct{ io.ReadSeeker }
+type readBadSeeker struct{ io.ReadSeeker }
+
+func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
+
+// TestReadTruncation test the ending condition on various truncated files and
+// that truncated files are still detected even if the underlying io.Reader
+// satisfies io.Seeker.
+func TestReadTruncation(t *testing.T) {
+       var ss []string
+       for _, p := range []string{
+               "testdata/gnu.tar",
+               "testdata/ustar-file-reg.tar",
+               "testdata/pax-path-hdr.tar",
+               "testdata/sparse-formats.tar",
+       } {
+               buf, err := ioutil.ReadFile(p)
+               if err != nil {
+                       t.Fatalf("unexpected error: %v", err)
+               }
+               ss = append(ss, string(buf))
+       }
+
+       data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
+       data2 += strings.Repeat("\x00", 10*512)
+       trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
+
+       var vectors = []struct {
+               input string // Input stream
+               cnt   int    // Expected number of headers read
+               err   error  // Expected error outcome
+       }{
+               {"", 0, io.EOF}, // Empty file is a "valid" tar file
+               {data1[:511], 0, io.ErrUnexpectedEOF},
+               {data1[:512], 1, io.ErrUnexpectedEOF},
+               {data1[:1024], 1, io.EOF},
+               {data1[:1536], 2, io.ErrUnexpectedEOF},
+               {data1[:2048], 2, io.EOF},
+               {data1, 2, io.EOF},
+               {data1[:2048] + data2[:1536], 3, io.EOF},
+               {data2[:511], 0, io.ErrUnexpectedEOF},
+               {data2[:512], 1, io.ErrUnexpectedEOF},
+               {data2[:1195], 1, io.ErrUnexpectedEOF},
+               {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
+               {data2[:1200], 1, io.EOF},
+               {data2[:1535], 1, io.EOF},
+               {data2[:1536], 1, io.EOF}, // Exact end of padding
+               {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
+               {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
+               {data2[:1536] + trash, 1, ErrHeader},
+               {data2[:2048], 1, io.EOF}, // Exactly 1 empty block
+               {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
+               {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
+               {data2[:2048] + trash, 1, ErrHeader},
+               {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
+               {data2[:2560] + trash[:1], 1, io.EOF},
+               {data2[:2560] + trash[:511], 1, io.EOF},
+               {data2[:2560] + trash, 1, io.EOF},
+               {data2[:3072], 1, io.EOF},
+               {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
+               {pax + trash[:1], 0, io.ErrUnexpectedEOF},
+               {pax + trash[:511], 0, io.ErrUnexpectedEOF},
+               {sparse[:511], 0, io.ErrUnexpectedEOF},
+               // TODO(dsnet): This should pass, but currently fails.
+               // {sparse[:512], 0, io.ErrUnexpectedEOF},
+               {sparse[:3584], 1, io.EOF},
+               {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
+               {sparse[:9216], 1, io.EOF},
+               {sparse[:9728], 2, io.ErrUnexpectedEOF},
+               {sparse[:10240], 2, io.EOF},
+               {sparse[:11264], 2, io.ErrUnexpectedEOF},
+               {sparse, 5, io.EOF},
+               {sparse + trash, 5, io.EOF},
+       }
+
+       for i, v := range vectors {
+               for j := 0; j < 6; j++ {
+                       var tr *Reader
+                       var s1, s2 string
+
+                       switch j {
+                       case 0:
+                               tr = NewReader(&reader{strings.NewReader(v.input)})
+                               s1, s2 = "io.Reader", "auto"
+                       case 1:
+                               tr = NewReader(&reader{strings.NewReader(v.input)})
+                               s1, s2 = "io.Reader", "manual"
+                       case 2:
+                               tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+                               s1, s2 = "io.ReadSeeker", "auto"
+                       case 3:
+                               tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+                               s1, s2 = "io.ReadSeeker", "manual"
+                       case 4:
+                               tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+                               s1, s2 = "ReadBadSeeker", "auto"
+                       case 5:
+                               tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+                               s1, s2 = "ReadBadSeeker", "manual"
+                       }
+
+                       var cnt int
+                       var err error
+                       for {
+                               if _, err = tr.Next(); err != nil {
+                                       break
+                               }
+                               cnt++
+                               if s2 == "manual" {
+                                       if _, err = io.Copy(ioutil.Discard, tr); err != nil {
+                                               break
+                                       }
+                               }
+                       }
+                       if err != v.err {
+                               t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
+                                       i, s1, s2, err, v.err)
+                       }
+                       if cnt != v.cnt {
+                               t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
+                                       i, s1, s2, cnt, v.cnt)
+                       }
+               }
+       }
+}
diff --git a/src/archive/tar/testdata/pax-path-hdr.tar b/src/archive/tar/testdata/pax-path-hdr.tar
new file mode 100644 (file)
index 0000000..ab8fc32
Binary files /dev/null and b/src/archive/tar/testdata/pax-path-hdr.tar differ
diff --git a/src/archive/tar/testdata/ustar-file-reg.tar b/src/archive/tar/testdata/ustar-file-reg.tar
new file mode 100644 (file)
index 0000000..c84fa27
Binary files /dev/null and b/src/archive/tar/testdata/ustar-file-reg.tar differ