]> Cypherpunks repositories - gostls13.git/commitdiff
archive/tar: implement Writer support for sparse files
authorJoe Tsai <joetsai@digital-static.net>
Sat, 19 Aug 2017 01:18:38 +0000 (18:18 -0700)
committerJoe Tsai <thebrokentoaster@gmail.com>
Wed, 23 Aug 2017 22:38:45 +0000 (22:38 +0000)
This CL is the second step (of two; part1 is CL/56771) for adding
sparse file support to the Writer.

There are no new identifiers exported in this CL, but this does make
use of Header.SparseHoles added in part1. If the Typeflag is set to
TypeGNUSparse or len(SparseHoles) > 0, then the Writer will emit an
sparse file, where the holes must be written by the user as zeros.

If TypeGNUSparse is set, then the output file must use the GNU format.
Otherwise, it must use the PAX format (with GNU-defined PAX keys).

A future CL may export Reader.Discard and Writer.FillZeros,
but those methods are currently unexported, and only used by the
tests for efficiency reasons.
Calling Discard or FillZeros on a hole 10GiB in size does take
time, even if it is essentially a memcopy.

Updates #13548

Change-Id: Id586d9178c227c0577f796f731ae2cbb72355601
Reviewed-on: https://go-review.googlesource.com/57212
Reviewed-by: Ian Lance Taylor <iant@golang.org>
13 files changed:
src/archive/tar/common.go
src/archive/tar/example_test.go
src/archive/tar/reader_test.go
src/archive/tar/testdata/gnu-nil-sparse-data.tar [new file with mode: 0644]
src/archive/tar/testdata/gnu-nil-sparse-hole.tar [new file with mode: 0644]
src/archive/tar/testdata/gnu-sparse-big.tar [new file with mode: 0644]
src/archive/tar/testdata/pax-nil-sparse-data.tar [new file with mode: 0644]
src/archive/tar/testdata/pax-nil-sparse-hole.tar [new file with mode: 0644]
src/archive/tar/testdata/pax-sparse-big.tar [new file with mode: 0644]
src/archive/tar/testdata/writer-big-long.tar
src/archive/tar/testdata/writer-big.tar
src/archive/tar/writer.go
src/archive/tar/writer_test.go

index 22f32062433024b27a34041261d1bf5fc977a674..b5921fef23c3980cadc47d5699cf37e2fb45944c 100644 (file)
@@ -33,6 +33,7 @@ var (
        ErrWriteAfterClose = errors.New("tar: write after close")
        errMissData        = errors.New("tar: sparse file references non-existent data")
        errUnrefData       = errors.New("tar: sparse file contains unreferenced data")
+       errWriteHole       = errors.New("tar: write non-NUL byte in sparse hole")
 )
 
 // Header type flags.
@@ -74,10 +75,13 @@ type Header struct {
 
        // SparseHoles represents a sequence of holes in a sparse file.
        //
-       // The regions must be sorted in ascending order, not overlap with
-       // each other, and not extend past the specified Size.
-       // The file is sparse if either len(SparseHoles) > 0 or
-       // the Typeflag is set to TypeGNUSparse.
+       // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
+       // A sparse file consists of fragments of data, intermixed with holes
+       // (described by this field). A hole is semantically a block of NUL-bytes,
+       // but does not actually exist within the TAR file.
+       // The logical size of the file stored in the Size field, while
+       // the holes must be sorted in ascending order,
+       // not overlap with each other, and not extend past the specified Size.
        SparseHoles []SparseEntry
 }
 
@@ -300,6 +304,20 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
                        return formatUnknown, nil // Invalid PAX key
                }
        }
+       if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
+               if isHeaderOnlyType(h.Typeflag) {
+                       return formatUnknown, nil // Cannot have sparse data on header-only file
+               }
+               if !validateSparseEntries(h.SparseHoles, h.Size) {
+                       return formatUnknown, nil
+               }
+               if h.Typeflag == TypeGNUSparse {
+                       format &= formatGNU // GNU only
+               } else {
+                       format &^= formatGNU // No GNU
+               }
+               format &^= formatUSTAR // No USTAR
+       }
        return format, paxHdrs
 }
 
index 5f0ce2f4029b34c9b5e6cf13f3023d2df951b749..b84950c7976c36a5b760da6c793c280743d4df7a 100644 (file)
@@ -7,20 +7,20 @@ package tar_test
 import (
        "archive/tar"
        "bytes"
+       "crypto/md5"
        "fmt"
        "io"
+       "io/ioutil"
        "log"
        "os"
+       "strings"
 )
 
 func Example() {
-       // Create a buffer to write our archive to.
        buf := new(bytes.Buffer)
 
-       // Create a new tar archive.
+       // Create and add some files to the archive.
        tw := tar.NewWriter(buf)
-
-       // Add some files to the archive.
        var files = []struct {
                Name, Body string
        }{
@@ -35,34 +35,29 @@ func Example() {
                        Size: int64(len(file.Body)),
                }
                if err := tw.WriteHeader(hdr); err != nil {
-                       log.Fatalln(err)
+                       log.Fatal(err)
                }
                if _, err := tw.Write([]byte(file.Body)); err != nil {
-                       log.Fatalln(err)
+                       log.Fatal(err)
                }
        }
-       // Make sure to check the error on Close.
        if err := tw.Close(); err != nil {
-               log.Fatalln(err)
+               log.Fatal(err)
        }
 
-       // Open the tar archive for reading.
-       r := bytes.NewReader(buf.Bytes())
-       tr := tar.NewReader(r)
-
-       // Iterate through the files in the archive.
+       // Open and iterate through the files in the archive.
+       tr := tar.NewReader(buf)
        for {
                hdr, err := tr.Next()
                if err == io.EOF {
-                       // end of tar archive
-                       break
+                       break // End of archive
                }
                if err != nil {
-                       log.Fatalln(err)
+                       log.Fatal(err)
                }
                fmt.Printf("Contents of %s:\n", hdr.Name)
                if _, err := io.Copy(os.Stdout, tr); err != nil {
-                       log.Fatalln(err)
+                       log.Fatal(err)
                }
                fmt.Println()
        }
@@ -78,3 +73,86 @@ func Example() {
        // Contents of todo.txt:
        // Get animal handling license.
 }
+
+// A sparse file can efficiently represent a large file that is mostly empty.
+func Example_sparse() {
+       buf := new(bytes.Buffer)
+
+       // Define a sparse file to add to the archive.
+       // This sparse files contains 5 data fragments, and 4 hole fragments.
+       // The logical size of the file is 16 KiB, while the physical size of the
+       // file is only 3 KiB (not counting the header data).
+       hdr := &tar.Header{
+               Name: "sparse.db",
+               Size: 16384,
+               SparseHoles: []tar.SparseEntry{
+                       // Data fragment at 0..1023
+                       {Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535
+                       // Data fragment at 1536..2047
+                       {Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583
+                       // Data fragment at 3584..4095
+                       {Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679
+                       // Data fragment at 7680..8191
+                       {Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871
+                       // Data fragment at 15872..16383
+               },
+       }
+
+       // The regions marked as a sparse hole are filled with NUL-bytes.
+       // The total length of the body content must match the specified Size field.
+       body := "" +
+               strings.Repeat("A", 1024) +
+               strings.Repeat("\x00", 1024-512) +
+               strings.Repeat("B", 512) +
+               strings.Repeat("\x00", 2048-512) +
+               strings.Repeat("C", 512) +
+               strings.Repeat("\x00", 4096-512) +
+               strings.Repeat("D", 512) +
+               strings.Repeat("\x00", 8192-512) +
+               strings.Repeat("E", 512)
+
+       h := md5.Sum([]byte(body))
+       fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
+       fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
+
+       // Create a new archive and write the sparse file.
+       tw := tar.NewWriter(buf)
+       if err := tw.WriteHeader(hdr); err != nil {
+               log.Fatal(err)
+       }
+       if _, err := tw.Write([]byte(body)); err != nil {
+               log.Fatal(err)
+       }
+       if err := tw.Close(); err != nil {
+               log.Fatal(err)
+       }
+
+       // Open and iterate through the files in the archive.
+       tr := tar.NewReader(buf)
+       for {
+               hdr, err := tr.Next()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       log.Fatal(err)
+               }
+               body, err := ioutil.ReadAll(tr)
+               if err != nil {
+                       log.Fatal(err)
+               }
+
+               h := md5.Sum([]byte(body))
+               fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
+               fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
+       }
+
+       // Output:
+       // Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
+       // Write SparseHoles of sparse.db:
+       //      [{1024 512} {2048 1536} {4096 3584} {8192 7680}]
+       //
+       // Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
+       // Read SparseHoles of sparse.db:
+       //      [{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}]
+}
index 9b7896132ac5734dbbf517aed4decf64a7461fba..fb7dcfeece0ea957160ba61f8747cbe42f0ebff1 100644 (file)
@@ -500,6 +500,46 @@ func TestReader(t *testing.T) {
                        Devmajor: 1,
                        Devminor: 1,
                }},
+       }, {
+               // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
+               file: "testdata/gnu-nil-sparse-data.tar",
+               headers: []*Header{{
+                       Name:        "sparse.db",
+                       Typeflag:    TypeGNUSparse,
+                       Size:        1000,
+                       ModTime:     time.Unix(0, 0),
+                       SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
+               }},
+       }, {
+               // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
+               file: "testdata/gnu-nil-sparse-hole.tar",
+               headers: []*Header{{
+                       Name:        "sparse.db",
+                       Typeflag:    TypeGNUSparse,
+                       Size:        1000,
+                       ModTime:     time.Unix(0, 0),
+                       SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
+               }},
+       }, {
+               // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
+               file: "testdata/pax-nil-sparse-data.tar",
+               headers: []*Header{{
+                       Name:        "sparse.db",
+                       Typeflag:    TypeReg,
+                       Size:        1000,
+                       ModTime:     time.Unix(0, 0),
+                       SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
+               }},
+       }, {
+               // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
+               file: "testdata/pax-nil-sparse-hole.tar",
+               headers: []*Header{{
+                       Name:        "sparse.db",
+                       Typeflag:    TypeReg,
+                       Size:        1000,
+                       ModTime:     time.Unix(0, 0),
+                       SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
+               }},
        }}
 
        for _, v := range vectors {
@@ -1212,7 +1252,7 @@ func TestReadGNUSparsePAXHeaders(t *testing.T) {
 
 func TestFileReader(t *testing.T) {
        type (
-               testRead struct { // ReadN(cnt) == (wantStr, wantErr)
+               testRead struct { // Read(cnt) == (wantStr, wantErr)
                        cnt     int
                        wantStr string
                        wantErr error
@@ -1228,22 +1268,24 @@ func TestFileReader(t *testing.T) {
                testFnc interface{} // testRead | testDiscard | testRemaining
        )
 
-       makeReg := func(s string, n int) fileReader {
-               return &regFileReader{strings.NewReader(s), int64(n)}
-       }
-       makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader {
-               if !validateSparseEntries(spd, size) {
-                       t.Fatalf("invalid sparse map: %v", spd)
+       type (
+               makeReg struct {
+                       str  string
+                       size int64
                }
-               sph := invertSparseEntries(append([]SparseEntry{}, spd...), size)
-               return &sparseFileReader{fr, sph, 0}
-       }
+               makeSparse struct {
+                       makeReg makeReg
+                       spd     sparseDatas
+                       size    int64
+               }
+               fileMaker interface{} // makeReg | makeSparse
+       )
 
        vectors := []struct {
-               fr    fileReader
+               maker fileMaker
                tests []testFnc
        }{{
-               fr: makeReg("", 0),
+               maker: makeReg{"", 0},
                tests: []testFnc{
                        testRemaining{0},
                        testRead{0, "", io.EOF},
@@ -1253,7 +1295,7 @@ func TestFileReader(t *testing.T) {
                        testRemaining{0},
                },
        }, {
-               fr: makeReg("", 1),
+               maker: makeReg{"", 1},
                tests: []testFnc{
                        testRemaining{1},
                        testRead{0, "", io.ErrUnexpectedEOF},
@@ -1263,14 +1305,14 @@ func TestFileReader(t *testing.T) {
                        testRemaining{1},
                },
        }, {
-               fr: makeReg("hello", 5),
+               maker: makeReg{"hello", 5},
                tests: []testFnc{
                        testRemaining{5},
                        testRead{5, "hello", io.EOF},
                        testRemaining{0},
                },
        }, {
-               fr: makeReg("hello, world", 50),
+               maker: makeReg{"hello, world", 50},
                tests: []testFnc{
                        testRemaining{50},
                        testDiscard{7, 7, nil},
@@ -1282,7 +1324,7 @@ func TestFileReader(t *testing.T) {
                        testRemaining{38},
                },
        }, {
-               fr: makeReg("hello, world", 5),
+               maker: makeReg{"hello, world", 5},
                tests: []testFnc{
                        testRemaining{5},
                        testRead{0, "", nil},
@@ -1294,7 +1336,7 @@ func TestFileReader(t *testing.T) {
                        testRead{0, "", io.EOF},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
                tests: []testFnc{
                        testRemaining{8},
                        testRead{3, "ab\x00", nil},
@@ -1302,92 +1344,92 @@ func TestFileReader(t *testing.T) {
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
                tests: []testFnc{
                        testRemaining{8},
                        testDiscard{100, 8, io.EOF},
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
                tests: []testFnc{
                        testRemaining{10},
                        testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF},
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
+               maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
                tests: []testFnc{
                        testRemaining{10},
                        testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF},
                        testRemaining{4},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8},
                tests: []testFnc{
                        testRemaining{8},
                        testRead{8, "\x00abc\x00\x00de", io.EOF},
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8},
                tests: []testFnc{
                        testRemaining{8},
                        testRead{8, "\x00abc\x00\x00de", io.EOF},
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
                },
        }, {
-               fr: makeSparse(makeReg("", 0), sparseDatas{}, 2),
+               maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2},
                tests: []testFnc{
                        testRead{100, "\x00\x00", io.EOF},
                },
        }, {
-               fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00", io.ErrUnexpectedEOF},
                },
        }, {
-               fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00ab", errMissData},
                },
        }, {
-               fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00ab", io.ErrUnexpectedEOF},
                },
        }, {
-               fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00", errMissData},
                },
        }, {
-               fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00de", errMissData},
                },
        }, {
-               fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF},
                },
        }, {
-               fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRemaining{15},
                        testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData},
@@ -1395,7 +1437,7 @@ func TestFileReader(t *testing.T) {
                        testRemaining{0},
                },
        }, {
-               fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
+               maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
                tests: []testFnc{
                        testRemaining{15},
                        testDiscard{100, 15, errUnrefData},
@@ -1405,21 +1447,38 @@ func TestFileReader(t *testing.T) {
        }}
 
        for i, v := range vectors {
+               var fr fileReader
+               switch maker := v.maker.(type) {
+               case makeReg:
+                       r := strings.NewReader(maker.str)
+                       fr = &regFileReader{r, maker.size}
+               case makeSparse:
+                       if !validateSparseEntries(maker.spd, maker.size) {
+                               t.Fatalf("invalid sparse map: %v", maker.spd)
+                       }
+                       sph := invertSparseEntries(maker.spd, maker.size)
+                       r := strings.NewReader(maker.makeReg.str)
+                       fr = &regFileReader{r, maker.makeReg.size}
+                       fr = &sparseFileReader{fr, sph, 0}
+               default:
+                       t.Fatalf("test %d, unknown make operation: %T", i, maker)
+               }
+
                for j, tf := range v.tests {
                        switch tf := tf.(type) {
                        case testRead:
                                b := make([]byte, tf.cnt)
-                               n, err := v.fr.Read(b)
+                               n, err := fr.Read(b)
                                if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr {
                                        t.Errorf("test %d.%d, Read(%d):\ngot  (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr)
                                }
                        case testDiscard:
-                               got, err := v.fr.Discard(tf.cnt)
+                               got, err := fr.Discard(tf.cnt)
                                if got != tf.wantCnt || err != tf.wantErr {
                                        t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr)
                                }
                        case testRemaining:
-                               got := v.fr.Remaining()
+                               got := fr.Remaining()
                                if got != tf.wantCnt {
                                        t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt)
                                }
diff --git a/src/archive/tar/testdata/gnu-nil-sparse-data.tar b/src/archive/tar/testdata/gnu-nil-sparse-data.tar
new file mode 100644 (file)
index 0000000..df1aa83
Binary files /dev/null and b/src/archive/tar/testdata/gnu-nil-sparse-data.tar differ
diff --git a/src/archive/tar/testdata/gnu-nil-sparse-hole.tar b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar
new file mode 100644 (file)
index 0000000..496abfe
Binary files /dev/null and b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar differ
diff --git a/src/archive/tar/testdata/gnu-sparse-big.tar b/src/archive/tar/testdata/gnu-sparse-big.tar
new file mode 100644 (file)
index 0000000..1a5cfc9
Binary files /dev/null and b/src/archive/tar/testdata/gnu-sparse-big.tar differ
diff --git a/src/archive/tar/testdata/pax-nil-sparse-data.tar b/src/archive/tar/testdata/pax-nil-sparse-data.tar
new file mode 100644 (file)
index 0000000..e59bd94
Binary files /dev/null and b/src/archive/tar/testdata/pax-nil-sparse-data.tar differ
diff --git a/src/archive/tar/testdata/pax-nil-sparse-hole.tar b/src/archive/tar/testdata/pax-nil-sparse-hole.tar
new file mode 100644 (file)
index 0000000..b44327b
Binary files /dev/null and b/src/archive/tar/testdata/pax-nil-sparse-hole.tar differ
diff --git a/src/archive/tar/testdata/pax-sparse-big.tar b/src/archive/tar/testdata/pax-sparse-big.tar
new file mode 100644 (file)
index 0000000..65d1f8e
Binary files /dev/null and b/src/archive/tar/testdata/pax-sparse-big.tar differ
index 4bfd519603321100ff1aabd07a0f07aede19c62b..09fc5dd3dd7fc5de3b6d22461fa23152fd499a41 100644 (file)
Binary files a/src/archive/tar/testdata/writer-big-long.tar and b/src/archive/tar/testdata/writer-big-long.tar differ
index f838ada81b100f1daf84937e84579a4971cfd158..0dadee70c1a53de229cb771ce30e60d88f547351 100644 (file)
Binary files a/src/archive/tar/testdata/writer-big.tar and b/src/archive/tar/testdata/writer-big.tar differ
index c9237c832902b20723ee351c82dfd4a839a46fe0..cc4701c627ff2133407084f20e8dbbdff2c43720 100644 (file)
@@ -10,6 +10,7 @@ import (
        "io"
        "path"
        "sort"
+       "strconv"
        "strings"
        "time"
 )
@@ -19,11 +20,11 @@ import (
 // Call WriteHeader to begin a new file, and then call Write to supply that file's data,
 // writing at most hdr.Size bytes in total.
 type Writer struct {
-       w   io.Writer
-       nb  int64  // number of unwritten bytes for current file entry
-       pad int64  // amount of padding to write after current file entry
-       hdr Header // Shallow copy of Header that is safe for mutations
-       blk block  // Buffer to use as temporary local storage
+       w    io.Writer
+       pad  int64      // Amount of padding to write after current file entry
+       curr fileWriter // Writer for current file entry
+       hdr  Header     // Shallow copy of Header that is safe for mutations
+       blk  block      // Buffer to use as temporary local storage
 
        // err is a persistent error.
        // It is only the responsibility of every exported method of Writer to
@@ -32,7 +33,16 @@ type Writer struct {
 }
 
 // NewWriter creates a new Writer writing to w.
-func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
+func NewWriter(w io.Writer) *Writer {
+       return &Writer{w: w, curr: &regFileWriter{w, 0}}
+}
+
+type fileWriter interface {
+       io.Writer
+       fileState
+
+       FillZeros(n int64) (int64, error)
+}
 
 // Flush finishes writing the current file's block padding.
 // The current file must be fully written before Flush can be called.
@@ -43,8 +53,8 @@ func (tw *Writer) Flush() error {
        if tw.err != nil {
                return tw.err
        }
-       if tw.nb > 0 {
-               return fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
+       if nb := tw.curr.Remaining(); nb > 0 {
+               return fmt.Errorf("archive/tar: missed writing %d bytes", nb)
        }
        if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
                return tw.err
@@ -96,6 +106,39 @@ func (tw *Writer) writeUSTARHeader(hdr *Header) error {
 }
 
 func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
+       realName, realSize := hdr.Name, hdr.Size
+
+       // Handle sparse files.
+       var spd sparseDatas
+       var spb []byte
+       if len(hdr.SparseHoles) > 0 {
+               sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map
+               sph = alignSparseEntries(sph, hdr.Size)
+               spd = invertSparseEntries(sph, hdr.Size)
+
+               // Format the sparse map.
+               hdr.Size = 0 // Replace with encoded size
+               spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n')
+               for _, s := range spd {
+                       hdr.Size += s.Length
+                       spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n')
+                       spb = append(strconv.AppendInt(spb, s.Length, 10), '\n')
+               }
+               pad := blockPadding(int64(len(spb)))
+               spb = append(spb, zeroBlock[:pad]...)
+               hdr.Size += int64(len(spb)) // Accounts for encoded sparse map
+
+               // Add and modify appropriate PAX records.
+               dir, file := path.Split(realName)
+               hdr.Name = path.Join(dir, "GNUSparseFile.0", file)
+               paxHdrs[paxGNUSparseMajor] = "1"
+               paxHdrs[paxGNUSparseMinor] = "0"
+               paxHdrs[paxGNUSparseName] = realName
+               paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10)
+               paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10)
+               delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName
+       }
+
        // Write PAX records to the output.
        if len(paxHdrs) > 0 {
                // Sort keys for deterministic ordering.
@@ -116,7 +159,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
                }
 
                // Write the extended header file.
-               dir, file := path.Split(hdr.Name)
+               dir, file := path.Split(realName)
                name := path.Join(dir, "PaxHeaders.0", file)
                data := buf.String()
                if err := tw.writeRawFile(name, data, TypeXHeader, formatPAX); err != nil {
@@ -129,13 +172,22 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
        fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) }
        blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal)
        blk.SetFormat(formatPAX)
-       return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag)
+       if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
+               return err
+       }
+
+       // Write the sparse map and setup the sparse writer if necessary.
+       if len(spd) > 0 {
+               // Use tw.curr since the sparse map is accounted for in hdr.Size.
+               if _, err := tw.curr.Write(spb); err != nil {
+                       return err
+               }
+               tw.curr = &sparseFileWriter{tw.curr, spd, 0}
+       }
+       return nil
 }
 
 func (tw *Writer) writeGNUHeader(hdr *Header) error {
-       // TODO(dsnet): Support writing sparse files.
-       // See https://golang.org/issue/13548
-
        // Use long-link files if Name or Linkname exceeds the field size.
        const longName = "././@LongLink"
        if len(hdr.Name) > nameSize {
@@ -153,6 +205,8 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error {
 
        // Pack the main header.
        var f formatter // Ignore errors since they are expected
+       var spd sparseDatas
+       var spb []byte
        blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric)
        if !hdr.AccessTime.IsZero() {
                f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix())
@@ -160,8 +214,54 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error {
        if !hdr.ChangeTime.IsZero() {
                f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix())
        }
+       if hdr.Typeflag == TypeGNUSparse {
+               sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map
+               sph = alignSparseEntries(sph, hdr.Size)
+               spd = invertSparseEntries(sph, hdr.Size)
+
+               // Format the sparse map.
+               formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas {
+                       for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ {
+                               f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset)
+                               f.formatNumeric(sa.Entry(i).Length(), sp[0].Length)
+                               sp = sp[1:]
+                       }
+                       if len(sp) > 0 {
+                               sa.IsExtended()[0] = 1
+                       }
+                       return sp
+               }
+               sp2 := formatSPD(spd, blk.GNU().Sparse())
+               for len(sp2) > 0 {
+                       var spHdr block
+                       sp2 = formatSPD(sp2, spHdr.Sparse())
+                       spb = append(spb, spHdr[:]...)
+               }
+
+               // Update size fields in the header block.
+               realSize := hdr.Size
+               hdr.Size = 0 // Encoded size; does not account for encoded sparse map
+               for _, s := range spd {
+                       hdr.Size += s.Length
+               }
+               copy(blk.V7().Size(), zeroBlock[:]) // Reset field
+               f.formatNumeric(blk.V7().Size(), hdr.Size)
+               f.formatNumeric(blk.GNU().RealSize(), realSize)
+       }
        blk.SetFormat(formatGNU)
-       return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag)
+       if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
+               return err
+       }
+
+       // Write the extended sparse map and setup the sparse writer if necessary.
+       if len(spd) > 0 {
+               // Use tw.w since the sparse map is not accounted for in hdr.Size.
+               if _, err := tw.w.Write(spb); err != nil {
+                       return err
+               }
+               tw.curr = &sparseFileWriter{tw.curr, spd, 0}
+       }
+       return nil
 }
 
 type (
@@ -249,7 +349,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
        if isHeaderOnlyType(flag) {
                size = 0
        }
-       tw.nb = size
+       tw.curr = &regFileWriter{tw.w, size}
        tw.pad = blockPadding(size)
        return nil
 }
@@ -279,6 +379,9 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
 // Write returns the error ErrWriteTooLong if more than
 // Header.Size bytes are written after WriteHeader.
 //
+// If the current file is sparse, then the regions marked as a sparse hole
+// must be written as NUL-bytes.
+//
 // Calling Write on special types like TypeLink, TypeSymLink, TypeChar,
 // TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless
 // of what the Header.Size claims.
@@ -286,17 +389,29 @@ func (tw *Writer) Write(b []byte) (int, error) {
        if tw.err != nil {
                return 0, tw.err
        }
+       n, err := tw.curr.Write(b)
+       if err != nil && err != ErrWriteTooLong {
+               tw.err = err
+       }
+       return n, err
+}
 
-       overwrite := int64(len(b)) > tw.nb
-       if overwrite {
-               b = b[:tw.nb]
+// TODO(dsnet): Export the Writer.FillZeros method to assist in quickly zeroing
+// out sections of a file. This is especially useful for efficiently
+// skipping over large holes in a sparse file.
+
+// fillZeros writes n bytes of zeros to the current file,
+// returning the number of bytes written.
+// If fewer than n bytes are discarded, it returns an non-nil error,
+// which may be ErrWriteTooLong if the current file is complete.
+func (tw *Writer) fillZeros(n int64) (int64, error) {
+       if tw.err != nil {
+               return 0, tw.err
        }
-       n, err := tw.w.Write(b)
-       tw.nb -= int64(n)
-       if err == nil && overwrite {
-               return n, ErrWriteTooLong // Non-fatal error
+       n, err := tw.curr.FillZeros(n)
+       if err != nil && err != ErrWriteTooLong {
+               tw.err = err
        }
-       tw.err = err
        return n, err
 }
 
@@ -320,3 +435,135 @@ func (tw *Writer) Close() error {
        tw.err = ErrWriteAfterClose
        return err // Report IO errors
 }
+
+// regFileWriter is a fileWriter for writing data to a regular file entry.
+type regFileWriter struct {
+       w  io.Writer // Underlying Writer
+       nb int64     // Number of remaining bytes to write
+}
+
+func (fw *regFileWriter) Write(b []byte) (int, error) {
+       overwrite := int64(len(b)) > fw.nb
+       if overwrite {
+               b = b[:fw.nb]
+       }
+       n, err := fw.w.Write(b)
+       fw.nb -= int64(n)
+       switch {
+       case err != nil:
+               return n, err
+       case overwrite:
+               return n, ErrWriteTooLong
+       default:
+               return n, nil
+       }
+}
+
+func (fw *regFileWriter) FillZeros(n int64) (int64, error) {
+       return io.CopyN(fw, zeroReader{}, n)
+}
+
+func (fw regFileWriter) Remaining() int64 {
+       return fw.nb
+}
+
+// sparseFileWriter is a fileWriter for writing data to a sparse file entry.
+type sparseFileWriter struct {
+       fw  fileWriter  // Underlying fileWriter
+       sp  sparseDatas // Normalized list of data fragments
+       pos int64       // Current position in sparse file
+}
+
+func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
+       overwrite := int64(len(b)) > sw.Remaining()
+       if overwrite {
+               b = b[:sw.Remaining()]
+       }
+
+       b0 := b
+       endPos := sw.pos + int64(len(b))
+       for endPos > sw.pos && err == nil {
+               var nf int // Bytes written in fragment
+               dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
+               if sw.pos < dataStart { // In a hole fragment
+                       bf := b[:min(int64(len(b)), dataStart-sw.pos)]
+                       nf, err = zeroWriter{}.Write(bf)
+               } else { // In a data fragment
+                       bf := b[:min(int64(len(b)), dataEnd-sw.pos)]
+                       nf, err = sw.fw.Write(bf)
+               }
+               b = b[nf:]
+               sw.pos += int64(nf)
+               if sw.pos >= dataEnd && len(sw.sp) > 1 {
+                       sw.sp = sw.sp[1:] // Ensure last fragment always remains
+               }
+       }
+
+       n = len(b0) - len(b)
+       switch {
+       case err == ErrWriteTooLong:
+               return n, errMissData // Not possible; implies bug in validation logic
+       case err != nil:
+               return n, err
+       case sw.Remaining() == 0 && sw.fw.Remaining() > 0:
+               return n, errUnrefData // Not possible; implies bug in validation logic
+       case overwrite:
+               return n, ErrWriteTooLong
+       default:
+               return n, nil
+       }
+}
+
+func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) {
+       overwrite := n > sw.Remaining()
+       if overwrite {
+               n = sw.Remaining()
+       }
+
+       var realFill int64 // Number of real data bytes to fill
+       endPos := sw.pos + n
+       for endPos > sw.pos {
+               var nf int64 // Size of fragment
+               dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
+               if sw.pos < dataStart { // In a hole fragment
+                       nf = min(endPos-sw.pos, dataStart-sw.pos)
+               } else { // In a data fragment
+                       nf = min(endPos-sw.pos, dataEnd-sw.pos)
+                       realFill += nf
+               }
+               sw.pos += nf
+               if sw.pos >= dataEnd && len(sw.sp) > 1 {
+                       sw.sp = sw.sp[1:] // Ensure last fragment always remains
+               }
+       }
+
+       _, err := sw.fw.FillZeros(realFill)
+       switch {
+       case err == ErrWriteTooLong:
+               return n, errMissData // Not possible; implies bug in validation logic
+       case err != nil:
+               return n, err
+       case sw.Remaining() == 0 && sw.fw.Remaining() > 0:
+               return n, errUnrefData // Not possible; implies bug in validation logic
+       case overwrite:
+               return n, ErrWriteTooLong
+       default:
+               return n, nil
+       }
+}
+
+func (sw sparseFileWriter) Remaining() int64 {
+       return sw.sp[len(sw.sp)-1].endOffset() - sw.pos
+}
+
+// zeroWriter may only be written with NULs, otherwise it returns errWriteHole.
+type zeroWriter struct{}
+
+func (zeroWriter) Write(b []byte) (int, error) {
+       for i, c := range b {
+               if c != 0 {
+                       return i, errWriteHole
+               }
+       }
+       return len(b), nil
+}
index 9cfc225611e4ad0d8acee483459eaa7b092552d8..def9c0110dfcb8afd525175d8496f5d9c65722ae 100644 (file)
@@ -49,83 +49,97 @@ func bytediff(a, b []byte) string {
 }
 
 func TestWriter(t *testing.T) {
-       type entry struct {
-               header   *Header
-               contents string
-       }
+       type (
+               testHeader struct { // WriteHeader(&hdr) == wantErr
+                       hdr     Header
+                       wantErr error
+               }
+               testWrite struct { // Write([]byte(str)) == (wantCnt, wantErr)
+                       str     string
+                       wantCnt int
+                       wantErr error
+               }
+               testFill struct { // fillZeros(cnt) == (wantCnt, wantErr)
+                       cnt     int64
+                       wantCnt int64
+                       wantErr error
+               }
+               testClose struct { // Close() == wantErr
+                       wantErr error
+               }
+               testFnc interface{} // testHeader | testWrite | testFill | testClose
+       )
 
        vectors := []struct {
-               file    string // filename of expected output
-               entries []*entry
-               err     error // expected error on WriteHeader
+               file  string // Optional filename of expected output
+               tests []testFnc
        }{{
                // The writer test file was produced with this command:
                // tar (GNU tar) 1.26
                //   ln -s small.txt link.txt
                //   tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt
                file: "testdata/writer.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "small.txt",
+                               Size:     5,
                                Mode:     0640,
                                Uid:      73025,
                                Gid:      5000,
-                               Size:     5,
-                               ModTime:  time.Unix(1246508266, 0),
-                               Typeflag: '0',
                                Uname:    "dsymonds",
                                Gname:    "eng",
-                       },
-                       contents: "Kilts",
-               }, {
-                       header: &Header{
+                               ModTime:  time.Unix(1246508266, 0),
+                       }, nil},
+                       testWrite{"Kilts", 5, nil},
+
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "small2.txt",
+                               Size:     11,
                                Mode:     0640,
                                Uid:      73025,
-                               Gid:      5000,
-                               Size:     11,
-                               ModTime:  time.Unix(1245217492, 0),
-                               Typeflag: '0',
                                Uname:    "dsymonds",
                                Gname:    "eng",
-                       },
-                       contents: "Google.com\n",
-               }, {
-                       header: &Header{
+                               Gid:      5000,
+                               ModTime:  time.Unix(1245217492, 0),
+                       }, nil},
+                       testWrite{"Google.com\n", 11, nil},
+
+                       testHeader{Header{
+                               Typeflag: TypeSymlink,
                                Name:     "link.txt",
+                               Linkname: "small.txt",
                                Mode:     0777,
                                Uid:      1000,
                                Gid:      1000,
-                               Size:     0,
-                               ModTime:  time.Unix(1314603082, 0),
-                               Typeflag: '2',
-                               Linkname: "small.txt",
                                Uname:    "strings",
                                Gname:    "strings",
-                       },
-                       // no contents
-               }},
+                               ModTime:  time.Unix(1314603082, 0),
+                       }, nil},
+                       testWrite{"", 0, nil},
+
+                       testClose{nil},
+               },
        }, {
                // The truncated test file was produced using these commands:
                //   dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt
                //   tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar
                file: "testdata/writer-big.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "tmp/16gig.txt",
+                               Size:     16 << 30,
                                Mode:     0640,
                                Uid:      73025,
                                Gid:      5000,
-                               Size:     16 << 30,
-                               ModTime:  time.Unix(1254699560, 0),
-                               Typeflag: '0',
                                Uname:    "dsymonds",
                                Gname:    "eng",
+                               ModTime:  time.Unix(1254699560, 0),
                                Devminor: -1, // Force use of GNU format
-                       },
-                       // fake contents
-                       contents: strings.Repeat("\x00", 4<<10),
-               }},
+                       }, nil},
+               },
        }, {
                // This truncated file was produced using this library.
                // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2.
@@ -135,141 +149,265 @@ func TestWriter(t *testing.T) {
                //
                // This file is in PAX format.
                file: "testdata/writer-big-long.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     strings.Repeat("longname/", 15) + "16gig.txt",
+                               Size:     16 << 30,
                                Mode:     0644,
                                Uid:      1000,
                                Gid:      1000,
-                               Size:     16 << 30,
-                               ModTime:  time.Unix(1399583047, 0),
-                               Typeflag: '0',
                                Uname:    "guillaume",
                                Gname:    "guillaume",
-                       },
-                       // fake contents
-                       contents: strings.Repeat("\x00", 4<<10),
-               }},
+                               ModTime:  time.Unix(1399583047, 0),
+                       }, nil},
+               },
        }, {
                // This file was produced using GNU tar v1.17.
                //      gnutar -b 4 --format=ustar (longname/)*15 + file.txt
                file: "testdata/ustar.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     strings.Repeat("longname/", 15) + "file.txt",
+                               Size:     6,
                                Mode:     0644,
-                               Uid:      0765,
-                               Gid:      024,
-                               Size:     06,
-                               ModTime:  time.Unix(1360135598, 0),
-                               Typeflag: '0',
+                               Uid:      501,
+                               Gid:      20,
                                Uname:    "shane",
                                Gname:    "staff",
-                       },
-                       contents: "hello\n",
-               }},
-       }, {
-               // This file was produced using gnu tar 1.26
-               // echo "Slartibartfast" > file.txt
-               // ln file.txt hard.txt
-               // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt
+                               ModTime:  time.Unix(1360135598, 0),
+                       }, nil},
+                       testWrite{"hello\n", 6, nil},
+                       testClose{nil},
+               },
+       }, {
+               // This file was produced using GNU tar v1.26:
+               //      echo "Slartibartfast" > file.txt
+               //      ln file.txt hard.txt
+               //      tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt
                file: "testdata/hardlink.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "file.txt",
+                               Size:     15,
                                Mode:     0644,
                                Uid:      1000,
                                Gid:      100,
-                               Size:     15,
-                               ModTime:  time.Unix(1425484303, 0),
-                               Typeflag: '0',
                                Uname:    "vbatts",
                                Gname:    "users",
-                       },
-                       contents: "Slartibartfast\n",
-               }, {
-                       header: &Header{
+                               ModTime:  time.Unix(1425484303, 0),
+                       }, nil},
+                       testWrite{"Slartibartfast\n", 15, nil},
+
+                       testHeader{Header{
+                               Typeflag: TypeLink,
                                Name:     "hard.txt",
+                               Linkname: "file.txt",
                                Mode:     0644,
                                Uid:      1000,
                                Gid:      100,
-                               Size:     0,
-                               ModTime:  time.Unix(1425484303, 0),
-                               Typeflag: '1',
-                               Linkname: "file.txt",
                                Uname:    "vbatts",
                                Gname:    "users",
-                       },
-                       // no contents
-               }},
+                               ModTime:  time.Unix(1425484303, 0),
+                       }, nil},
+                       testWrite{"", 0, nil},
+
+                       testClose{nil},
+               },
        }, {
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "bad-null.txt",
-                               Typeflag: '0',
                                Xattrs:   map[string]string{"null\x00null\x00": "fizzbuzz"},
-                       },
-               }},
-               err: ErrHeader,
+                       }, ErrHeader},
+               },
        }, {
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "null\x00.txt",
-                               Typeflag: '0',
-                       },
-               }},
-               err: ErrHeader,
+                       }, ErrHeader},
+               },
        }, {
                file: "testdata/gnu-utf8.tar",
-               entries: []*entry{{
-                       header: &Header{
-                               Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
-                               Mode: 0644,
-                               Uid:  1000, Gid: 1000,
-                               ModTime:  time.Unix(0, 0),
-                               Typeflag: '0',
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
+                               Name:     "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
+                               Mode:     0644,
+                               Uid:      1000, Gid: 1000,
                                Uname:    "☺",
                                Gname:    "âš¹",
+                               ModTime:  time.Unix(0, 0),
                                Devminor: -1, // Force use of GNU format
-                       },
-               }},
+                       }, nil},
+                       testClose{nil},
+               },
        }, {
                file: "testdata/gnu-not-utf8.tar",
-               entries: []*entry{{
-                       header: &Header{
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
                                Name:     "hi\x80\x81\x82\x83bye",
                                Mode:     0644,
                                Uid:      1000,
                                Gid:      1000,
-                               ModTime:  time.Unix(0, 0),
-                               Typeflag: '0',
                                Uname:    "rawr",
                                Gname:    "dsnet",
+                               ModTime:  time.Unix(0, 0),
                                Devminor: -1, // Force use of GNU format
-                       },
-               }},
+                       }, nil},
+                       testClose{nil},
+               },
+       }, {
+               file: "testdata/gnu-nil-sparse-data.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag:    TypeGNUSparse,
+                               Name:        "sparse.db",
+                               Size:        1000,
+                               SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
+                       }, nil},
+                       testWrite{strings.Repeat("0123456789", 100), 1000, nil},
+                       testClose{},
+               },
+       }, {
+               file: "testdata/gnu-nil-sparse-hole.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag:    TypeGNUSparse,
+                               Name:        "sparse.db",
+                               Size:        1000,
+                               SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
+                       }, nil},
+                       testWrite{strings.Repeat("\x00", 1000), 1000, nil},
+                       testClose{},
+               },
+       }, {
+               file: "testdata/pax-nil-sparse-data.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag:    TypeReg,
+                               Name:        "sparse.db",
+                               Size:        1000,
+                               SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
+                       }, nil},
+                       testWrite{strings.Repeat("0123456789", 100), 1000, nil},
+                       testClose{},
+               },
+       }, {
+               file: "testdata/pax-nil-sparse-hole.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag:    TypeReg,
+                               Name:        "sparse.db",
+                               Size:        1000,
+                               SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
+                       }, nil},
+                       testWrite{strings.Repeat("\x00", 1000), 1000, nil},
+                       testClose{},
+               },
+       }, {
+               file: "testdata/gnu-sparse-big.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeGNUSparse,
+                               Name:     "gnu-sparse",
+                               Size:     6e10,
+                               SparseHoles: []SparseEntry{
+                                       {Offset: 0e10, Length: 1e10 - 100},
+                                       {Offset: 1e10, Length: 1e10 - 100},
+                                       {Offset: 2e10, Length: 1e10 - 100},
+                                       {Offset: 3e10, Length: 1e10 - 100},
+                                       {Offset: 4e10, Length: 1e10 - 100},
+                                       {Offset: 5e10, Length: 1e10 - 100},
+                               },
+                       }, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 0, ErrWriteTooLong},
+                       testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong},
+                       testClose{nil},
+               },
+       }, {
+               file: "testdata/pax-sparse-big.tar",
+               tests: []testFnc{
+                       testHeader{Header{
+                               Typeflag: TypeReg,
+                               Name:     "pax-sparse",
+                               Size:     6e10,
+                               SparseHoles: []SparseEntry{
+                                       {Offset: 0e10, Length: 1e10 - 100},
+                                       {Offset: 1e10, Length: 1e10 - 100},
+                                       {Offset: 2e10, Length: 1e10 - 100},
+                                       {Offset: 3e10, Length: 1e10 - 100},
+                                       {Offset: 4e10, Length: 1e10 - 100},
+                                       {Offset: 5e10, Length: 1e10 - 100},
+                               },
+                       }, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 1e10 - 100, nil},
+                       testWrite{strings.Repeat("0123456789", 10), 100, nil},
+                       testFill{1e10 - 100, 0, ErrWriteTooLong},
+                       testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong},
+                       testClose{nil},
+               },
        }}
 
        for _, v := range vectors {
                t.Run(path.Base(v.file), func(t *testing.T) {
+                       const maxSize = 10 << 10 // 10KiB
                        buf := new(bytes.Buffer)
-                       tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB
-                       canFail := false
-                       for i, entry := range v.entries {
-                               canFail = canFail || entry.header.Size > 1<<10 || v.err != nil
-
-                               err := tw.WriteHeader(entry.header)
-                               if err != v.err {
-                                       t.Fatalf("entry %d: WriteHeader() = %v, want %v", i, err, v.err)
-                               }
-                               if _, err := io.WriteString(tw, entry.contents); err != nil {
-                                       t.Fatalf("entry %d: WriteString() = %v, want nil", i, err)
+                       tw := NewWriter(iotest.TruncateWriter(buf, maxSize))
+
+                       for i, tf := range v.tests {
+                               switch tf := tf.(type) {
+                               case testHeader:
+                                       err := tw.WriteHeader(&tf.hdr)
+                                       if err != tf.wantErr {
+                                               t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr)
+                                       }
+                               case testWrite:
+                                       got, err := tw.Write([]byte(tf.str))
+                                       if got != tf.wantCnt || err != tf.wantErr {
+                                               t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr)
+                                       }
+                               case testFill:
+                                       got, err := tw.fillZeros(tf.cnt)
+                                       if got != tf.wantCnt || err != tf.wantErr {
+                                               t.Fatalf("test %d, fillZeros() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr)
+                                       }
+                               case testClose:
+                                       err := tw.Close()
+                                       if err != tf.wantErr {
+                                               t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr)
+                                       }
+                               default:
+                                       t.Fatalf("test %d, unknown test operation: %T", i, tf)
                                }
                        }
-                       // Only interested in Close failures for the small tests.
-                       if err := tw.Close(); err != nil && !canFail {
-                               t.Fatalf("Close() = %v, want nil", err)
-                       }
 
                        if v.file != "" {
                                want, err := ioutil.ReadFile(v.file)
@@ -758,3 +896,286 @@ func TestIssue12594(t *testing.T) {
                }
        }
 }
+
+func TestFileWriter(t *testing.T) {
+       type (
+               testWrite struct { // Write(str) == (wantCnt, wantErr)
+                       str     string
+                       wantCnt int
+                       wantErr error
+               }
+               testFill struct { // FillZeros(cnt) == (wantCnt, wantErr)
+                       cnt     int64
+                       wantCnt int64
+                       wantErr error
+               }
+               testRemaining struct { // Remaining() == wantCnt
+                       wantCnt int64
+               }
+               testFnc interface{} // testWrite | testFill | testRemaining
+       )
+
+       type (
+               makeReg struct {
+                       size    int64
+                       wantStr string
+               }
+               makeSparse struct {
+                       makeReg makeReg
+                       sph     sparseHoles
+                       size    int64
+               }
+               fileMaker interface{} // makeReg | makeSparse
+       )
+
+       vectors := []struct {
+               maker fileMaker
+               tests []testFnc
+       }{{
+               maker: makeReg{0, ""},
+               tests: []testFnc{
+                       testRemaining{0},
+                       testWrite{"", 0, nil},
+                       testWrite{"a", 0, ErrWriteTooLong},
+                       testFill{0, 0, nil},
+                       testFill{1, 0, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{1, "a"},
+               tests: []testFnc{
+                       testRemaining{1},
+                       testWrite{"", 0, nil},
+                       testWrite{"a", 1, nil},
+                       testWrite{"bcde", 0, ErrWriteTooLong},
+                       testWrite{"", 0, nil},
+                       testFill{0, 0, nil},
+                       testFill{1, 0, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{5, "hello"},
+               tests: []testFnc{
+                       testRemaining{5},
+                       testWrite{"hello", 5, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{5, "\x00\x00\x00\x00\x00"},
+               tests: []testFnc{
+                       testRemaining{5},
+                       testFill{5, 5, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{5, "\x00\x00\x00\x00\x00"},
+               tests: []testFnc{
+                       testRemaining{5},
+                       testFill{10, 5, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{5, "abc\x00\x00"},
+               tests: []testFnc{
+                       testRemaining{5},
+                       testWrite{"abc", 3, nil},
+                       testRemaining{2},
+                       testFill{2, 2, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeReg{5, "\x00\x00abc"},
+               tests: []testFnc{
+                       testRemaining{5},
+                       testFill{2, 2, nil},
+                       testRemaining{3},
+                       testWrite{"abc", 3, nil},
+                       testFill{1, 0, ErrWriteTooLong},
+                       testWrite{"z", 0, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testRemaining{8},
+                       testWrite{"ab\x00\x00\x00cde", 8, nil},
+                       testWrite{"a", 0, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testWrite{"ab\x00", 3, nil},
+                       testRemaining{5},
+                       testWrite{"\x00\x00cde", 5, nil},
+                       testWrite{"a", 0, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testWrite{"ab", 2, nil},
+                       testRemaining{6},
+                       testFill{3, 3, nil},
+                       testRemaining{3},
+                       testWrite{"cde", 3, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testFill{8, 8, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testFill{9, 8, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{4, "\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testFill{9, 8, errMissData},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{6, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testFill{9, 8, errUnrefData},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testWrite{"ab", 2, nil},
+                       testRemaining{6},
+                       testFill{3, 3, nil},
+                       testRemaining{3},
+                       testWrite{"cde", 2, errMissData},
+                       testRemaining{1},
+               },
+       }, {
+               maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8},
+               tests: []testFnc{
+                       testWrite{"ab", 2, nil},
+                       testRemaining{6},
+                       testFill{3, 3, nil},
+                       testRemaining{3},
+                       testWrite{"cde", 3, errUnrefData},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testRemaining{7},
+                       testWrite{"\x00\x00abc\x00\x00", 7, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testWrite{"abcdefg", 0, errWriteHole},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testWrite{"\x00\x00abcde", 5, errWriteHole},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testFill{7, 7, nil},
+                       testFill{1, 0, ErrWriteTooLong},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testFill{4, 4, nil},
+                       testFill{8, 3, ErrWriteTooLong},
+               },
+       }, {
+               maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testFill{2, 2, nil},
+                       testRemaining{5},
+                       testWrite{"abc", 3, nil},
+                       testRemaining{2},
+                       testFill{2, 2, nil},
+                       testRemaining{0},
+               },
+       }, {
+               maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testFill{2, 2, nil},
+                       testWrite{"abc", 2, errMissData},
+                       testFill{2, 2, errMissData},
+               },
+       }, {
+               maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7},
+               tests: []testFnc{
+                       testFill{2, 2, nil},
+                       testWrite{"abc", 3, nil},
+                       testFill{2, 2, errUnrefData},
+               },
+       }}
+
+       for i, v := range vectors {
+               var wantStr string
+               bb := new(bytes.Buffer)
+               var fw fileWriter
+               switch maker := v.maker.(type) {
+               case makeReg:
+                       fw = &regFileWriter{bb, maker.size}
+                       wantStr = maker.wantStr
+               case makeSparse:
+                       if !validateSparseEntries(maker.sph, maker.size) {
+                               t.Fatalf("invalid sparse map: %v", maker.sph)
+                       }
+                       spd := invertSparseEntries(maker.sph, maker.size)
+                       fw = &regFileWriter{bb, maker.makeReg.size}
+                       fw = &sparseFileWriter{fw, spd, 0}
+                       wantStr = maker.makeReg.wantStr
+               default:
+                       t.Fatalf("test %d, unknown make operation: %T", i, maker)
+               }
+
+               for j, tf := range v.tests {
+                       switch tf := tf.(type) {
+                       case testWrite:
+                               got, err := fw.Write([]byte(tf.str))
+                               if got != tf.wantCnt || err != tf.wantErr {
+                                       t.Errorf("test %d.%d, Write(%s):\ngot  (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr)
+                               }
+                       case testFill:
+                               got, err := fw.FillZeros(tf.cnt)
+                               if got != tf.wantCnt || err != tf.wantErr {
+                                       t.Errorf("test %d.%d, FillZeros(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr)
+                               }
+                       case testRemaining:
+                               got := fw.Remaining()
+                               if got != tf.wantCnt {
+                                       t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt)
+                               }
+                       default:
+                               t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf)
+                       }
+               }
+
+               if got := bb.String(); got != wantStr {
+                       t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr)
+               }
+       }
+}