From: Joe Tsai Date: Sat, 19 Aug 2017 01:18:38 +0000 (-0700) Subject: archive/tar: implement Writer support for sparse files X-Git-Tag: go1.10beta1~1400 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e0ab505a97eed8773ea16842f2748b6d518fedd9;p=gostls13.git archive/tar: implement Writer support for sparse files This CL is the second step (of two; part1 is CL/56771) for adding sparse file support to the Writer. There are no new identifiers exported in this CL, but this does make use of Header.SparseHoles added in part1. If the Typeflag is set to TypeGNUSparse or len(SparseHoles) > 0, then the Writer will emit an sparse file, where the holes must be written by the user as zeros. If TypeGNUSparse is set, then the output file must use the GNU format. Otherwise, it must use the PAX format (with GNU-defined PAX keys). A future CL may export Reader.Discard and Writer.FillZeros, but those methods are currently unexported, and only used by the tests for efficiency reasons. Calling Discard or FillZeros on a hole 10GiB in size does take time, even if it is essentially a memcopy. Updates #13548 Change-Id: Id586d9178c227c0577f796f731ae2cbb72355601 Reviewed-on: https://go-review.googlesource.com/57212 Reviewed-by: Ian Lance Taylor --- diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go index 22f3206243..b5921fef23 100644 --- a/src/archive/tar/common.go +++ b/src/archive/tar/common.go @@ -33,6 +33,7 @@ var ( ErrWriteAfterClose = errors.New("tar: write after close") errMissData = errors.New("tar: sparse file references non-existent data") errUnrefData = errors.New("tar: sparse file contains unreferenced data") + errWriteHole = errors.New("tar: write non-NUL byte in sparse hole") ) // Header type flags. @@ -74,10 +75,13 @@ type Header struct { // SparseHoles represents a sequence of holes in a sparse file. // - // The regions must be sorted in ascending order, not overlap with - // each other, and not extend past the specified Size. - // The file is sparse if either len(SparseHoles) > 0 or - // the Typeflag is set to TypeGNUSparse. + // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse. + // A sparse file consists of fragments of data, intermixed with holes + // (described by this field). A hole is semantically a block of NUL-bytes, + // but does not actually exist within the TAR file. + // The logical size of the file stored in the Size field, while + // the holes must be sorted in ascending order, + // not overlap with each other, and not extend past the specified Size. SparseHoles []SparseEntry } @@ -300,6 +304,20 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) { return formatUnknown, nil // Invalid PAX key } } + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return formatUnknown, nil // Cannot have sparse data on header-only file + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return formatUnknown, nil + } + if h.Typeflag == TypeGNUSparse { + format &= formatGNU // GNU only + } else { + format &^= formatGNU // No GNU + } + format &^= formatUSTAR // No USTAR + } return format, paxHdrs } diff --git a/src/archive/tar/example_test.go b/src/archive/tar/example_test.go index 5f0ce2f402..b84950c797 100644 --- a/src/archive/tar/example_test.go +++ b/src/archive/tar/example_test.go @@ -7,20 +7,20 @@ package tar_test import ( "archive/tar" "bytes" + "crypto/md5" "fmt" "io" + "io/ioutil" "log" "os" + "strings" ) func Example() { - // Create a buffer to write our archive to. buf := new(bytes.Buffer) - // Create a new tar archive. + // Create and add some files to the archive. tw := tar.NewWriter(buf) - - // Add some files to the archive. var files = []struct { Name, Body string }{ @@ -35,34 +35,29 @@ func Example() { Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { - log.Fatalln(err) + log.Fatal(err) } if _, err := tw.Write([]byte(file.Body)); err != nil { - log.Fatalln(err) + log.Fatal(err) } } - // Make sure to check the error on Close. if err := tw.Close(); err != nil { - log.Fatalln(err) + log.Fatal(err) } - // Open the tar archive for reading. - r := bytes.NewReader(buf.Bytes()) - tr := tar.NewReader(r) - - // Iterate through the files in the archive. + // Open and iterate through the files in the archive. + tr := tar.NewReader(buf) for { hdr, err := tr.Next() if err == io.EOF { - // end of tar archive - break + break // End of archive } if err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Printf("Contents of %s:\n", hdr.Name) if _, err := io.Copy(os.Stdout, tr); err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Println() } @@ -78,3 +73,86 @@ func Example() { // Contents of todo.txt: // Get animal handling license. } + +// A sparse file can efficiently represent a large file that is mostly empty. +func Example_sparse() { + buf := new(bytes.Buffer) + + // Define a sparse file to add to the archive. + // This sparse files contains 5 data fragments, and 4 hole fragments. + // The logical size of the file is 16 KiB, while the physical size of the + // file is only 3 KiB (not counting the header data). + hdr := &tar.Header{ + Name: "sparse.db", + Size: 16384, + SparseHoles: []tar.SparseEntry{ + // Data fragment at 0..1023 + {Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535 + // Data fragment at 1536..2047 + {Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583 + // Data fragment at 3584..4095 + {Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679 + // Data fragment at 7680..8191 + {Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871 + // Data fragment at 15872..16383 + }, + } + + // The regions marked as a sparse hole are filled with NUL-bytes. + // The total length of the body content must match the specified Size field. + body := "" + + strings.Repeat("A", 1024) + + strings.Repeat("\x00", 1024-512) + + strings.Repeat("B", 512) + + strings.Repeat("\x00", 2048-512) + + strings.Repeat("C", 512) + + strings.Repeat("\x00", 4096-512) + + strings.Repeat("D", 512) + + strings.Repeat("\x00", 8192-512) + + strings.Repeat("E", 512) + + h := md5.Sum([]byte(body)) + fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h) + fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles) + + // Create a new archive and write the sparse file. + tw := tar.NewWriter(buf) + if err := tw.WriteHeader(hdr); err != nil { + log.Fatal(err) + } + if _, err := tw.Write([]byte(body)); err != nil { + log.Fatal(err) + } + if err := tw.Close(); err != nil { + log.Fatal(err) + } + + // Open and iterate through the files in the archive. + tr := tar.NewReader(buf) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + body, err := ioutil.ReadAll(tr) + if err != nil { + log.Fatal(err) + } + + h := md5.Sum([]byte(body)) + fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h) + fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles) + } + + // Output: + // Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9 + // Write SparseHoles of sparse.db: + // [{1024 512} {2048 1536} {4096 3584} {8192 7680}] + // + // Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9 + // Read SparseHoles of sparse.db: + // [{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}] +} diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 9b7896132a..fb7dcfeece 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -500,6 +500,46 @@ func TestReader(t *testing.T) { Devmajor: 1, Devminor: 1, }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }}, }} for _, v := range vectors { @@ -1212,7 +1252,7 @@ func TestReadGNUSparsePAXHeaders(t *testing.T) { func TestFileReader(t *testing.T) { type ( - testRead struct { // ReadN(cnt) == (wantStr, wantErr) + testRead struct { // Read(cnt) == (wantStr, wantErr) cnt int wantStr string wantErr error @@ -1228,22 +1268,24 @@ func TestFileReader(t *testing.T) { testFnc interface{} // testRead | testDiscard | testRemaining ) - makeReg := func(s string, n int) fileReader { - return ®FileReader{strings.NewReader(s), int64(n)} - } - makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader { - if !validateSparseEntries(spd, size) { - t.Fatalf("invalid sparse map: %v", spd) + type ( + makeReg struct { + str string + size int64 } - sph := invertSparseEntries(append([]SparseEntry{}, spd...), size) - return &sparseFileReader{fr, sph, 0} - } + makeSparse struct { + makeReg makeReg + spd sparseDatas + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) vectors := []struct { - fr fileReader + maker fileMaker tests []testFnc }{{ - fr: makeReg("", 0), + maker: makeReg{"", 0}, tests: []testFnc{ testRemaining{0}, testRead{0, "", io.EOF}, @@ -1253,7 +1295,7 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeReg("", 1), + maker: makeReg{"", 1}, tests: []testFnc{ testRemaining{1}, testRead{0, "", io.ErrUnexpectedEOF}, @@ -1263,14 +1305,14 @@ func TestFileReader(t *testing.T) { testRemaining{1}, }, }, { - fr: makeReg("hello", 5), + maker: makeReg{"hello", 5}, tests: []testFnc{ testRemaining{5}, testRead{5, "hello", io.EOF}, testRemaining{0}, }, }, { - fr: makeReg("hello, world", 50), + maker: makeReg{"hello, world", 50}, tests: []testFnc{ testRemaining{50}, testDiscard{7, 7, nil}, @@ -1282,7 +1324,7 @@ func TestFileReader(t *testing.T) { testRemaining{38}, }, }, { - fr: makeReg("hello, world", 5), + maker: makeReg{"hello, world", 5}, tests: []testFnc{ testRemaining{5}, testRead{0, "", nil}, @@ -1294,7 +1336,7 @@ func TestFileReader(t *testing.T) { testRead{0, "", io.EOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{3, "ab\x00", nil}, @@ -1302,92 +1344,92 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8}, testDiscard{100, 8, io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10}, testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10}, testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, testRemaining{4}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("", 0), sparseDatas{}, 2), + maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, tests: []testFnc{ testRead{100, "\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", errMissData}, }, }, { - fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", errMissData}, }, }, { - fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", errMissData}, }, }, { - fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15}, testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, @@ -1395,7 +1437,7 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15}, testDiscard{100, 15, errUnrefData}, @@ -1405,21 +1447,38 @@ func TestFileReader(t *testing.T) { }} for i, v := range vectors { + var fr fileReader + switch maker := v.maker.(type) { + case makeReg: + r := strings.NewReader(maker.str) + fr = ®FileReader{r, maker.size} + case makeSparse: + if !validateSparseEntries(maker.spd, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.spd) + } + sph := invertSparseEntries(maker.spd, maker.size) + r := strings.NewReader(maker.makeReg.str) + fr = ®FileReader{r, maker.makeReg.size} + fr = &sparseFileReader{fr, sph, 0} + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + for j, tf := range v.tests { switch tf := tf.(type) { case testRead: b := make([]byte, tf.cnt) - n, err := v.fr.Read(b) + n, err := fr.Read(b) if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) } case testDiscard: - got, err := v.fr.Discard(tf.cnt) + got, err := fr.Discard(tf.cnt) if got != tf.wantCnt || err != tf.wantErr { t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) } case testRemaining: - got := v.fr.Remaining() + got := fr.Remaining() if got != tf.wantCnt { t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) } diff --git a/src/archive/tar/testdata/gnu-nil-sparse-data.tar b/src/archive/tar/testdata/gnu-nil-sparse-data.tar new file mode 100644 index 0000000000..df1aa83453 Binary files /dev/null and b/src/archive/tar/testdata/gnu-nil-sparse-data.tar differ diff --git a/src/archive/tar/testdata/gnu-nil-sparse-hole.tar b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar new file mode 100644 index 0000000000..496abfeb78 Binary files /dev/null and b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar differ diff --git a/src/archive/tar/testdata/gnu-sparse-big.tar b/src/archive/tar/testdata/gnu-sparse-big.tar new file mode 100644 index 0000000000..1a5cfc96d9 Binary files /dev/null and b/src/archive/tar/testdata/gnu-sparse-big.tar differ diff --git a/src/archive/tar/testdata/pax-nil-sparse-data.tar b/src/archive/tar/testdata/pax-nil-sparse-data.tar new file mode 100644 index 0000000000..e59bd94117 Binary files /dev/null and b/src/archive/tar/testdata/pax-nil-sparse-data.tar differ diff --git a/src/archive/tar/testdata/pax-nil-sparse-hole.tar b/src/archive/tar/testdata/pax-nil-sparse-hole.tar new file mode 100644 index 0000000000..b44327bdbf Binary files /dev/null and b/src/archive/tar/testdata/pax-nil-sparse-hole.tar differ diff --git a/src/archive/tar/testdata/pax-sparse-big.tar b/src/archive/tar/testdata/pax-sparse-big.tar new file mode 100644 index 0000000000..65d1f8eceb Binary files /dev/null and b/src/archive/tar/testdata/pax-sparse-big.tar differ diff --git a/src/archive/tar/testdata/writer-big-long.tar b/src/archive/tar/testdata/writer-big-long.tar index 4bfd519603..09fc5dd3dd 100644 Binary files a/src/archive/tar/testdata/writer-big-long.tar and b/src/archive/tar/testdata/writer-big-long.tar differ diff --git a/src/archive/tar/testdata/writer-big.tar b/src/archive/tar/testdata/writer-big.tar index f838ada81b..0dadee70c1 100644 Binary files a/src/archive/tar/testdata/writer-big.tar and b/src/archive/tar/testdata/writer-big.tar differ diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go index c9237c8329..cc4701c627 100644 --- a/src/archive/tar/writer.go +++ b/src/archive/tar/writer.go @@ -10,6 +10,7 @@ import ( "io" "path" "sort" + "strconv" "strings" "time" ) @@ -19,11 +20,11 @@ import ( // Call WriteHeader to begin a new file, and then call Write to supply that file's data, // writing at most hdr.Size bytes in total. type Writer struct { - w io.Writer - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - hdr Header // Shallow copy of Header that is safe for mutations - blk block // Buffer to use as temporary local storage + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Writer to @@ -32,7 +33,16 @@ type Writer struct { } // NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} +} + +type fileWriter interface { + io.Writer + fileState + + FillZeros(n int64) (int64, error) +} // Flush finishes writing the current file's block padding. // The current file must be fully written before Flush can be called. @@ -43,8 +53,8 @@ func (tw *Writer) Flush() error { if tw.err != nil { return tw.err } - if tw.nb > 0 { - return fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) + if nb := tw.curr.Remaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) } if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err @@ -96,6 +106,39 @@ func (tw *Writer) writeUSTARHeader(hdr *Header) error { } func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + // Write PAX records to the output. if len(paxHdrs) > 0 { // Sort keys for deterministic ordering. @@ -116,7 +159,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { } // Write the extended header file. - dir, file := path.Split(hdr.Name) + dir, file := path.Split(realName) name := path.Join(dir, "PaxHeaders.0", file) data := buf.String() if err := tw.writeRawFile(name, data, TypeXHeader, formatPAX); err != nil { @@ -129,13 +172,22 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) blk.SetFormat(formatPAX) - return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil } func (tw *Writer) writeGNUHeader(hdr *Header) error { - // TODO(dsnet): Support writing sparse files. - // See https://golang.org/issue/13548 - // Use long-link files if Name or Linkname exceeds the field size. const longName = "././@LongLink" if len(hdr.Name) > nameSize { @@ -153,6 +205,8 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { // Pack the main header. var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) if !hdr.AccessTime.IsZero() { f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) @@ -160,8 +214,54 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { if !hdr.ChangeTime.IsZero() { f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) } + if hdr.Typeflag == TypeGNUSparse { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } blk.SetFormat(formatGNU) - return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil } type ( @@ -249,7 +349,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { if isHeaderOnlyType(flag) { size = 0 } - tw.nb = size + tw.curr = ®FileWriter{tw.w, size} tw.pad = blockPadding(size) return nil } @@ -279,6 +379,9 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { // Write returns the error ErrWriteTooLong if more than // Header.Size bytes are written after WriteHeader. // +// If the current file is sparse, then the regions marked as a sparse hole +// must be written as NUL-bytes. +// // Calling Write on special types like TypeLink, TypeSymLink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless // of what the Header.Size claims. @@ -286,17 +389,29 @@ func (tw *Writer) Write(b []byte) (int, error) { if tw.err != nil { return 0, tw.err } + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} - overwrite := int64(len(b)) > tw.nb - if overwrite { - b = b[:tw.nb] +// TODO(dsnet): Export the Writer.FillZeros method to assist in quickly zeroing +// out sections of a file. This is especially useful for efficiently +// skipping over large holes in a sparse file. + +// fillZeros writes n bytes of zeros to the current file, +// returning the number of bytes written. +// If fewer than n bytes are discarded, it returns an non-nil error, +// which may be ErrWriteTooLong if the current file is complete. +func (tw *Writer) fillZeros(n int64) (int64, error) { + if tw.err != nil { + return 0, tw.err } - n, err := tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - return n, ErrWriteTooLong // Non-fatal error + n, err := tw.curr.FillZeros(n) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - tw.err = err return n, err } @@ -320,3 +435,135 @@ func (tw *Writer) Close() error { tw.err = ErrWriteAfterClose return err // Report IO errors } + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (int, error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + n, err := fw.w.Write(b) + fw.nb -= int64(n) + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) FillZeros(n int64) (int64, error) { + return io.CopyN(fw, zeroReader{}, n) +} + +func (fw regFileWriter) Remaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.Remaining() + if overwrite { + b = b[:sw.Remaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.Remaining() == 0 && sw.fw.Remaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) { + overwrite := n > sw.Remaining() + if overwrite { + n = sw.Remaining() + } + + var realFill int64 // Number of real data bytes to fill + endPos := sw.pos + n + for endPos > sw.pos { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = min(endPos-sw.pos, dataStart-sw.pos) + } else { // In a data fragment + nf = min(endPos-sw.pos, dataEnd-sw.pos) + realFill += nf + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + _, err := sw.fw.FillZeros(realFill) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.Remaining() == 0 && sw.fw.Remaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw sparseFileWriter) Remaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go index 9cfc225611..def9c0110d 100644 --- a/src/archive/tar/writer_test.go +++ b/src/archive/tar/writer_test.go @@ -49,83 +49,97 @@ func bytediff(a, b []byte) string { } func TestWriter(t *testing.T) { - type entry struct { - header *Header - contents string - } + type ( + testHeader struct { // WriteHeader(&hdr) == wantErr + hdr Header + wantErr error + } + testWrite struct { // Write([]byte(str)) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testFill struct { // fillZeros(cnt) == (wantCnt, wantErr) + cnt int64 + wantCnt int64 + wantErr error + } + testClose struct { // Close() == wantErr + wantErr error + } + testFnc interface{} // testHeader | testWrite | testFill | testClose + ) vectors := []struct { - file string // filename of expected output - entries []*entry - err error // expected error on WriteHeader + file string // Optional filename of expected output + tests []testFnc }{{ // The writer test file was produced with this command: // tar (GNU tar) 1.26 // ln -s small.txt link.txt // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt file: "testdata/writer.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "small.txt", + Size: 5, Mode: 0640, Uid: 73025, Gid: 5000, - Size: 5, - ModTime: time.Unix(1246508266, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - contents: "Kilts", - }, { - header: &Header{ + ModTime: time.Unix(1246508266, 0), + }, nil}, + testWrite{"Kilts", 5, nil}, + + testHeader{Header{ + Typeflag: TypeReg, Name: "small2.txt", + Size: 11, Mode: 0640, Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1245217492, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - contents: "Google.com\n", - }, { - header: &Header{ + Gid: 5000, + ModTime: time.Unix(1245217492, 0), + }, nil}, + testWrite{"Google.com\n", 11, nil}, + + testHeader{Header{ + Typeflag: TypeSymlink, Name: "link.txt", + Linkname: "small.txt", Mode: 0777, Uid: 1000, Gid: 1000, - Size: 0, - ModTime: time.Unix(1314603082, 0), - Typeflag: '2', - Linkname: "small.txt", Uname: "strings", Gname: "strings", - }, - // no contents - }}, + ModTime: time.Unix(1314603082, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, }, { // The truncated test file was produced using these commands: // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar file: "testdata/writer-big.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "tmp/16gig.txt", + Size: 16 << 30, Mode: 0640, Uid: 73025, Gid: 5000, - Size: 16 << 30, - ModTime: time.Unix(1254699560, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", + ModTime: time.Unix(1254699560, 0), Devminor: -1, // Force use of GNU format - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }}, + }, nil}, + }, }, { // This truncated file was produced using this library. // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. @@ -135,141 +149,265 @@ func TestWriter(t *testing.T) { // // This file is in PAX format. file: "testdata/writer-big-long.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "16gig.txt", + Size: 16 << 30, Mode: 0644, Uid: 1000, Gid: 1000, - Size: 16 << 30, - ModTime: time.Unix(1399583047, 0), - Typeflag: '0', Uname: "guillaume", Gname: "guillaume", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }}, + ModTime: time.Unix(1399583047, 0), + }, nil}, + }, }, { // This file was produced using GNU tar v1.17. // gnutar -b 4 --format=ustar (longname/)*15 + file.txt file: "testdata/ustar.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "file.txt", + Size: 6, Mode: 0644, - Uid: 0765, - Gid: 024, - Size: 06, - ModTime: time.Unix(1360135598, 0), - Typeflag: '0', + Uid: 501, + Gid: 20, Uname: "shane", Gname: "staff", - }, - contents: "hello\n", - }}, - }, { - // This file was produced using gnu tar 1.26 - // echo "Slartibartfast" > file.txt - // ln file.txt hard.txt - // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + ModTime: time.Unix(1360135598, 0), + }, nil}, + testWrite{"hello\n", 6, nil}, + testClose{nil}, + }, + }, { + // This file was produced using GNU tar v1.26: + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt file: "testdata/hardlink.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "file.txt", + Size: 15, Mode: 0644, Uid: 1000, Gid: 100, - Size: 15, - ModTime: time.Unix(1425484303, 0), - Typeflag: '0', Uname: "vbatts", Gname: "users", - }, - contents: "Slartibartfast\n", - }, { - header: &Header{ + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"Slartibartfast\n", 15, nil}, + + testHeader{Header{ + Typeflag: TypeLink, Name: "hard.txt", + Linkname: "file.txt", Mode: 0644, Uid: 1000, Gid: 100, - Size: 0, - ModTime: time.Unix(1425484303, 0), - Typeflag: '1', - Linkname: "file.txt", Uname: "vbatts", Gname: "users", - }, - // no contents - }}, + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, }, { - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "bad-null.txt", - Typeflag: '0', Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, - }, - }}, - err: ErrHeader, + }, ErrHeader}, + }, }, { - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "null\x00.txt", - Typeflag: '0', - }, - }}, - err: ErrHeader, + }, ErrHeader}, + }, }, { file: "testdata/gnu-utf8.tar", - entries: []*entry{{ - header: &Header{ - Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", - Mode: 0644, - Uid: 1000, Gid: 1000, - ModTime: time.Unix(0, 0), - Typeflag: '0', + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, Uname: "☺", Gname: "⚹", + ModTime: time.Unix(0, 0), Devminor: -1, // Force use of GNU format - }, - }}, + }, nil}, + testClose{nil}, + }, }, { file: "testdata/gnu-not-utf8.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "hi\x80\x81\x82\x83bye", Mode: 0644, Uid: 1000, Gid: 1000, - ModTime: time.Unix(0, 0), - Typeflag: '0', Uname: "rawr", Gname: "dsnet", + ModTime: time.Unix(0, 0), Devminor: -1, // Force use of GNU format - }, - }}, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "gnu-sparse", + Size: 6e10, + SparseHoles: []SparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 0, ErrWriteTooLong}, + testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong}, + testClose{nil}, + }, + }, { + file: "testdata/pax-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "pax-sparse", + Size: 6e10, + SparseHoles: []SparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 0, ErrWriteTooLong}, + testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong}, + testClose{nil}, + }, }} for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { + const maxSize = 10 << 10 // 10KiB buf := new(bytes.Buffer) - tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB - canFail := false - for i, entry := range v.entries { - canFail = canFail || entry.header.Size > 1<<10 || v.err != nil - - err := tw.WriteHeader(entry.header) - if err != v.err { - t.Fatalf("entry %d: WriteHeader() = %v, want %v", i, err, v.err) - } - if _, err := io.WriteString(tw, entry.contents); err != nil { - t.Fatalf("entry %d: WriteString() = %v, want nil", i, err) + tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) + + for i, tf := range v.tests { + switch tf := tf.(type) { + case testHeader: + err := tw.WriteHeader(&tf.hdr) + if err != tf.wantErr { + t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) + } + case testWrite: + got, err := tw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testFill: + got, err := tw.fillZeros(tf.cnt) + if got != tf.wantCnt || err != tf.wantErr { + t.Fatalf("test %d, fillZeros() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testClose: + err := tw.Close() + if err != tf.wantErr { + t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) + } + default: + t.Fatalf("test %d, unknown test operation: %T", i, tf) } } - // Only interested in Close failures for the small tests. - if err := tw.Close(); err != nil && !canFail { - t.Fatalf("Close() = %v, want nil", err) - } if v.file != "" { want, err := ioutil.ReadFile(v.file) @@ -758,3 +896,286 @@ func TestIssue12594(t *testing.T) { } } } + +func TestFileWriter(t *testing.T) { + type ( + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testFill struct { // FillZeros(cnt) == (wantCnt, wantErr) + cnt int64 + wantCnt int64 + wantErr error + } + testRemaining struct { // Remaining() == wantCnt + wantCnt int64 + } + testFnc interface{} // testWrite | testFill | testRemaining + ) + + type ( + makeReg struct { + size int64 + wantStr string + } + makeSparse struct { + makeReg makeReg + sph sparseHoles + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{0, ""}, + tests: []testFnc{ + testRemaining{0}, + testWrite{"", 0, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testFill{0, 0, nil}, + testFill{1, 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{1, "a"}, + tests: []testFnc{ + testRemaining{1}, + testWrite{"", 0, nil}, + testWrite{"a", 1, nil}, + testWrite{"bcde", 0, ErrWriteTooLong}, + testWrite{"", 0, nil}, + testFill{0, 0, nil}, + testFill{1, 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "hello"}, + tests: []testFnc{ + testRemaining{5}, + testWrite{"hello", 5, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testFill{5, 5, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testFill{10, 5, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "abc\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testWrite{"abc", 3, nil}, + testRemaining{2}, + testFill{2, 2, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00abc"}, + tests: []testFnc{ + testRemaining{5}, + testFill{2, 2, nil}, + testRemaining{3}, + testWrite{"abc", 3, nil}, + testFill{1, 0, ErrWriteTooLong}, + testWrite{"z", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testRemaining{8}, + testWrite{"ab\x00\x00\x00cde", 8, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00", 3, nil}, + testRemaining{5}, + testWrite{"\x00\x00cde", 5, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 3, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{8, 8, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{4, "\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, errMissData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{6, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, errUnrefData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 2, errMissData}, + testRemaining{1}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 3, errUnrefData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7}, + testWrite{"\x00\x00abc\x00\x00", 7, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"abcdefg", 0, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abcde", 5, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{7, 7, nil}, + testFill{1, 0, ErrWriteTooLong}, + }, + }, { + maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{4, 4, nil}, + testFill{8, 3, ErrWriteTooLong}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testRemaining{5}, + testWrite{"abc", 3, nil}, + testRemaining{2}, + testFill{2, 2, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testWrite{"abc", 2, errMissData}, + testFill{2, 2, errMissData}, + }, + }, { + maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testWrite{"abc", 3, nil}, + testFill{2, 2, errUnrefData}, + }, + }} + + for i, v := range vectors { + var wantStr string + bb := new(bytes.Buffer) + var fw fileWriter + switch maker := v.maker.(type) { + case makeReg: + fw = ®FileWriter{bb, maker.size} + wantStr = maker.wantStr + case makeSparse: + if !validateSparseEntries(maker.sph, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.sph) + } + spd := invertSparseEntries(maker.sph, maker.size) + fw = ®FileWriter{bb, maker.makeReg.size} + fw = &sparseFileWriter{fw, spd, 0} + wantStr = maker.makeReg.wantStr + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testWrite: + got, err := fw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) + } + case testFill: + got, err := fw.FillZeros(tf.cnt) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, FillZeros(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) + } + case testRemaining: + got := fw.Remaining() + if got != tf.wantCnt { + t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + + if got := bb.String(); got != wantStr { + t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) + } + } +}