From e0ab505a97eed8773ea16842f2748b6d518fedd9 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Fri, 18 Aug 2017 18:18:38 -0700 Subject: [PATCH] archive/tar: implement Writer support for sparse files This CL is the second step (of two; part1 is CL/56771) for adding sparse file support to the Writer. There are no new identifiers exported in this CL, but this does make use of Header.SparseHoles added in part1. If the Typeflag is set to TypeGNUSparse or len(SparseHoles) > 0, then the Writer will emit an sparse file, where the holes must be written by the user as zeros. If TypeGNUSparse is set, then the output file must use the GNU format. Otherwise, it must use the PAX format (with GNU-defined PAX keys). A future CL may export Reader.Discard and Writer.FillZeros, but those methods are currently unexported, and only used by the tests for efficiency reasons. Calling Discard or FillZeros on a hole 10GiB in size does take time, even if it is essentially a memcopy. Updates #13548 Change-Id: Id586d9178c227c0577f796f731ae2cbb72355601 Reviewed-on: https://go-review.googlesource.com/57212 Reviewed-by: Ian Lance Taylor --- src/archive/tar/common.go | 26 +- src/archive/tar/example_test.go | 112 ++- src/archive/tar/reader_test.go | 133 +++- .../tar/testdata/gnu-nil-sparse-data.tar | Bin 0 -> 2560 bytes .../tar/testdata/gnu-nil-sparse-hole.tar | Bin 0 -> 1536 bytes src/archive/tar/testdata/gnu-sparse-big.tar | Bin 0 -> 5120 bytes .../tar/testdata/pax-nil-sparse-data.tar | Bin 0 -> 4096 bytes .../tar/testdata/pax-nil-sparse-hole.tar | Bin 0 -> 3072 bytes src/archive/tar/testdata/pax-sparse-big.tar | Bin 0 -> 6144 bytes src/archive/tar/testdata/writer-big-long.tar | Bin 4096 -> 1536 bytes src/archive/tar/testdata/writer-big.tar | Bin 4096 -> 512 bytes src/archive/tar/writer.go | 293 +++++++- src/archive/tar/writer_test.go | 665 ++++++++++++++---- 13 files changed, 1026 insertions(+), 203 deletions(-) create mode 100644 src/archive/tar/testdata/gnu-nil-sparse-data.tar create mode 100644 src/archive/tar/testdata/gnu-nil-sparse-hole.tar create mode 100644 src/archive/tar/testdata/gnu-sparse-big.tar create mode 100644 src/archive/tar/testdata/pax-nil-sparse-data.tar create mode 100644 src/archive/tar/testdata/pax-nil-sparse-hole.tar create mode 100644 src/archive/tar/testdata/pax-sparse-big.tar diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go index 22f3206243..b5921fef23 100644 --- a/src/archive/tar/common.go +++ b/src/archive/tar/common.go @@ -33,6 +33,7 @@ var ( ErrWriteAfterClose = errors.New("tar: write after close") errMissData = errors.New("tar: sparse file references non-existent data") errUnrefData = errors.New("tar: sparse file contains unreferenced data") + errWriteHole = errors.New("tar: write non-NUL byte in sparse hole") ) // Header type flags. @@ -74,10 +75,13 @@ type Header struct { // SparseHoles represents a sequence of holes in a sparse file. // - // The regions must be sorted in ascending order, not overlap with - // each other, and not extend past the specified Size. - // The file is sparse if either len(SparseHoles) > 0 or - // the Typeflag is set to TypeGNUSparse. + // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse. + // A sparse file consists of fragments of data, intermixed with holes + // (described by this field). A hole is semantically a block of NUL-bytes, + // but does not actually exist within the TAR file. + // The logical size of the file stored in the Size field, while + // the holes must be sorted in ascending order, + // not overlap with each other, and not extend past the specified Size. SparseHoles []SparseEntry } @@ -300,6 +304,20 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) { return formatUnknown, nil // Invalid PAX key } } + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return formatUnknown, nil // Cannot have sparse data on header-only file + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return formatUnknown, nil + } + if h.Typeflag == TypeGNUSparse { + format &= formatGNU // GNU only + } else { + format &^= formatGNU // No GNU + } + format &^= formatUSTAR // No USTAR + } return format, paxHdrs } diff --git a/src/archive/tar/example_test.go b/src/archive/tar/example_test.go index 5f0ce2f402..b84950c797 100644 --- a/src/archive/tar/example_test.go +++ b/src/archive/tar/example_test.go @@ -7,20 +7,20 @@ package tar_test import ( "archive/tar" "bytes" + "crypto/md5" "fmt" "io" + "io/ioutil" "log" "os" + "strings" ) func Example() { - // Create a buffer to write our archive to. buf := new(bytes.Buffer) - // Create a new tar archive. + // Create and add some files to the archive. tw := tar.NewWriter(buf) - - // Add some files to the archive. var files = []struct { Name, Body string }{ @@ -35,34 +35,29 @@ func Example() { Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { - log.Fatalln(err) + log.Fatal(err) } if _, err := tw.Write([]byte(file.Body)); err != nil { - log.Fatalln(err) + log.Fatal(err) } } - // Make sure to check the error on Close. if err := tw.Close(); err != nil { - log.Fatalln(err) + log.Fatal(err) } - // Open the tar archive for reading. - r := bytes.NewReader(buf.Bytes()) - tr := tar.NewReader(r) - - // Iterate through the files in the archive. + // Open and iterate through the files in the archive. + tr := tar.NewReader(buf) for { hdr, err := tr.Next() if err == io.EOF { - // end of tar archive - break + break // End of archive } if err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Printf("Contents of %s:\n", hdr.Name) if _, err := io.Copy(os.Stdout, tr); err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Println() } @@ -78,3 +73,86 @@ func Example() { // Contents of todo.txt: // Get animal handling license. } + +// A sparse file can efficiently represent a large file that is mostly empty. +func Example_sparse() { + buf := new(bytes.Buffer) + + // Define a sparse file to add to the archive. + // This sparse files contains 5 data fragments, and 4 hole fragments. + // The logical size of the file is 16 KiB, while the physical size of the + // file is only 3 KiB (not counting the header data). + hdr := &tar.Header{ + Name: "sparse.db", + Size: 16384, + SparseHoles: []tar.SparseEntry{ + // Data fragment at 0..1023 + {Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535 + // Data fragment at 1536..2047 + {Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583 + // Data fragment at 3584..4095 + {Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679 + // Data fragment at 7680..8191 + {Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871 + // Data fragment at 15872..16383 + }, + } + + // The regions marked as a sparse hole are filled with NUL-bytes. + // The total length of the body content must match the specified Size field. + body := "" + + strings.Repeat("A", 1024) + + strings.Repeat("\x00", 1024-512) + + strings.Repeat("B", 512) + + strings.Repeat("\x00", 2048-512) + + strings.Repeat("C", 512) + + strings.Repeat("\x00", 4096-512) + + strings.Repeat("D", 512) + + strings.Repeat("\x00", 8192-512) + + strings.Repeat("E", 512) + + h := md5.Sum([]byte(body)) + fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h) + fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles) + + // Create a new archive and write the sparse file. + tw := tar.NewWriter(buf) + if err := tw.WriteHeader(hdr); err != nil { + log.Fatal(err) + } + if _, err := tw.Write([]byte(body)); err != nil { + log.Fatal(err) + } + if err := tw.Close(); err != nil { + log.Fatal(err) + } + + // Open and iterate through the files in the archive. + tr := tar.NewReader(buf) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + body, err := ioutil.ReadAll(tr) + if err != nil { + log.Fatal(err) + } + + h := md5.Sum([]byte(body)) + fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h) + fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles) + } + + // Output: + // Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9 + // Write SparseHoles of sparse.db: + // [{1024 512} {2048 1536} {4096 3584} {8192 7680}] + // + // Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9 + // Read SparseHoles of sparse.db: + // [{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}] +} diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 9b7896132a..fb7dcfeece 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -500,6 +500,46 @@ func TestReader(t *testing.T) { Devmajor: 1, Devminor: 1, }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }}, }} for _, v := range vectors { @@ -1212,7 +1252,7 @@ func TestReadGNUSparsePAXHeaders(t *testing.T) { func TestFileReader(t *testing.T) { type ( - testRead struct { // ReadN(cnt) == (wantStr, wantErr) + testRead struct { // Read(cnt) == (wantStr, wantErr) cnt int wantStr string wantErr error @@ -1228,22 +1268,24 @@ func TestFileReader(t *testing.T) { testFnc interface{} // testRead | testDiscard | testRemaining ) - makeReg := func(s string, n int) fileReader { - return ®FileReader{strings.NewReader(s), int64(n)} - } - makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader { - if !validateSparseEntries(spd, size) { - t.Fatalf("invalid sparse map: %v", spd) + type ( + makeReg struct { + str string + size int64 } - sph := invertSparseEntries(append([]SparseEntry{}, spd...), size) - return &sparseFileReader{fr, sph, 0} - } + makeSparse struct { + makeReg makeReg + spd sparseDatas + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) vectors := []struct { - fr fileReader + maker fileMaker tests []testFnc }{{ - fr: makeReg("", 0), + maker: makeReg{"", 0}, tests: []testFnc{ testRemaining{0}, testRead{0, "", io.EOF}, @@ -1253,7 +1295,7 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeReg("", 1), + maker: makeReg{"", 1}, tests: []testFnc{ testRemaining{1}, testRead{0, "", io.ErrUnexpectedEOF}, @@ -1263,14 +1305,14 @@ func TestFileReader(t *testing.T) { testRemaining{1}, }, }, { - fr: makeReg("hello", 5), + maker: makeReg{"hello", 5}, tests: []testFnc{ testRemaining{5}, testRead{5, "hello", io.EOF}, testRemaining{0}, }, }, { - fr: makeReg("hello, world", 50), + maker: makeReg{"hello, world", 50}, tests: []testFnc{ testRemaining{50}, testDiscard{7, 7, nil}, @@ -1282,7 +1324,7 @@ func TestFileReader(t *testing.T) { testRemaining{38}, }, }, { - fr: makeReg("hello, world", 5), + maker: makeReg{"hello, world", 5}, tests: []testFnc{ testRemaining{5}, testRead{0, "", nil}, @@ -1294,7 +1336,7 @@ func TestFileReader(t *testing.T) { testRead{0, "", io.EOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{3, "ab\x00", nil}, @@ -1302,92 +1344,92 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8}, testDiscard{100, 8, io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10}, testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10}, testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, testRemaining{4}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("", 0), sparseDatas{}, 2), + maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, tests: []testFnc{ testRead{100, "\x00\x00", io.EOF}, }, }, { - fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", errMissData}, }, }, { - fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", errMissData}, }, }, { - fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", errMissData}, }, }, { - fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, }, }, { - fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15}, testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, @@ -1395,7 +1437,7 @@ func TestFileReader(t *testing.T) { testRemaining{0}, }, }, { - fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15}, testDiscard{100, 15, errUnrefData}, @@ -1405,21 +1447,38 @@ func TestFileReader(t *testing.T) { }} for i, v := range vectors { + var fr fileReader + switch maker := v.maker.(type) { + case makeReg: + r := strings.NewReader(maker.str) + fr = ®FileReader{r, maker.size} + case makeSparse: + if !validateSparseEntries(maker.spd, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.spd) + } + sph := invertSparseEntries(maker.spd, maker.size) + r := strings.NewReader(maker.makeReg.str) + fr = ®FileReader{r, maker.makeReg.size} + fr = &sparseFileReader{fr, sph, 0} + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + for j, tf := range v.tests { switch tf := tf.(type) { case testRead: b := make([]byte, tf.cnt) - n, err := v.fr.Read(b) + n, err := fr.Read(b) if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) } case testDiscard: - got, err := v.fr.Discard(tf.cnt) + got, err := fr.Discard(tf.cnt) if got != tf.wantCnt || err != tf.wantErr { t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) } case testRemaining: - got := v.fr.Remaining() + got := fr.Remaining() if got != tf.wantCnt { t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) } diff --git a/src/archive/tar/testdata/gnu-nil-sparse-data.tar b/src/archive/tar/testdata/gnu-nil-sparse-data.tar new file mode 100644 index 0000000000000000000000000000000000000000..df1aa834538e74f20f303e5d9c7fb4ba8fea1e82 GIT binary patch literal 2560 zcmXReNGvK&)k{fYpdB!P00tbip}DC6Ob(=k!NAbO(9oDcA((bfLYQ1yT#{I%pnwn{ zlZEa%qK$!Sg!ml70fz|D1`tvRwE(-J28Kq)CZ=ZQ7M7!|(J&ZI2g5Zl5Go`_8wwCI?c&U|<9U<_rqKG;!KK9|iA4$uNFwAh z(LG1BDGd;nOd;HlV4+|L3I(V*%L?&h*u>fXD6hgM&KaQJha%1hHHi0zdj!NPWD+XL zy~Mf*MHfO-7>YQu-cj~w2#kgR?Lxr7(8$=t)Xdz%l6EZGxqj4SN<)Blv5qjAQa6t3 zr(FmjVu^Mv+PQwzWJ*JTcCn5ynNl~7>Ze@@AYzGjEZVt#)MQFSfOfHtFqu*}j_Mx` Lfzc2cx*-4no2;l4 literal 0 HcmV?d00001 diff --git a/src/archive/tar/testdata/pax-nil-sparse-data.tar b/src/archive/tar/testdata/pax-nil-sparse-data.tar new file mode 100644 index 0000000000000000000000000000000000000000..e59bd94117d9e467143592dcfe8e69767ba935ee GIT binary patch literal 4096 zcmeH|%?`pK497Y16nlaR-KU3L)srzXJ^*GMX0~ZSlf|dEI;YDLZfG(>FR+%d@^62z z6WvCNQYsy?g}w--Wmq1GRb#yo!^n6FL+rVPFb)i25_DFxFl=_ySE2CthOp~yfMC5n zgzw5X;&fI8rxS10qL)!vj@P!Lk;SKDDCFaylA_ktT^1Y*N_iCY9|;HQx}x(s9~OWA zqJCA?@<&&If%}1Z$P#4!Ysi0;#K`|&n6$PSPx2 Y`kB|j_kpV{SiUy=c3uImfCCCV00+Y!FaQ7m literal 0 HcmV?d00001 diff --git a/src/archive/tar/testdata/pax-nil-sparse-hole.tar b/src/archive/tar/testdata/pax-nil-sparse-hole.tar new file mode 100644 index 0000000000000000000000000000000000000000..b44327bdbfb343658e5142f2129d7688f3044021 GIT binary patch literal 3072 zcmeHH%?iRW4DPv4(I=Qq#{4<#s-8p;AE2@vCi{_AVfgfR)5#e0R)vk`(l!Zg@_qSw zRL!1LM%qMjVQxwrin0syYVvK&u}WA%A8S*X1e6#v(Ri)GaoNM%mA212!gF^cirwKH zeV8xRwbm){kvFfbS9078b1PL+`kfe#>tRrnDop;Q6fB_}P;`d`kk1{^iS=j6KXt0# z<^}!e3vlSZ*L*u7{k~1L`i~P_G4U5o`qKZn$%)JX@J^vJ=yD>2g)##I0fB&kCj?&3 CI8e<1 literal 0 HcmV?d00001 diff --git a/src/archive/tar/testdata/pax-sparse-big.tar b/src/archive/tar/testdata/pax-sparse-big.tar new file mode 100644 index 0000000000000000000000000000000000000000..65d1f8eceb084955d03ef41f6efbbed0ab3ab07d GIT binary patch literal 6144 zcmeHJOKQU~5cOK8@CD-NV_D9^+a{}0C_R9nS~%o~)PTw9quL1*1m0OePDUHejKqB2 z(=-0k-Nxu&V_iX)C*3XFNn2;KDHk19HnLNu`2FnvN=6yyTxEGTWfNn$&hAfJUw-WT z#p9gk>(8HJV7=Ju?a?-j1kv{3qm&5xc=w?9rhykjgnT@;M()2-EV}6ypIW4 zm*mBPoymBf{O?}zAMMUW`UKowz+L0vu6oVGGx7QtN_hQ0MUzwMKOAx<{8wZ(tg^0& zWSp?T=YcN*zYKgC_%p!)+e8FXL%{zBiBeXoUd6Lgn;H871mdv{nV}k5sUZL?#j{eI p8T$bQ;;{~yp&DALApk7Jvr?NG`vC;vu@0G`8d`_|BJgeqTmi{^;4uII literal 0 HcmV?d00001 diff --git a/src/archive/tar/testdata/writer-big-long.tar b/src/archive/tar/testdata/writer-big-long.tar index 4bfd519603321100ff1aabd07a0f07aede19c62b..09fc5dd3dd7fc5de3b6d22461fa23152fd499a41 100644 GIT binary patch delta 6 NcmZorXy94E1^@>T0n-2g delta 46 UcmZqRX;4_ew)p^)10xXx0C%Skg8%>k diff --git a/src/archive/tar/testdata/writer-big.tar b/src/archive/tar/testdata/writer-big.tar index f838ada81b100f1daf84937e84579a4971cfd158..0dadee70c1a53de229cb771ce30e60d88f547351 100644 GIT binary patch delta 6 NcmZorXkb~u1ONu~0mlFU delta 106 UcmZo*X;4_eG`WCr0*!G40EZqaD*ylh diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go index c9237c8329..cc4701c627 100644 --- a/src/archive/tar/writer.go +++ b/src/archive/tar/writer.go @@ -10,6 +10,7 @@ import ( "io" "path" "sort" + "strconv" "strings" "time" ) @@ -19,11 +20,11 @@ import ( // Call WriteHeader to begin a new file, and then call Write to supply that file's data, // writing at most hdr.Size bytes in total. type Writer struct { - w io.Writer - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - hdr Header // Shallow copy of Header that is safe for mutations - blk block // Buffer to use as temporary local storage + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Writer to @@ -32,7 +33,16 @@ type Writer struct { } // NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} +} + +type fileWriter interface { + io.Writer + fileState + + FillZeros(n int64) (int64, error) +} // Flush finishes writing the current file's block padding. // The current file must be fully written before Flush can be called. @@ -43,8 +53,8 @@ func (tw *Writer) Flush() error { if tw.err != nil { return tw.err } - if tw.nb > 0 { - return fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) + if nb := tw.curr.Remaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) } if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err @@ -96,6 +106,39 @@ func (tw *Writer) writeUSTARHeader(hdr *Header) error { } func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + // Write PAX records to the output. if len(paxHdrs) > 0 { // Sort keys for deterministic ordering. @@ -116,7 +159,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { } // Write the extended header file. - dir, file := path.Split(hdr.Name) + dir, file := path.Split(realName) name := path.Join(dir, "PaxHeaders.0", file) data := buf.String() if err := tw.writeRawFile(name, data, TypeXHeader, formatPAX); err != nil { @@ -129,13 +172,22 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) blk.SetFormat(formatPAX) - return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil } func (tw *Writer) writeGNUHeader(hdr *Header) error { - // TODO(dsnet): Support writing sparse files. - // See https://golang.org/issue/13548 - // Use long-link files if Name or Linkname exceeds the field size. const longName = "././@LongLink" if len(hdr.Name) > nameSize { @@ -153,6 +205,8 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { // Pack the main header. var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) if !hdr.AccessTime.IsZero() { f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) @@ -160,8 +214,54 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { if !hdr.ChangeTime.IsZero() { f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) } + if hdr.Typeflag == TypeGNUSparse { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } blk.SetFormat(formatGNU) - return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil } type ( @@ -249,7 +349,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { if isHeaderOnlyType(flag) { size = 0 } - tw.nb = size + tw.curr = ®FileWriter{tw.w, size} tw.pad = blockPadding(size) return nil } @@ -279,6 +379,9 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { // Write returns the error ErrWriteTooLong if more than // Header.Size bytes are written after WriteHeader. // +// If the current file is sparse, then the regions marked as a sparse hole +// must be written as NUL-bytes. +// // Calling Write on special types like TypeLink, TypeSymLink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless // of what the Header.Size claims. @@ -286,17 +389,29 @@ func (tw *Writer) Write(b []byte) (int, error) { if tw.err != nil { return 0, tw.err } + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} - overwrite := int64(len(b)) > tw.nb - if overwrite { - b = b[:tw.nb] +// TODO(dsnet): Export the Writer.FillZeros method to assist in quickly zeroing +// out sections of a file. This is especially useful for efficiently +// skipping over large holes in a sparse file. + +// fillZeros writes n bytes of zeros to the current file, +// returning the number of bytes written. +// If fewer than n bytes are discarded, it returns an non-nil error, +// which may be ErrWriteTooLong if the current file is complete. +func (tw *Writer) fillZeros(n int64) (int64, error) { + if tw.err != nil { + return 0, tw.err } - n, err := tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - return n, ErrWriteTooLong // Non-fatal error + n, err := tw.curr.FillZeros(n) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - tw.err = err return n, err } @@ -320,3 +435,135 @@ func (tw *Writer) Close() error { tw.err = ErrWriteAfterClose return err // Report IO errors } + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (int, error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + n, err := fw.w.Write(b) + fw.nb -= int64(n) + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) FillZeros(n int64) (int64, error) { + return io.CopyN(fw, zeroReader{}, n) +} + +func (fw regFileWriter) Remaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.Remaining() + if overwrite { + b = b[:sw.Remaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.Remaining() == 0 && sw.fw.Remaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) { + overwrite := n > sw.Remaining() + if overwrite { + n = sw.Remaining() + } + + var realFill int64 // Number of real data bytes to fill + endPos := sw.pos + n + for endPos > sw.pos { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = min(endPos-sw.pos, dataStart-sw.pos) + } else { // In a data fragment + nf = min(endPos-sw.pos, dataEnd-sw.pos) + realFill += nf + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + _, err := sw.fw.FillZeros(realFill) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.Remaining() == 0 && sw.fw.Remaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw sparseFileWriter) Remaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go index 9cfc225611..def9c0110d 100644 --- a/src/archive/tar/writer_test.go +++ b/src/archive/tar/writer_test.go @@ -49,83 +49,97 @@ func bytediff(a, b []byte) string { } func TestWriter(t *testing.T) { - type entry struct { - header *Header - contents string - } + type ( + testHeader struct { // WriteHeader(&hdr) == wantErr + hdr Header + wantErr error + } + testWrite struct { // Write([]byte(str)) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testFill struct { // fillZeros(cnt) == (wantCnt, wantErr) + cnt int64 + wantCnt int64 + wantErr error + } + testClose struct { // Close() == wantErr + wantErr error + } + testFnc interface{} // testHeader | testWrite | testFill | testClose + ) vectors := []struct { - file string // filename of expected output - entries []*entry - err error // expected error on WriteHeader + file string // Optional filename of expected output + tests []testFnc }{{ // The writer test file was produced with this command: // tar (GNU tar) 1.26 // ln -s small.txt link.txt // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt file: "testdata/writer.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "small.txt", + Size: 5, Mode: 0640, Uid: 73025, Gid: 5000, - Size: 5, - ModTime: time.Unix(1246508266, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - contents: "Kilts", - }, { - header: &Header{ + ModTime: time.Unix(1246508266, 0), + }, nil}, + testWrite{"Kilts", 5, nil}, + + testHeader{Header{ + Typeflag: TypeReg, Name: "small2.txt", + Size: 11, Mode: 0640, Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1245217492, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - contents: "Google.com\n", - }, { - header: &Header{ + Gid: 5000, + ModTime: time.Unix(1245217492, 0), + }, nil}, + testWrite{"Google.com\n", 11, nil}, + + testHeader{Header{ + Typeflag: TypeSymlink, Name: "link.txt", + Linkname: "small.txt", Mode: 0777, Uid: 1000, Gid: 1000, - Size: 0, - ModTime: time.Unix(1314603082, 0), - Typeflag: '2', - Linkname: "small.txt", Uname: "strings", Gname: "strings", - }, - // no contents - }}, + ModTime: time.Unix(1314603082, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, }, { // The truncated test file was produced using these commands: // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar file: "testdata/writer-big.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "tmp/16gig.txt", + Size: 16 << 30, Mode: 0640, Uid: 73025, Gid: 5000, - Size: 16 << 30, - ModTime: time.Unix(1254699560, 0), - Typeflag: '0', Uname: "dsymonds", Gname: "eng", + ModTime: time.Unix(1254699560, 0), Devminor: -1, // Force use of GNU format - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }}, + }, nil}, + }, }, { // This truncated file was produced using this library. // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. @@ -135,141 +149,265 @@ func TestWriter(t *testing.T) { // // This file is in PAX format. file: "testdata/writer-big-long.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "16gig.txt", + Size: 16 << 30, Mode: 0644, Uid: 1000, Gid: 1000, - Size: 16 << 30, - ModTime: time.Unix(1399583047, 0), - Typeflag: '0', Uname: "guillaume", Gname: "guillaume", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }}, + ModTime: time.Unix(1399583047, 0), + }, nil}, + }, }, { // This file was produced using GNU tar v1.17. // gnutar -b 4 --format=ustar (longname/)*15 + file.txt file: "testdata/ustar.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "file.txt", + Size: 6, Mode: 0644, - Uid: 0765, - Gid: 024, - Size: 06, - ModTime: time.Unix(1360135598, 0), - Typeflag: '0', + Uid: 501, + Gid: 20, Uname: "shane", Gname: "staff", - }, - contents: "hello\n", - }}, - }, { - // This file was produced using gnu tar 1.26 - // echo "Slartibartfast" > file.txt - // ln file.txt hard.txt - // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + ModTime: time.Unix(1360135598, 0), + }, nil}, + testWrite{"hello\n", 6, nil}, + testClose{nil}, + }, + }, { + // This file was produced using GNU tar v1.26: + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt file: "testdata/hardlink.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "file.txt", + Size: 15, Mode: 0644, Uid: 1000, Gid: 100, - Size: 15, - ModTime: time.Unix(1425484303, 0), - Typeflag: '0', Uname: "vbatts", Gname: "users", - }, - contents: "Slartibartfast\n", - }, { - header: &Header{ + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"Slartibartfast\n", 15, nil}, + + testHeader{Header{ + Typeflag: TypeLink, Name: "hard.txt", + Linkname: "file.txt", Mode: 0644, Uid: 1000, Gid: 100, - Size: 0, - ModTime: time.Unix(1425484303, 0), - Typeflag: '1', - Linkname: "file.txt", Uname: "vbatts", Gname: "users", - }, - // no contents - }}, + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, }, { - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "bad-null.txt", - Typeflag: '0', Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, - }, - }}, - err: ErrHeader, + }, ErrHeader}, + }, }, { - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "null\x00.txt", - Typeflag: '0', - }, - }}, - err: ErrHeader, + }, ErrHeader}, + }, }, { file: "testdata/gnu-utf8.tar", - entries: []*entry{{ - header: &Header{ - Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", - Mode: 0644, - Uid: 1000, Gid: 1000, - ModTime: time.Unix(0, 0), - Typeflag: '0', + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, Uname: "☺", Gname: "⚹", + ModTime: time.Unix(0, 0), Devminor: -1, // Force use of GNU format - }, - }}, + }, nil}, + testClose{nil}, + }, }, { file: "testdata/gnu-not-utf8.tar", - entries: []*entry{{ - header: &Header{ + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, Name: "hi\x80\x81\x82\x83bye", Mode: 0644, Uid: 1000, Gid: 1000, - ModTime: time.Unix(0, 0), - Typeflag: '0', Uname: "rawr", Gname: "dsnet", + ModTime: time.Unix(0, 0), Devminor: -1, // Force use of GNU format - }, - }}, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "gnu-sparse", + Size: 6e10, + SparseHoles: []SparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 0, ErrWriteTooLong}, + testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong}, + testClose{nil}, + }, + }, { + file: "testdata/pax-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "pax-sparse", + Size: 6e10, + SparseHoles: []SparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 1e10 - 100, nil}, + testWrite{strings.Repeat("0123456789", 10), 100, nil}, + testFill{1e10 - 100, 0, ErrWriteTooLong}, + testWrite{strings.Repeat("0123456789", 10), 0, ErrWriteTooLong}, + testClose{nil}, + }, }} for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { + const maxSize = 10 << 10 // 10KiB buf := new(bytes.Buffer) - tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB - canFail := false - for i, entry := range v.entries { - canFail = canFail || entry.header.Size > 1<<10 || v.err != nil - - err := tw.WriteHeader(entry.header) - if err != v.err { - t.Fatalf("entry %d: WriteHeader() = %v, want %v", i, err, v.err) - } - if _, err := io.WriteString(tw, entry.contents); err != nil { - t.Fatalf("entry %d: WriteString() = %v, want nil", i, err) + tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) + + for i, tf := range v.tests { + switch tf := tf.(type) { + case testHeader: + err := tw.WriteHeader(&tf.hdr) + if err != tf.wantErr { + t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) + } + case testWrite: + got, err := tw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testFill: + got, err := tw.fillZeros(tf.cnt) + if got != tf.wantCnt || err != tf.wantErr { + t.Fatalf("test %d, fillZeros() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testClose: + err := tw.Close() + if err != tf.wantErr { + t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) + } + default: + t.Fatalf("test %d, unknown test operation: %T", i, tf) } } - // Only interested in Close failures for the small tests. - if err := tw.Close(); err != nil && !canFail { - t.Fatalf("Close() = %v, want nil", err) - } if v.file != "" { want, err := ioutil.ReadFile(v.file) @@ -758,3 +896,286 @@ func TestIssue12594(t *testing.T) { } } } + +func TestFileWriter(t *testing.T) { + type ( + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testFill struct { // FillZeros(cnt) == (wantCnt, wantErr) + cnt int64 + wantCnt int64 + wantErr error + } + testRemaining struct { // Remaining() == wantCnt + wantCnt int64 + } + testFnc interface{} // testWrite | testFill | testRemaining + ) + + type ( + makeReg struct { + size int64 + wantStr string + } + makeSparse struct { + makeReg makeReg + sph sparseHoles + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{0, ""}, + tests: []testFnc{ + testRemaining{0}, + testWrite{"", 0, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testFill{0, 0, nil}, + testFill{1, 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{1, "a"}, + tests: []testFnc{ + testRemaining{1}, + testWrite{"", 0, nil}, + testWrite{"a", 1, nil}, + testWrite{"bcde", 0, ErrWriteTooLong}, + testWrite{"", 0, nil}, + testFill{0, 0, nil}, + testFill{1, 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "hello"}, + tests: []testFnc{ + testRemaining{5}, + testWrite{"hello", 5, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testFill{5, 5, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testFill{10, 5, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "abc\x00\x00"}, + tests: []testFnc{ + testRemaining{5}, + testWrite{"abc", 3, nil}, + testRemaining{2}, + testFill{2, 2, nil}, + testRemaining{0}, + }, + }, { + maker: makeReg{5, "\x00\x00abc"}, + tests: []testFnc{ + testRemaining{5}, + testFill{2, 2, nil}, + testRemaining{3}, + testWrite{"abc", 3, nil}, + testFill{1, 0, ErrWriteTooLong}, + testWrite{"z", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testRemaining{8}, + testWrite{"ab\x00\x00\x00cde", 8, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00", 3, nil}, + testRemaining{5}, + testWrite{"\x00\x00cde", 5, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 3, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{8, 8, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{5, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{4, "\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, errMissData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{6, "\x00\x00\x00\x00\x00"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testFill{9, 8, errUnrefData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 2, errMissData}, + testRemaining{1}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6}, + testFill{3, 3, nil}, + testRemaining{3}, + testWrite{"cde", 3, errUnrefData}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7}, + testWrite{"\x00\x00abc\x00\x00", 7, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"abcdefg", 0, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abcde", 5, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{7, 7, nil}, + testFill{1, 0, ErrWriteTooLong}, + }, + }, { + maker: makeSparse{makeReg{3, "\x00\x00\x00"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{4, 4, nil}, + testFill{8, 3, ErrWriteTooLong}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testRemaining{5}, + testWrite{"abc", 3, nil}, + testRemaining{2}, + testFill{2, 2, nil}, + testRemaining{0}, + }, + }, { + maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testWrite{"abc", 2, errMissData}, + testFill{2, 2, errMissData}, + }, + }, { + maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testFill{2, 2, nil}, + testWrite{"abc", 3, nil}, + testFill{2, 2, errUnrefData}, + }, + }} + + for i, v := range vectors { + var wantStr string + bb := new(bytes.Buffer) + var fw fileWriter + switch maker := v.maker.(type) { + case makeReg: + fw = ®FileWriter{bb, maker.size} + wantStr = maker.wantStr + case makeSparse: + if !validateSparseEntries(maker.sph, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.sph) + } + spd := invertSparseEntries(maker.sph, maker.size) + fw = ®FileWriter{bb, maker.makeReg.size} + fw = &sparseFileWriter{fw, spd, 0} + wantStr = maker.makeReg.wantStr + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testWrite: + got, err := fw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) + } + case testFill: + got, err := fw.FillZeros(tf.cnt) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, FillZeros(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) + } + case testRemaining: + got := fw.Remaining() + if got != tf.wantCnt { + t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + + if got := bb.String(); got != wantStr { + t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) + } + } +} -- 2.50.0