// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
+// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
package tar
import (
TypeCont = '7' // reserved
TypeXHeader = 'x' // extended header
TypeXGlobalHeader = 'g' // global extended header
+ TypeGNULongName = 'L' // Next file has a long name
+ TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name
)
// A Header represents a single header in a tar archive.
ChangeTime time.Time // status change time
}
+// File name constants from the tar spec.
+const (
+ fileNameSize = 100 // Maximum number of bytes in a standard tar name.
+ fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
+)
+
// sysStat, if non-nil, populates h from system-dependent fields of fi.
var sysStat func(fi os.FileInfo, h *Header) error
"io/ioutil"
"os"
"strconv"
+ "strings"
"time"
)
ErrHeader = errors.New("archive/tar: invalid tar header")
)
+const maxNanoSecondIntSize = 9
+
// A Reader provides sequential access to the contents of a tar archive.
// A tar archive consists of a sequence of files.
// The Next method advances to the next file in the archive (including the first),
if tr.err == nil {
tr.skipUnread()
}
- if tr.err == nil {
+ if tr.err != nil {
+ return hdr, tr.err
+ }
+ hdr = tr.readHeader()
+ if hdr == nil {
+ return hdr, tr.err
+ }
+ // Check for PAX/GNU header.
+ switch hdr.Typeflag {
+ case TypeXHeader:
+ // PAX extended header
+ headers, err := parsePAX(tr)
+ if err != nil {
+ return nil, err
+ }
+ // We actually read the whole file,
+ // but this skips alignment padding
+ tr.skipUnread()
hdr = tr.readHeader()
+ mergePAX(hdr, headers)
+ return hdr, nil
+ case TypeGNULongName:
+ // We have a GNU long name header. Its contents are the real file name.
+ realname, err := ioutil.ReadAll(tr)
+ if err != nil {
+ return nil, err
+ }
+ hdr, err := tr.Next()
+ hdr.Name = cString(realname)
+ return hdr, err
+ case TypeGNULongLink:
+ // We have a GNU long link header.
+ realname, err := ioutil.ReadAll(tr)
+ if err != nil {
+ return nil, err
+ }
+ hdr, err := tr.Next()
+ hdr.Linkname = cString(realname)
+ return hdr, err
}
return hdr, tr.err
}
-// Parse bytes as a NUL-terminated C-style string.
+// mergePAX merges well known headers according to PAX standard.
+// In general headers with the same name as those found
+// in the header struct overwrite those found in the header
+// struct with higher precision or longer values. Esp. useful
+// for name and linkname fields.
+func mergePAX(hdr *Header, headers map[string]string) error {
+ for k, v := range headers {
+ switch k {
+ case "path":
+ hdr.Name = v
+ case "linkpath":
+ hdr.Linkname = v
+ case "gname":
+ hdr.Gname = v
+ case "uname":
+ hdr.Uname = v
+ case "uid":
+ uid, err := strconv.ParseInt(v, 10, 0)
+ if err != nil {
+ return err
+ }
+ hdr.Uid = int(uid)
+ case "gid":
+ gid, err := strconv.ParseInt(v, 10, 0)
+ if err != nil {
+ return err
+ }
+ hdr.Gid = int(gid)
+ case "atime":
+ t, err := parsePAXTime(v)
+ if err != nil {
+ return err
+ }
+ hdr.AccessTime = t
+ case "mtime":
+ t, err := parsePAXTime(v)
+ if err != nil {
+ return err
+ }
+ hdr.ModTime = t
+ case "ctime":
+ t, err := parsePAXTime(v)
+ if err != nil {
+ return err
+ }
+ hdr.ChangeTime = t
+ case "size":
+ size, err := strconv.ParseInt(v, 10, 0)
+ if err != nil {
+ return err
+ }
+ hdr.Size = int64(size)
+ }
+
+ }
+ return nil
+}
+
+// parsePAXTime takes a string of the form %d.%d as described in
+// the PAX specification.
+func parsePAXTime(t string) (time.Time, error) {
+ buf := []byte(t)
+ pos := bytes.IndexByte(buf, '.')
+ var seconds, nanoseconds int64
+ var err error
+ if pos == -1 {
+ seconds, err = strconv.ParseInt(t, 10, 0)
+ if err != nil {
+ return time.Time{}, err
+ }
+ } else {
+ seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
+ if err != nil {
+ return time.Time{}, err
+ }
+ nano_buf := string(buf[pos+1:])
+ // Pad as needed before converting to a decimal.
+ // For example .030 -> .030000000 -> 30000000 nanoseconds
+ if len(nano_buf) < maxNanoSecondIntSize {
+ // Right pad
+ nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
+ } else if len(nano_buf) > maxNanoSecondIntSize {
+ // Right truncate
+ nano_buf = nano_buf[:maxNanoSecondIntSize]
+ }
+ nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
+ if err != nil {
+ return time.Time{}, err
+ }
+ }
+ ts := time.Unix(seconds, nanoseconds)
+ return ts, nil
+}
+
+// parsePAX parses PAX headers.
+// If an extended header (type 'x') is invalid, ErrHeader is returned
+func parsePAX(r io.Reader) (map[string]string, error) {
+ buf, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+ headers := make(map[string]string)
+ // Each record is constructed as
+ // "%d %s=%s\n", length, keyword, value
+ for len(buf) > 0 {
+ // or the header was empty to start with.
+ var sp int
+ // The size field ends at the first space.
+ sp = bytes.IndexByte(buf, ' ')
+ if sp == -1 {
+ return nil, ErrHeader
+ }
+ // Parse the first token as a decimal integer.
+ n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
+ if err != nil {
+ return nil, ErrHeader
+ }
+ // Extract everything between the decimal and the n -1 on the
+ // beginning to to eat the ' ', -1 on the end to skip the newline.
+ var record []byte
+ record, buf = buf[sp+1:n-1], buf[n:]
+ // The first equals is guaranteed to mark the end of the key.
+ // Everything else is value.
+ eq := bytes.IndexByte(record, '=')
+ if eq == -1 {
+ return nil, ErrHeader
+ }
+ key, value := record[:eq], record[eq+1:]
+ headers[string(key)] = string(value)
+ }
+ return headers, nil
+}
+
+// cString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
n := 0
return int64(x)
}
-// Skip any unread bytes in the existing file entry, as well as any alignment padding.
+// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.nb + tr.pad // number of bytes to skip
tr.nb, tr.pad = 0, 0
"fmt"
"io"
"os"
+ "reflect"
+ "strings"
"testing"
"time"
)
},
},
},
+ {
+ file: "testdata/pax.tar",
+ headers: []*Header{
+ {
+ Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+ Mode: 0664,
+ Uid: 1000,
+ Gid: 1000,
+ Uname: "shane",
+ Gname: "shane",
+ Size: 7,
+ ModTime: time.Unix(1350244992, 23960108),
+ ChangeTime: time.Unix(1350244992, 23960108),
+ AccessTime: time.Unix(1350244992, 23960108),
+ Typeflag: TypeReg,
+ },
+ {
+ Name: "a/b",
+ Mode: 0777,
+ Uid: 1000,
+ Gid: 1000,
+ Uname: "shane",
+ Gname: "shane",
+ Size: 0,
+ ModTime: time.Unix(1350266320, 910238425),
+ ChangeTime: time.Unix(1350266320, 910238425),
+ AccessTime: time.Unix(1350266320, 910238425),
+ Typeflag: TypeSymlink,
+ Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+ },
+ },
+ },
}
func TestReader(t *testing.T) {
}
hdr, err := tr.Next()
if err == io.EOF {
- break
+ continue testLoop
}
if hdr != nil || err != nil {
t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err)
t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
}
}
+
+func TestParsePAXHeader(t *testing.T) {
+ paxTests := [][3]string{
+ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
+ {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length
+ {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}}
+ for _, test := range paxTests {
+ key, expected, raw := test[0], test[1], test[2]
+ reader := bytes.NewBuffer([]byte(raw))
+ headers, err := parsePAX(reader)
+ if err != nil {
+ t.Errorf("Couldn't parse correctly formatted headers: %v", err)
+ continue
+ }
+ if strings.EqualFold(headers[key], expected) {
+ t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected)
+ continue
+ }
+ trailer := make([]byte, 100)
+ n, err := reader.Read(trailer)
+ if err != io.EOF || n != 0 {
+ t.Error("Buffer wasn't consumed")
+ }
+ }
+ badHeader := bytes.NewBuffer([]byte("3 somelongkey="))
+ if _, err := parsePAX(badHeader); err != ErrHeader {
+ t.Fatal("Unexpected success when parsing bad header")
+ }
+}
+
+func TestParsePAXTime(t *testing.T) {
+ // Some valid PAX time values
+ timestamps := map[string]time.Time{
+ "1350244992.023960108": time.Unix(1350244992, 23960108), // The commoon case
+ "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value
+ "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value
+ "1350244992": time.Unix(1350244992, 0), // Low precision value
+ }
+ for input, expected := range timestamps {
+ ts, err := parsePAXTime(input)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !ts.Equal(expected) {
+ t.Fatal("Time parsing failure %s %s", ts, expected)
+ }
+ }
+}
+
+func TestMergePAX(t *testing.T) {
+ hdr := new(Header)
+ // Test a string, integer, and time based value.
+ headers := map[string]string{
+ "path": "a/b/c",
+ "uid": "1000",
+ "mtime": "1350244992.023960108",
+ }
+ err := mergePAX(hdr, headers)
+ if err != nil {
+ t.Fatal(err)
+ }
+ want := &Header{
+ Name: "a/b/c",
+ Uid: 1000,
+ ModTime: time.Unix(1350244992, 23960108),
+ }
+ if !reflect.DeepEqual(hdr, want) {
+ t.Errorf("incorrect merge: got %+v, want %+v", hdr, want)
+ }
+}
// - catch more errors (no first header, etc.)
import (
+ "bytes"
"errors"
"fmt"
"io"
+ "os"
+ "path"
"strconv"
+ "strings"
"time"
)
ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close")
+ errNameTooLong = errors.New("archive/tar: name too long")
)
// A Writer provides sequential writing of a tar archive in POSIX.1 format.
if tw.err != nil {
return tw.err
}
-
+ // Decide whether or not to use PAX extensions
+ // TODO(shanemhansen): we might want to use PAX headers for
+ // subsecond time resolution, but for now let's just capture
+ // the long name/long symlink use case.
+ suffix := hdr.Name
+ prefix := ""
+ if len(hdr.Name) > fileNameSize || len(hdr.Linkname) > fileNameSize {
+ var err error
+ prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
+ // Either we were unable to pack the long name into ustar format
+ // or the link name is too long; use PAX headers.
+ if err == errNameTooLong || len(hdr.Linkname) > fileNameSize {
+ if err := tw.writePAXHeader(hdr); err != nil {
+ return err
+ }
+ } else if err != nil {
+ return err
+ }
+ }
tw.nb = int64(hdr.Size)
tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two
header := make([]byte, blockSize)
s := slicer(header)
-
- // TODO(dsymonds): handle names longer than 100 chars
- copy(s.next(100), []byte(hdr.Name))
+ tw.cString(s.next(fileNameSize), suffix)
// Handle out of range ModTime carefully.
var modTime int64
tw.cString(s.next(32), hdr.Gname) // 297:329
tw.numeric(s.next(8), hdr.Devmajor) // 329:337
tw.numeric(s.next(8), hdr.Devminor) // 337:345
-
+ tw.cString(s.next(155), prefix) // 345:500
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
copy(header[257:265], []byte("ustar \x00"))
}
+ // Use the ustar magic if we used ustar long names.
+ if len(prefix) > 0 {
+ copy(header[257:265], []byte("ustar\000"))
+ }
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
return tw.err
}
+// writeUSTARLongName splits a USTAR long name hdr.Name.
+// name must be < 256 characters. errNameTooLong is returned
+// if hdr.Name can't be split. The splitting heuristic
+// is compatible with gnu tar.
+func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
+ length := len(name)
+ if length > fileNamePrefixSize+1 {
+ length = fileNamePrefixSize + 1
+ } else if name[length-1] == '/' {
+ length--
+ }
+ i := strings.LastIndex(name[:length], "/")
+ nlen := length - i - 1
+ if i <= 0 || nlen > fileNameSize || nlen == 0 {
+ err = errNameTooLong
+ return
+ }
+ prefix, suffix = name[:i], name[i+1:]
+ return
+}
+
+// writePaxHeader writes an extended pax header to the
+// archive.
+func (tw *Writer) writePAXHeader(hdr *Header) error {
+ // Prepare extended header
+ ext := new(Header)
+ ext.Typeflag = TypeXHeader
+ // Setting ModTime is required for reader parsing to
+ // succeed, and seems harmless enough.
+ ext.ModTime = hdr.ModTime
+ // The spec asks that we namespace our pseudo files
+ // with the current pid.
+ pid := os.Getpid()
+ dir, file := path.Split(hdr.Name)
+ ext.Name = path.Join(dir,
+ fmt.Sprintf("PaxHeaders.%d", pid), file)[0:100]
+ // Construct the body
+ var buf bytes.Buffer
+ if len(hdr.Name) > fileNameSize {
+ fmt.Fprint(&buf, paxHeader("path="+hdr.Name))
+ }
+ if len(hdr.Linkname) > fileNameSize {
+ fmt.Fprint(&buf, paxHeader("linkpath="+hdr.Linkname))
+ }
+ ext.Size = int64(len(buf.Bytes()))
+ if err := tw.WriteHeader(ext); err != nil {
+ return err
+ }
+ if _, err := tw.Write(buf.Bytes()); err != nil {
+ return err
+ }
+ if err := tw.Flush(); err != nil {
+ return err
+ }
+ return nil
+}
+
+// paxHeader formats a single pax record, prefixing it with the appropriate length
+func paxHeader(msg string) string {
+ const padding = 2 // Extra padding for space and newline
+ size := len(msg) + padding
+ size += len(strconv.Itoa(size))
+ record := fmt.Sprintf("%d %s\n", size, msg)
+ if len(record) != size {
+ // Final adjustment if adding size increased
+ // the number of digits in size
+ size = len(record)
+ record = fmt.Sprintf("%d %s\n", size, msg)
+ }
+ return record
+}
+
// Write writes to the current entry in the tar archive.
// Write returns the error ErrWriteTooLong if more than
// hdr.Size bytes are written after WriteHeader.
"fmt"
"io"
"io/ioutil"
+ "os"
"strings"
"testing"
"testing/iotest"
},
},
},
+ // This file was produced using gnu tar 1.17
+ // gnutar -b 4 --format=ustar (longname/)*15 + file.txt
+ {
+ file: "testdata/ustar.tar",
+ entries: []*writerTestEntry{
+ {
+ header: &Header{
+ Name: strings.Repeat("longname/", 15) + "file.txt",
+ Mode: 0644,
+ Uid: 0765,
+ Gid: 024,
+ Size: 06,
+ ModTime: time.Unix(1360135598, 0),
+ Typeflag: '0',
+ Uname: "shane",
+ Gname: "staff",
+ },
+ contents: "hello\n",
+ },
+ },
+ },
}
// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection.
}
}
}
+
+func TestPax(t *testing.T) {
+ // Create an archive with a large name
+ fileinfo, err := os.Stat("testdata/small.txt")
+ if err != nil {
+ t.Fatal(err)
+ }
+ hdr, err := FileInfoHeader(fileinfo, "")
+ if err != nil {
+ t.Fatalf("os.Stat: %v", err)
+ }
+ // Force a PAX long name to be written
+ longName := strings.Repeat("ab", 100)
+ contents := strings.Repeat(" ", int(hdr.Size))
+ hdr.Name = longName
+ var buf bytes.Buffer
+ writer := NewWriter(&buf)
+ if err := writer.WriteHeader(hdr); err != nil {
+ t.Fatal(err)
+ }
+ if _, err = writer.Write([]byte(contents)); err != nil {
+ t.Fatal(err)
+ }
+ if err := writer.Close(); err != nil {
+ t.Fatal(err)
+ }
+ // Simple test to make sure PAX extensions are in effect
+ if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+ t.Fatal("Expected at least one PAX header to be written.")
+ }
+ // Test that we can get a long name back out of the archive.
+ reader := NewReader(&buf)
+ hdr, err = reader.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if hdr.Name != longName {
+ t.Fatal("Couldn't recover long file name")
+ }
+}
+
+func TestPAXHeader(t *testing.T) {
+ medName := strings.Repeat("CD", 50)
+ longName := strings.Repeat("AB", 100)
+ paxTests := [][2]string{
+ {"name=/etc/hosts", "19 name=/etc/hosts\n"},
+ {"a=b", "6 a=b\n"}, // Single digit length
+ {"a=names", "11 a=names\n"}, // Test case involving carries
+ {"name=" + longName, fmt.Sprintf("210 name=%s\n", longName)},
+ {"name=" + medName, fmt.Sprintf("110 name=%s\n", medName)}}
+
+ for _, test := range paxTests {
+ key, expected := test[0], test[1]
+ if result := paxHeader(key); result != expected {
+ t.Fatalf("paxHeader: got %s, expected %s", result, expected)
+ }
+ }
+}