]> Cypherpunks repositories - gostls13.git/commitdiff
archive/zip: new package for reading ZIP files
authorAndrew Gerrand <adg@golang.org>
Thu, 30 Sep 2010 01:59:46 +0000 (11:59 +1000)
committerAndrew Gerrand <adg@golang.org>
Thu, 30 Sep 2010 01:59:46 +0000 (11:59 +1000)
R=rsc
CC=golang-dev
https://golang.org/cl/2125042

src/pkg/Makefile
src/pkg/archive/zip/Makefile [new file with mode: 0644]
src/pkg/archive/zip/reader.go [new file with mode: 0644]
src/pkg/archive/zip/reader_test.go [new file with mode: 0644]
src/pkg/archive/zip/struct.go [new file with mode: 0644]
src/pkg/archive/zip/testdata/gophercolor16x16.png [new file with mode: 0644]
src/pkg/archive/zip/testdata/r.zip [new file with mode: 0644]
src/pkg/archive/zip/testdata/readme.notzip [new file with mode: 0644]
src/pkg/archive/zip/testdata/readme.zip [new file with mode: 0644]
src/pkg/archive/zip/testdata/test.zip [new file with mode: 0644]

index 33194918b8d0d04dff6b3ba001b1088f44f89c8f..d7351c599376453fded2fb7f94b8fe30b8e993c0 100644 (file)
@@ -15,6 +15,7 @@ all: install
 
 DIRS=\
        archive/tar\
+       archive/zip\
        asn1\
        big\
        bufio\
diff --git a/src/pkg/archive/zip/Makefile b/src/pkg/archive/zip/Makefile
new file mode 100644 (file)
index 0000000..32a5431
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright 2010 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=archive/zip
+GOFILES=\
+       reader.go\
+       struct.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/archive/zip/reader.go b/src/pkg/archive/zip/reader.go
new file mode 100644 (file)
index 0000000..579ba16
--- /dev/null
@@ -0,0 +1,278 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+The zip package provides support for reading ZIP archives.
+
+See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+
+This package does not support ZIP64 or disk spanning.
+*/
+package zip
+
+import (
+       "bufio"
+       "bytes"
+       "compress/flate"
+       "hash"
+       "hash/crc32"
+       "encoding/binary"
+       "io"
+       "os"
+)
+
+var (
+       FormatError       = os.NewError("not a valid zip file")
+       UnsupportedMethod = os.NewError("unsupported compression algorithm")
+       ChecksumError     = os.NewError("checksum error")
+)
+
+type Reader struct {
+       r       io.ReaderAt
+       File    []*File
+       Comment string
+}
+
+type File struct {
+       FileHeader
+       zipr         io.ReaderAt
+       zipsize      int64
+       headerOffset uint32
+       bodyOffset   int64
+}
+
+// OpenReader will open the Zip file specified by name and return a Reader.
+func OpenReader(name string) (*Reader, os.Error) {
+       f, err := os.Open(name, os.O_RDONLY, 0644)
+       if err != nil {
+               return nil, err
+       }
+       fi, err := f.Stat()
+       if err != nil {
+               return nil, err
+       }
+       return NewReader(f, fi.Size)
+}
+
+// NewReader returns a new Reader reading from r, which is assumed to
+// have the given size in bytes.
+func NewReader(r io.ReaderAt, size int64) (*Reader, os.Error) {
+       end, err := readDirectoryEnd(r, size)
+       if err != nil {
+               return nil, err
+       }
+       z := &Reader{
+               r:       r,
+               File:    make([]*File, end.directoryRecords),
+               Comment: end.comment,
+       }
+       rs := io.NewSectionReader(r, 0, size)
+       if _, err = rs.Seek(int64(end.directoryOffset), 0); err != nil {
+               return nil, err
+       }
+       buf := bufio.NewReader(rs)
+       for i := range z.File {
+               z.File[i] = &File{zipr: r, zipsize: size}
+               if err := readDirectoryHeader(z.File[i], buf); err != nil {
+                       return nil, err
+               }
+       }
+       return z, nil
+}
+
+// Open returns a ReadCloser that provides access to the File's contents.
+func (f *File) Open() (rc io.ReadCloser, err os.Error) {
+       off := int64(f.headerOffset)
+       if f.bodyOffset == 0 {
+               r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
+               if err = readFileHeader(f, r); err != nil {
+                       return
+               }
+               if f.bodyOffset, err = r.Seek(0, 1); err != nil {
+                       return
+               }
+       }
+       r := io.NewSectionReader(f.zipr, off+f.bodyOffset, int64(f.CompressedSize))
+       switch f.Method {
+       case 0: // store (no compression)
+               rc = nopCloser{r}
+       case 8: // DEFLATE
+               rc = flate.NewReader(r)
+       default:
+               err = UnsupportedMethod
+       }
+       if rc != nil {
+               rc = &checksumReader{rc, crc32.NewIEEE(), f.CRC32}
+       }
+       return
+}
+
+type checksumReader struct {
+       rc   io.ReadCloser
+       hash hash.Hash32
+       sum  uint32
+}
+
+func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
+       n, err = r.rc.Read(b)
+       r.hash.Write(b[:n])
+       if err != os.EOF {
+               return
+       }
+       if r.hash.Sum32() != r.sum {
+               err = ChecksumError
+       }
+       return
+}
+
+func (r *checksumReader) Close() os.Error { return r.rc.Close() }
+
+type nopCloser struct {
+       io.Reader
+}
+
+func (f nopCloser) Close() os.Error { return nil }
+
+func readFileHeader(f *File, r io.Reader) (err os.Error) {
+       defer func() {
+               if rerr, ok := recover().(os.Error); ok {
+                       err = rerr
+               }
+       }()
+       var (
+               signature      uint32
+               filenameLength uint16
+               extraLength    uint16
+       )
+       read(r, &signature)
+       if signature != fileHeaderSignature {
+               return FormatError
+       }
+       read(r, &f.ReaderVersion)
+       read(r, &f.Flags)
+       read(r, &f.Method)
+       read(r, &f.ModifiedTime)
+       read(r, &f.ModifiedDate)
+       read(r, &f.CRC32)
+       read(r, &f.CompressedSize)
+       read(r, &f.UncompressedSize)
+       read(r, &filenameLength)
+       read(r, &extraLength)
+       f.Name = string(readByteSlice(r, filenameLength))
+       f.Extra = readByteSlice(r, extraLength)
+       return
+}
+
+func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
+       defer func() {
+               if rerr, ok := recover().(os.Error); ok {
+                       err = rerr
+               }
+       }()
+       var (
+               signature          uint32
+               filenameLength     uint16
+               extraLength        uint16
+               commentLength      uint16
+               startDiskNumber    uint16 // unused
+               internalAttributes uint16 // unused
+               externalAttributes uint32 // unused
+       )
+       read(r, &signature)
+       if signature != directoryHeaderSignature {
+               return FormatError
+       }
+       read(r, &f.CreatorVersion)
+       read(r, &f.ReaderVersion)
+       read(r, &f.Flags)
+       read(r, &f.Method)
+       read(r, &f.ModifiedTime)
+       read(r, &f.ModifiedDate)
+       read(r, &f.CRC32)
+       read(r, &f.CompressedSize)
+       read(r, &f.UncompressedSize)
+       read(r, &filenameLength)
+       read(r, &extraLength)
+       read(r, &commentLength)
+       read(r, &startDiskNumber)
+       read(r, &internalAttributes)
+       read(r, &externalAttributes)
+       read(r, &f.headerOffset)
+       f.Name = string(readByteSlice(r, filenameLength))
+       f.Extra = readByteSlice(r, extraLength)
+       f.Comment = string(readByteSlice(r, commentLength))
+       return
+}
+
+func readDirectoryEnd(r io.ReaderAt, size int64) (d *directoryEnd, err os.Error) {
+       // look for directoryEndSignature in the last 1k, then in the last 65k
+       var b []byte
+       for i, bLen := range []int64{1024, 65 * 1024} {
+               if bLen > size {
+                       bLen = size
+               }
+               b = make([]byte, int(bLen))
+               if _, err := r.ReadAt(b, size-bLen); err != nil && err != os.EOF {
+                       return nil, err
+               }
+               if p := findSignatureInBlock(b); p >= 0 {
+                       b = b[p:]
+                       break
+               }
+               if i == 1 || bLen == size {
+                       return nil, FormatError
+               }
+       }
+
+       // read header into struct
+       defer func() {
+               if rerr, ok := recover().(os.Error); ok {
+                       err = rerr
+                       d = nil
+               }
+       }()
+       br := bytes.NewBuffer(b[4:]) // skip over signature
+       d = new(directoryEnd)
+       read(br, &d.diskNbr)
+       read(br, &d.dirDiskNbr)
+       read(br, &d.dirRecordsThisDisk)
+       read(br, &d.directoryRecords)
+       read(br, &d.directorySize)
+       read(br, &d.directoryOffset)
+       read(br, &d.commentLen)
+       d.comment = string(readByteSlice(br, d.commentLen))
+       return d, nil
+}
+
+func findSignatureInBlock(b []byte) int {
+       const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
+       for i := len(b) - minSize; i >= 0; i-- {
+               // defined from directoryEndSignature in struct.go
+               if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
+                       // n is length of comment
+                       n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
+                       if n+minSize+i == len(b) {
+                               return i
+                       }
+               }
+       }
+       return -1
+}
+
+func read(r io.Reader, data interface{}) {
+       if err := binary.Read(r, binary.LittleEndian, data); err != nil {
+               panic(err)
+       }
+}
+
+func readByteSlice(r io.Reader, l uint16) []byte {
+       b := make([]byte, l)
+       if l == 0 {
+               return b
+       }
+       if _, err := io.ReadFull(r, b); err != nil {
+               panic(err)
+       }
+       return b
+}
diff --git a/src/pkg/archive/zip/reader_test.go b/src/pkg/archive/zip/reader_test.go
new file mode 100644 (file)
index 0000000..36b925c
--- /dev/null
@@ -0,0 +1,180 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zip
+
+import (
+       "bytes"
+       "encoding/binary"
+       "io"
+       "io/ioutil"
+       "os"
+       "testing"
+)
+
+type ZipTest struct {
+       Name    string
+       Comment string
+       File    []ZipTestFile
+       Error   os.Error // the error that Opening this file should return
+}
+
+type ZipTestFile struct {
+       Name    string
+       Content []byte // if blank, will attempt to compare against File
+       File    string // name of file to compare to (relative to testdata/)
+}
+
+var tests = []ZipTest{
+       ZipTest{
+               Name:    "test.zip",
+               Comment: "This is a zipfile comment.",
+               File: []ZipTestFile{
+                       ZipTestFile{
+                               Name:    "test.txt",
+                               Content: []byte("This is a test text file.\n"),
+                       },
+                       ZipTestFile{
+                               Name: "gophercolor16x16.png",
+                               File: "gophercolor16x16.png",
+                       },
+               },
+       },
+       ZipTest{
+               Name: "r.zip",
+               File: []ZipTestFile{
+                       ZipTestFile{
+                               Name: "r/r.zip",
+                               File: "r.zip",
+                       },
+               },
+       },
+       ZipTest{Name: "readme.zip"},
+       ZipTest{Name: "readme.notzip", Error: FormatError},
+}
+
+func TestReader(t *testing.T) {
+       for _, zt := range tests {
+               readTestZip(t, zt)
+       }
+}
+
+func readTestZip(t *testing.T, zt ZipTest) {
+       z, err := OpenReader("testdata/" + zt.Name)
+       if err != zt.Error {
+               t.Errorf("error=%v, want %v", err, zt.Error)
+               return
+       }
+
+       // bail here if no Files expected to be tested
+       // (there may actually be files in the zip, but we don't care)
+       if zt.File == nil {
+               return
+       }
+
+       if z.Comment != zt.Comment {
+               t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment)
+       }
+       if len(z.File) != len(zt.File) {
+               t.Errorf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File))
+       }
+
+       // test read of each file
+       for i, ft := range zt.File {
+               readTestFile(t, ft, z.File[i])
+       }
+
+       // test simultaneous reads
+       n := 0
+       done := make(chan bool)
+       for i := 0; i < 5; i++ {
+               for j, ft := range zt.File {
+                       go func() {
+                               readTestFile(t, ft, z.File[j])
+                               done <- true
+                       }()
+                       n++
+               }
+       }
+       for ; n > 0; n-- {
+               <-done
+       }
+
+       // test invalid checksum
+       z.File[0].CRC32++ // invalidate
+       r, err := z.File[0].Open()
+       if err != nil {
+               t.Error(err)
+               return
+       }
+       var b bytes.Buffer
+       _, err = io.Copy(&b, r)
+       if err != ChecksumError {
+               t.Errorf("%s: copy error=%v, want %v", err, ChecksumError)
+       }
+}
+
+func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
+       if f.Name != ft.Name {
+               t.Errorf("name=%q, want %q", f.Name, ft.Name)
+       }
+       var b bytes.Buffer
+       r, err := f.Open()
+       if err != nil {
+               t.Error(err)
+               return
+       }
+       _, err = io.Copy(&b, r)
+       if err != nil {
+               t.Error(err)
+               return
+       }
+       r.Close()
+       var c []byte
+       if len(ft.Content) != 0 {
+               c = ft.Content
+       } else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil {
+               t.Error(err)
+               return
+       }
+       if b.Len() != len(c) {
+               t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c))
+               return
+       }
+       for i, b := range b.Bytes() {
+               if b != c[i] {
+                       t.Errorf("%s: content[%d]=%q want %q", i, b, c[i])
+                       return
+               }
+       }
+}
+
+func TestInvalidFiles(t *testing.T) {
+       const size = 1024 * 70 // 70kb
+       b := make([]byte, size)
+
+       // zeroes
+       _, err := NewReader(sliceReaderAt(b), size)
+       if err != FormatError {
+               t.Errorf("zeroes: error=%v, want %v", err, FormatError)
+       }
+
+       // repeated directoryEndSignatures
+       sig := make([]byte, 4)
+       binary.LittleEndian.PutUint32(sig, directoryEndSignature)
+       for i := 0; i < size-4; i += 4 {
+               copy(b[i:i+4], sig)
+       }
+       _, err = NewReader(sliceReaderAt(b), size)
+       if err != FormatError {
+               t.Errorf("sigs: error=%v, want %v", err, FormatError)
+       }
+}
+
+type sliceReaderAt []byte
+
+func (r sliceReaderAt) ReadAt(b []byte, off int64) (int, os.Error) {
+       copy(b, r[int(off):int(off)+len(b)])
+       return len(b), nil
+}
diff --git a/src/pkg/archive/zip/struct.go b/src/pkg/archive/zip/struct.go
new file mode 100644 (file)
index 0000000..8a8c727
--- /dev/null
@@ -0,0 +1,33 @@
+package zip
+
+const (
+       fileHeaderSignature      = 0x04034b50
+       directoryHeaderSignature = 0x02014b50
+       directoryEndSignature    = 0x06054b50
+)
+
+type FileHeader struct {
+       Name             string
+       CreatorVersion   uint16
+       ReaderVersion    uint16
+       Flags            uint16
+       Method           uint16
+       ModifiedTime     uint16
+       ModifiedDate     uint16
+       CRC32            uint32
+       CompressedSize   uint32
+       UncompressedSize uint32
+       Extra            []byte
+       Comment          string
+}
+
+type directoryEnd struct {
+       diskNbr            uint16 // unused
+       dirDiskNbr         uint16 // unused
+       dirRecordsThisDisk uint16 // unused
+       directoryRecords   uint16
+       directorySize      uint32
+       directoryOffset    uint32 // relative to file
+       commentLen         uint16
+       comment            string
+}
diff --git a/src/pkg/archive/zip/testdata/gophercolor16x16.png b/src/pkg/archive/zip/testdata/gophercolor16x16.png
new file mode 100644 (file)
index 0000000..48854ff
Binary files /dev/null and b/src/pkg/archive/zip/testdata/gophercolor16x16.png differ
diff --git a/src/pkg/archive/zip/testdata/r.zip b/src/pkg/archive/zip/testdata/r.zip
new file mode 100644 (file)
index 0000000..ea0fa2f
Binary files /dev/null and b/src/pkg/archive/zip/testdata/r.zip differ
diff --git a/src/pkg/archive/zip/testdata/readme.notzip b/src/pkg/archive/zip/testdata/readme.notzip
new file mode 100644 (file)
index 0000000..06668c4
Binary files /dev/null and b/src/pkg/archive/zip/testdata/readme.notzip differ
diff --git a/src/pkg/archive/zip/testdata/readme.zip b/src/pkg/archive/zip/testdata/readme.zip
new file mode 100644 (file)
index 0000000..db3bb90
Binary files /dev/null and b/src/pkg/archive/zip/testdata/readme.zip differ
diff --git a/src/pkg/archive/zip/testdata/test.zip b/src/pkg/archive/zip/testdata/test.zip
new file mode 100644 (file)
index 0000000..03890c0
Binary files /dev/null and b/src/pkg/archive/zip/testdata/test.zip differ