import (
"bytes"
"fmt"
+ "hash"
"io"
"io/ioutil"
+ "sort"
"strings"
"testing"
"time"
testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t)
}
+type repeatedByte struct {
+ off int64
+ b byte
+ n int64
+}
+
+// rleBuffer is a run-length-encoded byte buffer.
+// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt,
+// allowing random-access reads.
+type rleBuffer struct {
+ buf []repeatedByte
+}
+
+func (r *rleBuffer) Size() int64 {
+ if len(r.buf) == 0 {
+ return 0
+ }
+ last := &r.buf[len(r.buf)-1]
+ return last.off + last.n
+}
+
+func (r *rleBuffer) Write(p []byte) (n int, err error) {
+ var rp *repeatedByte
+ if len(r.buf) > 0 {
+ rp = &r.buf[len(r.buf)-1]
+ // Fast path, if p is entirely the same byte repeated.
+ if lastByte := rp.b; len(p) > 0 && p[0] == lastByte {
+ all := true
+ for _, b := range p {
+ if b != lastByte {
+ all = false
+ break
+ }
+ }
+ if all {
+ rp.n += int64(len(p))
+ return len(p), nil
+ }
+ }
+ }
+
+ for _, b := range p {
+ if rp == nil || rp.b != b {
+ r.buf = append(r.buf, repeatedByte{r.Size(), b, 1})
+ rp = &r.buf[len(r.buf)-1]
+ } else {
+ rp.n++
+ }
+ }
+ return len(p), nil
+}
+
+func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) {
+ if len(p) == 0 {
+ return
+ }
+ skipParts := sort.Search(len(r.buf), func(i int) bool {
+ part := &r.buf[i]
+ return part.off+part.n > off
+ })
+ parts := r.buf[skipParts:]
+ if len(parts) > 0 {
+ skipBytes := off - parts[0].off
+ for len(parts) > 0 {
+ part := parts[0]
+ for i := skipBytes; i < part.n; i++ {
+ if n == len(p) {
+ return
+ }
+ p[n] = part.b
+ n++
+ }
+ parts = parts[1:]
+ skipBytes = 0
+ }
+ }
+ if n != len(p) {
+ err = io.ErrUnexpectedEOF
+ }
+ return
+}
+
+// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code.
+func TestRLEBuffer(t *testing.T) {
+ b := new(rleBuffer)
+ var all []byte
+ writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"}
+ for _, w := range writes {
+ b.Write([]byte(w))
+ all = append(all, w...)
+ }
+ if len(b.buf) != 10 {
+ t.Fatalf("len(b.buf) = %d; want 10", len(b.buf))
+ }
+
+ for i := 0; i < len(all); i++ {
+ for j := 0; j < len(all)-i; j++ {
+ buf := make([]byte, j)
+ n, err := b.ReadAt(buf, int64(i))
+ if err != nil || n != len(buf) {
+ t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf))
+ }
+ if !bytes.Equal(buf, all[i:i+j]) {
+ t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j])
+ }
+ }
+ }
+}
+
+// fakeHash32 is a dummy Hash32 that always returns 0.
+type fakeHash32 struct {
+ hash.Hash32
+}
+
+func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil }
+func (fakeHash32) Sum32() uint32 { return 0 }
+
func TestZip64(t *testing.T) {
if testing.Short() {
t.Skip("slow test; skipping")
}
+ const size = 1 << 32 // before the "END\n" part
+ testZip64(t, size)
+}
+
+func testZip64(t testing.TB, size int64) {
+ const chunkSize = 1024
+ chunks := int(size / chunkSize)
// write 2^32 bytes plus "END\n" to a zip file
- buf := new(bytes.Buffer)
+ buf := new(rleBuffer)
w := NewWriter(buf)
- f, err := w.Create("huge.txt")
+ f, err := w.CreateHeader(&FileHeader{
+ Name: "huge.txt",
+ Method: Store,
+ })
if err != nil {
t.Fatal(err)
}
- chunk := make([]byte, 1024)
+ f.(*fileWriter).crc32 = fakeHash32{}
+ chunk := make([]byte, chunkSize)
for i := range chunk {
chunk[i] = '.'
}
- chunk[len(chunk)-1] = '\n'
- end := []byte("END\n")
- for i := 0; i < (1<<32)/1024; i++ {
+ for i := 0; i < chunks; i++ {
_, err := f.Write(chunk)
if err != nil {
t.Fatal("write chunk:", err)
}
}
+ end := []byte("END\n")
_, err = f.Write(end)
if err != nil {
t.Fatal("write end:", err)
}
// read back zip file and check that we get to the end of it
- r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+ r, err := NewReader(buf, int64(buf.Size()))
if err != nil {
t.Fatal("reader:", err)
}
if err != nil {
t.Fatal("opening:", err)
}
- for i := 0; i < (1<<32)/1024; i++ {
+ rc.(*checksumReader).hash = fakeHash32{}
+ for i := 0; i < chunks; i++ {
_, err := io.ReadFull(rc, chunk)
if err != nil {
t.Fatal("read:", err)
if err != nil {
t.Fatal("closing:", err)
}
- if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
- t.Errorf("UncompressedSize %d, want %d", got, want)
+ if size == 1<<32 {
+ if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
+ t.Errorf("UncompressedSize %d, want %d", got, want)
+ }
}
- if got, want := f0.UncompressedSize64, (1<<32)+uint64(len(end)); got != want {
+ if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want {
t.Errorf("UncompressedSize64 %d, want %d", got, want)
}
}
}
testValidHeader(&h, t)
}
+
+// Just benchmarking how fast the Zip64 test above is. Not related to
+// our zip performance, since the test above disabled CRC32 and flate.
+func BenchmarkZip64Test(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ testZip64(b, 1<<26)
+ }
+}