TARG=compress/gzip
GOFILES=\
gunzip.go\
+ gzip.go\
include ../../../Make.pkg
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// The gzip package implements reading (and eventually writing) of
+// The gzip package implements reading and writing of
// gzip format compressed files, as specified in RFC 1952.
package gzip
"os"
)
+// BUG(nigeltao): Comments and Names don't properly map UTF-8 character codes outside of
+// the 0x00-0x7f range to ISO 8859-1 (Latin-1).
+
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
var HeaderError os.Error = os.ErrorString("invalid gzip header")
var ChecksumError os.Error = os.ErrorString("gzip checksum error")
+// The gzip file stores a header giving metadata about the compressed file.
+// That header is exposed as the fields of the Deflater and Inflater structs.
+type Header struct {
+ Comment string // comment
+ Extra []byte // "extra data"
+ Mtime uint32 // modification time (seconds since January 1, 1970)
+ Name string // file name
+ OS byte // operating system type
+}
+
// An Inflater is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
-// The gzip file stores a header giving metadata about the compressed file.
-// That header is exposed as the fields of the Inflater struct.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Inflater
// returned by Read as tentative until they receive the successful
// (zero length, nil error) Read marking the end of the data.
type Inflater struct {
- Comment string // comment
- Extra []byte // "extra data"
- Mtime uint32 // modification time (seconds since January 1, 1970)
- Name string // file name
- OS byte // operating system type
-
+ Header
r flate.Reader
inflater io.ReadCloser
digest hash.Hash32
flg byte
buf [512]byte
err os.Error
- eof bool
}
// NewInflater creates a new Inflater reading the given reader.
return "", err
}
if z.buf[i] == 0 {
+ // GZIP (RFC 1952) specifies that strings are null-terminated ISO 8859-1 (Latin-1).
+ // TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8.
return string(z.buf[0:i]), nil
}
}
}
func (z *Inflater) read2() (uint32, os.Error) {
- _, err := z.r.Read(z.buf[0:2])
+ _, err := io.ReadFull(z.r, z.buf[0:2])
if err != nil {
return 0, err
}
if z.err != nil {
return 0, z.err
}
- if z.eof || len(p) == 0 {
+ if len(p) == 0 {
return 0, nil
}
"testing"
)
-type gzipTest struct {
+type gunzipTest struct {
name string
desc string
raw string
err os.Error
}
-var gzipTests = []gzipTest{
- gzipTest{ // has 1 empty fixed-huffman block
+var gunzipTests = []gunzipTest{
+ gunzipTest{ // has 1 empty fixed-huffman block
"empty.txt",
"empty.txt",
"",
},
nil,
},
- gzipTest{ // has 1 non-empty fixed huffman block
+ gunzipTest{ // has 1 non-empty fixed huffman block
"hello.txt",
"hello.txt",
"hello world\n",
},
nil,
},
- gzipTest{ // concatenation
+ gunzipTest{ // concatenation
"hello.txt",
"hello.txt x2",
"hello world\n" +
},
nil,
},
- gzipTest{ // has a fixed huffman block with some length-distance pairs
+ gunzipTest{ // has a fixed huffman block with some length-distance pairs
"shesells.txt",
"shesells.txt",
"she sells seashells by the seashore\n",
},
nil,
},
- gzipTest{ // has dynamic huffman blocks
+ gunzipTest{ // has dynamic huffman blocks
"gettysburg",
"gettysburg",
" Four score and seven years ago our fathers brought forth on\n" +
},
nil,
},
- gzipTest{ // has 1 non-empty fixed huffman block then garbage
+ gunzipTest{ // has 1 non-empty fixed huffman block then garbage
"hello.txt",
"hello.txt + garbage",
"hello world\n",
},
HeaderError,
},
- gzipTest{ // has 1 non-empty fixed huffman block not enough header
+ gunzipTest{ // has 1 non-empty fixed huffman block not enough header
"hello.txt",
"hello.txt + garbage",
"hello world\n",
},
io.ErrUnexpectedEOF,
},
- gzipTest{ // has 1 non-empty fixed huffman block but corrupt checksum
+ gunzipTest{ // has 1 non-empty fixed huffman block but corrupt checksum
"hello.txt",
"hello.txt + corrupt checksum",
"hello world\n",
},
ChecksumError,
},
- gzipTest{ // has 1 non-empty fixed huffman block but corrupt size
+ gunzipTest{ // has 1 non-empty fixed huffman block but corrupt size
"hello.txt",
"hello.txt + corrupt size",
"hello world\n",
func TestInflater(t *testing.T) {
b := new(bytes.Buffer)
- for _, tt := range gzipTests {
+ for _, tt := range gunzipTests {
in := bytes.NewBuffer(tt.gzip)
gzip, err := NewInflater(in)
if err != nil {
--- /dev/null
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "compress/flate"
+ "hash"
+ "hash/crc32"
+ "io"
+ "os"
+)
+
+// These constants are copied from the flate package, so that code that imports
+// "compress/gzip" does not also have to import "compress/flate".
+const (
+ NoCompression = flate.NoCompression
+ BestSpeed = flate.BestSpeed
+ BestCompression = flate.BestCompression
+ DefaultCompression = flate.DefaultCompression
+)
+
+// A Deflater is an io.WriteCloser that satisfies writes by compressing data written
+// to its wrapped io.Writer.
+type Deflater struct {
+ Header
+ w io.Writer
+ level int
+ deflater io.WriteCloser
+ digest hash.Hash32
+ size uint32
+ closed bool
+ buf [10]byte
+ err os.Error
+}
+
+// NewDeflater calls NewDeflaterLevel with the default compression level.
+func NewDeflater(w io.Writer) (*Deflater, os.Error) {
+ return NewDeflaterLevel(w, DefaultCompression)
+}
+
+// NewDeflaterLevel creates a new Deflater writing to the given writer.
+// Writes may be buffered and not flushed until Close.
+// Callers that wish to set the fields in Deflater.Header must
+// do so before the first call to Write or Close.
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// level is the compression level, which can be DefaultCompression, NoCompression,
+// or any integer value between BestSpeed and BestCompression (inclusive).
+func NewDeflaterLevel(w io.Writer, level int) (*Deflater, os.Error) {
+ z := new(Deflater)
+ z.OS = 255 // unknown
+ z.w = w
+ z.level = level
+ z.digest = crc32.NewIEEE()
+ return z, nil
+}
+
+// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
+func put2(p []byte, v uint16) {
+ p[0] = uint8(v >> 0)
+ p[1] = uint8(v >> 8)
+}
+
+func put4(p []byte, v uint32) {
+ p[0] = uint8(v >> 0)
+ p[1] = uint8(v >> 8)
+ p[2] = uint8(v >> 16)
+ p[3] = uint8(v >> 24)
+}
+
+// writeBytes writes a length-prefixed byte slice to z.w.
+func (z *Deflater) writeBytes(b []byte) os.Error {
+ if len(b) > 0xffff {
+ return os.NewError("gzip.Write: Extra data is too large")
+ }
+ put2(z.buf[0:2], uint16(len(b)))
+ _, err := z.w.Write(z.buf[0:2])
+ if err != nil {
+ return err
+ }
+ _, err = z.w.Write(b)
+ return err
+}
+
+// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w.
+func (z *Deflater) writeString(s string) os.Error {
+ // GZIP (RFC 1952) specifies that strings are null-terminated ISO 8859-1 (Latin-1).
+ // TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1).
+ for _, v := range s {
+ if v > 0x7f {
+ return os.NewError("gzip.Write: Comment/Name character code was outside the 0x00-0x7f range")
+ }
+ }
+ _, err := io.WriteString(z.w, s)
+ if err != nil {
+ return err
+ }
+ // GZIP strings are NUL-terminated.
+ z.buf[0] = 0
+ _, err = z.w.Write(z.buf[0:1])
+ return err
+}
+
+func (z *Deflater) Write(p []byte) (int, os.Error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+ var n int
+ // Write the GZIP header lazily.
+ if z.deflater == nil {
+ z.buf[0] = gzipID1
+ z.buf[1] = gzipID2
+ z.buf[2] = gzipDeflate
+ z.buf[3] = 0
+ if z.Extra != nil {
+ z.buf[3] |= 0x04
+ }
+ if z.Name != "" {
+ z.buf[3] |= 0x08
+ }
+ if z.Comment != "" {
+ z.buf[3] |= 0x10
+ }
+ put4(z.buf[4:8], z.Mtime)
+ if z.level == BestCompression {
+ z.buf[8] = 2
+ } else if z.level == BestSpeed {
+ z.buf[8] = 4
+ } else {
+ z.buf[8] = 0
+ }
+ z.buf[9] = z.OS
+ n, z.err = z.w.Write(z.buf[0:10])
+ if z.err != nil {
+ return n, z.err
+ }
+ if z.Extra != nil {
+ z.err = z.writeBytes(z.Extra)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ if z.Name != "" {
+ z.err = z.writeString(z.Name)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ if z.Comment != "" {
+ z.err = z.writeString(z.Comment)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ z.deflater = flate.NewDeflater(z.w, z.level)
+ }
+ z.size += uint32(len(p))
+ z.digest.Write(p)
+ n, z.err = z.deflater.Write(p)
+ return n, z.err
+}
+
+// Calling Close does not close the wrapped io.Writer originally passed to NewDeflater.
+func (z *Deflater) Close() os.Error {
+ if z.err != nil {
+ return z.err
+ }
+ if z.closed {
+ return nil
+ }
+ z.closed = true
+ if z.deflater == nil {
+ z.Write(nil)
+ if z.err != nil {
+ return z.err
+ }
+ }
+ z.err = z.deflater.Close()
+ if z.err != nil {
+ return z.err
+ }
+ put4(z.buf[0:4], z.digest.Sum32())
+ put4(z.buf[4:8], z.size)
+ _, z.err = z.w.Write(z.buf[0:8])
+ return z.err
+}
--- /dev/null
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "io"
+ "io/ioutil"
+ "strings"
+ "testing"
+)
+
+// Tests that gzipping and then gunzipping is the identity function.
+func TestWriter(t *testing.T) {
+ // Set up the Pipe to do the gzip and gunzip.
+ piper, pipew := io.Pipe()
+ defer piper.Close()
+ go func() {
+ defer pipew.Close()
+ deflater, err := NewDeflater(pipew)
+ if err != nil {
+ t.Errorf("%v", err)
+ return
+ }
+ defer deflater.Close()
+ deflater.Comment = "comment"
+ deflater.Extra = strings.Bytes("extra")
+ deflater.Mtime = 1e8
+ deflater.Name = "name"
+ _, err = deflater.Write(strings.Bytes("payload"))
+ if err != nil {
+ t.Errorf("%v", err)
+ return
+ }
+ }()
+ inflater, err := NewInflater(piper)
+ if err != nil {
+ t.Errorf("%v", err)
+ return
+ }
+ defer inflater.Close()
+
+ // Read and compare to the original input.
+ b, err := ioutil.ReadAll(inflater)
+ if err != nil {
+ t.Errorf(": %v", err)
+ return
+ }
+ if string(b) != "payload" {
+ t.Fatalf("payload is %q, want %q", string(b), "payload")
+ }
+ if inflater.Comment != "comment" {
+ t.Fatalf("comment is %q, want %q", inflater.Comment, "comment")
+ }
+ if string(inflater.Extra) != "extra" {
+ t.Fatalf("extra is %q, want %q", inflater.Extra, "extra")
+ }
+ if inflater.Mtime != 1e8 {
+ t.Fatalf("mtime is %d, want %d", inflater.Mtime, uint32(1e8))
+ }
+ if inflater.Name != "name" {
+ t.Fatalf("name is %q, want %q", inflater.Name, "name")
+ }
+}
return NewDeflaterLevel(w, DefaultCompression)
}
-// NewDeflater creates a new io.WriteCloser that satisfies writes by compressing data written to w.
+// NewDeflaterLevel creates a new io.WriteCloser that satisfies writes by compressing data written to w.
// It is the caller's responsibility to call Close on the WriteCloser when done.
// level is the compression level, which can be DefaultCompression, NoCompression,
// or any integer value between BestSpeed and BestCompression (inclusive).