From: Maksim Meshkov Date: Wed, 24 May 2023 21:18:15 +0000 (+0000) Subject: compress/flate, archive/zip: reduce memory allocations X-Git-Tag: go1.21rc1~254 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=08458804fb6591397fe1c58f4e04fd490e70fbcb;p=gostls13.git compress/flate, archive/zip: reduce memory allocations The existing implementation allocates a new 4KB buffer each time it opens flate-encoded file in a zip archive. This commit allows the flate reader to reuse the buffer on call Reset instead of allocating a new one. It is noticeable when a zip archive contains a huge amount of files, e.g. zip archive has 50_000 files, for each file 4KB buffer is allocated, so it is 200MB memory allocations. If files are read sequentially only one buffer is needed. Fixes #59774 Change-Id: Ib16336b101ba58e8f0f30a45dc5fd4eeebc801a1 GitHub-Last-Rev: f3f395b2ad95b7ad7ce9df6f5e49c7b6a0627627 GitHub-Pull-Request: golang/go#59775 Reviewed-on: https://go-review.googlesource.com/c/go/+/487675 Run-TryBot: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Reviewed-by: Matthew Dempsky --- diff --git a/src/archive/zip/zip_test.go b/src/archive/zip/zip_test.go index a4b952efcc..7d1de07c98 100644 --- a/src/archive/zip/zip_test.go +++ b/src/archive/zip/zip_test.go @@ -30,7 +30,7 @@ func TestOver65kFiles(t *testing.T) { for i := 0; i < nFiles; i++ { _, err := w.CreateHeader(&FileHeader{ Name: fmt.Sprintf("%d.dat", i), - Method: Store, // avoid Issue 6136 and Issue 6138 + Method: Store, // Deflate is too slow when it is compiled with -race flag }) if err != nil { t.Fatalf("creating file %d: %v", i, err) diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go index 7efd4477ed..d7375f2f1f 100644 --- a/src/compress/flate/inflate.go +++ b/src/compress/flate/inflate.go @@ -267,6 +267,7 @@ type Reader interface { type decompressor struct { // Input source. r Reader + rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader roffset int64 // Input bits, in top of b. @@ -746,11 +747,20 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { } } -func makeReader(r io.Reader) Reader { +func (f *decompressor) makeReader(r io.Reader) { if rr, ok := r.(Reader); ok { - return rr + f.rBuf = nil + f.r = rr + return + } + // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. + if f.rBuf != nil { + f.rBuf.Reset(r) + } else { + // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. + f.rBuf = bufio.NewReader(r) } - return bufio.NewReader(r) + f.r = f.rBuf } func fixedHuffmanDecoderInit() { @@ -775,12 +785,13 @@ func fixedHuffmanDecoderInit() { func (f *decompressor) Reset(r io.Reader, dict []byte) error { *f = decompressor{ - r: makeReader(r), + rBuf: f.rBuf, bits: f.bits, codebits: f.codebits, dict: f.dict, step: (*decompressor).nextBlock, } + f.makeReader(r) f.dict.init(maxMatchOffset, dict) return nil } @@ -797,7 +808,7 @@ func NewReader(r io.Reader) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor - f.r = makeReader(r) + f.makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock @@ -816,7 +827,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor - f.r = makeReader(r) + f.makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock diff --git a/src/compress/flate/inflate_test.go b/src/compress/flate/inflate_test.go index 9575be1cf2..28a0122ac6 100644 --- a/src/compress/flate/inflate_test.go +++ b/src/compress/flate/inflate_test.go @@ -5,6 +5,7 @@ package flate import ( + "bufio" "bytes" "io" "strings" @@ -95,3 +96,42 @@ func TestResetDict(t *testing.T) { } } } + +func TestReaderReusesReaderBuffer(t *testing.T) { + encodedReader := bytes.NewReader([]byte{}) + encodedNotByteReader := struct{ io.Reader }{encodedReader} + + t.Run("BufferIsReused", func(t *testing.T) { + f := NewReader(encodedNotByteReader).(*decompressor) + bufioR, ok := f.r.(*bufio.Reader) + if !ok { + t.Fatalf("bufio.Reader should be created") + } + f.Reset(encodedNotByteReader, nil) + if bufioR != f.r { + t.Fatalf("bufio.Reader was not reused") + } + }) + t.Run("BufferIsNotReusedWhenGotByteReader", func(t *testing.T) { + f := NewReader(encodedNotByteReader).(*decompressor) + if _, ok := f.r.(*bufio.Reader); !ok { + t.Fatalf("bufio.Reader should be created") + } + f.Reset(encodedReader, nil) + if f.r != encodedReader { + t.Fatalf("provided io.ByteReader should be used directly") + } + }) + t.Run("BufferIsCreatedAfterByteReader", func(t *testing.T) { + for i, r := range []io.Reader{encodedReader, bufio.NewReader(encodedReader)} { + f := NewReader(r).(*decompressor) + if f.r != r { + t.Fatalf("provided io.ByteReader should be used directly, i=%d", i) + } + f.Reset(encodedNotByteReader, nil) + if _, ok := f.r.(*bufio.Reader); !ok { + t.Fatalf("bufio.Reader should be created, i=%d", i) + } + } + }) +}