From 4de15a5cdaf94b9e2269fb79008c8c862f355d2a Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Tue, 1 May 2012 10:51:34 +1000 Subject: [PATCH] compress/flate: optimize history-copy decoding. The forwardCopy function could be re-written in asm, and the copyHuff method could probably be rolled into huffmanBlock and copyHist, but I'm leaving those changes for future CLs. compress/flate benchmarks: benchmark old ns/op new ns/op delta BenchmarkDecoderBestSpeed1K 385327 435140 +12.93% BenchmarkDecoderBestSpeed10K 1245190 1062112 -14.70% BenchmarkDecoderBestSpeed100K 8512365 5833680 -31.47% BenchmarkDecoderDefaultCompression1K 382225 421301 +10.22% BenchmarkDecoderDefaultCompression10K 867950 613890 -29.27% BenchmarkDecoderDefaultCompression100K 5658240 2466726 -56.40% BenchmarkDecoderBestCompression1K 383760 421634 +9.87% BenchmarkDecoderBestCompression10K 867743 614671 -29.16% BenchmarkDecoderBestCompression100K 5660160 2464996 -56.45% image/png benchmarks: benchmark old ns/op new ns/op delta BenchmarkDecodeGray 2540834 2389624 -5.95% BenchmarkDecodeNRGBAGradient 10052700 9534565 -5.15% BenchmarkDecodeNRGBAOpaque 8704710 8163430 -6.22% BenchmarkDecodePaletted 1458779 1325017 -9.17% BenchmarkDecodeRGB 7183606 6794668 -5.41% Wall time for Denis Cheremisov's PNG-decoding program given in https://groups.google.com/group/golang-nuts/browse_thread/thread/22aa8a05040fdd49 Before: 3.07s After: 2.32s Delta: -24% Before profile: Total: 304 samples 159 52.3% 52.3% 251 82.6% compress/flate.(*decompressor).huffmanBlock 58 19.1% 71.4% 76 25.0% compress/flate.(*decompressor).huffSym 32 10.5% 81.9% 32 10.5% hash/adler32.update 16 5.3% 87.2% 22 7.2% bufio.(*Reader).ReadByte 16 5.3% 92.4% 37 12.2% compress/flate.(*decompressor).moreBits 7 2.3% 94.7% 7 2.3% hash/crc32.update 7 2.3% 97.0% 7 2.3% runtime.memmove 5 1.6% 98.7% 5 1.6% scanblock 2 0.7% 99.3% 9 3.0% runtime.copy 1 0.3% 99.7% 1 0.3% compress/flate.(*huffmanDecoder).init After profile: Total: 230 samples 59 25.7% 25.7% 70 30.4% compress/flate.(*decompressor).huffSym 45 19.6% 45.2% 45 19.6% hash/adler32.update 35 15.2% 60.4% 35 15.2% compress/flate.forwardCopy 20 8.7% 69.1% 151 65.7% compress/flate.(*decompressor).huffmanBlock 16 7.0% 76.1% 24 10.4% compress/flate.(*decompressor).moreBits 15 6.5% 82.6% 15 6.5% runtime.memmove 11 4.8% 87.4% 50 21.7% compress/flate.(*decompressor).copyHist 7 3.0% 90.4% 7 3.0% hash/crc32.update 6 2.6% 93.0% 9 3.9% bufio.(*Reader).ReadByte 4 1.7% 94.8% 4 1.7% runtime.slicearray R=rsc, rogpeppe, dave CC=golang-dev, krasin https://golang.org/cl/6127064 --- src/pkg/compress/flate/copy.go | 17 ++++++++ src/pkg/compress/flate/copy_test.go | 42 ++++++++++++++++++++ src/pkg/compress/flate/inflate.go | 60 ++++++++++++++--------------- 3 files changed, 88 insertions(+), 31 deletions(-) create mode 100644 src/pkg/compress/flate/copy.go create mode 100644 src/pkg/compress/flate/copy_test.go diff --git a/src/pkg/compress/flate/copy.go b/src/pkg/compress/flate/copy.go new file mode 100644 index 0000000000..06e5d2e66d --- /dev/null +++ b/src/pkg/compress/flate/copy.go @@ -0,0 +1,17 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// forwardCopy is like the built-in copy function except that it always goes +// forward from the start, even if the dst and src overlap. +func forwardCopy(dst, src []byte) int { + if len(src) > len(dst) { + src = src[:len(dst)] + } + for i, x := range src { + dst[i] = x + } + return len(src) +} diff --git a/src/pkg/compress/flate/copy_test.go b/src/pkg/compress/flate/copy_test.go new file mode 100644 index 0000000000..d13941cf1c --- /dev/null +++ b/src/pkg/compress/flate/copy_test.go @@ -0,0 +1,42 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "testing" +) + +func TestForwardCopy(t *testing.T) { + testCases := []struct { + dst0, dst1 int + src0, src1 int + want string + }{ + {0, 9, 0, 9, "012345678"}, + {0, 5, 4, 9, "45678"}, + {4, 9, 0, 5, "01230"}, + {1, 6, 3, 8, "34567"}, + {3, 8, 1, 6, "12121"}, + {0, 9, 3, 6, "345"}, + {3, 6, 0, 9, "012"}, + {1, 6, 0, 9, "00000"}, + {0, 4, 7, 8, "7"}, + {0, 1, 6, 8, "6"}, + {4, 4, 6, 9, ""}, + {2, 8, 6, 6, ""}, + {0, 0, 0, 0, ""}, + } + for _, tc := range testCases { + b := []byte("012345678") + dst := b[tc.dst0:tc.dst1] + src := b[tc.src0:tc.src1] + n := forwardCopy(dst, src) + got := string(dst[:n]) + if got != tc.want { + t.Errorf("dst=b[%d:%d], src=b[%d:%d]: got %q, want %q", + tc.dst0, tc.dst1, tc.src0, tc.src1, got, tc.want) + } + } +} diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go index 3f2042bfe9..a4be91b6f7 100644 --- a/src/pkg/compress/flate/inflate.go +++ b/src/pkg/compress/flate/inflate.go @@ -505,51 +505,49 @@ func (f *decompressor) huffmanBlock() { return } - p := f.hp - dist - if p < 0 { - p += len(f.hist) - } - for i := 0; i < length; i++ { - f.hist[f.hp] = f.hist[p] - f.hp++ - p++ - if f.hp == len(f.hist) { - // After flush continue copying out of history. - f.copyLen = length - (i + 1) - f.copyDist = dist - f.flush((*decompressor).copyHuff) - return - } - if p == len(f.hist) { - p = 0 - } + f.copyLen, f.copyDist = length, dist + if f.copyHist() { + return } } panic("unreached") } -func (f *decompressor) copyHuff() { - length := f.copyLen - dist := f.copyDist - p := f.hp - dist +// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. +// It reports whether the f.hist buffer is full. +func (f *decompressor) copyHist() bool { + p := f.hp - f.copyDist if p < 0 { p += len(f.hist) } - for i := 0; i < length; i++ { - f.hist[f.hp] = f.hist[p] - f.hp++ - p++ + for f.copyLen > 0 { + n := f.copyLen + if x := len(f.hist) - f.hp; n > x { + n = x + } + if x := len(f.hist) - p; n > x { + n = x + } + forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n]) + p += n + f.hp += n + f.copyLen -= n if f.hp == len(f.hist) { - f.copyLen = length - (i + 1) + // After flush continue copying out of history. f.flush((*decompressor).copyHuff) - return + return true } if p == len(f.hist) { p = 0 } } + return false +} - // Continue processing Huffman block. +func (f *decompressor) copyHuff() { + if f.copyHist() { + return + } f.huffmanBlock() } @@ -584,9 +582,9 @@ func (f *decompressor) dataBlock() { f.copyData() } +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. func (f *decompressor) copyData() { - // Read f.dataLen bytes into history, - // pausing for reads as history fills. n := f.copyLen for n > 0 { m := len(f.hist) - f.hp -- 2.48.1