The forwardCopy function could be re-written in asm, and the copyHuff
method could probably be rolled into huffmanBlock and copyHist, but
I'm leaving those changes for future CLs.
compress/flate benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkDecoderBestSpeed1K 385327 435140 +12.93%
BenchmarkDecoderBestSpeed10K
1245190 1062112 -14.70%
BenchmarkDecoderBestSpeed100K
8512365 5833680 -31.47%
BenchmarkDecoderDefaultCompression1K 382225 421301 +10.22%
BenchmarkDecoderDefaultCompression10K 867950 613890 -29.27%
BenchmarkDecoderDefaultCompression100K
5658240 2466726 -56.40%
BenchmarkDecoderBestCompression1K 383760 421634 +9.87%
BenchmarkDecoderBestCompression10K 867743 614671 -29.16%
BenchmarkDecoderBestCompression100K
5660160 2464996 -56.45%
image/png benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkDecodeGray
2540834 2389624 -5.95%
BenchmarkDecodeNRGBAGradient
10052700 9534565 -5.15%
BenchmarkDecodeNRGBAOpaque
8704710 8163430 -6.22%
BenchmarkDecodePaletted
1458779 1325017 -9.17%
BenchmarkDecodeRGB
7183606 6794668 -5.41%
Wall time for Denis Cheremisov's PNG-decoding program given in
https://groups.google.com/group/golang-nuts/browse_thread/thread/
22aa8a05040fdd49
Before: 3.07s
After: 2.32s
Delta: -24%
Before profile:
Total: 304 samples
159 52.3% 52.3% 251 82.6% compress/flate.(*decompressor).huffmanBlock
58 19.1% 71.4% 76 25.0% compress/flate.(*decompressor).huffSym
32 10.5% 81.9% 32 10.5% hash/adler32.update
16 5.3% 87.2% 22 7.2% bufio.(*Reader).ReadByte
16 5.3% 92.4% 37 12.2% compress/flate.(*decompressor).moreBits
7 2.3% 94.7% 7 2.3% hash/crc32.update
7 2.3% 97.0% 7 2.3% runtime.memmove
5 1.6% 98.7% 5 1.6% scanblock
2 0.7% 99.3% 9 3.0% runtime.copy
1 0.3% 99.7% 1 0.3% compress/flate.(*huffmanDecoder).init
After profile:
Total: 230 samples
59 25.7% 25.7% 70 30.4% compress/flate.(*decompressor).huffSym
45 19.6% 45.2% 45 19.6% hash/adler32.update
35 15.2% 60.4% 35 15.2% compress/flate.forwardCopy
20 8.7% 69.1% 151 65.7% compress/flate.(*decompressor).huffmanBlock
16 7.0% 76.1% 24 10.4% compress/flate.(*decompressor).moreBits
15 6.5% 82.6% 15 6.5% runtime.memmove
11 4.8% 87.4% 50 21.7% compress/flate.(*decompressor).copyHist
7 3.0% 90.4% 7 3.0% hash/crc32.update
6 2.6% 93.0% 9 3.9% bufio.(*Reader).ReadByte
4 1.7% 94.8% 4 1.7% runtime.slicearray
R=rsc, rogpeppe, dave
CC=golang-dev, krasin
https://golang.org/cl/
6127064
--- /dev/null
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// forwardCopy is like the built-in copy function except that it always goes
+// forward from the start, even if the dst and src overlap.
+func forwardCopy(dst, src []byte) int {
+ if len(src) > len(dst) {
+ src = src[:len(dst)]
+ }
+ for i, x := range src {
+ dst[i] = x
+ }
+ return len(src)
+}
--- /dev/null
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "testing"
+)
+
+func TestForwardCopy(t *testing.T) {
+ testCases := []struct {
+ dst0, dst1 int
+ src0, src1 int
+ want string
+ }{
+ {0, 9, 0, 9, "012345678"},
+ {0, 5, 4, 9, "45678"},
+ {4, 9, 0, 5, "01230"},
+ {1, 6, 3, 8, "34567"},
+ {3, 8, 1, 6, "12121"},
+ {0, 9, 3, 6, "345"},
+ {3, 6, 0, 9, "012"},
+ {1, 6, 0, 9, "00000"},
+ {0, 4, 7, 8, "7"},
+ {0, 1, 6, 8, "6"},
+ {4, 4, 6, 9, ""},
+ {2, 8, 6, 6, ""},
+ {0, 0, 0, 0, ""},
+ }
+ for _, tc := range testCases {
+ b := []byte("012345678")
+ dst := b[tc.dst0:tc.dst1]
+ src := b[tc.src0:tc.src1]
+ n := forwardCopy(dst, src)
+ got := string(dst[:n])
+ if got != tc.want {
+ t.Errorf("dst=b[%d:%d], src=b[%d:%d]: got %q, want %q",
+ tc.dst0, tc.dst1, tc.src0, tc.src1, got, tc.want)
+ }
+ }
+}
return
}
- p := f.hp - dist
- if p < 0 {
- p += len(f.hist)
- }
- for i := 0; i < length; i++ {
- f.hist[f.hp] = f.hist[p]
- f.hp++
- p++
- if f.hp == len(f.hist) {
- // After flush continue copying out of history.
- f.copyLen = length - (i + 1)
- f.copyDist = dist
- f.flush((*decompressor).copyHuff)
- return
- }
- if p == len(f.hist) {
- p = 0
- }
+ f.copyLen, f.copyDist = length, dist
+ if f.copyHist() {
+ return
}
}
panic("unreached")
}
-func (f *decompressor) copyHuff() {
- length := f.copyLen
- dist := f.copyDist
- p := f.hp - dist
+// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
+// It reports whether the f.hist buffer is full.
+func (f *decompressor) copyHist() bool {
+ p := f.hp - f.copyDist
if p < 0 {
p += len(f.hist)
}
- for i := 0; i < length; i++ {
- f.hist[f.hp] = f.hist[p]
- f.hp++
- p++
+ for f.copyLen > 0 {
+ n := f.copyLen
+ if x := len(f.hist) - f.hp; n > x {
+ n = x
+ }
+ if x := len(f.hist) - p; n > x {
+ n = x
+ }
+ forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
+ p += n
+ f.hp += n
+ f.copyLen -= n
if f.hp == len(f.hist) {
- f.copyLen = length - (i + 1)
+ // After flush continue copying out of history.
f.flush((*decompressor).copyHuff)
- return
+ return true
}
if p == len(f.hist) {
p = 0
}
}
+ return false
+}
- // Continue processing Huffman block.
+func (f *decompressor) copyHuff() {
+ if f.copyHist() {
+ return
+ }
f.huffmanBlock()
}
f.copyData()
}
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
- // Read f.dataLen bytes into history,
- // pausing for reads as history fills.
n := f.copyLen
for n > 0 {
m := len(f.hist) - f.hp