]> Cypherpunks repositories - gostls13.git/commitdiff
compress/flate: optimize history-copy decoding.
authorNigel Tao <nigeltao@golang.org>
Tue, 1 May 2012 00:51:34 +0000 (10:51 +1000)
committerNigel Tao <nigeltao@golang.org>
Tue, 1 May 2012 00:51:34 +0000 (10:51 +1000)
The forwardCopy function could be re-written in asm, and the copyHuff
method could probably be rolled into huffmanBlock and copyHist, but
I'm leaving those changes for future CLs.

compress/flate benchmarks:
benchmark                                 old ns/op    new ns/op    delta
BenchmarkDecoderBestSpeed1K                  385327       435140  +12.93%
BenchmarkDecoderBestSpeed10K                1245190      1062112  -14.70%
BenchmarkDecoderBestSpeed100K               8512365      5833680  -31.47%
BenchmarkDecoderDefaultCompression1K         382225       421301  +10.22%
BenchmarkDecoderDefaultCompression10K        867950       613890  -29.27%
BenchmarkDecoderDefaultCompression100K      5658240      2466726  -56.40%
BenchmarkDecoderBestCompression1K            383760       421634   +9.87%
BenchmarkDecoderBestCompression10K           867743       614671  -29.16%
BenchmarkDecoderBestCompression100K         5660160      2464996  -56.45%

image/png benchmarks:
benchmark                       old ns/op    new ns/op    delta
BenchmarkDecodeGray               2540834      2389624   -5.95%
BenchmarkDecodeNRGBAGradient     10052700      9534565   -5.15%
BenchmarkDecodeNRGBAOpaque        8704710      8163430   -6.22%
BenchmarkDecodePaletted           1458779      1325017   -9.17%
BenchmarkDecodeRGB                7183606      6794668   -5.41%

Wall time for Denis Cheremisov's PNG-decoding program given in
https://groups.google.com/group/golang-nuts/browse_thread/thread/22aa8a05040fdd49
Before: 3.07s
After:  2.32s
Delta:  -24%

Before profile:
Total: 304 samples
         159  52.3%  52.3%      251  82.6% compress/flate.(*decompressor).huffmanBlock
          58  19.1%  71.4%       76  25.0% compress/flate.(*decompressor).huffSym
          32  10.5%  81.9%       32  10.5% hash/adler32.update
          16   5.3%  87.2%       22   7.2% bufio.(*Reader).ReadByte
          16   5.3%  92.4%       37  12.2% compress/flate.(*decompressor).moreBits
           7   2.3%  94.7%        7   2.3% hash/crc32.update
           7   2.3%  97.0%        7   2.3% runtime.memmove
           5   1.6%  98.7%        5   1.6% scanblock
           2   0.7%  99.3%        9   3.0% runtime.copy
           1   0.3%  99.7%        1   0.3% compress/flate.(*huffmanDecoder).init

After profile:
Total: 230 samples
          59  25.7%  25.7%       70  30.4% compress/flate.(*decompressor).huffSym
          45  19.6%  45.2%       45  19.6% hash/adler32.update
          35  15.2%  60.4%       35  15.2% compress/flate.forwardCopy
          20   8.7%  69.1%      151  65.7% compress/flate.(*decompressor).huffmanBlock
          16   7.0%  76.1%       24  10.4% compress/flate.(*decompressor).moreBits
          15   6.5%  82.6%       15   6.5% runtime.memmove
          11   4.8%  87.4%       50  21.7% compress/flate.(*decompressor).copyHist
           7   3.0%  90.4%        7   3.0% hash/crc32.update
           6   2.6%  93.0%        9   3.9% bufio.(*Reader).ReadByte
           4   1.7%  94.8%        4   1.7% runtime.slicearray

R=rsc, rogpeppe, dave
CC=golang-dev, krasin
https://golang.org/cl/6127064

src/pkg/compress/flate/copy.go [new file with mode: 0644]
src/pkg/compress/flate/copy_test.go [new file with mode: 0644]
src/pkg/compress/flate/inflate.go

diff --git a/src/pkg/compress/flate/copy.go b/src/pkg/compress/flate/copy.go
new file mode 100644 (file)
index 0000000..06e5d2e
--- /dev/null
@@ -0,0 +1,17 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// forwardCopy is like the built-in copy function except that it always goes
+// forward from the start, even if the dst and src overlap.
+func forwardCopy(dst, src []byte) int {
+       if len(src) > len(dst) {
+               src = src[:len(dst)]
+       }
+       for i, x := range src {
+               dst[i] = x
+       }
+       return len(src)
+}
diff --git a/src/pkg/compress/flate/copy_test.go b/src/pkg/compress/flate/copy_test.go
new file mode 100644 (file)
index 0000000..d13941c
--- /dev/null
@@ -0,0 +1,42 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+       "testing"
+)
+
+func TestForwardCopy(t *testing.T) {
+       testCases := []struct {
+               dst0, dst1 int
+               src0, src1 int
+               want       string
+       }{
+               {0, 9, 0, 9, "012345678"},
+               {0, 5, 4, 9, "45678"},
+               {4, 9, 0, 5, "01230"},
+               {1, 6, 3, 8, "34567"},
+               {3, 8, 1, 6, "12121"},
+               {0, 9, 3, 6, "345"},
+               {3, 6, 0, 9, "012"},
+               {1, 6, 0, 9, "00000"},
+               {0, 4, 7, 8, "7"},
+               {0, 1, 6, 8, "6"},
+               {4, 4, 6, 9, ""},
+               {2, 8, 6, 6, ""},
+               {0, 0, 0, 0, ""},
+       }
+       for _, tc := range testCases {
+               b := []byte("012345678")
+               dst := b[tc.dst0:tc.dst1]
+               src := b[tc.src0:tc.src1]
+               n := forwardCopy(dst, src)
+               got := string(dst[:n])
+               if got != tc.want {
+                       t.Errorf("dst=b[%d:%d], src=b[%d:%d]: got %q, want %q",
+                               tc.dst0, tc.dst1, tc.src0, tc.src1, got, tc.want)
+               }
+       }
+}
index 3f2042bfe92fd9f90addc256f7512c338ddcd6bb..a4be91b6f78821ceed1ebf2d49f219de38b44e31 100644 (file)
@@ -505,51 +505,49 @@ func (f *decompressor) huffmanBlock() {
                        return
                }
 
-               p := f.hp - dist
-               if p < 0 {
-                       p += len(f.hist)
-               }
-               for i := 0; i < length; i++ {
-                       f.hist[f.hp] = f.hist[p]
-                       f.hp++
-                       p++
-                       if f.hp == len(f.hist) {
-                               // After flush continue copying out of history.
-                               f.copyLen = length - (i + 1)
-                               f.copyDist = dist
-                               f.flush((*decompressor).copyHuff)
-                               return
-                       }
-                       if p == len(f.hist) {
-                               p = 0
-                       }
+               f.copyLen, f.copyDist = length, dist
+               if f.copyHist() {
+                       return
                }
        }
        panic("unreached")
 }
 
-func (f *decompressor) copyHuff() {
-       length := f.copyLen
-       dist := f.copyDist
-       p := f.hp - dist
+// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
+// It reports whether the f.hist buffer is full.
+func (f *decompressor) copyHist() bool {
+       p := f.hp - f.copyDist
        if p < 0 {
                p += len(f.hist)
        }
-       for i := 0; i < length; i++ {
-               f.hist[f.hp] = f.hist[p]
-               f.hp++
-               p++
+       for f.copyLen > 0 {
+               n := f.copyLen
+               if x := len(f.hist) - f.hp; n > x {
+                       n = x
+               }
+               if x := len(f.hist) - p; n > x {
+                       n = x
+               }
+               forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
+               p += n
+               f.hp += n
+               f.copyLen -= n
                if f.hp == len(f.hist) {
-                       f.copyLen = length - (i + 1)
+                       // After flush continue copying out of history.
                        f.flush((*decompressor).copyHuff)
-                       return
+                       return true
                }
                if p == len(f.hist) {
                        p = 0
                }
        }
+       return false
+}
 
-       // Continue processing Huffman block.
+func (f *decompressor) copyHuff() {
+       if f.copyHist() {
+               return
+       }
        f.huffmanBlock()
 }
 
@@ -584,9 +582,9 @@ func (f *decompressor) dataBlock() {
        f.copyData()
 }
 
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
 func (f *decompressor) copyData() {
-       // Read f.dataLen bytes into history,
-       // pausing for reads as history fills.
        n := f.copyLen
        for n > 0 {
                m := len(f.hist) - f.hp