]> Cypherpunks repositories - gostls13.git/commitdiff
image/jpeg: move the level-shift and clip out of the idct function,
authorNigel Tao <nigeltao@golang.org>
Sun, 7 Oct 2012 00:32:02 +0000 (11:32 +1100)
committerNigel Tao <nigeltao@golang.org>
Sun, 7 Oct 2012 00:32:02 +0000 (11:32 +1100)
to be consistent with the fdct function, and to ease any future
idct rewrites in assembly.

The BenchmarkIDCT delta is obviously just an accounting change and not
a real saving, but it does give an indication of what proportion of
time was spent in the actual IDCT and what proportion was in shift and
clip. The idct time taken is now comparable to fdct.

The BenchmarkFDCT delta is an estimate of benchmark noise.

benchmark                   old ns/op    new ns/op    delta
BenchmarkFDCT                    3842         3837   -0.13%
BenchmarkIDCT                    5611         3478  -38.01%
BenchmarkDecodeRGBOpaque      2932785      2929751   -0.10%

R=r
CC=golang-dev
https://golang.org/cl/6625057

src/pkg/image/jpeg/dct_test.go
src/pkg/image/jpeg/idct.go
src/pkg/image/jpeg/reader.go

index c7d7cfe55ced45966fc7ae84a5bc3420c306a1b1..770e274bac0123a8c7fbf272918154386ea4c2d0 100644 (file)
@@ -12,7 +12,7 @@ import (
        "testing"
 )
 
-func BenchmarkFDCT(b *testing.B) {
+func benchmarkDCT(b *testing.B, f func(*block)) {
        b.StopTimer()
        blocks := make([]block, 0, b.N*len(testBlocks))
        for i := 0; i < b.N; i++ {
@@ -20,21 +20,16 @@ func BenchmarkFDCT(b *testing.B) {
        }
        b.StartTimer()
        for i := range blocks {
-               fdct(&blocks[i])
+               f(&blocks[i])
        }
 }
 
+func BenchmarkFDCT(b *testing.B) {
+       benchmarkDCT(b, fdct)
+}
+
 func BenchmarkIDCT(b *testing.B) {
-       b.StopTimer()
-       dummy := make([]byte, 64)
-       blocks := make([]block, 0, b.N*len(testBlocks))
-       for i := 0; i < b.N; i++ {
-               blocks = append(blocks, testBlocks[:]...)
-       }
-       b.StartTimer()
-       for i := range blocks {
-               idct(dummy, 8, &blocks[i])
-       }
+       benchmarkDCT(b, idct)
 }
 
 func TestDCT(t *testing.T) {
@@ -85,10 +80,9 @@ func TestDCT(t *testing.T) {
        }
 
        // Check that the optimized and slow IDCT implementations agree.
-       dummy := make([]byte, 64)
        for i, b := range blocks {
                got, want := b, b
-               idct(dummy, 8, &got)
+               idct(&got)
                slowIDCT(&want)
                if differ(&got, &want) {
                        t.Errorf("i=%d: IDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
index 1808bebd1a0b8b348cd05f6c62e90616d36ed5af..92ff1e4b41d3c128b93a29aa4b8f647e798eae2a 100644 (file)
@@ -59,9 +59,7 @@ const (
        r2 = 181 // 256/sqrt(2)
 )
 
-// idct performs a 2-D Inverse Discrete Cosine Transformation, followed by a
-// +128 level shift and a clip to [0, 255], writing the results to dst.
-// stride is the number of elements between successive rows of dst.
+// idct performs a 2-D Inverse Discrete Cosine Transformation.
 //
 // The input coefficients should already have been multiplied by the
 // appropriate quantization table. We use fixed-point computation, with the
@@ -71,7 +69,7 @@ const (
 // For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
 // discrete W transform and for the discrete Fourier transform", IEEE Trans. on
 // ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
-func idct(dst []byte, stride int, src *block) {
+func idct(src *block) {
        // Horizontal 1-D IDCT.
        for y := 0; y < 8; y++ {
                y8 := y * 8
@@ -191,21 +189,4 @@ func idct(dst []byte, stride int, src *block) {
                src[8*6+x] = (y3 - y2) >> 14
                src[8*7+x] = (y7 - y1) >> 14
        }
-
-       // Level shift by +128, clip to [0, 255], and write to dst.
-       for y := 0; y < 8; y++ {
-               y8 := y * 8
-               yStride := y * stride
-               for x := 0; x < 8; x++ {
-                       c := src[y8+x]
-                       if c < -128 {
-                               c = 0
-                       } else if c > 127 {
-                               c = 255
-                       } else {
-                               c += 128
-                       }
-                       dst[yStride+x] = uint8(c)
-               }
-       }
 }
index bdafd5143ef262b4073fa6b3e8d11738b777a509..263ef45aac35ea097524c0c767b8265d700e1067 100644 (file)
@@ -309,8 +309,10 @@ func (d *decoder) processSOS(n int) error {
                                        }
 
                                        // Perform the inverse DCT and store the MCU component to the image.
+                                       idct(&b)
+                                       dst, stride := []byte(nil), 0
                                        if d.nComp == nGrayComponent {
-                                               idct(d.img1.Pix[8*(my*d.img1.Stride+mx):], d.img1.Stride, &b)
+                                               dst, stride = d.img1.Pix[8*(my*d.img1.Stride+mx):], d.img1.Stride
                                        } else {
                                                switch i {
                                                case 0:
@@ -321,11 +323,27 @@ func (d *decoder) processSOS(n int) error {
                                                                mx0 += j % 2
                                                                my0 += j / 2
                                                        }
-                                                       idct(d.img3.Y[8*(my0*d.img3.YStride+mx0):], d.img3.YStride, &b)
+                                                       dst, stride = d.img3.Y[8*(my0*d.img3.YStride+mx0):], d.img3.YStride
                                                case 1:
-                                                       idct(d.img3.Cb[8*(my*d.img3.CStride+mx):], d.img3.CStride, &b)
+                                                       dst, stride = d.img3.Cb[8*(my*d.img3.CStride+mx):], d.img3.CStride
                                                case 2:
-                                                       idct(d.img3.Cr[8*(my*d.img3.CStride+mx):], d.img3.CStride, &b)
+                                                       dst, stride = d.img3.Cr[8*(my*d.img3.CStride+mx):], d.img3.CStride
+                                               }
+                                       }
+                                       // Level shift by +128, clip to [0, 255], and write to dst.
+                                       for y := 0; y < 8; y++ {
+                                               y8 := y * 8
+                                               yStride := y * stride
+                                               for x := 0; x < 8; x++ {
+                                                       c := b[y8+x]
+                                                       if c < -128 {
+                                                               c = 0
+                                                       } else if c > 127 {
+                                                               c = 255
+                                                       } else {
+                                                               c += 128
+                                                       }
+                                                       dst[yStride+x] = uint8(c)
                                                }
                                        }
                                } // for j