From 12e343f372c03b6fec9d5da6cd83833c79812bc9 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Sun, 7 Oct 2012 10:21:17 +1100 Subject: [PATCH] image/jpeg: add DCT tests, do a small optimization (common sub-expression elimination) in idct.go. benchmark old ns/op new ns/op delta BenchmarkIDCT 5649 5610 -0.69% BenchmarkDecodeRGBOpaque 2948607 2941051 -0.26% The "type block" declaration moved so that idct.go is compilable as a stand-alone file: "go tool 6g -S idct.go" works. R=r CC=golang-dev https://golang.org/cl/6619056 --- src/pkg/image/jpeg/dct_test.go | 297 ++++++++++++++++++++++++++++++ src/pkg/image/jpeg/idct.go | 65 ++++--- src/pkg/image/jpeg/reader.go | 4 - src/pkg/image/jpeg/writer_test.go | 17 ++ 4 files changed, 350 insertions(+), 33 deletions(-) create mode 100644 src/pkg/image/jpeg/dct_test.go diff --git a/src/pkg/image/jpeg/dct_test.go b/src/pkg/image/jpeg/dct_test.go new file mode 100644 index 0000000000..c7d7cfe55c --- /dev/null +++ b/src/pkg/image/jpeg/dct_test.go @@ -0,0 +1,297 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jpeg + +import ( + "bytes" + "fmt" + "math" + "math/rand" + "testing" +) + +func BenchmarkFDCT(b *testing.B) { + b.StopTimer() + blocks := make([]block, 0, b.N*len(testBlocks)) + for i := 0; i < b.N; i++ { + blocks = append(blocks, testBlocks[:]...) + } + b.StartTimer() + for i := range blocks { + fdct(&blocks[i]) + } +} + +func BenchmarkIDCT(b *testing.B) { + b.StopTimer() + dummy := make([]byte, 64) + blocks := make([]block, 0, b.N*len(testBlocks)) + for i := 0; i < b.N; i++ { + blocks = append(blocks, testBlocks[:]...) + } + b.StartTimer() + for i := range blocks { + idct(dummy, 8, &blocks[i]) + } +} + +func TestDCT(t *testing.T) { + blocks := make([]block, len(testBlocks)) + copy(blocks, testBlocks[:]) + + // Append some randomly generated blocks of varying sparseness. + r := rand.New(rand.NewSource(123)) + for i := 0; i < 100; i++ { + b := block{} + n := r.Int() % 64 + for j := 0; j < n; j++ { + b[r.Int()%len(b)] = r.Int() % 256 + } + blocks = append(blocks, b) + } + + // Check that the FDCT and IDCT functions are inverses, after a scale and + // level shift. Scaling reduces the rounding errors in the conversion from + // floats to ints. + for i, b := range blocks { + got, want := b, b + for j := range got { + got[j] = (got[j] - 128) * 8 + } + slowFDCT(&got) + slowIDCT(&got) + for j := range got { + got[j] = got[j]/8 + 128 + } + if differ(&got, &want) { + t.Errorf("i=%d: IDCT(FDCT)\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } + + // Check that the optimized and slow FDCT implementations agree. + // The fdct function already does a scale and level shift. + for i, b := range blocks { + got, want := b, b + fdct(&got) + for j := range want { + want[j] = (want[j] - 128) * 8 + } + slowFDCT(&want) + if differ(&got, &want) { + t.Errorf("i=%d: FDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } + + // Check that the optimized and slow IDCT implementations agree. + dummy := make([]byte, 64) + for i, b := range blocks { + got, want := b, b + idct(dummy, 8, &got) + slowIDCT(&want) + if differ(&got, &want) { + t.Errorf("i=%d: IDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } +} + +// differ returns whether any pair-wise elements in b0 and b1 differ by 2 or +// more. That tolerance is because there isn't a single definitive decoding of +// a given JPEG image, even before the YCbCr to RGB conversion; implementations +// can have different IDCT rounding errors. +func differ(b0, b1 *block) bool { + for i := range b0 { + delta := b0[i] - b1[i] + if delta < -2 || +2 < delta { + return true + } + } + return false +} + +// alpha returns 1 if i is 0 and returns √2 otherwise. +func alpha(i int) float64 { + if i == 0 { + return 1 + } + return math.Sqrt2 +} + +// slowFDCT performs the 8*8 2-dimensional forward discrete cosine transform: +// +// dst[u,v] = (1/8) * Σ_x Σ_y alpha(u) * alpha(v) * src[x,y] * +// cos((π/2) * (2*x + 1) * u / 8) * +// cos((π/2) * (2*y + 1) * v / 8) +// +// x and y are in pixel space, and u and v are in transform space. +// +// b acts as both dst and src. +func slowFDCT(b *block) { + var dst [blockSize]float64 + for v := 0; v < 8; v++ { + for u := 0; u < 8; u++ { + sum := 0.0 + for y := 0; y < 8; y++ { + for x := 0; x < 8; x++ { + sum += alpha(u) * alpha(v) * float64(b[8*y+x]) * + math.Cos(math.Pi*float64((2*x+1)*u)/16) * + math.Cos(math.Pi*float64((2*y+1)*v)/16) + } + } + dst[8*v+u] = sum / 8 + } + } + // Convert from float64 to int. + for i := range dst { + b[i] = int(dst[i] + 0.5) + } +} + +// slowIDCT performs the 8*8 2-dimensional inverse discrete cosine transform: +// +// dst[x,y] = (1/8) * Σ_u Σ_v alpha(u) * alpha(v) * src[u,v] * +// cos((π/2) * (2*x + 1) * u / 8) * +// cos((π/2) * (2*y + 1) * v / 8) +// +// x and y are in pixel space, and u and v are in transform space. +// +// b acts as both dst and src. +func slowIDCT(b *block) { + var dst [blockSize]float64 + for y := 0; y < 8; y++ { + for x := 0; x < 8; x++ { + sum := 0.0 + for v := 0; v < 8; v++ { + for u := 0; u < 8; u++ { + sum += alpha(u) * alpha(v) * float64(b[8*v+u]) * + math.Cos(math.Pi*float64((2*x+1)*u)/16) * + math.Cos(math.Pi*float64((2*y+1)*v)/16) + } + } + dst[8*y+x] = sum / 8 + } + } + // Convert from float64 to int. + for i := range dst { + b[i] = int(dst[i] + 0.5) + } +} + +func (b *block) String() string { + s := bytes.NewBuffer(nil) + fmt.Fprintf(s, "{\n") + for y := 0; y < 8; y++ { + fmt.Fprintf(s, "\t") + for x := 0; x < 8; x++ { + fmt.Fprintf(s, "0x%04x, ", uint16(b[8*y+x])) + } + fmt.Fprintln(s) + } + fmt.Fprintf(s, "}") + return s.String() +} + +// testBlocks are the first 10 pre-IDCT blocks from ../testdata/video-001.jpeg. +var testBlocks = [10]block{ + { + 0x7f, 0xf6, 0x01, 0x07, 0xff, 0x00, 0x00, 0x00, + 0xf5, 0x01, 0xfa, 0x01, 0xfe, 0x00, 0x01, 0x00, + 0x05, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0xff, 0xf8, 0x00, 0x01, 0xff, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, 0x00, 0xff, 0xff, 0x00, + 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x01, 0xff, 0x01, 0x00, 0xfe, + }, + { + 0x29, 0x07, 0x00, 0xfc, 0x01, 0x01, 0x00, 0x00, + 0x07, 0x00, 0x03, 0x00, 0x01, 0x00, 0xff, 0xff, + 0xff, 0xfd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, 0xff, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x01, 0xfa, 0x01, 0x00, 0x01, 0x00, 0x01, 0xff, + 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x02, + }, + { + 0xc5, 0xfa, 0x01, 0x00, 0x00, 0x01, 0x00, 0xff, + 0x02, 0xff, 0x01, 0x00, 0x01, 0x00, 0xff, 0x00, + 0xff, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, + 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + { + 0x86, 0x05, 0x00, 0x02, 0x00, 0x00, 0x01, 0x00, + 0xf2, 0x06, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, + 0xf6, 0xfa, 0xf9, 0x00, 0xff, 0x01, 0x00, 0x00, + 0xf9, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, + 0xff, 0x00, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x01, 0xff, 0x01, 0x00, 0xff, 0x00, 0x00, + }, + { + 0x24, 0xfe, 0x00, 0xff, 0x00, 0xff, 0xff, 0x00, + 0x08, 0xfd, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00, + 0x06, 0x03, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x01, 0xff, 0x00, 0x01, 0x00, 0x00, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x01, + }, + { + 0xcd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xff, + }, + { + 0x81, 0xfe, 0x05, 0xff, 0x01, 0xff, 0x01, 0x00, + 0xef, 0xf9, 0x00, 0xf9, 0x00, 0xff, 0x00, 0xff, + 0x05, 0xf9, 0x00, 0xf8, 0x01, 0xff, 0x01, 0xff, + 0x00, 0xff, 0x07, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x01, + 0xff, 0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, + }, + { + 0x28, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0b, 0x02, 0x01, 0x03, 0x00, 0xff, 0x00, 0x01, + 0xfe, 0x02, 0x01, 0x03, 0xff, 0x00, 0x00, 0x00, + 0x01, 0x00, 0xfd, 0x00, 0x01, 0x00, 0xff, 0x00, + 0x01, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x01, 0x01, 0x00, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x01, + }, + { + 0xdf, 0xf9, 0xfe, 0x00, 0x03, 0x01, 0xff, 0xff, + 0x04, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xfe, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + }, + { + 0x88, 0xfd, 0x00, 0x00, 0xff, 0x00, 0x01, 0xff, + 0xe1, 0x06, 0x06, 0x01, 0xff, 0x00, 0x01, 0x00, + 0x08, 0x00, 0xfa, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x08, 0x01, 0x00, 0xff, 0x01, 0xff, 0x00, 0x00, + 0xf5, 0xff, 0x00, 0x01, 0xff, 0x01, 0x01, 0x00, + 0xff, 0xff, 0x01, 0xff, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x01, 0x01, 0xff, 0x00, 0xff, 0x00, 0x01, + 0x02, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x00, + }, +} diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go index b387dfdffd..1808bebd1a 100644 --- a/src/pkg/image/jpeg/idct.go +++ b/src/pkg/image/jpeg/idct.go @@ -37,6 +37,10 @@ package jpeg * */ +const blockSize = 64 // A DCT block is 8x8. + +type block [blockSize]int + const ( w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16) w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16) @@ -70,30 +74,31 @@ const ( func idct(dst []byte, stride int, src *block) { // Horizontal 1-D IDCT. for y := 0; y < 8; y++ { + y8 := y * 8 // If all the AC components are zero, then the IDCT is trivial. - if src[y*8+1] == 0 && src[y*8+2] == 0 && src[y*8+3] == 0 && - src[y*8+4] == 0 && src[y*8+5] == 0 && src[y*8+6] == 0 && src[y*8+7] == 0 { - dc := src[y*8+0] << 3 - src[y*8+0] = dc - src[y*8+1] = dc - src[y*8+2] = dc - src[y*8+3] = dc - src[y*8+4] = dc - src[y*8+5] = dc - src[y*8+6] = dc - src[y*8+7] = dc + if src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 && + src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0 { + dc := src[y8+0] << 3 + src[y8+0] = dc + src[y8+1] = dc + src[y8+2] = dc + src[y8+3] = dc + src[y8+4] = dc + src[y8+5] = dc + src[y8+6] = dc + src[y8+7] = dc continue } // Prescale. - x0 := (src[y*8+0] << 11) + 128 - x1 := src[y*8+4] << 11 - x2 := src[y*8+6] - x3 := src[y*8+2] - x4 := src[y*8+1] - x5 := src[y*8+7] - x6 := src[y*8+5] - x7 := src[y*8+3] + x0 := (src[y8+0] << 11) + 128 + x1 := src[y8+4] << 11 + x2 := src[y8+6] + x3 := src[y8+2] + x4 := src[y8+1] + x5 := src[y8+7] + x6 := src[y8+5] + x7 := src[y8+3] // Stage 1. x8 := w7 * (x4 + x5) @@ -123,14 +128,14 @@ func idct(dst []byte, stride int, src *block) { x4 = (r2*(x4-x5) + 128) >> 8 // Stage 4. - src[8*y+0] = (x7 + x1) >> 8 - src[8*y+1] = (x3 + x2) >> 8 - src[8*y+2] = (x0 + x4) >> 8 - src[8*y+3] = (x8 + x6) >> 8 - src[8*y+4] = (x8 - x6) >> 8 - src[8*y+5] = (x0 - x4) >> 8 - src[8*y+6] = (x3 - x2) >> 8 - src[8*y+7] = (x7 - x1) >> 8 + src[y8+0] = (x7 + x1) >> 8 + src[y8+1] = (x3 + x2) >> 8 + src[y8+2] = (x0 + x4) >> 8 + src[y8+3] = (x8 + x6) >> 8 + src[y8+4] = (x8 - x6) >> 8 + src[y8+5] = (x0 - x4) >> 8 + src[y8+6] = (x3 - x2) >> 8 + src[y8+7] = (x7 - x1) >> 8 } // Vertical 1-D IDCT. @@ -189,8 +194,10 @@ func idct(dst []byte, stride int, src *block) { // Level shift by +128, clip to [0, 255], and write to dst. for y := 0; y < 8; y++ { + y8 := y * 8 + yStride := y * stride for x := 0; x < 8; x++ { - c := src[y*8+x] + c := src[y8+x] if c < -128 { c = 0 } else if c > 127 { @@ -198,7 +205,7 @@ func idct(dst []byte, stride int, src *block) { } else { c += 128 } - dst[y*stride+x] = uint8(c) + dst[yStride+x] = uint8(c) } } } diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go index 5ed142a6c6..bdafd5143e 100644 --- a/src/pkg/image/jpeg/reader.go +++ b/src/pkg/image/jpeg/reader.go @@ -35,11 +35,7 @@ type component struct { tq uint8 // Quantization table destination selector. } -type block [blockSize]int - const ( - blockSize = 64 // A DCT block is 8x8. - dcTable = 0 acTable = 1 maxTc = 1 diff --git a/src/pkg/image/jpeg/writer_test.go b/src/pkg/image/jpeg/writer_test.go index 8732df8459..c070db00ad 100644 --- a/src/pkg/image/jpeg/writer_test.go +++ b/src/pkg/image/jpeg/writer_test.go @@ -171,6 +171,23 @@ func TestWriter(t *testing.T) { } } +func BenchmarkDecodeRGBOpaque(b *testing.B) { + b.StopTimer() + data, err := ioutil.ReadFile("../testdata/video-001.jpeg") + if err != nil { + b.Fatal(err) + } + cfg, err := DecodeConfig(bytes.NewReader(data)) + if err != nil { + b.Fatal(err) + } + b.SetBytes(int64(cfg.Width * cfg.Height * 4)) + b.StartTimer() + for i := 0; i < b.N; i++ { + Decode(bytes.NewReader(data)) + } +} + func BenchmarkEncodeRGBOpaque(b *testing.B) { b.StopTimer() img := image.NewRGBA(image.Rect(0, 0, 640, 480)) -- 2.48.1