From 2113c9ad0d82b3d1a734c0b5fc0efc9c44a920d5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Mo=CC=88hrmann?= Date: Tue, 18 Oct 2016 06:18:39 +0200 Subject: [PATCH] image/color: improve speed of RGBA methods MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Apply the optimizations added to color conversion functions in https://go-review.googlesource.com/#/c/21910/ to the RGBA methods. YCbCrToRGBA/0-4 6.32ns ± 3% 6.58ns ± 2% +4.15% (p=0.000 n=20+19) YCbCrToRGBA/128-4 8.02ns ± 2% 5.89ns ± 2% -26.57% (p=0.000 n=20+19) YCbCrToRGBA/255-4 8.06ns ± 2% 6.59ns ± 3% -18.18% (p=0.000 n=20+20) NYCbCrAToRGBA/0-4 8.71ns ± 2% 8.78ns ± 2% +0.86% (p=0.036 n=19+20) NYCbCrAToRGBA/128-4 10.3ns ± 4% 7.9ns ± 2% -23.44% (p=0.000 n=20+20) NYCbCrAToRGBA/255-4 9.64ns ± 2% 8.79ns ± 3% -8.80% (p=0.000 n=20+20) Fixes: #15260 Change-Id: I225efdf74603e8d2b4f063054f7baee7a5029de6 Reviewed-on: https://go-review.googlesource.com/31773 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Nigel Tao --- src/image/color/ycbcr.go | 89 +++++++++++++++++++++++------------ src/image/color/ycbcr_test.go | 63 ++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 37 deletions(-) diff --git a/src/image/color/ycbcr.go b/src/image/color/ycbcr.go index c3658b0623..18d1a568aa 100644 --- a/src/image/color/ycbcr.go +++ b/src/image/color/ycbcr.go @@ -139,24 +139,39 @@ func (c YCbCr) RGBA() (uint32, uint32, uint32, uint32) { yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200. cb1 := int32(c.Cb) - 128 cr1 := int32(c.Cr) - 128 - r := (yy1 + 91881*cr1) >> 8 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 8 - b := (yy1 + 116130*cb1) >> 8 - if r < 0 { - r = 0 - } else if r > 0xffff { - r = 0xffff + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 8 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = 0xffff + // } + // + // but uses fewer branches and is faster. + // The code below to compute g and b uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 8 + } else { + r = ^(r >> 31) & 0xffff } - if g < 0 { - g = 0 - } else if g > 0xffff { - g = 0xffff + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 8 + } else { + g = ^(g >> 31) & 0xffff } - if b < 0 { - b = 0 - } else if b > 0xffff { - b = 0xffff + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 8 + } else { + b = ^(b >> 31) & 0xffff } + return uint32(r), uint32(g), uint32(b), 0xffff } @@ -184,23 +199,37 @@ func (c NYCbCrA) RGBA() (uint32, uint32, uint32, uint32) { yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200. cb1 := int32(c.Cb) - 128 cr1 := int32(c.Cr) - 128 - r := (yy1 + 91881*cr1) >> 8 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 8 - b := (yy1 + 116130*cb1) >> 8 - if r < 0 { - r = 0 - } else if r > 0xffff { - r = 0xffff + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 8 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = 0xffff + // } + // + // but uses fewer branches and is faster. + // The code below to compute g and b uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 8 + } else { + r = ^(r >> 31) & 0xffff } - if g < 0 { - g = 0 - } else if g > 0xffff { - g = 0xffff + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 8 + } else { + g = ^(g >> 31) & 0xffff } - if b < 0 { - b = 0 - } else if b > 0xffff { - b = 0xffff + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 8 + } else { + b = ^(b >> 31) & 0xffff } // The second part of this method applies the alpha. diff --git a/src/image/color/ycbcr_test.go b/src/image/color/ycbcr_test.go index 561699f4e0..85c1b984a6 100644 --- a/src/image/color/ycbcr_test.go +++ b/src/image/color/ycbcr_test.go @@ -172,7 +172,8 @@ func TestPalette(t *testing.T) { } } -var sink uint8 +var sink8 uint8 +var sink32 uint32 func BenchmarkYCbCrToRGB(b *testing.B) { // YCbCrToRGB does saturating arithmetic. @@ -180,17 +181,17 @@ func BenchmarkYCbCrToRGB(b *testing.B) { // different paths through the generated code. b.Run("0", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = YCbCrToRGB(0, 0, 0) + sink8, sink8, sink8 = YCbCrToRGB(0, 0, 0) } }) b.Run("128", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = YCbCrToRGB(128, 128, 128) + sink8, sink8, sink8 = YCbCrToRGB(128, 128, 128) } }) b.Run("255", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = YCbCrToRGB(255, 255, 255) + sink8, sink8, sink8 = YCbCrToRGB(255, 255, 255) } }) } @@ -201,17 +202,65 @@ func BenchmarkRGBToYCbCr(b *testing.B) { // through the generated code. b.Run("0", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = RGBToYCbCr(0, 0, 0) + sink8, sink8, sink8 = RGBToYCbCr(0, 0, 0) } }) b.Run("Cb", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = RGBToYCbCr(0, 0, 255) + sink8, sink8, sink8 = RGBToYCbCr(0, 0, 255) } }) b.Run("Cr", func(b *testing.B) { for i := 0; i < b.N; i++ { - sink, sink, sink = RGBToYCbCr(255, 0, 0) + sink8, sink8, sink8 = RGBToYCbCr(255, 0, 0) + } + }) +} + +func BenchmarkYCbCrToRGBA(b *testing.B) { + // RGB does saturating arithmetic. + // Low, middle, and high values can take + // different paths through the generated code. + b.Run("0", func(b *testing.B) { + c := YCbCr{0, 0, 0} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() + } + }) + b.Run("128", func(b *testing.B) { + c := YCbCr{128, 128, 128} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() + } + }) + b.Run("255", func(b *testing.B) { + c := YCbCr{255, 255, 255} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() + } + }) +} + +func BenchmarkNYCbCrAToRGBA(b *testing.B) { + // RGBA does saturating arithmetic. + // Low, middle, and high values can take + // different paths through the generated code. + b.Run("0", func(b *testing.B) { + c := NYCbCrA{YCbCr{0, 0, 0}, 0xff} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() + } + }) + b.Run("128", func(b *testing.B) { + c := NYCbCrA{YCbCr{128, 128, 128}, 0xff} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() + } + }) + b.Run("255", func(b *testing.B) { + c := NYCbCrA{YCbCr{255, 255, 255}, 0xff} + for i := 0; i < b.N; i++ { + sink32, sink32, sink32, sink32 = c.RGBA() } }) } -- 2.48.1