From f0c5b8b9c9c7900033ddb11b584da6198d599454 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Mo=CC=88hrmann?= Date: Tue, 12 Apr 2016 21:16:27 +0200 Subject: [PATCH] image/color: optimize YCbCrToRGB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use one comparison to detect underflow and overflow simultaneously. Use a shift, bitwise complement and uint8 type conversion to handle clamping to upper and lower bound without additional branching. Overall the new code is faster for a mix of common case, underflow and overflow. name old time/op new time/op delta YCbCr-2 1.12ms ± 0% 0.64ms ± 0% -43.01% (p=0.000 n=48+47) name old time/op new time/op delta YCbCrToRGB/0-2 5.52ns ± 0% 5.77ns ± 0% +4.48% (p=0.000 n=50+49) YCbCrToRGB/128-2 6.05ns ± 0% 5.52ns ± 0% -8.69% (p=0.000 n=39+50) YCbCrToRGB/255-2 5.80ns ± 0% 5.77ns ± 0% -0.58% (p=0.000 n=50+49) Found in collaboration with Josh Bleecher Snyder and Ralph Corderoy. Change-Id: Ic5020320f704966f545fdc1ae6bc24ddb5d3d09a Reviewed-on: https://go-review.googlesource.com/21910 Reviewed-by: Josh Bleecher Snyder Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot --- src/image/color/ycbcr.go | 49 ++++--- src/image/internal/imageutil/gen.go | 48 ++++--- src/image/internal/imageutil/impl.go | 192 ++++++++++++++++++--------- 3 files changed, 193 insertions(+), 96 deletions(-) diff --git a/src/image/color/ycbcr.go b/src/image/color/ycbcr.go index 904434f6a3..d2c5b569a7 100644 --- a/src/image/color/ycbcr.go +++ b/src/image/color/ycbcr.go @@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) { // B = Y' + 1.77200*(Cb-128) // http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'. - yy1 := int32(y) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(y) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(cb) - 128 cr1 := int32(cr) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 0xff { - r = 0xff + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 0xff { - g = 0xff + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 0xff { - b = 0xff + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } + return uint8(r), uint8(g), uint8(b) } diff --git a/src/image/internal/imageutil/gen.go b/src/image/internal/imageutil/gen.go index fc1e707f0f..6f8d2b2f5d 100644 --- a/src/image/internal/imageutil/gen.go +++ b/src/image/internal/imageutil/gen.go @@ -95,26 +95,42 @@ const sratioCase = ` %s // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(src.Cb[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 255 { - r = 255 + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 255 { - g = 255 + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 255 { - b = 255 + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } dpix[x+0] = uint8(r) diff --git a/src/image/internal/imageutil/impl.go b/src/image/internal/imageutil/impl.go index fd7826d4a9..0993f3145c 100644 --- a/src/image/internal/imageutil/impl.go +++ b/src/image/internal/imageutil/impl.go @@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 { // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(src.Cb[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 255 { - r = 255 + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 255 { - g = 255 + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 255 { - b = 255 + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } dpix[x+0] = uint8(r) @@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ci := ciBase + sx/2 // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(src.Cb[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 255 { - r = 255 + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 255 { - g = 255 + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 255 { - b = 255 + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } dpix[x+0] = uint8(r) @@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ci := ciBase + sx/2 // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(src.Cb[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 255 { - r = 255 + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 255 { - g = 255 + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 255 { - b = 255 + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } dpix[x+0] = uint8(r) @@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 { // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. + yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200. cb1 := int32(src.Cb[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128 - r := (yy1 + 91881*cr1) >> 16 - g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 - b := (yy1 + 116130*cb1) >> 16 - if r < 0 { - r = 0 - } else if r > 255 { - r = 255 + + // The bit twiddling below is equivalent to + // + // r := (yy1 + 91881*cr1) >> 16 + // if r < 0 { + // r = 0 + // } else if r > 0xff { + // r = ^int32(0) + // } + // + // but uses fewer branches and is faster. + // Note that the uint8 type conversion in the return + // statement will convert ^int32(0) to 0xff. + // The code below to compute b and g uses a similar pattern. + r := yy1 + 91881*cr1 + if uint32(r)&0xff000000 == 0 { + r >>= 16 + } else { + r = ^(r >> 31) } - if g < 0 { - g = 0 - } else if g > 255 { - g = 255 + + b := yy1 + 116130*cb1 + if uint32(b)&0xff000000 == 0 { + b >>= 16 + } else { + b = ^(b >> 31) } - if b < 0 { - b = 0 - } else if b > 255 { - b = 255 + + g := yy1 - 22554*cb1 - 46802*cr1 + if uint32(g)&0xff000000 == 0 { + g >>= 16 + } else { + g = ^(g >> 31) } dpix[x+0] = uint8(r) -- 2.48.1