]> Cypherpunks repositories - gostls13.git/commitdiff
image/color: optimize YCbCrToRGB
authorMartin Möhrmann <martisch@uos.de>
Tue, 12 Apr 2016 19:16:27 +0000 (21:16 +0200)
committerMartin Möhrmann <martisch@uos.de>
Tue, 12 Apr 2016 23:04:26 +0000 (23:04 +0000)
Use one comparison to detect underflow and overflow simultaneously.
Use a shift, bitwise complement and uint8 type conversion to handle
clamping to upper and lower bound without additional branching.

Overall the new code is faster for a mix of
common case, underflow and overflow.

name     old time/op  new time/op  delta
YCbCr-2  1.12ms ± 0%  0.64ms ± 0%  -43.01%  (p=0.000 n=48+47)

name              old time/op  new time/op  delta
YCbCrToRGB/0-2    5.52ns ± 0%  5.77ns ± 0%  +4.48%  (p=0.000 n=50+49)
YCbCrToRGB/128-2  6.05ns ± 0%  5.52ns ± 0%  -8.69%  (p=0.000 n=39+50)
YCbCrToRGB/255-2  5.80ns ± 0%  5.77ns ± 0%  -0.58%  (p=0.000 n=50+49)

Found in collaboration with Josh Bleecher Snyder and Ralph Corderoy.

Change-Id: Ic5020320f704966f545fdc1ae6bc24ddb5d3d09a
Reviewed-on: https://go-review.googlesource.com/21910
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/image/color/ycbcr.go
src/image/internal/imageutil/gen.go
src/image/internal/imageutil/impl.go

index 904434f6a3d8a9830c15befc4623550c737fba3f..d2c5b569a7c8d9930f4117cf16db5c4df5dec8ac 100644 (file)
@@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) {
        //      B = Y' + 1.77200*(Cb-128)
        // http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
 
-       yy1 := int32(y) * 0x10100 // Convert 0x12 to 0x121200.
+       yy1 := int32(y) * 0x010100 // Convert 0x12 to 0x121200.
        cb1 := int32(cb) - 128
        cr1 := int32(cr) - 128
-       r := (yy1 + 91881*cr1) >> 16
-       g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-       b := (yy1 + 116130*cb1) >> 16
-       if r < 0 {
-               r = 0
-       } else if r > 0xff {
-               r = 0xff
+
+       // The bit twiddling below is equivalent to
+       //
+       // r := (yy1 + 91881*cr1) >> 16
+       // if r < 0 {
+       //     r = 0
+       // } else if r > 0xff {
+       //     r = ^int32(0)
+       // }
+       //
+       // but uses fewer branches and is faster.
+       // Note that the uint8 type conversion in the return
+       // statement will convert ^int32(0) to 0xff.
+       // The code below to compute b and g uses a similar pattern.
+       r := yy1 + 91881*cr1
+       if uint32(r)&0xff000000 == 0 {
+               r >>= 16
+       } else {
+               r = ^(r >> 31)
        }
-       if g < 0 {
-               g = 0
-       } else if g > 0xff {
-               g = 0xff
+
+       b := yy1 + 116130*cb1
+       if uint32(b)&0xff000000 == 0 {
+               b >>= 16
+       } else {
+               b = ^(b >> 31)
        }
-       if b < 0 {
-               b = 0
-       } else if b > 0xff {
-               b = 0xff
+
+       g := yy1 - 22554*cb1 - 46802*cr1
+       if uint32(g)&0xff000000 == 0 {
+               g >>= 16
+       } else {
+               g = ^(g >> 31)
        }
+
        return uint8(r), uint8(g), uint8(b)
 }
 
index fc1e707f0fd9f2c18a417c728db71600cab4b33b..6f8d2b2f5dc1b2900221e6d1c86d35d3fe8f269b 100644 (file)
@@ -95,26 +95,42 @@ const sratioCase = `
                        %s
 
                                // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-                               yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+                               yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
                                cb1 := int32(src.Cb[ci]) - 128
                                cr1 := int32(src.Cr[ci]) - 128
-                               r := (yy1 + 91881*cr1) >> 16
-                               g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-                               b := (yy1 + 116130*cb1) >> 16
-                               if r < 0 {
-                                       r = 0
-                               } else if r > 255 {
-                                       r = 255
+
+                               // The bit twiddling below is equivalent to
+                               //
+                               // r := (yy1 + 91881*cr1) >> 16
+                               // if r < 0 {
+                               //     r = 0
+                               // } else if r > 0xff {
+                               //     r = ^int32(0)
+                               // }
+                               //
+                               // but uses fewer branches and is faster.
+                               // Note that the uint8 type conversion in the return
+                               // statement will convert ^int32(0) to 0xff.
+                               // The code below to compute b and g uses a similar pattern.
+                               r := yy1 + 91881*cr1
+                               if uint32(r)&0xff000000 == 0 {
+                                       r >>= 16
+                               } else {
+                                       r = ^(r >> 31)
                                }
-                               if g < 0 {
-                                       g = 0
-                               } else if g > 255 {
-                                       g = 255
+
+                               b := yy1 + 116130*cb1
+                               if uint32(b)&0xff000000 == 0 {
+                                       b >>= 16
+                               } else {
+                                       b = ^(b >> 31)
                                }
-                               if b < 0 {
-                                       b = 0
-                               } else if b > 255 {
-                                       b = 255
+
+                               g := yy1 - 22554*cb1 - 46802*cr1
+                               if uint32(g)&0xff000000 == 0 {
+                                       g >>= 16
+                               } else {
+                                       g = ^(g >> 31)
                                }
 
                                dpix[x+0] = uint8(r)
index fd7826d4a972a0b34be5993717fd8f212aa34e48..0993f3145ce885416ece5174089a4b9ef460d7b9 100644 (file)
@@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
                        for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
 
                                // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-                               yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+                               yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
                                cb1 := int32(src.Cb[ci]) - 128
                                cr1 := int32(src.Cr[ci]) - 128
-                               r := (yy1 + 91881*cr1) >> 16
-                               g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-                               b := (yy1 + 116130*cb1) >> 16
-                               if r < 0 {
-                                       r = 0
-                               } else if r > 255 {
-                                       r = 255
+
+                               // The bit twiddling below is equivalent to
+                               //
+                               // r := (yy1 + 91881*cr1) >> 16
+                               // if r < 0 {
+                               //     r = 0
+                               // } else if r > 0xff {
+                               //     r = ^int32(0)
+                               // }
+                               //
+                               // but uses fewer branches and is faster.
+                               // Note that the uint8 type conversion in the return
+                               // statement will convert ^int32(0) to 0xff.
+                               // The code below to compute b and g uses a similar pattern.
+                               r := yy1 + 91881*cr1
+                               if uint32(r)&0xff000000 == 0 {
+                                       r >>= 16
+                               } else {
+                                       r = ^(r >> 31)
                                }
-                               if g < 0 {
-                                       g = 0
-                               } else if g > 255 {
-                                       g = 255
+
+                               b := yy1 + 116130*cb1
+                               if uint32(b)&0xff000000 == 0 {
+                                       b >>= 16
+                               } else {
+                                       b = ^(b >> 31)
                                }
-                               if b < 0 {
-                                       b = 0
-                               } else if b > 255 {
-                                       b = 255
+
+                               g := yy1 - 22554*cb1 - 46802*cr1
+                               if uint32(g)&0xff000000 == 0 {
+                                       g >>= 16
+                               } else {
+                                       g = ^(g >> 31)
                                }
 
                                dpix[x+0] = uint8(r)
@@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
                                ci := ciBase + sx/2
 
                                // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-                               yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+                               yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
                                cb1 := int32(src.Cb[ci]) - 128
                                cr1 := int32(src.Cr[ci]) - 128
-                               r := (yy1 + 91881*cr1) >> 16
-                               g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-                               b := (yy1 + 116130*cb1) >> 16
-                               if r < 0 {
-                                       r = 0
-                               } else if r > 255 {
-                                       r = 255
+
+                               // The bit twiddling below is equivalent to
+                               //
+                               // r := (yy1 + 91881*cr1) >> 16
+                               // if r < 0 {
+                               //     r = 0
+                               // } else if r > 0xff {
+                               //     r = ^int32(0)
+                               // }
+                               //
+                               // but uses fewer branches and is faster.
+                               // Note that the uint8 type conversion in the return
+                               // statement will convert ^int32(0) to 0xff.
+                               // The code below to compute b and g uses a similar pattern.
+                               r := yy1 + 91881*cr1
+                               if uint32(r)&0xff000000 == 0 {
+                                       r >>= 16
+                               } else {
+                                       r = ^(r >> 31)
                                }
-                               if g < 0 {
-                                       g = 0
-                               } else if g > 255 {
-                                       g = 255
+
+                               b := yy1 + 116130*cb1
+                               if uint32(b)&0xff000000 == 0 {
+                                       b >>= 16
+                               } else {
+                                       b = ^(b >> 31)
                                }
-                               if b < 0 {
-                                       b = 0
-                               } else if b > 255 {
-                                       b = 255
+
+                               g := yy1 - 22554*cb1 - 46802*cr1
+                               if uint32(g)&0xff000000 == 0 {
+                                       g >>= 16
+                               } else {
+                                       g = ^(g >> 31)
                                }
 
                                dpix[x+0] = uint8(r)
@@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
                                ci := ciBase + sx/2
 
                                // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-                               yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+                               yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
                                cb1 := int32(src.Cb[ci]) - 128
                                cr1 := int32(src.Cr[ci]) - 128
-                               r := (yy1 + 91881*cr1) >> 16
-                               g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-                               b := (yy1 + 116130*cb1) >> 16
-                               if r < 0 {
-                                       r = 0
-                               } else if r > 255 {
-                                       r = 255
+
+                               // The bit twiddling below is equivalent to
+                               //
+                               // r := (yy1 + 91881*cr1) >> 16
+                               // if r < 0 {
+                               //     r = 0
+                               // } else if r > 0xff {
+                               //     r = ^int32(0)
+                               // }
+                               //
+                               // but uses fewer branches and is faster.
+                               // Note that the uint8 type conversion in the return
+                               // statement will convert ^int32(0) to 0xff.
+                               // The code below to compute b and g uses a similar pattern.
+                               r := yy1 + 91881*cr1
+                               if uint32(r)&0xff000000 == 0 {
+                                       r >>= 16
+                               } else {
+                                       r = ^(r >> 31)
                                }
-                               if g < 0 {
-                                       g = 0
-                               } else if g > 255 {
-                                       g = 255
+
+                               b := yy1 + 116130*cb1
+                               if uint32(b)&0xff000000 == 0 {
+                                       b >>= 16
+                               } else {
+                                       b = ^(b >> 31)
                                }
-                               if b < 0 {
-                                       b = 0
-                               } else if b > 255 {
-                                       b = 255
+
+                               g := yy1 - 22554*cb1 - 46802*cr1
+                               if uint32(g)&0xff000000 == 0 {
+                                       g >>= 16
+                               } else {
+                                       g = ^(g >> 31)
                                }
 
                                dpix[x+0] = uint8(r)
@@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
                        for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
 
                                // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-                               yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+                               yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
                                cb1 := int32(src.Cb[ci]) - 128
                                cr1 := int32(src.Cr[ci]) - 128
-                               r := (yy1 + 91881*cr1) >> 16
-                               g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-                               b := (yy1 + 116130*cb1) >> 16
-                               if r < 0 {
-                                       r = 0
-                               } else if r > 255 {
-                                       r = 255
+
+                               // The bit twiddling below is equivalent to
+                               //
+                               // r := (yy1 + 91881*cr1) >> 16
+                               // if r < 0 {
+                               //     r = 0
+                               // } else if r > 0xff {
+                               //     r = ^int32(0)
+                               // }
+                               //
+                               // but uses fewer branches and is faster.
+                               // Note that the uint8 type conversion in the return
+                               // statement will convert ^int32(0) to 0xff.
+                               // The code below to compute b and g uses a similar pattern.
+                               r := yy1 + 91881*cr1
+                               if uint32(r)&0xff000000 == 0 {
+                                       r >>= 16
+                               } else {
+                                       r = ^(r >> 31)
                                }
-                               if g < 0 {
-                                       g = 0
-                               } else if g > 255 {
-                                       g = 255
+
+                               b := yy1 + 116130*cb1
+                               if uint32(b)&0xff000000 == 0 {
+                                       b >>= 16
+                               } else {
+                                       b = ^(b >> 31)
                                }
-                               if b < 0 {
-                                       b = 0
-                               } else if b > 255 {
-                                       b = 255
+
+                               g := yy1 - 22554*cb1 - 46802*cr1
+                               if uint32(g)&0xff000000 == 0 {
+                                       g >>= 16
+                               } else {
+                                       g = ^(g >> 31)
                                }
 
                                dpix[x+0] = uint8(r)