Avoids a lot of redundant bounds checks.
R=nigeltao, rsc
CC=golang-dev
https://golang.org/cl/
2678041
                                cr[0][3*x+3] = uint8(b >> 8)
                        }
                case cbP8:
-                       for x := b.Min.X; x < b.Max.X; x++ {
-                               cr[0][x+1] = paletted.ColorIndexAt(x, y)
-                       }
+                       rowOffset := y * paletted.Stride
+                       copy(cr[0][b.Min.X+1:], paletted.Pix[rowOffset+b.Min.X:rowOffset+b.Max.X])
                case cbTCA8:
                        // Convert from image.Image (which is alpha-premultiplied) to PNG's non-alpha-premultiplied.
                        for x := b.Min.X; x < b.Max.X; x++ {
 
 package png
 
 import (
+       "bytes"
        "fmt"
        "image"
        "io"
                }
        }
 }
+
+func BenchmarkEncodePaletted(b *testing.B) {
+       b.StopTimer()
+       img := image.NewPaletted(640, 480,
+               []image.Color{
+                       image.RGBAColor{0, 0, 0, 255},
+                       image.RGBAColor{255, 255, 255, 255},
+               })
+       b.StartTimer()
+       buffer := new(bytes.Buffer)
+       for i := 0; i < b.N; i++ {
+               buffer.Reset()
+               Encode(buffer, img)
+       }
+}