]> Cypherpunks repositories - gostls13.git/commitdiff
image/jpeg: change block from [64]int to [64]int32.
authorNigel Tao <nigeltao@golang.org>
Tue, 30 Oct 2012 00:10:08 +0000 (11:10 +1100)
committerNigel Tao <nigeltao@golang.org>
Tue, 30 Oct 2012 00:10:08 +0000 (11:10 +1100)
On 6g/linux:
benchmark                     old ns/op    new ns/op    delta
BenchmarkFDCT                      4606         4241   -7.92%
BenchmarkIDCT                      4187         3923   -6.31%
BenchmarkDecodeBaseline         3154864      3170224   +0.49%
BenchmarkDecodeProgressive      4072812      4017132   -1.37%
BenchmarkEncode                39406920     34596760  -12.21%

Stack requirements before (from 'go tool 6g -S'):
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1352-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$5344-24

after:
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1064-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$2520-24

Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and
Cr. This reduces the stack requirement slightly, but also avoids an
unlucky coincidence where a BenchmarkEncode stack split lands between
encoder.writeByte and bufio.Writer.WriteByte, which occurs very often
during Huffman encoding and is otherwise disasterous for the final
benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/
change does not have a noticable effect on the benchmarks.

R=r
CC=golang-dev, rsc
https://golang.org/cl/6823043

src/pkg/image/jpeg/dct_test.go
src/pkg/image/jpeg/huffman.go
src/pkg/image/jpeg/idct.go
src/pkg/image/jpeg/reader.go
src/pkg/image/jpeg/scan.go
src/pkg/image/jpeg/writer.go

index 52d0c10dee25115ac90bb1d202d4474b100f62d3..7389f7e4fe81395e81e71a152bfd72808f6f0f6c 100644 (file)
@@ -42,7 +42,7 @@ func TestDCT(t *testing.T) {
                b := block{}
                n := r.Int() % 64
                for j := 0; j < n; j++ {
-                       b[r.Int()%len(b)] = r.Int() % 256
+                       b[r.Int()%len(b)] = r.Int31() % 256
                }
                blocks = append(blocks, b)
        }
@@ -144,9 +144,9 @@ func slowFDCT(b *block) {
                        dst[8*v+u] = sum / 8
                }
        }
-       // Convert from float64 to int.
+       // Convert from float64 to int32.
        for i := range dst {
-               b[i] = int(dst[i] + 0.5)
+               b[i] = int32(dst[i] + 0.5)
        }
 }
 
@@ -174,9 +174,9 @@ func slowIDCT(b *block) {
                        dst[8*y+x] = sum / 8
                }
        }
-       // Convert from float64 to int.
+       // Convert from float64 to int32.
        for i := range dst {
-               b[i] = int(dst[i] + 0.5)
+               b[i] = int32(dst[i] + 0.5)
        }
 }
 
index 1c598f6534e3d04c671da60ccea9c0be230fc582..9393932aafb11dc748ba8509adb2319c84161b8c 100644 (file)
@@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error {
 }
 
 // The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
-func (d *decoder) receiveExtend(t uint8) (int, error) {
+func (d *decoder) receiveExtend(t uint8) (int32, error) {
        err := d.ensureNBits(int(t))
        if err != nil {
                return 0, err
        }
        d.b.n -= int(t)
        d.b.m >>= t
-       s := 1 << t
-       x := int(d.b.a>>uint8(d.b.n)) & (s - 1)
+       s := int32(1) << t
+       x := int32(d.b.a>>uint8(d.b.n)) & (s - 1)
        if x < s>>1 {
                x += ((-1) << t) + 1
        }
index 92ff1e4b41d3c128b93a29aa4b8f647e798eae2a..46fcaecb798524ab0ec288c985935522a20a5818 100644 (file)
@@ -39,7 +39,7 @@ package jpeg
 
 const blockSize = 64 // A DCT block is 8x8.
 
-type block [blockSize]int
+type block [blockSize]int32
 
 const (
        w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
index c28dc8807c5d47ff92fcaaa6646ffd80773f0017..5b1e61774ecee042a4a33a02e75454b639ad74f6 100644 (file)
@@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error {
                        return FormatError("bad Tq value")
                }
                for i := range d.quant[tq] {
-                       d.quant[tq][i] = int(d.tmp[i+1])
+                       d.quant[tq][i] = int32(d.tmp[i+1])
                }
        }
        if n != 0 {
index fd491ed083b7db1030f9b7251ee1d7e1f5c01fd1..e3ae8ae441c6e9d3de7bba45173d6821b2a84a73 100644 (file)
@@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error {
        // significant bit.
        //
        // For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
-       zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0)
+       zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0)
        if d.progressive {
-               zigStart = int(d.tmp[1+2*nComp])
-               zigEnd = int(d.tmp[2+2*nComp])
-               ah = uint(d.tmp[3+2*nComp] >> 4)
-               al = uint(d.tmp[3+2*nComp] & 0x0f)
+               zigStart = int32(d.tmp[1+2*nComp])
+               zigEnd = int32(d.tmp[2+2*nComp])
+               ah = uint32(d.tmp[3+2*nComp] >> 4)
+               al = uint32(d.tmp[3+2*nComp] & 0x0f)
                if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd {
                        return FormatError("bad spectral selection bounds")
                }
@@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error {
        var (
                // b is the decoded coefficients, in natural (not zig-zag) order.
                b  block
-               dc [nColorComponent]int
+               dc [nColorComponent]int32
                // mx0 and my0 are the location of the current (in terms of 8x8 blocks).
                // For example, with 4:2:0 chroma subsampling, the block whose top left
                // pixel co-ordinates are (16, 8) is the third block in the first row:
@@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error {
                                                                val0 := value >> 4
                                                                val1 := value & 0x0f
                                                                if val1 != 0 {
-                                                                       zig += int(val0)
+                                                                       zig += int32(val0)
                                                                        if zig > zigEnd {
                                                                                break
                                                                        }
@@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error {
                                // Reset the Huffman decoder.
                                d.b = bits{}
                                // Reset the DC components, as per section F.2.1.3.1.
-                               dc = [nColorComponent]int{}
+                               dc = [nColorComponent]int32{}
                                // Reset the progressive decoder state, as per section G.1.2.2.
                                d.eobRun = 0
                        }
@@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error {
 
 // refine decodes a successive approximation refinement block, as specified in
 // section G.1.2.
-func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error {
+func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error {
        // Refining a DC component is trivial.
        if zigStart == 0 {
                if zigEnd != 0 {
@@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
        if d.eobRun == 0 {
        loop:
                for ; zig <= zigEnd; zig++ {
-                       z := 0
+                       z := int32(0)
                        value, err := d.decodeHuffman(h)
                        if err != nil {
                                return err
@@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
                                return FormatError("unexpected Huffman code")
                        }
 
-                       zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta)
+                       zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta)
                        if err != nil {
                                return err
                        }
@@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 
 // refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
 // the first nz zero entries are skipped over.
-func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) {
+func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) {
        for ; zig <= zigEnd; zig++ {
                u := unzig[zig]
                if b[u] == 0 {
index 099298e462ec72838529d692c265f850bbfc5fe2..375d8a66d5aadebd4a53fd538d65bb99c9176916 100644 (file)
@@ -21,7 +21,7 @@ func min(x, y int) int {
 }
 
 // div returns a/b rounded to the nearest integer, instead of rounded to zero.
-func div(a int, b int) int {
+func div(a, b int32) int32 {
        if a >= 0 {
                return (a + (b >> 1)) / b
        }
@@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) {
 }
 
 // emitHuff emits the given value with the given Huffman encoder.
-func (e *encoder) emitHuff(h huffIndex, value int) {
+func (e *encoder) emitHuff(h huffIndex, value int32) {
        x := theHuffmanLUT[h][value]
        e.emit(x&(1<<24-1), x>>24)
 }
 
 // emitHuffRLE emits a run of runLength copies of value encoded with the given
 // Huffman encoder.
-func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
+func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) {
        a, b := value, value
        if a < 0 {
                a, b = -value, value-1
@@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
        } else {
                nBits = 8 + uint32(bitCount[a>>8])
        }
-       e.emitHuff(h, runLength<<4|int(nBits))
+       e.emitHuff(h, runLength<<4|int32(nBits))
        if nBits > 0 {
                e.emit(uint32(b)&(1<<nBits-1), nBits)
        }
@@ -347,15 +347,15 @@ func (e *encoder) writeDHT() {
 // writeBlock writes a block of pixel data using the given quantization table,
 // returning the post-quantized DC value of the DCT-transformed block.
 // b is in natural (not zig-zag) order.
-func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int {
+func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 {
        fdct(b)
        // Emit the DC delta.
-       dc := div(b[0], (8 * int(e.quant[q][0])))
+       dc := div(b[0], 8*int32(e.quant[q][0]))
        e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC)
        // Emit the AC components.
-       h, runLength := huffIndex(2*q+1), 0
+       h, runLength := huffIndex(2*q+1), int32(0)
        for zig := 1; zig < blockSize; zig++ {
-               ac := div(b[unzig[zig]], (8 * int(e.quant[q][zig])))
+               ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig]))
                if ac == 0 {
                        runLength++
                } else {
@@ -383,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) {
                for i := 0; i < 8; i++ {
                        r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA()
                        yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
-                       yBlock[8*j+i] = int(yy)
-                       cbBlock[8*j+i] = int(cb)
-                       crBlock[8*j+i] = int(cr)
+                       yBlock[8*j+i] = int32(yy)
+                       cbBlock[8*j+i] = int32(cb)
+                       crBlock[8*j+i] = int32(cr)
                }
        }
 }
@@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block)
                        }
                        pix := m.Pix[offset+sx*4:]
                        yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
-                       yBlock[8*j+i] = int(yy)
-                       cbBlock[8*j+i] = int(cb)
-                       crBlock[8*j+i] = int(cr)
+                       yBlock[8*j+i] = int32(yy)
+                       cbBlock[8*j+i] = int32(cb)
+                       crBlock[8*j+i] = int32(cr)
                }
        }
 }
@@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) {
        var (
                // Scratch buffers to hold the YCbCr values.
                // The blocks are in natural (not zig-zag) order.
-               yBlock  block
-               cbBlock [4]block
-               crBlock [4]block
-               cBlock  block
+               b      block
+               cb, cr [4]block
                // DC components are delta-encoded.
-               prevDCY, prevDCCb, prevDCCr int
+               prevDCY, prevDCCb, prevDCCr int32
        )
        bounds := m.Bounds()
        rgba, _ := m.(*image.RGBA)
@@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) {
                                yOff := (i & 2) * 4
                                p := image.Pt(x+xOff, y+yOff)
                                if rgba != nil {
-                                       rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i])
+                                       rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
                                } else {
-                                       toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i])
+                                       toYCbCr(m, p, &b, &cb[i], &cr[i])
                                }
-                               prevDCY = e.writeBlock(&yBlock, 0, prevDCY)
+                               prevDCY = e.writeBlock(&b, 0, prevDCY)
                        }
-                       scale(&cBlock, &cbBlock)
-                       prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb)
-                       scale(&cBlock, &crBlock)
-                       prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr)
+                       scale(&b, &cb)
+                       prevDCCb = e.writeBlock(&b, 1, prevDCCb)
+                       scale(&b, &cr)
+                       prevDCCr = e.writeBlock(&b, 1, prevDCCr)
                }
        }
        // Pad the last byte with 1's.