From: Nigel Tao Date: Tue, 30 Oct 2012 00:10:08 +0000 (+1100) Subject: image/jpeg: change block from [64]int to [64]int32. X-Git-Tag: go1.1rc2~2038 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=daf43ba476ac29c9c15b59169a9458900efa0e1d;p=gostls13.git image/jpeg: change block from [64]int to [64]int32. On 6g/linux: benchmark old ns/op new ns/op delta BenchmarkFDCT 4606 4241 -7.92% BenchmarkIDCT 4187 3923 -6.31% BenchmarkDecodeBaseline 3154864 3170224 +0.49% BenchmarkDecodeProgressive 4072812 4017132 -1.37% BenchmarkEncode 39406920 34596760 -12.21% Stack requirements before (from 'go tool 6g -S'): (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1352-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$5344-24 after: (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1064-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$2520-24 Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and Cr. This reduces the stack requirement slightly, but also avoids an unlucky coincidence where a BenchmarkEncode stack split lands between encoder.writeByte and bufio.Writer.WriteByte, which occurs very often during Huffman encoding and is otherwise disasterous for the final benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/ change does not have a noticable effect on the benchmarks. R=r CC=golang-dev, rsc https://golang.org/cl/6823043 --- diff --git a/src/pkg/image/jpeg/dct_test.go b/src/pkg/image/jpeg/dct_test.go index 52d0c10dee..7389f7e4fe 100644 --- a/src/pkg/image/jpeg/dct_test.go +++ b/src/pkg/image/jpeg/dct_test.go @@ -42,7 +42,7 @@ func TestDCT(t *testing.T) { b := block{} n := r.Int() % 64 for j := 0; j < n; j++ { - b[r.Int()%len(b)] = r.Int() % 256 + b[r.Int()%len(b)] = r.Int31() % 256 } blocks = append(blocks, b) } @@ -144,9 +144,9 @@ func slowFDCT(b *block) { dst[8*v+u] = sum / 8 } } - // Convert from float64 to int. + // Convert from float64 to int32. for i := range dst { - b[i] = int(dst[i] + 0.5) + b[i] = int32(dst[i] + 0.5) } } @@ -174,9 +174,9 @@ func slowIDCT(b *block) { dst[8*y+x] = sum / 8 } } - // Convert from float64 to int. + // Convert from float64 to int32. for i := range dst { - b[i] = int(dst[i] + 0.5) + b[i] = int32(dst[i] + 0.5) } } diff --git a/src/pkg/image/jpeg/huffman.go b/src/pkg/image/jpeg/huffman.go index 1c598f6534..9393932aaf 100644 --- a/src/pkg/image/jpeg/huffman.go +++ b/src/pkg/image/jpeg/huffman.go @@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error { } // The composition of RECEIVE and EXTEND, specified in section F.2.2.1. -func (d *decoder) receiveExtend(t uint8) (int, error) { +func (d *decoder) receiveExtend(t uint8) (int32, error) { err := d.ensureNBits(int(t)) if err != nil { return 0, err } d.b.n -= int(t) d.b.m >>= t - s := 1 << t - x := int(d.b.a>>uint8(d.b.n)) & (s - 1) + s := int32(1) << t + x := int32(d.b.a>>uint8(d.b.n)) & (s - 1) if x < s>>1 { x += ((-1) << t) + 1 } diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go index 92ff1e4b41..46fcaecb79 100644 --- a/src/pkg/image/jpeg/idct.go +++ b/src/pkg/image/jpeg/idct.go @@ -39,7 +39,7 @@ package jpeg const blockSize = 64 // A DCT block is 8x8. -type block [blockSize]int +type block [blockSize]int32 const ( w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16) diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go index c28dc8807c..5b1e61774e 100644 --- a/src/pkg/image/jpeg/reader.go +++ b/src/pkg/image/jpeg/reader.go @@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error { return FormatError("bad Tq value") } for i := range d.quant[tq] { - d.quant[tq][i] = int(d.tmp[i+1]) + d.quant[tq][i] = int32(d.tmp[i+1]) } } if n != 0 { diff --git a/src/pkg/image/jpeg/scan.go b/src/pkg/image/jpeg/scan.go index fd491ed083..e3ae8ae441 100644 --- a/src/pkg/image/jpeg/scan.go +++ b/src/pkg/image/jpeg/scan.go @@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error { // significant bit. // // For baseline JPEGs, these parameters are hard-coded to 0/63/0/0. - zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0) + zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0) if d.progressive { - zigStart = int(d.tmp[1+2*nComp]) - zigEnd = int(d.tmp[2+2*nComp]) - ah = uint(d.tmp[3+2*nComp] >> 4) - al = uint(d.tmp[3+2*nComp] & 0x0f) + zigStart = int32(d.tmp[1+2*nComp]) + zigEnd = int32(d.tmp[2+2*nComp]) + ah = uint32(d.tmp[3+2*nComp] >> 4) + al = uint32(d.tmp[3+2*nComp] & 0x0f) if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd { return FormatError("bad spectral selection bounds") } @@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error { var ( // b is the decoded coefficients, in natural (not zig-zag) order. b block - dc [nColorComponent]int + dc [nColorComponent]int32 // mx0 and my0 are the location of the current (in terms of 8x8 blocks). // For example, with 4:2:0 chroma subsampling, the block whose top left // pixel co-ordinates are (16, 8) is the third block in the first row: @@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error { val0 := value >> 4 val1 := value & 0x0f if val1 != 0 { - zig += int(val0) + zig += int32(val0) if zig > zigEnd { break } @@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error { // Reset the Huffman decoder. d.b = bits{} // Reset the DC components, as per section F.2.1.3.1. - dc = [nColorComponent]int{} + dc = [nColorComponent]int32{} // Reset the progressive decoder state, as per section G.1.2.2. d.eobRun = 0 } @@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error { // refine decodes a successive approximation refinement block, as specified in // section G.1.2. -func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error { +func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error { // Refining a DC component is trivial. if zigStart == 0 { if zigEnd != 0 { @@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro if d.eobRun == 0 { loop: for ; zig <= zigEnd; zig++ { - z := 0 + z := int32(0) value, err := d.decodeHuffman(h) if err != nil { return err @@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro return FormatError("unexpected Huffman code") } - zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta) + zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta) if err != nil { return err } @@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro // refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0, // the first nz zero entries are skipped over. -func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) { +func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) { for ; zig <= zigEnd; zig++ { u := unzig[zig] if b[u] == 0 { diff --git a/src/pkg/image/jpeg/writer.go b/src/pkg/image/jpeg/writer.go index 099298e462..375d8a66d5 100644 --- a/src/pkg/image/jpeg/writer.go +++ b/src/pkg/image/jpeg/writer.go @@ -21,7 +21,7 @@ func min(x, y int) int { } // div returns a/b rounded to the nearest integer, instead of rounded to zero. -func div(a int, b int) int { +func div(a, b int32) int32 { if a >= 0 { return (a + (b >> 1)) / b } @@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) { } // emitHuff emits the given value with the given Huffman encoder. -func (e *encoder) emitHuff(h huffIndex, value int) { +func (e *encoder) emitHuff(h huffIndex, value int32) { x := theHuffmanLUT[h][value] e.emit(x&(1<<24-1), x>>24) } // emitHuffRLE emits a run of runLength copies of value encoded with the given // Huffman encoder. -func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) { +func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) { a, b := value, value if a < 0 { a, b = -value, value-1 @@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) { } else { nBits = 8 + uint32(bitCount[a>>8]) } - e.emitHuff(h, runLength<<4|int(nBits)) + e.emitHuff(h, runLength<<4|int32(nBits)) if nBits > 0 { e.emit(uint32(b)&(1<>8), uint8(g>>8), uint8(b>>8)) - yBlock[8*j+i] = int(yy) - cbBlock[8*j+i] = int(cb) - crBlock[8*j+i] = int(cr) + yBlock[8*j+i] = int32(yy) + cbBlock[8*j+i] = int32(cb) + crBlock[8*j+i] = int32(cr) } } } @@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block) } pix := m.Pix[offset+sx*4:] yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2]) - yBlock[8*j+i] = int(yy) - cbBlock[8*j+i] = int(cb) - crBlock[8*j+i] = int(cr) + yBlock[8*j+i] = int32(yy) + cbBlock[8*j+i] = int32(cb) + crBlock[8*j+i] = int32(cr) } } } @@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) { var ( // Scratch buffers to hold the YCbCr values. // The blocks are in natural (not zig-zag) order. - yBlock block - cbBlock [4]block - crBlock [4]block - cBlock block + b block + cb, cr [4]block // DC components are delta-encoded. - prevDCY, prevDCCb, prevDCCr int + prevDCY, prevDCCb, prevDCCr int32 ) bounds := m.Bounds() rgba, _ := m.(*image.RGBA) @@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) { yOff := (i & 2) * 4 p := image.Pt(x+xOff, y+yOff) if rgba != nil { - rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i]) + rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i]) } else { - toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i]) + toYCbCr(m, p, &b, &cb[i], &cr[i]) } - prevDCY = e.writeBlock(&yBlock, 0, prevDCY) + prevDCY = e.writeBlock(&b, 0, prevDCY) } - scale(&cBlock, &cbBlock) - prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb) - scale(&cBlock, &crBlock) - prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr) + scale(&b, &cb) + prevDCCb = e.writeBlock(&b, 1, prevDCCb) + scale(&b, &cr) + prevDCCr = e.writeBlock(&b, 1, prevDCCr) } } // Pad the last byte with 1's.