From: Nigel Tao <nigeltao@golang.org>
Date: Tue, 30 Oct 2012 00:10:08 +0000 (+1100)
Subject: image/jpeg: change block from [64]int to [64]int32.
X-Git-Tag: go1.1rc2~2038
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=daf43ba476ac29c9c15b59169a9458900efa0e1d;p=gostls13.git

image/jpeg: change block from [64]int to [64]int32.

On 6g/linux:
benchmark                     old ns/op    new ns/op    delta
BenchmarkFDCT                      4606         4241   -7.92%
BenchmarkIDCT                      4187         3923   -6.31%
BenchmarkDecodeBaseline         3154864      3170224   +0.49%
BenchmarkDecodeProgressive      4072812      4017132   -1.37%
BenchmarkEncode                39406920     34596760  -12.21%

Stack requirements before (from 'go tool 6g -S'):
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1352-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$5344-24

after:
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1064-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$2520-24

Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and
Cr. This reduces the stack requirement slightly, but also avoids an
unlucky coincidence where a BenchmarkEncode stack split lands between
encoder.writeByte and bufio.Writer.WriteByte, which occurs very often
during Huffman encoding and is otherwise disasterous for the final
benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/
change does not have a noticable effect on the benchmarks.

R=r
CC=golang-dev, rsc
https://golang.org/cl/6823043
---

diff --git a/src/pkg/image/jpeg/dct_test.go b/src/pkg/image/jpeg/dct_test.go
index 52d0c10dee..7389f7e4fe 100644
--- a/src/pkg/image/jpeg/dct_test.go
+++ b/src/pkg/image/jpeg/dct_test.go
@@ -42,7 +42,7 @@ func TestDCT(t *testing.T) {
 		b := block{}
 		n := r.Int() % 64
 		for j := 0; j < n; j++ {
-			b[r.Int()%len(b)] = r.Int() % 256
+			b[r.Int()%len(b)] = r.Int31() % 256
 		}
 		blocks = append(blocks, b)
 	}
@@ -144,9 +144,9 @@ func slowFDCT(b *block) {
 			dst[8*v+u] = sum / 8
 		}
 	}
-	// Convert from float64 to int.
+	// Convert from float64 to int32.
 	for i := range dst {
-		b[i] = int(dst[i] + 0.5)
+		b[i] = int32(dst[i] + 0.5)
 	}
 }
 
@@ -174,9 +174,9 @@ func slowIDCT(b *block) {
 			dst[8*y+x] = sum / 8
 		}
 	}
-	// Convert from float64 to int.
+	// Convert from float64 to int32.
 	for i := range dst {
-		b[i] = int(dst[i] + 0.5)
+		b[i] = int32(dst[i] + 0.5)
 	}
 }
 
diff --git a/src/pkg/image/jpeg/huffman.go b/src/pkg/image/jpeg/huffman.go
index 1c598f6534..9393932aaf 100644
--- a/src/pkg/image/jpeg/huffman.go
+++ b/src/pkg/image/jpeg/huffman.go
@@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error {
 }
 
 // The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
-func (d *decoder) receiveExtend(t uint8) (int, error) {
+func (d *decoder) receiveExtend(t uint8) (int32, error) {
 	err := d.ensureNBits(int(t))
 	if err != nil {
 		return 0, err
 	}
 	d.b.n -= int(t)
 	d.b.m >>= t
-	s := 1 << t
-	x := int(d.b.a>>uint8(d.b.n)) & (s - 1)
+	s := int32(1) << t
+	x := int32(d.b.a>>uint8(d.b.n)) & (s - 1)
 	if x < s>>1 {
 		x += ((-1) << t) + 1
 	}
diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go
index 92ff1e4b41..46fcaecb79 100644
--- a/src/pkg/image/jpeg/idct.go
+++ b/src/pkg/image/jpeg/idct.go
@@ -39,7 +39,7 @@ package jpeg
 
 const blockSize = 64 // A DCT block is 8x8.
 
-type block [blockSize]int
+type block [blockSize]int32
 
 const (
 	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go
index c28dc8807c..5b1e61774e 100644
--- a/src/pkg/image/jpeg/reader.go
+++ b/src/pkg/image/jpeg/reader.go
@@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error {
 			return FormatError("bad Tq value")
 		}
 		for i := range d.quant[tq] {
-			d.quant[tq][i] = int(d.tmp[i+1])
+			d.quant[tq][i] = int32(d.tmp[i+1])
 		}
 	}
 	if n != 0 {
diff --git a/src/pkg/image/jpeg/scan.go b/src/pkg/image/jpeg/scan.go
index fd491ed083..e3ae8ae441 100644
--- a/src/pkg/image/jpeg/scan.go
+++ b/src/pkg/image/jpeg/scan.go
@@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error {
 	// significant bit.
 	//
 	// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
-	zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0)
+	zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0)
 	if d.progressive {
-		zigStart = int(d.tmp[1+2*nComp])
-		zigEnd = int(d.tmp[2+2*nComp])
-		ah = uint(d.tmp[3+2*nComp] >> 4)
-		al = uint(d.tmp[3+2*nComp] & 0x0f)
+		zigStart = int32(d.tmp[1+2*nComp])
+		zigEnd = int32(d.tmp[2+2*nComp])
+		ah = uint32(d.tmp[3+2*nComp] >> 4)
+		al = uint32(d.tmp[3+2*nComp] & 0x0f)
 		if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd {
 			return FormatError("bad spectral selection bounds")
 		}
@@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error {
 	var (
 		// b is the decoded coefficients, in natural (not zig-zag) order.
 		b  block
-		dc [nColorComponent]int
+		dc [nColorComponent]int32
 		// mx0 and my0 are the location of the current (in terms of 8x8 blocks).
 		// For example, with 4:2:0 chroma subsampling, the block whose top left
 		// pixel co-ordinates are (16, 8) is the third block in the first row:
@@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error {
 								val0 := value >> 4
 								val1 := value & 0x0f
 								if val1 != 0 {
-									zig += int(val0)
+									zig += int32(val0)
 									if zig > zigEnd {
 										break
 									}
@@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error {
 				// Reset the Huffman decoder.
 				d.b = bits{}
 				// Reset the DC components, as per section F.2.1.3.1.
-				dc = [nColorComponent]int{}
+				dc = [nColorComponent]int32{}
 				// Reset the progressive decoder state, as per section G.1.2.2.
 				d.eobRun = 0
 			}
@@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error {
 
 // refine decodes a successive approximation refinement block, as specified in
 // section G.1.2.
-func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error {
+func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error {
 	// Refining a DC component is trivial.
 	if zigStart == 0 {
 		if zigEnd != 0 {
@@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 	if d.eobRun == 0 {
 	loop:
 		for ; zig <= zigEnd; zig++ {
-			z := 0
+			z := int32(0)
 			value, err := d.decodeHuffman(h)
 			if err != nil {
 				return err
@@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 				return FormatError("unexpected Huffman code")
 			}
 
-			zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta)
+			zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta)
 			if err != nil {
 				return err
 			}
@@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 
 // refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
 // the first nz zero entries are skipped over.
-func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) {
+func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) {
 	for ; zig <= zigEnd; zig++ {
 		u := unzig[zig]
 		if b[u] == 0 {
diff --git a/src/pkg/image/jpeg/writer.go b/src/pkg/image/jpeg/writer.go
index 099298e462..375d8a66d5 100644
--- a/src/pkg/image/jpeg/writer.go
+++ b/src/pkg/image/jpeg/writer.go
@@ -21,7 +21,7 @@ func min(x, y int) int {
 }
 
 // div returns a/b rounded to the nearest integer, instead of rounded to zero.
-func div(a int, b int) int {
+func div(a, b int32) int32 {
 	if a >= 0 {
 		return (a + (b >> 1)) / b
 	}
@@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) {
 }
 
 // emitHuff emits the given value with the given Huffman encoder.
-func (e *encoder) emitHuff(h huffIndex, value int) {
+func (e *encoder) emitHuff(h huffIndex, value int32) {
 	x := theHuffmanLUT[h][value]
 	e.emit(x&(1<<24-1), x>>24)
 }
 
 // emitHuffRLE emits a run of runLength copies of value encoded with the given
 // Huffman encoder.
-func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
+func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) {
 	a, b := value, value
 	if a < 0 {
 		a, b = -value, value-1
@@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
 	} else {
 		nBits = 8 + uint32(bitCount[a>>8])
 	}
-	e.emitHuff(h, runLength<<4|int(nBits))
+	e.emitHuff(h, runLength<<4|int32(nBits))
 	if nBits > 0 {
 		e.emit(uint32(b)&(1<<nBits-1), nBits)
 	}
@@ -347,15 +347,15 @@ func (e *encoder) writeDHT() {
 // writeBlock writes a block of pixel data using the given quantization table,
 // returning the post-quantized DC value of the DCT-transformed block.
 // b is in natural (not zig-zag) order.
-func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int {
+func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 {
 	fdct(b)
 	// Emit the DC delta.
-	dc := div(b[0], (8 * int(e.quant[q][0])))
+	dc := div(b[0], 8*int32(e.quant[q][0]))
 	e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC)
 	// Emit the AC components.
-	h, runLength := huffIndex(2*q+1), 0
+	h, runLength := huffIndex(2*q+1), int32(0)
 	for zig := 1; zig < blockSize; zig++ {
-		ac := div(b[unzig[zig]], (8 * int(e.quant[q][zig])))
+		ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig]))
 		if ac == 0 {
 			runLength++
 		} else {
@@ -383,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) {
 		for i := 0; i < 8; i++ {
 			r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA()
 			yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
-			yBlock[8*j+i] = int(yy)
-			cbBlock[8*j+i] = int(cb)
-			crBlock[8*j+i] = int(cr)
+			yBlock[8*j+i] = int32(yy)
+			cbBlock[8*j+i] = int32(cb)
+			crBlock[8*j+i] = int32(cr)
 		}
 	}
 }
@@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block)
 			}
 			pix := m.Pix[offset+sx*4:]
 			yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
-			yBlock[8*j+i] = int(yy)
-			cbBlock[8*j+i] = int(cb)
-			crBlock[8*j+i] = int(cr)
+			yBlock[8*j+i] = int32(yy)
+			cbBlock[8*j+i] = int32(cb)
+			crBlock[8*j+i] = int32(cr)
 		}
 	}
 }
@@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) {
 	var (
 		// Scratch buffers to hold the YCbCr values.
 		// The blocks are in natural (not zig-zag) order.
-		yBlock  block
-		cbBlock [4]block
-		crBlock [4]block
-		cBlock  block
+		b      block
+		cb, cr [4]block
 		// DC components are delta-encoded.
-		prevDCY, prevDCCb, prevDCCr int
+		prevDCY, prevDCCb, prevDCCr int32
 	)
 	bounds := m.Bounds()
 	rgba, _ := m.(*image.RGBA)
@@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) {
 				yOff := (i & 2) * 4
 				p := image.Pt(x+xOff, y+yOff)
 				if rgba != nil {
-					rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i])
+					rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
 				} else {
-					toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i])
+					toYCbCr(m, p, &b, &cb[i], &cr[i])
 				}
-				prevDCY = e.writeBlock(&yBlock, 0, prevDCY)
+				prevDCY = e.writeBlock(&b, 0, prevDCY)
 			}
-			scale(&cBlock, &cbBlock)
-			prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb)
-			scale(&cBlock, &crBlock)
-			prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr)
+			scale(&b, &cb)
+			prevDCCb = e.writeBlock(&b, 1, prevDCCb)
+			scale(&b, &cr)
+			prevDCCr = e.writeBlock(&b, 1, prevDCCr)
 		}
 	}
 	// Pad the last byte with 1's.