// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+package jpeg
+
// This is a Go translation of idct.c from
//
// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
*
*/
-package jpeg
-
const (
w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
r2 = 181 // 256/sqrt(2)
)
-// 2-D Inverse Discrete Cosine Transformation, followed by a +128 level shift.
+// idct performs a 2-D Inverse Discrete Cosine Transformation, followed by a
+// +128 level shift and a clip to [0, 255], writing the results to dst.
+// stride is the number of elements between successive rows of dst.
//
-// The input coefficients should already have been multiplied by the appropriate quantization table.
-// We use fixed-point computation, with the number of bits for the fractional component varying over the
-// intermediate stages. The final values are expected to range within [0, 255], after a +128 level shift.
+// The input coefficients should already have been multiplied by the
+// appropriate quantization table. We use fixed-point computation, with the
+// number of bits for the fractional component varying over the intermediate
+// stages.
//
-// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the discrete W transform and
-// for the discrete Fourier transform", IEEE Trans. on ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
-func idct(b *block) {
+// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
+// discrete W transform and for the discrete Fourier transform", IEEE Trans. on
+// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
+func idct(dst []byte, stride int, src *block) {
// Horizontal 1-D IDCT.
for y := 0; y < 8; y++ {
// If all the AC components are zero, then the IDCT is trivial.
- if b[y*8+1] == 0 && b[y*8+2] == 0 && b[y*8+3] == 0 &&
- b[y*8+4] == 0 && b[y*8+5] == 0 && b[y*8+6] == 0 && b[y*8+7] == 0 {
- dc := b[y*8+0] << 3
- b[y*8+0] = dc
- b[y*8+1] = dc
- b[y*8+2] = dc
- b[y*8+3] = dc
- b[y*8+4] = dc
- b[y*8+5] = dc
- b[y*8+6] = dc
- b[y*8+7] = dc
+ if src[y*8+1] == 0 && src[y*8+2] == 0 && src[y*8+3] == 0 &&
+ src[y*8+4] == 0 && src[y*8+5] == 0 && src[y*8+6] == 0 && src[y*8+7] == 0 {
+ dc := src[y*8+0] << 3
+ src[y*8+0] = dc
+ src[y*8+1] = dc
+ src[y*8+2] = dc
+ src[y*8+3] = dc
+ src[y*8+4] = dc
+ src[y*8+5] = dc
+ src[y*8+6] = dc
+ src[y*8+7] = dc
continue
}
// Prescale.
- x0 := (b[y*8+0] << 11) + 128
- x1 := b[y*8+4] << 11
- x2 := b[y*8+6]
- x3 := b[y*8+2]
- x4 := b[y*8+1]
- x5 := b[y*8+7]
- x6 := b[y*8+5]
- x7 := b[y*8+3]
+ x0 := (src[y*8+0] << 11) + 128
+ x1 := src[y*8+4] << 11
+ x2 := src[y*8+6]
+ x3 := src[y*8+2]
+ x4 := src[y*8+1]
+ x5 := src[y*8+7]
+ x6 := src[y*8+5]
+ x7 := src[y*8+3]
// Stage 1.
x8 := w7 * (x4 + x5)
x4 = (r2*(x4-x5) + 128) >> 8
// Stage 4.
- b[8*y+0] = (x7 + x1) >> 8
- b[8*y+1] = (x3 + x2) >> 8
- b[8*y+2] = (x0 + x4) >> 8
- b[8*y+3] = (x8 + x6) >> 8
- b[8*y+4] = (x8 - x6) >> 8
- b[8*y+5] = (x0 - x4) >> 8
- b[8*y+6] = (x3 - x2) >> 8
- b[8*y+7] = (x7 - x1) >> 8
+ src[8*y+0] = (x7 + x1) >> 8
+ src[8*y+1] = (x3 + x2) >> 8
+ src[8*y+2] = (x0 + x4) >> 8
+ src[8*y+3] = (x8 + x6) >> 8
+ src[8*y+4] = (x8 - x6) >> 8
+ src[8*y+5] = (x0 - x4) >> 8
+ src[8*y+6] = (x3 - x2) >> 8
+ src[8*y+7] = (x7 - x1) >> 8
}
// Vertical 1-D IDCT.
// we do not bother to check for the all-zero case.
// Prescale.
- y0 := (b[8*0+x] << 8) + 8192
- y1 := b[8*4+x] << 8
- y2 := b[8*6+x]
- y3 := b[8*2+x]
- y4 := b[8*1+x]
- y5 := b[8*7+x]
- y6 := b[8*5+x]
- y7 := b[8*3+x]
+ y0 := (src[8*0+x] << 8) + 8192
+ y1 := src[8*4+x] << 8
+ y2 := src[8*6+x]
+ y3 := src[8*2+x]
+ y4 := src[8*1+x]
+ y5 := src[8*7+x]
+ y6 := src[8*5+x]
+ y7 := src[8*3+x]
// Stage 1.
y8 := w7*(y4+y5) + 4
y4 = (r2*(y4-y5) + 128) >> 8
// Stage 4.
- b[8*0+x] = (y7 + y1) >> 14
- b[8*1+x] = (y3 + y2) >> 14
- b[8*2+x] = (y0 + y4) >> 14
- b[8*3+x] = (y8 + y6) >> 14
- b[8*4+x] = (y8 - y6) >> 14
- b[8*5+x] = (y0 - y4) >> 14
- b[8*6+x] = (y3 - y2) >> 14
- b[8*7+x] = (y7 - y1) >> 14
+ src[8*0+x] = (y7 + y1) >> 14
+ src[8*1+x] = (y3 + y2) >> 14
+ src[8*2+x] = (y0 + y4) >> 14
+ src[8*3+x] = (y8 + y6) >> 14
+ src[8*4+x] = (y8 - y6) >> 14
+ src[8*5+x] = (y0 - y4) >> 14
+ src[8*6+x] = (y3 - y2) >> 14
+ src[8*7+x] = (y7 - y1) >> 14
}
- // Level shift.
- for i := range *b {
- b[i] += 128
+ // Level shift by +128, clip to [0, 255], and write to dst.
+ for y := 0; y < 8; y++ {
+ for x := 0; x < 8; x++ {
+ c := src[y*8+x]
+ if c < -128 {
+ c = 0
+ } else if c > 127 {
+ c = 255
+ } else {
+ c += 128
+ }
+ dst[y*stride+x] = uint8(c)
+ }
}
}
huff [maxTc + 1][maxTh + 1]huffman
quant [maxTq + 1]block
b bits
- blocks [nComponent][maxH * maxV]block
tmp [1024]byte
}
return nil
}
-// Clip x to the range [0, 255] inclusive.
-func clip(x int) uint8 {
- if x < 0 {
- return 0
- }
- if x > 255 {
- return 255
- }
- return uint8(x)
-}
-
-// Store the MCU to the image.
-func (d *decoder) storeMCU(mx, my int) {
- h0, v0 := d.comps[0].h, d.comps[0].v
- // Store the luma blocks.
- for v := 0; v < v0; v++ {
- for h := 0; h < h0; h++ {
- p := 8 * ((v0*my+v)*d.img.YStride + (h0*mx + h))
- for y := 0; y < 8; y++ {
- for x := 0; x < 8; x++ {
- d.img.Y[p] = clip(d.blocks[0][h0*v+h][8*y+x])
- p++
- }
- p += d.img.YStride - 8
- }
- }
- }
- // Store the chroma blocks.
- p := 8 * (my*d.img.CStride + mx)
- for y := 0; y < 8; y++ {
- for x := 0; x < 8; x++ {
- d.img.Cb[p] = clip(d.blocks[1][0][8*y+x])
- d.img.Cr[p] = clip(d.blocks[2][0][8*y+x])
- p++
- }
- p += d.img.CStride - 8
- }
-}
-
// Specified in section B.2.3.
func (d *decoder) processSOS(n int) os.Error {
if n != 4+2*nComponent {
}
mcu, expectedRST := 0, uint8(rst0Marker)
- var allZeroes block
- var dc [nComponent]int
+ var (
+ allZeroes, b block
+ dc [nComponent]int
+ )
for my := 0; my < myy; my++ {
for mx := 0; mx < mxx; mx++ {
for i := 0; i < nComponent; i++ {
qt := &d.quant[d.comps[i].tq]
for j := 0; j < d.comps[i].h*d.comps[i].v; j++ {
- d.blocks[i][j] = allZeroes
+ // TODO(nigeltao): make this a "var b block" once the compiler's escape
+ // analysis is good enough to allocate it on the stack, not the heap.
+ b = allZeroes
// Decode the DC coefficient, as specified in section F.2.2.1.
value, err := d.decodeHuffman(&d.huff[dcTableClass][scanComps[i].td])
return err
}
dc[i] += dcDelta
- d.blocks[i][j][0] = dc[i] * qt[0]
+ b[0] = dc[i] * qt[0]
// Decode the AC coefficients, as specified in section F.2.2.2.
for k := 1; k < blockSize; k++ {
if err != nil {
return err
}
- d.blocks[i][j][unzig[k]] = ac * qt[k]
+ b[unzig[k]] = ac * qt[k]
} else {
if val0 != 0x0f {
break
}
}
- idct(&d.blocks[i][j])
+ // Perform the inverse DCT and store the MCU component to the image.
+ switch i {
+ case 0:
+ mx0 := h0*mx + (j % 2)
+ my0 := v0*my + (j / 2)
+ idct(d.img.Y[8*(my0*d.img.YStride+mx0):], d.img.YStride, &b)
+ case 1:
+ idct(d.img.Cb[8*(my*d.img.CStride+mx):], d.img.CStride, &b)
+ case 2:
+ idct(d.img.Cr[8*(my*d.img.CStride+mx):], d.img.CStride, &b)
+ }
} // for j
} // for i
- d.storeMCU(mx, my)
mcu++
if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy {
// A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,