image/jpeg: support decoding CMYK and YCbCrK images.

author Nigel Tao <nigeltao@golang.org>

Fri, 13 Feb 2015 07:09:21 +0000 (18:09 +1100)

committer Nigel Tao <nigeltao@golang.org>

Mon, 16 Feb 2015 00:25:47 +0000 (00:25 +0000)
author Nigel Tao <nigeltao@golang.org>
Fri, 13 Feb 2015 07:09:21 +0000 (18:09 +1100)
committer Nigel Tao <nigeltao@golang.org>
Mon, 16 Feb 2015 00:25:47 +0000 (00:25 +0000)
diff --git a/src/image/decode_test.go b/src/image/decode_test.go

index 8dee57ee4678231682c2ec1ebbd3ca611b160575..e88bfdaaaea241a28adb64adb936b6ece38e0f93 100644 (file)
--- a/src/image/decode_test.go
+++ b/src/image/decode_test.go
@@ -32,6 +32,7 @@ var imageTests = []imageTest{
         // JPEG is a lossy format and hence needs a non-zero tolerance.
         {"testdata/video-001.png", "testdata/video-001.jpeg", 8 << 8},
         {"testdata/video-001.png", "testdata/video-001.progressive.jpeg", 8 << 8},
+       {"testdata/video-001.cmyk.png", "testdata/video-001.cmyk.jpeg", 8 << 8},
         // Grayscale images.
         {"testdata/video-005.gray.png", "testdata/video-005.gray.jpeg", 8 << 8},
         {"testdata/video-005.gray.png", "testdata/video-005.gray.png", 0},
diff --git a/src/image/jpeg/reader.go b/src/image/jpeg/reader.go

index 6d8b1d1d036c84b57e53e937f780b8663d999fa3..72c3f064c0bc5a696a21221c23aeb1bd408b31c4 100644 (file)
--- a/src/image/jpeg/reader.go
+++ b/src/image/jpeg/reader.go
@@ -10,6 +10,7 @@ package jpeg
  import (
         "image"
         "image/color"
+       "image/draw"
         "io"
  )
  
@@ -41,10 +42,7 @@ const (
         maxTh   = 3
         maxTq   = 3
  
-       // A grayscale JPEG image has only a Y component.
-       nGrayComponent = 1
-       // A color JPEG image has Y, Cb and Cr components.
-       nColorComponent = 3
+       maxComponents = 4
  
         // We only support 4:4:4, 4:4:0, 4:2:2 and 4:2:0 downsampling, and therefore the
         // number of luma samples per chroma sample is at most 2 in the horizontal
@@ -54,19 +52,30 @@ const (
  )
  
  const (
-       soiMarker   = 0xd8 // Start Of Image.
-       eoiMarker   = 0xd9 // End Of Image.
-       sof0Marker  = 0xc0 // Start Of Frame (Baseline).
-       sof2Marker  = 0xc2 // Start Of Frame (Progressive).
-       dhtMarker   = 0xc4 // Define Huffman Table.
-       dqtMarker   = 0xdb // Define Quantization Table.
-       sosMarker   = 0xda // Start Of Scan.
-       driMarker   = 0xdd // Define Restart Interval.
-       rst0Marker  = 0xd0 // ReSTart (0).
-       rst7Marker  = 0xd7 // ReSTart (7).
-       app0Marker  = 0xe0 // APPlication specific (0).
-       app15Marker = 0xef // APPlication specific (15).
-       comMarker   = 0xfe // COMment.
+       soiMarker  = 0xd8 // Start Of Image.
+       eoiMarker  = 0xd9 // End Of Image.
+       sof0Marker = 0xc0 // Start Of Frame (Baseline).
+       sof2Marker = 0xc2 // Start Of Frame (Progressive).
+       dhtMarker  = 0xc4 // Define Huffman Table.
+       dqtMarker  = 0xdb // Define Quantization Table.
+       sosMarker  = 0xda // Start Of Scan.
+       driMarker  = 0xdd // Define Restart Interval.
+       rst0Marker = 0xd0 // ReSTart (0).
+       rst7Marker = 0xd7 // ReSTart (7).
+       comMarker  = 0xfe // COMment.
+       // "APPlication specific" markers aren't part of the JPEG spec per se,
+       // but in practice, their use is described at
+       // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
+       app0Marker  = 0xe0
+       app14Marker = 0xee
+       app15Marker = 0xef
+)
+
+// See http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
+const (
+       adobeTransformUnknown = 0
+       adobeTransformYCbCr   = 1
+       adobeTransformYCbCrK  = 2
  )
  
  // unzig maps from the zig-zag ordering to the natural ordering. For example,
@@ -114,17 +123,24 @@ type decoder struct {
                 nUnreadable int
         }
         width, height int
-       img1          *image.Gray
-       img3          *image.YCbCr
-       ri            int // Restart Interval.
-       nComp         int
-       progressive   bool
-       eobRun        uint16 // End-of-Band run, specified in section G.1.2.2.
-       comp          [nColorComponent]component
-       progCoeffs    [nColorComponent][]block // Saved state between progressive-mode scans.
-       huff          [maxTc + 1][maxTh + 1]huffman
-       quant         [maxTq + 1]block // Quantization tables, in zig-zag order.
-       tmp           [blockSize + 1]byte
+
+       img1        *image.Gray
+       img3        *image.YCbCr
+       blackPix    []byte
+       blackStride int
+
+       ri                  int // Restart Interval.
+       nComp               int
+       progressive         bool
+       adobeTransformValid bool
+       adobeTransform      uint8
+       eobRun              uint16 // End-of-Band run, specified in section G.1.2.2.
+
+       comp       [maxComponents]component
+       progCoeffs [maxComponents][]block // Saved state between progressive-mode scans.
+       huff       [maxTc + 1][maxTh + 1]huffman
+       quant      [maxTq + 1]block // Quantization tables, in zig-zag order.
+       tmp        [blockSize + 1]byte
  }
  
  // fill fills up the d.bytes.buf buffer from the underlying io.Reader. It
@@ -285,10 +301,12 @@ func (d *decoder) ignore(n int) error {
  // Specified in section B.2.2.
  func (d *decoder) processSOF(n int) error {
         switch n {
-       case 6 + 3*nGrayComponent:
-               d.nComp = nGrayComponent
-       case 6 + 3*nColorComponent:
-               d.nComp = nColorComponent
+       case 6 + 3*1: // Grayscale image.
+               d.nComp = 1
+       case 6 + 3*3: // YCbCr image. (TODO(nigeltao): or RGB image.)
+               d.nComp = 3
+       case 6 + 3*4: // YCbCrK or CMYK image.
+               d.nComp = 4
         default:
                 return UnsupportedError("SOF has wrong length")
         }
@@ -307,7 +325,7 @@ func (d *decoder) processSOF(n int) error {
         for i := 0; i < d.nComp; i++ {
                 d.comp[i].c = d.tmp[6+3*i]
                 d.comp[i].tq = d.tmp[8+3*i]
-               if d.nComp == nGrayComponent {
+               if d.nComp == 1 {
                         // If a JPEG image has only one component, section A.2 says "this data
                         // is non-interleaved by definition" and section A.2.2 says "[in this
                         // case...] the order of data units within a scan shall be left-to-right
@@ -326,16 +344,43 @@ func (d *decoder) processSOF(n int) error {
                 hv := d.tmp[7+3*i]
                 d.comp[i].h = int(hv >> 4)
                 d.comp[i].v = int(hv & 0x0f)
-               // For color images, we only support 4:4:4, 4:4:0, 4:2:2 or 4:2:0 chroma
-               // downsampling ratios. This implies that the (h, v) values for the Y
-               // component are either (1, 1), (1, 2), (2, 1) or (2, 2), and the (h, v)
-               // values for the Cr and Cb components must be (1, 1).
-               if i == 0 {
-                       if hv != 0x11 && hv != 0x21 && hv != 0x22 && hv != 0x12 {
+               switch d.nComp {
+               case 3:
+                       // For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2 or 4:2:0 chroma
+                       // downsampling ratios. This implies that the (h, v) values for the Y
+                       // component are either (1, 1), (1, 2), (2, 1) or (2, 2), and the (h, v)
+                       // values for the Cr and Cb components must be (1, 1).
+                       if i == 0 {
+                               if hv != 0x11 && hv != 0x21 && hv != 0x22 && hv != 0x12 {
+                                       return UnsupportedError("luma/chroma downsample ratio")
+                               }
+                       } else if hv != 0x11 {
                                 return UnsupportedError("luma/chroma downsample ratio")
                         }
-               } else if hv != 0x11 {
-                       return UnsupportedError("luma/chroma downsample ratio")
+               case 4:
+                       // For 4-component images (either CMYK or YCbCrK), we only support two
+                       // hv vectors: [0x11 0x11 0x11 0x11] and [0x22 0x11 0x11 0x22].
+                       // Theoretically, 4-component JPEG images could mix and match hv values
+                       // but in practice, those two combinations are the only ones in use,
+                       // and it simplifies the applyBlack code below if we can assume that:
+                       //      - for CMYK, the C and K channels have full samples, and if the M
+                       //        and Y channels subsample, they subsample both horizontally and
+                       //        vertically.
+                       //      - for YCbCrK, the Y and K channels have full samples.
+                       switch i {
+                       case 0:
+                               if hv != 0x11 && hv != 0x22 {
+                                       return UnsupportedError("luma/chroma downsample ratio")
+                               }
+                       case 1, 2:
+                               if hv != 0x11 {
+                                       return UnsupportedError("luma/chroma downsample ratio")
+                               }
+                       case 3:
+                               if d.comp[0].h != d.comp[3].h || d.comp[0].v != d.comp[3].v {
+                                       return UnsupportedError("luma/chroma downsample ratio")
+                               }
+                       }
                 }
         }
         return nil
@@ -378,6 +423,26 @@ func (d *decoder) processDRI(n int) error {
         return nil
  }
  
+func (d *decoder) processApp14Marker(n int) error {
+       if n < 12 {
+               return d.ignore(n)
+       }
+       if err := d.readFull(d.tmp[:12]); err != nil {
+               return err
+       }
+       n -= 12
+
+       if d.tmp[0] == 'A' && d.tmp[1] == 'd' && d.tmp[2] == 'o' && d.tmp[3] == 'b' && d.tmp[4] == 'e' {
+               d.adobeTransformValid = true
+               d.adobeTransform = d.tmp[11]
+       }
+
+       if n > 0 {
+               return d.ignore(n)
+       }
+       return nil
+}
+
  // decode reads a JPEG image from r and returns it as an image.Image.
  func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
         d.r = r
@@ -459,25 +524,29 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
                         return nil, FormatError("short segment length")
                 }
  
-               switch {
-               case marker == sof0Marker || marker == sof2Marker: // Start Of Frame.
+               switch marker {
+               case sof0Marker, sof2Marker:
                         d.progressive = marker == sof2Marker
                         err = d.processSOF(n)
                         if configOnly {
                                 return nil, err
                         }
-               case marker == dhtMarker: // Define Huffman Table.
+               case dhtMarker:
                         err = d.processDHT(n)
-               case marker == dqtMarker: // Define Quantization Table.
+               case dqtMarker:
                         err = d.processDQT(n)
-               case marker == sosMarker: // Start Of Scan.
+               case sosMarker:
                         err = d.processSOS(n)
-               case marker == driMarker: // Define Restart Interval.
+               case driMarker:
                         err = d.processDRI(n)
-               case app0Marker <= marker && marker <= app15Marker || marker == comMarker: // APPlication specific, or COMment.
-                       err = d.ignore(n)
+               case app14Marker:
+                       err = d.processApp14Marker(n)
                 default:
-                       err = UnsupportedError("unknown marker")
+                       if app0Marker <= marker && marker <= app15Marker || marker == comMarker {
+                               err = d.ignore(n)
+                       } else {
+                               err = UnsupportedError("unknown marker")
+                       }
                 }
                 if err != nil {
                         return nil, err
@@ -487,11 +556,89 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
                 return d.img1, nil
         }
         if d.img3 != nil {
+               if d.blackPix != nil {
+                       return d.applyBlack()
+               }
                 return d.img3, nil
         }
         return nil, FormatError("missing SOS marker")
  }
  
+// applyBlack combines d.img3 and d.blackPix into a CMYK image. The formula
+// used depends on whether the JPEG image is stored as CMYK or YCbCrK,
+// indicated by the APP14 (Adobe) metadata.
+//
+// Adobe CMYK JPEG images are inverted, where 255 means no ink instead of full
+// ink, so we apply "v = 255 - v" at various points. Note that a double
+// inversion is a no-op, so inversions might be implicit in the code below.
+func (d *decoder) applyBlack() (image.Image, error) {
+       if !d.adobeTransformValid {
+               return nil, UnsupportedError("unknown color model: 4-component JPEG doesn't have Adobe APP14 metadata")
+       }
+
+       // If the 4-component JPEG image isn't explicitly marked as "Unknown (RGB
+       // or CMYK)" as per
+       // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
+       // we assume that it is YCbCrK. This matches libjpeg's jdapimin.c.
+       if d.adobeTransform != adobeTransformUnknown {
+               // Convert the YCbCr part of the YCbCrK to RGB, invert the RGB to get
+               // CMY, and patch in the original K. The RGB to CMY inversion cancels
+               // out the 'Adobe inversion' described in the applyBlack doc comment
+               // above, so in practice, only the fourth channel (black) is inverted.
+               bounds := d.img3.Bounds()
+               img := image.NewRGBA(bounds)
+               // TODO(nigeltao): do the draw.Draw YCbCr -> RGB conversion directly,
+               // instead of having the image/jpeg package depend on the image/draw
+               // package.
+               draw.Draw(img, bounds, d.img3, bounds.Min, draw.Src)
+               for iBase, y := 0, bounds.Min.Y; y < bounds.Max.Y; iBase, y = iBase+img.Stride, y+1 {
+                       for i, x := iBase+3, bounds.Min.X; x < bounds.Max.X; i, x = i+4, x+1 {
+                               img.Pix[i] = 255 - d.blackPix[(y-bounds.Min.Y)*d.blackStride+(x-bounds.Min.X)]
+                       }
+               }
+               return &image.CMYK{
+                       Pix:    img.Pix,
+                       Stride: img.Stride,
+                       Rect:   img.Rect,
+               }, nil
+       }
+
+       // The first three channels (cyan, magenta, yellow) of the CMYK
+       // were decoded into d.img3, but each channel was decoded into a separate
+       // []byte slice, and some channels may be subsampled. We interleave the
+       // separate channels into an image.CMYK's single []byte slice containing 4
+       // contiguous bytes per pixel.
+       bounds := d.img3.Bounds()
+       img := image.NewCMYK(bounds)
+
+       translations := [4]struct {
+               src    []byte
+               stride int
+       }{
+               {d.img3.Y, d.img3.YStride},
+               {d.img3.Cb, d.img3.CStride},
+               {d.img3.Cr, d.img3.CStride},
+               {d.blackPix, d.blackStride},
+       }
+       for t, translation := range translations {
+               subsample := d.comp[t].h != d.comp[0].h || d.comp[t].v != d.comp[0].v
+               for iBase, y := 0, bounds.Min.Y; y < bounds.Max.Y; iBase, y = iBase+img.Stride, y+1 {
+                       sy := y - bounds.Min.Y
+                       if subsample {
+                               sy /= 2
+                       }
+                       for i, x := iBase+t, bounds.Min.X; x < bounds.Max.X; i, x = i+4, x+1 {
+                               sx := x - bounds.Min.X
+                               if subsample {
+                                       sx /= 2
+                               }
+                               img.Pix[i] = 255 - translation.src[sy*translation.stride+sx]
+                       }
+               }
+       }
+       return img, nil
+}
+
  // Decode reads a JPEG image from r and returns it as an image.Image.
  func Decode(r io.Reader) (image.Image, error) {
         var d decoder
@@ -506,15 +653,21 @@ func DecodeConfig(r io.Reader) (image.Config, error) {
                 return image.Config{}, err
         }
         switch d.nComp {
-       case nGrayComponent:
+       case 1:
                 return image.Config{
                         ColorModel: color.GrayModel,
                         Width:      d.width,
                         Height:     d.height,
                 }, nil
-       case nColorComponent:
+       case 3:
+               return image.Config{
+                       ColorModel: color.YCbCrModel, // TODO(nigeltao): support RGB JPEGs.
+                       Width:      d.width,
+                       Height:     d.height,
+               }, nil
+       case 4:
                 return image.Config{
-                       ColorModel: color.YCbCrModel,
+                       ColorModel: color.CMYKModel,
                         Width:      d.width,
                         Height:     d.height,
                 }, nil
diff --git a/src/image/jpeg/scan.go b/src/image/jpeg/scan.go

index 2bd1d9d531d825f0cb33f0870e669d6b4f8522c1..41689363125841365ece2ec75fe54925bd3ab03b 100644 (file)
--- a/src/image/jpeg/scan.go
+++ b/src/image/jpeg/scan.go
@@ -10,11 +10,12 @@ import (
  
  // makeImg allocates and initializes the destination image.
  func (d *decoder) makeImg(h0, v0, mxx, myy int) {
-       if d.nComp == nGrayComponent {
+       if d.nComp == 1 {
                 m := image.NewGray(image.Rect(0, 0, 8*mxx, 8*myy))
                 d.img1 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.Gray)
                 return
         }
+
         var subsampleRatio image.YCbCrSubsampleRatio
         switch {
         case h0 == 1 && v0 == 1:
@@ -30,6 +31,12 @@ func (d *decoder) makeImg(h0, v0, mxx, myy int) {
         }
         m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio)
         d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr)
+
+       if d.nComp == 4 {
+               h3, v3 := d.comp[3].h, d.comp[3].v
+               d.blackPix = make([]byte, 8*h3*mxx*8*v3*myy)
+               d.blackStride = 8 * h3 * mxx
+       }
  }
  
  // Specified in section B.2.3.
@@ -47,7 +54,7 @@ func (d *decoder) processSOS(n int) error {
         if n != 4+2*nComp {
                 return FormatError("SOS length inconsistent with number of components")
         }
-       var scan [nColorComponent]struct {
+       var scan [maxComponents]struct {
                 compIndex uint8
                 td        uint8 // DC table selector.
                 ta        uint8 // AC table selector.
@@ -128,7 +135,7 @@ func (d *decoder) processSOS(n int) error {
         var (
                 // b is the decoded coefficients, in natural (not zig-zag) order.
                 b  block
-               dc [nColorComponent]int32
+               dc [maxComponents]int32
                 // bx and by are the location of the current (in terms of 8x8 blocks).
                 // For example, with 4:2:0 chroma subsampling, the block whose top left
                 // pixel co-ordinates are (16, 8) is the third block in the first row:
@@ -276,7 +283,7 @@ func (d *decoder) processSOS(n int) error {
                                         }
                                         idct(&b)
                                         dst, stride := []byte(nil), 0
-                                       if d.nComp == nGrayComponent {
+                                       if d.nComp == 1 {
                                                 dst, stride = d.img1.Pix[8*(by*d.img1.Stride+bx):], d.img1.Stride
                                         } else {
                                                 switch compIndex {
@@ -286,6 +293,8 @@ func (d *decoder) processSOS(n int) error {
                                                         dst, stride = d.img3.Cb[8*(by*d.img3.CStride+bx):], d.img3.CStride
                                                 case 2:
                                                         dst, stride = d.img3.Cr[8*(by*d.img3.CStride+bx):], d.img3.CStride
+                                               case 3:
+                                                       dst, stride = d.blackPix[8*(by*d.blackStride+bx):], d.blackStride
                                                 default:
                                                         return UnsupportedError("too many components")
                                                 }
@@ -325,7 +334,7 @@ func (d *decoder) processSOS(n int) error {
                                 // Reset the Huffman decoder.
                                 d.bits = bits{}
                                 // Reset the DC components, as per section F.2.1.3.1.
-                               dc = [nColorComponent]int32{}
+                               dc = [maxComponents]int32{}
                                 // Reset the progressive decoder state, as per section G.1.2.2.
                                 d.eobRun = 0
                         }
diff --git a/src/image/testdata/video-001.cmyk.jpeg b/src/image/testdata/video-001.cmyk.jpeg

new file mode 100644 (file)

index 0000000..507df84

Binary files /dev/null and b/src/image/testdata/video-001.cmyk.jpeg differ
diff --git a/src/image/testdata/video-001.cmyk.png b/src/image/testdata/video-001.cmyk.png

new file mode 100644 (file)

index 0000000..ef7b2b8

Binary files /dev/null and b/src/image/testdata/video-001.cmyk.png differ
author	Nigel Tao <nigeltao@golang.org>
	Fri, 13 Feb 2015 07:09:21 +0000 (18:09 +1100)
committer	Nigel Tao <nigeltao@golang.org>
	Mon, 16 Feb 2015 00:25:47 +0000 (00:25 +0000)
src/image/decode_test.go		patch \| blob \| history
src/image/jpeg/reader.go		patch \| blob \| history
src/image/jpeg/scan.go		patch \| blob \| history
src/image/testdata/video-001.cmyk.jpeg	[new file with mode: 0644]	patch \| blob
src/image/testdata/video-001.cmyk.png	[new file with mode: 0644]	patch \| blob