mime/multipart: fix handling of empty parts without CRLF before next part

author Brad Fitzpatrick <bradfitz@golang.org>

Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)

committer Brad Fitzpatrick <bradfitz@golang.org>

Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
author Brad Fitzpatrick <bradfitz@golang.org>
Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
committer Brad Fitzpatrick <bradfitz@golang.org>
Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
diff --git a/src/pkg/mime/multipart/multipart.go b/src/pkg/mime/multipart/multipart.go

index 6ace4be564c94a7782e5ab48557cc3b56ac1e975..e9e337b9222bd6124b6f3538bb6d2a8a3a1fc111 100644 (file)
--- a/src/pkg/mime/multipart/multipart.go
+++ b/src/pkg/mime/multipart/multipart.go
@@ -22,11 +22,6 @@ import (
         "net/textproto"
  )
  
-// TODO(bradfitz): inline these once the compiler can inline them in
-// read-only situation (such as bytes.HasSuffix)
-var lf = []byte("\n")
-var crlf = []byte("\r\n")
-
  var emptyParams = make(map[string]string)
  
  // A Part represents a single part in a multipart body.
@@ -36,8 +31,9 @@ type Part struct {
         // i.e. "foo-bar" changes case to "Foo-Bar"
         Header textproto.MIMEHeader
  
-       buffer *bytes.Buffer
-       mr     *Reader
+       buffer    *bytes.Buffer
+       mr        *Reader
+       bytesRead int
  
         disposition       string
         dispositionParams map[string]string
@@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
  // Read reads the body of a part, after its headers and before the
  // next part (if any) begins.
  func (p *Part) Read(d []byte) (n int, err error) {
+       defer func() {
+               p.bytesRead += n
+       }()
         if p.buffer.Len() >= len(d) {
                 // Internal buffer of unconsumed data is large enough for
                 // the read request.  No need to parse more at the moment.
                 return p.buffer.Read(d)
         }
         peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
-       unexpectedEof := err == io.EOF
-       if err != nil && !unexpectedEof {
+
+       // Look for an immediate empty part without a leading \r\n
+       // before the boundary separator.  Some MIME code makes empty
+       // parts like this. Most browsers, however, write the \r\n
+       // before the subsequent boundary even for empty parts and
+       // won't hit this path.
+       if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
+               return 0, io.EOF
+       }
+       unexpectedEOF := err == io.EOF
+       if err != nil && !unexpectedEOF {
                 return 0, fmt.Errorf("multipart: Part Read: %v", err)
         }
         if peek == nil {
@@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
                 foundBoundary = true
         } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
                 nCopy = safeCount
-       } else if unexpectedEof {
+       } else if unexpectedEOF {
                 // If we've run out of peek buffer and the boundary
                 // wasn't found (and can't possibly fit), we must have
                 // hit the end of the file unexpectedly.
@@ -172,7 +180,10 @@ type Reader struct {
         currentPart *Part
         partsRead   int
  
-       nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte
+       nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
+       nlDashBoundary   []byte // nl + "--boundary"
+       dashBoundaryDash []byte // "--boundary--"
+       dashBoundary     []byte // "--boundary"
  }
  
  // NextPart returns the next part in the multipart or an error.
@@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
         expectNewPart := false
         for {
                 line, err := r.bufReader.ReadSlice('\n')
-               if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) {
+               if err == io.EOF && r.isFinalBoundary(line) {
                         // If the buffer ends in "--boundary--" without the
                         // trailing "\r\n", ReadSlice will return an error
                         // (since it's missing the '\n'), but this is a valid
@@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
                         return bp, nil
                 }
  
-               if hasPrefixThenNewline(line, r.dashBoundaryDash) {
+               if r.isFinalBoundary(line) {
                         // Expected EOF
                         return nil, io.EOF
                 }
@@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
         panic("unreachable")
  }
  
-func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
+// isFinalBoundary returns whether line is the final boundary line
+// indiciating that all parts are over.
+// It matches `^--boundary--[ \t]*(\r\n)?$`
+func (mr *Reader) isFinalBoundary(line []byte) bool {
+       if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
+               return false
+       }
+       rest := line[len(mr.dashBoundaryDash):]
+       rest = skipLWSPChar(rest)
+       return len(rest) == 0 || bytes.Equal(rest, mr.nl)
+}
+
+func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
         // http://tools.ietf.org/html/rfc2046#section-5.1
         //   The boundary delimiter line is then defined as a line
         //   consisting entirely of two hyphen characters ("-",
@@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
         if !bytes.HasPrefix(line, mr.dashBoundary) {
                 return false
         }
-       if bytes.HasSuffix(line, mr.nl) {
-               return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)])
+       rest := line[len(mr.dashBoundary):]
+       rest = skipLWSPChar(rest)
+
+       // On the first part, see our lines are ending in \n instead of \r\n
+       // and switch into that mode if so.  This is a violation of the spec,
+       // but occurs in practice.
+       if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
+               mr.nl = mr.nl[1:]
+               mr.nlDashBoundary = mr.nlDashBoundary[1:]
         }
-       // Violate the spec and also support newlines without the
-       // carriage return...
-       if mr.partsRead == 0 && bytes.HasSuffix(line, lf) {
-               if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) {
-                       mr.nl = mr.nl[1:]
-                       mr.nlDashBoundary = mr.nlDashBoundary[1:]
-                       return true
-               }
-       }
-       return false
+       return bytes.Equal(rest, mr.nl)
  }
  
-func onlyHorizontalWhitespace(s []byte) bool {
-       for _, b := range s {
-               if b != ' ' && b != '\t' {
-                       return false
-               }
+// peekBufferIsEmptyPart returns whether the provided peek-ahead
+// buffer represents an empty part.  This is only called if we've not
+// already read any bytes in this part and checks for the case of MIME
+// software not writing the \r\n on empty parts. Some does, some
+// doesn't.
+//
+// This checks that what follows the "--boundary" is actually the end
+// ("--boundary--" with optional whitespace) or optional whitespace
+// and then a newline, so we don't catch "--boundaryFAKE", in which
+// case the whole line is part of the data.
+func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
+       // End of parts case.
+       // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
+       if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
+               rest := peek[len(mr.dashBoundaryDash):]
+               rest = skipLWSPChar(rest)
+               return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
         }
-       return true
+       if !bytes.HasPrefix(peek, mr.dashBoundary) {
+               return false
+       }
+       // Test whether rest matches `^[ \t]*\r\n`)
+       rest := peek[len(mr.dashBoundary):]
+       rest = skipLWSPChar(rest)
+       return bytes.HasPrefix(rest, mr.nl)
  }
  
-func hasPrefixThenNewline(s, prefix []byte) bool {
-       return bytes.HasPrefix(s, prefix) &&
-               (len(s) == len(prefix)+1 && s[len(s)-1] == '\n' ||
-                       len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf))
+// skipLWSPChar returns b with leading spaces and tabs removed.
+// RFC 822 defines:
+//    LWSP-char = SPACE / HTAB
+func skipLWSPChar(b []byte) []byte {
+       for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
+               b = b[1:]
+       }
+       return b
  }
diff --git a/src/pkg/mime/multipart/multipart_test.go b/src/pkg/mime/multipart/multipart_test.go

index ca7108d7ad0a4bd666f9d9364cba70830008f1c2..cd65e177e852e3a80539ec6c688ef0b14338ebbd 100644 (file)
--- a/src/pkg/mime/multipart/multipart_test.go
+++ b/src/pkg/mime/multipart/multipart_test.go
@@ -10,20 +10,13 @@ import (
         "fmt"
         "io"
         "io/ioutil"
+       "net/textproto"
         "os"
+       "reflect"
         "strings"
         "testing"
  )
  
-func TestHorizontalWhitespace(t *testing.T) {
-       if !onlyHorizontalWhitespace([]byte(" \t")) {
-               t.Error("expected pass")
-       }
-       if onlyHorizontalWhitespace([]byte("foo bar")) {
-               t.Error("expected failure")
-       }
-}
-
  func TestBoundaryLine(t *testing.T) {
         mr := NewReader(strings.NewReader(""), "myBoundary")
         if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
@@ -319,29 +312,6 @@ Oh no, premature EOF!
         }
  }
  
-func TestZeroLengthBody(t *testing.T) {
-       testBody := strings.Replace(`
-This is a multi-part message.  This line is ignored.
---MyBoundary
-foo: bar
-
-
---MyBoundary--
-`, "\n", "\r\n", -1)
-       r := NewReader(strings.NewReader(testBody), "MyBoundary")
-       part, err := r.NextPart()
-       if err != nil {
-               t.Fatalf("didn't get a part")
-       }
-       n, err := io.Copy(ioutil.Discard, part)
-       if err != nil {
-               t.Errorf("error reading part: %v", err)
-       }
-       if n != 0 {
-               t.Errorf("read %d bytes; expected 0", n)
-       }
-}
-
  type slowReader struct {
         r io.Reader
  }
@@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
                 t.Fatalf("final outer NextPart = %v; want io.EOF", err)
         }
  }
+
+type headerBody struct {
+       header textproto.MIMEHeader
+       body   string
+}
+
+func formData(key, value string) headerBody {
+       return headerBody{
+               textproto.MIMEHeader{
+                       "Content-Type":        {"text/plain; charset=ISO-8859-1"},
+                       "Content-Disposition": {"form-data; name=" + key},
+               },
+               value,
+       }
+}
+
+type parseTest struct {
+       name    string
+       in, sep string
+       want    []headerBody
+}
+
+var parseTests = []parseTest{
+       // Actual body from App Engine on a blob upload. The final part (the
+       // Content-Type: message/external-body) is what App Engine replaces
+       // the uploaded file with.  The other form fields (prefixed with
+       // "other" in their form-data name) are unchanged.  A bug was
+       // reported with blob uploads failing when the other fields were
+       // empty. This was the MIME POST body that previously failed.
+       {
+               name: "App Engine post",
+               sep:  "00151757727e9583fd04bfbca4c6",
+               in:   "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
+               want: []headerBody{
+                       formData("otherEmpty1", ""),
+                       formData("otherFoo1", "foo"),
+                       formData("otherFoo2", "foo"),
+                       formData("otherEmpty2", ""),
+                       formData("otherRepeatFoo", "foo"),
+                       formData("otherRepeatFoo", "foo"),
+                       formData("otherRepeatEmpty", ""),
+                       formData("otherRepeatEmpty", ""),
+                       formData("submit", "Submit"),
+                       {textproto.MIMEHeader{
+                               "Content-Type":        {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
+                               "Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
+                       }, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
+               },
+       },
+
+       // Single empty part, ended with --boundary immediately after headers.
+       {
+               name: "single empty part, --boundary",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Single empty part, ended with \r\n--boundary immediately after headers.
+       {
+               name: "single empty part, \r\n--boundary",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Final part empty.
+       {
+               name: "final part empty",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+                       {textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
+               },
+       },
+
+       // Final part empty with newlines after final separator.
+       {
+               name: "final part empty then crlf",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Final part empty with lwsp-chars after final separator.
+       {
+               name: "final part empty then lwsp",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // No parts (empty form as submitted by Chrome)
+       {
+               name: "no parts",
+               sep:  "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
+               in:   "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
+               want: []headerBody{},
+       },
+
+       // Part containing data starting with the boundary, but with additional suffix.
+       {
+               name: "fake separator as data",
+               sep:  "sep",
+               in:   "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
+               },
+       },
+
+       // Part containing a boundary with whitespace following it.
+       {
+               name: "boundary with whitespace",
+               sep:  "sep",
+               in:   "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
+               },
+       },
+
+       // With ignored leading line.
+       {
+               name: "leading line",
+               sep:  "MyBoundary",
+               in: strings.Replace(`This is a multi-part message.  This line is ignored.
+--MyBoundary
+foo: bar
+
+
+--MyBoundary--`, "\n", "\r\n", -1),
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       roundTripParseTest(),
+}
+
+func TestParse(t *testing.T) {
+Cases:
+       for _, tt := range parseTests {
+               r := NewReader(strings.NewReader(tt.in), tt.sep)
+               got := []headerBody{}
+               for {
+                       p, err := r.NextPart()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Errorf("in test %q, NextPart: %v", tt.name, err)
+                               continue Cases
+                       }
+                       pbody, err := ioutil.ReadAll(p)
+                       if err != nil {
+                               t.Errorf("in test %q, error reading part: %v", tt.name, err)
+                               continue Cases
+                       }
+                       got = append(got, headerBody{p.Header, string(pbody)})
+               }
+               if !reflect.DeepEqual(tt.want, got) {
+                       t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
+                       if len(tt.want) != len(got) {
+                               t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
+                       } else if len(got) > 1 {
+                               for pi, wantPart := range tt.want {
+                                       if !reflect.DeepEqual(wantPart, got[pi]) {
+                                               t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+func roundTripParseTest() parseTest {
+       t := parseTest{
+               name: "round trip",
+               want: []headerBody{
+                       formData("empty", ""),
+                       formData("lf", "\n"),
+                       formData("cr", "\r"),
+                       formData("crlf", "\r\n"),
+                       formData("foo", "bar"),
+               },
+       }
+       var buf bytes.Buffer
+       w := NewWriter(&buf)
+       for _, p := range t.want {
+               pw, err := w.CreatePart(p.header)
+               if err != nil {
+                       panic(err)
+               }
+               _, err = pw.Write([]byte(p.body))
+               if err != nil {
+                       panic(err)
+               }
+       }
+       w.Close()
+       t.in = buf.String()
+       t.sep = w.Boundary()
+       return t
+}
author	Brad Fitzpatrick <bradfitz@golang.org>
	Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
committer	Brad Fitzpatrick <bradfitz@golang.org>
	Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
src/pkg/mime/multipart/multipart.go		patch \| blob \| history
src/pkg/mime/multipart/multipart_test.go		patch \| blob \| history