]> Cypherpunks repositories - gostls13.git/commitdiff
mime/multipart: fix handling of empty parts without CRLF before next part
authorBrad Fitzpatrick <bradfitz@golang.org>
Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
committerBrad Fitzpatrick <bradfitz@golang.org>
Tue, 15 May 2012 01:16:47 +0000 (18:16 -0700)
Empty parts can be either of the form:

a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"...
or
b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"...

We never handled case b).  In fact the RFC seems kinda vague about
it, but browsers seem to do a), and App Engine's synthetic POST
bodies after blob uploads is of form b).

So handle them both, and add a bunch of tests.

(I can't promise these are the last fixes to multipart, especially
considering its history, but I'm growing increasingly confident at
least, and I've never submitted a multipart CL with known bugs
outstanding, including this time.)

R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/6212046

src/pkg/mime/multipart/multipart.go
src/pkg/mime/multipart/multipart_test.go

index 6ace4be564c94a7782e5ab48557cc3b56ac1e975..e9e337b9222bd6124b6f3538bb6d2a8a3a1fc111 100644 (file)
@@ -22,11 +22,6 @@ import (
        "net/textproto"
 )
 
-// TODO(bradfitz): inline these once the compiler can inline them in
-// read-only situation (such as bytes.HasSuffix)
-var lf = []byte("\n")
-var crlf = []byte("\r\n")
-
 var emptyParams = make(map[string]string)
 
 // A Part represents a single part in a multipart body.
@@ -36,8 +31,9 @@ type Part struct {
        // i.e. "foo-bar" changes case to "Foo-Bar"
        Header textproto.MIMEHeader
 
-       buffer *bytes.Buffer
-       mr     *Reader
+       buffer    *bytes.Buffer
+       mr        *Reader
+       bytesRead int
 
        disposition       string
        dispositionParams map[string]string
@@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
 // Read reads the body of a part, after its headers and before the
 // next part (if any) begins.
 func (p *Part) Read(d []byte) (n int, err error) {
+       defer func() {
+               p.bytesRead += n
+       }()
        if p.buffer.Len() >= len(d) {
                // Internal buffer of unconsumed data is large enough for
                // the read request.  No need to parse more at the moment.
                return p.buffer.Read(d)
        }
        peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
-       unexpectedEof := err == io.EOF
-       if err != nil && !unexpectedEof {
+
+       // Look for an immediate empty part without a leading \r\n
+       // before the boundary separator.  Some MIME code makes empty
+       // parts like this. Most browsers, however, write the \r\n
+       // before the subsequent boundary even for empty parts and
+       // won't hit this path.
+       if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
+               return 0, io.EOF
+       }
+       unexpectedEOF := err == io.EOF
+       if err != nil && !unexpectedEOF {
                return 0, fmt.Errorf("multipart: Part Read: %v", err)
        }
        if peek == nil {
@@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
                foundBoundary = true
        } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
                nCopy = safeCount
-       } else if unexpectedEof {
+       } else if unexpectedEOF {
                // If we've run out of peek buffer and the boundary
                // wasn't found (and can't possibly fit), we must have
                // hit the end of the file unexpectedly.
@@ -172,7 +180,10 @@ type Reader struct {
        currentPart *Part
        partsRead   int
 
-       nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte
+       nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
+       nlDashBoundary   []byte // nl + "--boundary"
+       dashBoundaryDash []byte // "--boundary--"
+       dashBoundary     []byte // "--boundary"
 }
 
 // NextPart returns the next part in the multipart or an error.
@@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
        expectNewPart := false
        for {
                line, err := r.bufReader.ReadSlice('\n')
-               if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) {
+               if err == io.EOF && r.isFinalBoundary(line) {
                        // If the buffer ends in "--boundary--" without the
                        // trailing "\r\n", ReadSlice will return an error
                        // (since it's missing the '\n'), but this is a valid
@@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
                        return bp, nil
                }
 
-               if hasPrefixThenNewline(line, r.dashBoundaryDash) {
+               if r.isFinalBoundary(line) {
                        // Expected EOF
                        return nil, io.EOF
                }
@@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
        panic("unreachable")
 }
 
-func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
+// isFinalBoundary returns whether line is the final boundary line
+// indiciating that all parts are over.
+// It matches `^--boundary--[ \t]*(\r\n)?$`
+func (mr *Reader) isFinalBoundary(line []byte) bool {
+       if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
+               return false
+       }
+       rest := line[len(mr.dashBoundaryDash):]
+       rest = skipLWSPChar(rest)
+       return len(rest) == 0 || bytes.Equal(rest, mr.nl)
+}
+
+func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
        // http://tools.ietf.org/html/rfc2046#section-5.1
        //   The boundary delimiter line is then defined as a line
        //   consisting entirely of two hyphen characters ("-",
@@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
        if !bytes.HasPrefix(line, mr.dashBoundary) {
                return false
        }
-       if bytes.HasSuffix(line, mr.nl) {
-               return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)])
+       rest := line[len(mr.dashBoundary):]
+       rest = skipLWSPChar(rest)
+
+       // On the first part, see our lines are ending in \n instead of \r\n
+       // and switch into that mode if so.  This is a violation of the spec,
+       // but occurs in practice.
+       if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
+               mr.nl = mr.nl[1:]
+               mr.nlDashBoundary = mr.nlDashBoundary[1:]
        }
-       // Violate the spec and also support newlines without the
-       // carriage return...
-       if mr.partsRead == 0 && bytes.HasSuffix(line, lf) {
-               if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) {
-                       mr.nl = mr.nl[1:]
-                       mr.nlDashBoundary = mr.nlDashBoundary[1:]
-                       return true
-               }
-       }
-       return false
+       return bytes.Equal(rest, mr.nl)
 }
 
-func onlyHorizontalWhitespace(s []byte) bool {
-       for _, b := range s {
-               if b != ' ' && b != '\t' {
-                       return false
-               }
+// peekBufferIsEmptyPart returns whether the provided peek-ahead
+// buffer represents an empty part.  This is only called if we've not
+// already read any bytes in this part and checks for the case of MIME
+// software not writing the \r\n on empty parts. Some does, some
+// doesn't.
+//
+// This checks that what follows the "--boundary" is actually the end
+// ("--boundary--" with optional whitespace) or optional whitespace
+// and then a newline, so we don't catch "--boundaryFAKE", in which
+// case the whole line is part of the data.
+func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
+       // End of parts case.
+       // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
+       if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
+               rest := peek[len(mr.dashBoundaryDash):]
+               rest = skipLWSPChar(rest)
+               return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
        }
-       return true
+       if !bytes.HasPrefix(peek, mr.dashBoundary) {
+               return false
+       }
+       // Test whether rest matches `^[ \t]*\r\n`)
+       rest := peek[len(mr.dashBoundary):]
+       rest = skipLWSPChar(rest)
+       return bytes.HasPrefix(rest, mr.nl)
 }
 
-func hasPrefixThenNewline(s, prefix []byte) bool {
-       return bytes.HasPrefix(s, prefix) &&
-               (len(s) == len(prefix)+1 && s[len(s)-1] == '\n' ||
-                       len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf))
+// skipLWSPChar returns b with leading spaces and tabs removed.
+// RFC 822 defines:
+//    LWSP-char = SPACE / HTAB
+func skipLWSPChar(b []byte) []byte {
+       for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
+               b = b[1:]
+       }
+       return b
 }
index ca7108d7ad0a4bd666f9d9364cba70830008f1c2..cd65e177e852e3a80539ec6c688ef0b14338ebbd 100644 (file)
@@ -10,20 +10,13 @@ import (
        "fmt"
        "io"
        "io/ioutil"
+       "net/textproto"
        "os"
+       "reflect"
        "strings"
        "testing"
 )
 
-func TestHorizontalWhitespace(t *testing.T) {
-       if !onlyHorizontalWhitespace([]byte(" \t")) {
-               t.Error("expected pass")
-       }
-       if onlyHorizontalWhitespace([]byte("foo bar")) {
-               t.Error("expected failure")
-       }
-}
-
 func TestBoundaryLine(t *testing.T) {
        mr := NewReader(strings.NewReader(""), "myBoundary")
        if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
@@ -319,29 +312,6 @@ Oh no, premature EOF!
        }
 }
 
-func TestZeroLengthBody(t *testing.T) {
-       testBody := strings.Replace(`
-This is a multi-part message.  This line is ignored.
---MyBoundary
-foo: bar
-
-
---MyBoundary--
-`, "\n", "\r\n", -1)
-       r := NewReader(strings.NewReader(testBody), "MyBoundary")
-       part, err := r.NextPart()
-       if err != nil {
-               t.Fatalf("didn't get a part")
-       }
-       n, err := io.Copy(ioutil.Discard, part)
-       if err != nil {
-               t.Errorf("error reading part: %v", err)
-       }
-       if n != 0 {
-               t.Errorf("read %d bytes; expected 0", n)
-       }
-}
-
 type slowReader struct {
        r io.Reader
 }
@@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
                t.Fatalf("final outer NextPart = %v; want io.EOF", err)
        }
 }
+
+type headerBody struct {
+       header textproto.MIMEHeader
+       body   string
+}
+
+func formData(key, value string) headerBody {
+       return headerBody{
+               textproto.MIMEHeader{
+                       "Content-Type":        {"text/plain; charset=ISO-8859-1"},
+                       "Content-Disposition": {"form-data; name=" + key},
+               },
+               value,
+       }
+}
+
+type parseTest struct {
+       name    string
+       in, sep string
+       want    []headerBody
+}
+
+var parseTests = []parseTest{
+       // Actual body from App Engine on a blob upload. The final part (the
+       // Content-Type: message/external-body) is what App Engine replaces
+       // the uploaded file with.  The other form fields (prefixed with
+       // "other" in their form-data name) are unchanged.  A bug was
+       // reported with blob uploads failing when the other fields were
+       // empty. This was the MIME POST body that previously failed.
+       {
+               name: "App Engine post",
+               sep:  "00151757727e9583fd04bfbca4c6",
+               in:   "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
+               want: []headerBody{
+                       formData("otherEmpty1", ""),
+                       formData("otherFoo1", "foo"),
+                       formData("otherFoo2", "foo"),
+                       formData("otherEmpty2", ""),
+                       formData("otherRepeatFoo", "foo"),
+                       formData("otherRepeatFoo", "foo"),
+                       formData("otherRepeatEmpty", ""),
+                       formData("otherRepeatEmpty", ""),
+                       formData("submit", "Submit"),
+                       {textproto.MIMEHeader{
+                               "Content-Type":        {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
+                               "Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
+                       }, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
+               },
+       },
+
+       // Single empty part, ended with --boundary immediately after headers.
+       {
+               name: "single empty part, --boundary",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Single empty part, ended with \r\n--boundary immediately after headers.
+       {
+               name: "single empty part, \r\n--boundary",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Final part empty.
+       {
+               name: "final part empty",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+                       {textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
+               },
+       },
+
+       // Final part empty with newlines after final separator.
+       {
+               name: "final part empty then crlf",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // Final part empty with lwsp-chars after final separator.
+       {
+               name: "final part empty then lwsp",
+               sep:  "abc",
+               in:   "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       // No parts (empty form as submitted by Chrome)
+       {
+               name: "no parts",
+               sep:  "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
+               in:   "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
+               want: []headerBody{},
+       },
+
+       // Part containing data starting with the boundary, but with additional suffix.
+       {
+               name: "fake separator as data",
+               sep:  "sep",
+               in:   "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
+               },
+       },
+
+       // Part containing a boundary with whitespace following it.
+       {
+               name: "boundary with whitespace",
+               sep:  "sep",
+               in:   "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
+               },
+       },
+
+       // With ignored leading line.
+       {
+               name: "leading line",
+               sep:  "MyBoundary",
+               in: strings.Replace(`This is a multi-part message.  This line is ignored.
+--MyBoundary
+foo: bar
+
+
+--MyBoundary--`, "\n", "\r\n", -1),
+               want: []headerBody{
+                       {textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+               },
+       },
+
+       roundTripParseTest(),
+}
+
+func TestParse(t *testing.T) {
+Cases:
+       for _, tt := range parseTests {
+               r := NewReader(strings.NewReader(tt.in), tt.sep)
+               got := []headerBody{}
+               for {
+                       p, err := r.NextPart()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Errorf("in test %q, NextPart: %v", tt.name, err)
+                               continue Cases
+                       }
+                       pbody, err := ioutil.ReadAll(p)
+                       if err != nil {
+                               t.Errorf("in test %q, error reading part: %v", tt.name, err)
+                               continue Cases
+                       }
+                       got = append(got, headerBody{p.Header, string(pbody)})
+               }
+               if !reflect.DeepEqual(tt.want, got) {
+                       t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
+                       if len(tt.want) != len(got) {
+                               t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
+                       } else if len(got) > 1 {
+                               for pi, wantPart := range tt.want {
+                                       if !reflect.DeepEqual(wantPart, got[pi]) {
+                                               t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+func roundTripParseTest() parseTest {
+       t := parseTest{
+               name: "round trip",
+               want: []headerBody{
+                       formData("empty", ""),
+                       formData("lf", "\n"),
+                       formData("cr", "\r"),
+                       formData("crlf", "\r\n"),
+                       formData("foo", "bar"),
+               },
+       }
+       var buf bytes.Buffer
+       w := NewWriter(&buf)
+       for _, p := range t.want {
+               pw, err := w.CreatePart(p.header)
+               if err != nil {
+                       panic(err)
+               }
+               _, err = pw.Write([]byte(p.body))
+               if err != nil {
+                       panic(err)
+               }
+       }
+       w.Close()
+       t.in = buf.String()
+       t.sep = w.Boundary()
+       return t
+}