From 9c7aa5fea983fe58d126542013861a022adefa70 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 22 Feb 2013 10:40:23 -0800 Subject: [PATCH] mime/multipart: allow unescaped newlines through in quoted-printable This makes Go's quoted-printable decoder more like other popular ones, allowing through a bare \r or \n, and also passes through \r\n which looked like a real bug before. Fixes #4771 R=minux.ma CC=golang-dev https://golang.org/cl/7300092 --- src/pkg/mime/multipart/quotedprintable.go | 58 +++++-- .../mime/multipart/quotedprintable_test.go | 149 +++++++++++++++++- 2 files changed, 196 insertions(+), 11 deletions(-) diff --git a/src/pkg/mime/multipart/quotedprintable.go b/src/pkg/mime/multipart/quotedprintable.go index 0a60a6ed55..9e18e1ea12 100644 --- a/src/pkg/mime/multipart/quotedprintable.go +++ b/src/pkg/mime/multipart/quotedprintable.go @@ -3,6 +3,10 @@ // license that can be found in the LICENSE file. // The file define a quoted-printable decoder, as specified in RFC 2045. +// Deviations: +// 1. in addition to "=\r\n", "=\n" is also treated as soft line break. +// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent +// with other broken QP encoders & decoders. package multipart @@ -14,14 +18,16 @@ import ( ) type qpReader struct { - br *bufio.Reader - rerr error // last read error - line []byte // to be consumed before more of br + br *bufio.Reader + skipWhite bool + rerr error // last read error + line []byte // to be consumed before more of br } func newQuotedPrintableReader(r io.Reader) io.Reader { return &qpReader{ - br: bufio.NewReader(r), + br: bufio.NewReader(r), + skipWhite: true, } } @@ -49,6 +55,10 @@ func (q *qpReader) readHexByte(v []byte) (b byte, err error) { return hb<<4 | lb, nil } +func isQPSkipWhiteByte(b byte) bool { + return b == ' ' || b == '\t' +} + func isQPDiscardWhitespace(r rune) bool { switch r { case '\n', '\r', ' ', '\t': @@ -57,22 +67,48 @@ func isQPDiscardWhitespace(r rune) bool { return false } +var ( + crlf = []byte("\r\n") + lf = []byte("\n") + softSuffix = []byte("=") +) + func (q *qpReader) Read(p []byte) (n int, err error) { for len(p) > 0 { if len(q.line) == 0 { if q.rerr != nil { return n, q.rerr } + q.skipWhite = true q.line, q.rerr = q.br.ReadSlice('\n') - q.line = bytes.TrimRightFunc(q.line, isQPDiscardWhitespace) + + // Does the line end in CRLF instead of just LF? + hasLF := bytes.HasSuffix(q.line, lf) + hasCR := bytes.HasSuffix(q.line, crlf) + wholeLine := q.line + q.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace) + if bytes.HasSuffix(q.line, softSuffix) { + rightStripped := wholeLine[len(q.line):] + q.line = q.line[:len(q.line)-1] + if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) { + q.rerr = fmt.Errorf("multipart: invalid bytes after =: %q", rightStripped) + } + } else if hasLF { + if hasCR { + q.line = append(q.line, '\r', '\n') + } else { + q.line = append(q.line, '\n') + } + } continue } - if len(q.line) == 1 && q.line[0] == '=' { - // Soft newline; skipped. - q.line = nil + b := q.line[0] + if q.skipWhite && isQPSkipWhiteByte(b) { + q.line = q.line[1:] continue } - b := q.line[0] + q.skipWhite = false + switch { case b == '=': b, err = q.readHexByte(q.line[1:]) @@ -80,7 +116,9 @@ func (q *qpReader) Read(p []byte) (n int, err error) { return n, err } q.line = q.line[2:] // 2 of the 3; other 1 is done below - case b != '\t' && (b < ' ' || b > '~'): + case b == '\t' || b == '\r' || b == '\n': + break + case b < ' ' || b > '~': return n, fmt.Errorf("multipart: invalid unescaped byte 0x%02x in quoted-printable body", b) } p[0] = b diff --git a/src/pkg/mime/multipart/quotedprintable_test.go b/src/pkg/mime/multipart/quotedprintable_test.go index 796a41f42d..7bcf5767be 100644 --- a/src/pkg/mime/multipart/quotedprintable_test.go +++ b/src/pkg/mime/multipart/quotedprintable_test.go @@ -5,11 +5,18 @@ package multipart import ( + "bufio" "bytes" + "errors" + "flag" "fmt" "io" + "os/exec" + "regexp" + "sort" "strings" "testing" + "time" ) func TestQuotedPrintable(t *testing.T) { @@ -17,15 +24,39 @@ func TestQuotedPrintable(t *testing.T) { in, want string err interface{} }{ + {in: "", want: ""}, {in: "foo bar", want: "foo bar"}, {in: "foo bar=3D", want: "foo bar="}, + {in: "foo bar=\n", want: "foo bar"}, + {in: "foo bar\n", want: "foo bar\n"}, // somewhat lax. {in: "foo bar=0", want: "foo bar", err: io.ErrUnexpectedEOF}, {in: "foo bar=ab", want: "foo bar", err: "multipart: invalid quoted-printable hex byte 0x61"}, {in: "foo bar=0D=0A", want: "foo bar\r\n"}, - {in: "foo bar=\r\n baz", want: "foo bar baz"}, + {in: " A B =\r\n C ", want: "A B C"}, + {in: " A B =\n C ", want: "A B C"}, // lax. treating LF as CRLF {in: "foo=\nbar", want: "foobar"}, {in: "foo\x00bar", want: "foo", err: "multipart: invalid unescaped byte 0x00 in quoted-printable body"}, {in: "foo bar\xff", want: "foo bar", err: "multipart: invalid unescaped byte 0xff in quoted-printable body"}, + + // Equal sign. + {in: "=3D30\n", want: "=30\n"}, + {in: "=00=FF0=\n", want: "\x00\xff0"}, + + // Trailing whitespace + {in: "foo \n", want: "foo\n"}, + {in: "foo \n\nfoo =\n\nfoo=20\n\n", want: "foo\n\nfoo \nfoo \n\n"}, + + // Tests that we allow bare \n and \r through, despite it being strictly + // not permitted per RFC 2045, Section 6.7 Page 22 bullet (4). + {in: "foo\nbar", want: "foo\nbar"}, + {in: "foo\rbar", want: "foo\rbar"}, + {in: "foo\r\nbar", want: "foo\r\nbar"}, + + // Different types of soft line-breaks. + {in: "foo=\r\nbar", want: "foobar"}, + {in: "foo=\nbar", want: "foobar"}, + {in: "foo=\rbar", want: "foo", err: "multipart: invalid quoted-printable hex byte 0x0d"}, + {in: "foo=\r\r\r \nbar", want: "foo", err: `multipart: invalid bytes after =: "\r\r\r \n"`}, } for _, tt := range tests { var buf bytes.Buffer @@ -50,3 +81,119 @@ func TestQuotedPrintable(t *testing.T) { } } + +func everySequence(base, alpha string, length int, fn func(string)) { + if len(base) == length { + fn(base) + return + } + for i := 0; i < len(alpha); i++ { + everySequence(base+alpha[i:i+1], alpha, length, fn) + } +} + +var useQprint = flag.Bool("qprint", false, "Compare against the 'qprint' program.") + +var badSoftRx = regexp.MustCompile(`=([^\r\n]+?\n)|([^\r\n]+$)|(\r$)|(\r[^\n]+\n)|( \r\n)`) + +func TestQPExhaustive(t *testing.T) { + if *useQprint { + _, err := exec.LookPath("qprint") + if err != nil { + t.Fatalf("Error looking for qprint: %v", err) + } + } + + var buf bytes.Buffer + res := make(map[string]int) + everySequence("", "0A \r\n=", 6, func(s string) { + if strings.HasSuffix(s, "=") || strings.Contains(s, "==") { + return + } + buf.Reset() + _, err := io.Copy(&buf, newQuotedPrintableReader(strings.NewReader(s))) + if err != nil { + errStr := err.Error() + if strings.Contains(errStr, "invalid bytes after =:") { + errStr = "invalid bytes after =" + } + res[errStr]++ + if strings.Contains(errStr, "invalid quoted-printable hex byte ") { + if strings.HasSuffix(errStr, "0x20") && (strings.Contains(s, "=0 ") || strings.Contains(s, "=A ") || strings.Contains(s, "= ")) { + return + } + if strings.HasSuffix(errStr, "0x3d") && (strings.Contains(s, "=0=") || strings.Contains(s, "=A=")) { + return + } + if strings.HasSuffix(errStr, "0x0a") || strings.HasSuffix(errStr, "0x0d") { + // bunch of cases; since whitespace at the end of of a line before \n is removed. + return + } + } + if strings.Contains(errStr, "unexpected EOF") { + return + } + if errStr == "invalid bytes after =" && badSoftRx.MatchString(s) { + return + } + t.Errorf("decode(%q) = %v", s, err) + return + } + if *useQprint { + cmd := exec.Command("qprint", "-d") + cmd.Stdin = strings.NewReader(s) + stderr, err := cmd.StderrPipe() + if err != nil { + panic(err) + } + qpres := make(chan interface{}, 2) + go func() { + br := bufio.NewReader(stderr) + s, _ := br.ReadString('\n') + if s != "" { + qpres <- errors.New(s) + if cmd.Process != nil { + // It can get stuck on invalid input, like: + // echo -n "0000= " | qprint -d + cmd.Process.Kill() + } + } + }() + go func() { + want, err := cmd.Output() + if err == nil { + qpres <- want + } + }() + select { + case got := <-qpres: + if want, ok := got.([]byte); ok { + if string(want) != buf.String() { + t.Errorf("go decode(%q) = %q; qprint = %q", s, want, buf.String()) + } + } else { + t.Logf("qprint -d(%q) = %v", s, got) + } + case <-time.After(5 * time.Second): + t.Logf("qprint timeout on %q", s) + } + } + res["OK"]++ + }) + var outcomes []string + for k, v := range res { + outcomes = append(outcomes, fmt.Sprintf("%v: %d", k, v)) + } + sort.Strings(outcomes) + got := strings.Join(outcomes, "\n") + want := `OK: 21576 +invalid bytes after =: 3397 +multipart: invalid quoted-printable hex byte 0x0a: 1400 +multipart: invalid quoted-printable hex byte 0x0d: 2700 +multipart: invalid quoted-printable hex byte 0x20: 2490 +multipart: invalid quoted-printable hex byte 0x3d: 440 +unexpected EOF: 3122` + if got != want { + t.Errorf("Got:\n%s\nWant:\n%s", got, want) + } +} -- 2.48.1