import (
"bytes"
+ "fmt"
+ "os"
"strings"
"unicode"
)
params = make(map[string]string)
+ // Map of base parameter name -> parameter name -> value
+ // for parameters containing a '*' character.
+ // Lazily initialized.
+ var continuation map[string]map[string]string
+
v = v[i:]
for len(v) > 0 {
v = strings.TrimLeftFunc(v, unicode.IsSpace)
if len(v) == 0 {
- return
+ break
}
key, value, rest := consumeMediaParam(v)
if key == "" {
// Parse error.
return "", nil
}
- params[key] = value
+
+ pmap := params
+ if idx := strings.Index(key, "*"); idx != -1 {
+ baseName := key[:idx]
+ if continuation == nil {
+ continuation = make(map[string]map[string]string)
+ }
+ var ok bool
+ if pmap, ok = continuation[baseName]; !ok {
+ continuation[baseName] = make(map[string]string)
+ pmap = continuation[baseName]
+ }
+ }
+ if _, exists := pmap[key]; exists {
+ // Duplicate parameter name is bogus.
+ return "", nil
+ }
+ pmap[key] = value
v = rest
}
+
+ // Stitch together any continuations or things with stars
+ // (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
+ var buf bytes.Buffer
+ for key, pieceMap := range continuation {
+ singlePartKey := key + "*"
+ if v, ok := pieceMap[singlePartKey]; ok {
+ decv := decode2231Enc(v)
+ params[key] = decv
+ continue
+ }
+
+ buf.Reset()
+ valid := false
+ for n := 0; ; n++ {
+ simplePart := fmt.Sprintf("%s*%d", key, n)
+ if v, ok := pieceMap[simplePart]; ok {
+ valid = true
+ buf.WriteString(v)
+ continue
+ }
+ encodedPart := simplePart + "*"
+ if v, ok := pieceMap[encodedPart]; ok {
+ valid = true
+ if n == 0 {
+ buf.WriteString(decode2231Enc(v))
+ } else {
+ decv, _ := percentHexUnescape(v)
+ buf.WriteString(decv)
+ }
+ } else {
+ break
+ }
+ }
+ if valid {
+ params[key] = buf.String()
+ }
+ }
+
return
}
+func decode2231Enc(v string) string {
+ sv := strings.Split(v, "'", 3)
+ if len(sv) != 3 {
+ return ""
+ }
+ // Ignoring lang in sv[1] for now.
+ charset := strings.ToLower(sv[0])
+ if charset != "us-ascii" && charset != "utf-8" {
+ // TODO: unsupported encoding
+ return ""
+ }
+ encv, _ := percentHexUnescape(sv[2])
+ return encv
+}
+
func isNotTokenChar(rune int) bool {
return !IsTokenChar(rune)
}
for idx, rune = range rest {
switch {
case nextIsLiteral:
- if rune >= 0x80 {
- return "", v
- }
buffer.WriteRune(rune)
nextIsLiteral = false
case rune == leadQuote:
return buffer.String(), rest[idx+1:]
- case IsQText(rune):
- buffer.WriteRune(rune)
case rune == '\\':
nextIsLiteral = true
+ case rune != '\r' && rune != '\n':
+ buffer.WriteRune(rune)
default:
return "", v
}
if param == "" {
return "", "", v
}
+
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
if !strings.HasPrefix(rest, "=") {
return "", "", v
}
return param, value, rest
}
+
+func percentHexUnescape(s string) (string, os.Error) {
+ // Count %, check that they're well-formed.
+ percents := 0
+ for i := 0; i < len(s); {
+ if s[i] != '%' {
+ i++
+ continue
+ }
+ percents++
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ s = s[i:]
+ if len(s) > 3 {
+ s = s[0:3]
+ }
+ return "", fmt.Errorf("Bogus characters after %: %q", s)
+ }
+ i += 3
+ }
+ if percents == 0 {
+ return s, nil
+ }
+
+ t := make([]byte, len(s)-2*percents)
+ j := 0
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
+ j++
+ i += 3
+ default:
+ t[j] = s[i]
+ j++
+ i++
+ }
+ }
+ return string(t), nil
+}
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
+
+func unhex(c byte) byte {
+ switch {
+ case '0' <= c && c <= '9':
+ return c - '0'
+ case 'a' <= c && c <= 'f':
+ return c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return c - 'A' + 10
+ }
+ return 0
+}
"form-data",
m("key", "value", "blah", "value", "name", "foo")},
+ {`foo; key=val1; key=the-key-appears-again-which-is-bogus`,
+ "", m()},
+
+ // From RFC 2231:
+ {`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`,
+ "application/x-stuff",
+ m("title", "This is ***fun***")},
+
+ {`message/external-body; access-type=URL; ` +
+ `URL*0="ftp://";` +
+ `URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`,
+ "message/external-body",
+ m("access-type", "URL",
+ "URL", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar")},
+
+ {`application/x-stuff; ` +
+ `title*0*=us-ascii'en'This%20is%20even%20more%20; ` +
+ `title*1*=%2A%2A%2Afun%2A%2A%2A%20; ` +
+ `title*2="isn't it!"`,
+ "application/x-stuff",
+ m("title", "This is even more ***fun*** isn't it!")},
+
// Tests from http://greenbytes.de/tech/tc2231/
// TODO(bradfitz): add the rest of the tests from that site.
{`attachment; filename="f\oo.html"`,
"attachment",
m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")},
{`foobar`, "foobar", m()},
- // TODO(bradfitz): rest of them, including RFC2231 encoded UTF-8 and
- // other charsets.
+ {`attachment; filename* =UTF-8''foo-%c3%a4.html`,
+ "attachment",
+ m("filename", "foo-ä.html")},
+ {`attachment; filename*=UTF-8''A-%2541.html`,
+ "attachment",
+ m("filename", "A-%41.html")},
+ {`attachment; filename*0="foo."; filename*1="html"`,
+ "attachment",
+ m("filename", "foo.html")},
+ {`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`,
+ "attachment",
+ m("filename", "foo-ä.html")},
+ {`attachment; filename*0="foo"; filename*01="bar"`,
+ "attachment",
+ m("filename", "foo")},
+ {`attachment; filename*0="foo"; filename*2="bar"`,
+ "attachment",
+ m("filename", "foo")},
+ {`attachment; filename*1="foo"; filename*2="bar"`,
+ "attachment", m()},
+ {`attachment; filename*1="bar"; filename*0="foo"`,
+ "attachment",
+ m("filename", "foobar")},
+ {`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`,
+ "attachment",
+ m("filename", "foo-ä.html")},
+ {`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`,
+ "attachment",
+ m("filename", "foo-ä.html")},
+
+ // Browsers also just send UTF-8 directly without RFC 2231,
+ // at least when the source page is served with UTF-8.
+ {`form-data; firstname="Брэд"; lastname="Фицпатрик"`,
+ "form-data",
+ m("firstname", "Брэд", "lastname", "Фицпатрик")},
}
for _, test := range tests {
mt, params := ParseMediaType(test.in)