if !ok {
return m, ProtocolError("malformed MIME header line: " + string(kv))
}
- key := canonicalMIMEHeaderKey(k)
+ key, ok := canonicalMIMEHeaderKey(k)
+ if !ok {
+ return m, ProtocolError("malformed MIME header line: " + string(kv))
+ }
+ for _, c := range v {
+ if !validHeaderValueByte(c) {
+ return m, ProtocolError("malformed MIME header line: " + string(kv))
+ }
+ }
// As per RFC 7230 field-name is a token, tokens consist of one or more chars.
// We could return a ProtocolError here, but better to be liberal in what we
return s
}
if upper && 'a' <= c && c <= 'z' {
- return canonicalMIMEHeaderKey([]byte(s))
+ s, _ = canonicalMIMEHeaderKey([]byte(s))
+ return s
}
if !upper && 'A' <= c && c <= 'Z' {
- return canonicalMIMEHeaderKey([]byte(s))
+ s, _ = canonicalMIMEHeaderKey([]byte(s))
+ return s
}
upper = c == '-'
}
const toLower = 'a' - 'A'
-// validHeaderFieldByte reports whether b is a valid byte in a header
+// validHeaderFieldByte reports whether c is a valid byte in a header
// field name. RFC 7230 says:
//
// header-field = field-name ":" OWS field-value OWS
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
// token = 1*tchar
-func validHeaderFieldByte(b byte) bool {
- return int(b) < len(isTokenTable) && isTokenTable[b]
+func validHeaderFieldByte(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ (1<<(10)-1)<<'0' |
+ (1<<(26)-1)<<'a' |
+ (1<<(26)-1)<<'A' |
+ 1<<'!' |
+ 1<<'#' |
+ 1<<'$' |
+ 1<<'%' |
+ 1<<'&' |
+ 1<<'\'' |
+ 1<<'*' |
+ 1<<'+' |
+ 1<<'-' |
+ 1<<'.' |
+ 1<<'^' |
+ 1<<'_' |
+ 1<<'`' |
+ 1<<'|' |
+ 1<<'~'
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// validHeaderValueByte reports whether c is a valid byte in a header
+// field value. RFC 7230 says:
+//
+// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+// field-vchar = VCHAR / obs-text
+// obs-text = %x80-FF
+//
+// RFC 5234 says:
+//
+// HTAB = %x09
+// SP = %x20
+// VCHAR = %x21-7E
+func validHeaderValueByte(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
+ // Since this is the obs-text range, we invert the mask to
+ // create a bitmap with 1s for disallowed bytes.
+ const mask = 0 |
+ (1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
+ 1<<0x20 | // SP: %x20
+ 1<<0x09 // HTAB: %x09
+ return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&^(mask>>64)) == 0
}
// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
//
// For invalid inputs (if a contains spaces or non-token bytes), a
// is unchanged and a string copy is returned.
-func canonicalMIMEHeaderKey(a []byte) string {
+//
+// ok is true if the header key contains only valid characters and spaces.
+// ReadMIMEHeader accepts header keys containing spaces, but does not
+// canonicalize them.
+func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
// See if a looks like a header key. If not, return it unchanged.
+ noCanon := false
for _, c := range a {
if validHeaderFieldByte(c) {
continue
}
// Don't canonicalize.
- return string(a)
+ if c == ' ' {
+ // We accept invalid headers with a space before the
+ // colon, but must not canonicalize them.
+ // See https://go.dev/issue/34540.
+ noCanon = true
+ continue
+ }
+ return string(a), false
+ }
+ if noCanon {
+ return string(a), true
}
upper := true
// case, so a copy of a's bytes into a new string does not
// happen in this map lookup:
if v := commonHeader[string(a)]; v != "" {
- return v
+ return v, true
}
- return string(a)
+ return string(a), true
}
// commonHeader interns common header strings.
commonHeader[v] = v
}
}
-
-// isTokenTable is a copy of net/http/lex.go's isTokenTable.
-// See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
-var isTokenTable = [127]bool{
- '!': true,
- '#': true,
- '$': true,
- '%': true,
- '&': true,
- '\'': true,
- '*': true,
- '+': true,
- '-': true,
- '.': true,
- '0': true,
- '1': true,
- '2': true,
- '3': true,
- '4': true,
- '5': true,
- '6': true,
- '7': true,
- '8': true,
- '9': true,
- 'A': true,
- 'B': true,
- 'C': true,
- 'D': true,
- 'E': true,
- 'F': true,
- 'G': true,
- 'H': true,
- 'I': true,
- 'J': true,
- 'K': true,
- 'L': true,
- 'M': true,
- 'N': true,
- 'O': true,
- 'P': true,
- 'Q': true,
- 'R': true,
- 'S': true,
- 'T': true,
- 'U': true,
- 'W': true,
- 'V': true,
- 'X': true,
- 'Y': true,
- 'Z': true,
- '^': true,
- '_': true,
- '`': true,
- 'a': true,
- 'b': true,
- 'c': true,
- 'd': true,
- 'e': true,
- 'f': true,
- 'g': true,
- 'h': true,
- 'i': true,
- 'j': true,
- 'k': true,
- 'l': true,
- 'm': true,
- 'n': true,
- 'o': true,
- 'p': true,
- 'q': true,
- 'r': true,
- 's': true,
- 't': true,
- 'u': true,
- 'v': true,
- 'w': true,
- 'x': true,
- 'y': true,
- 'z': true,
- '|': true,
- '~': true,
-}
"Foo-\r\n\tBar: foo\r\n\r\n",
"Foo\r\n\t: foo\r\n\r\n",
"Foo-\n\tBar",
+ "Foo \tBar: foo\r\n\r\n",
}
-
for _, input := range inputs {
r := reader(input)
- if m, err := r.ReadMIMEHeader(); err == nil {
+ if m, err := r.ReadMIMEHeader(); err == nil || err == io.EOF {
t.Errorf("ReadMIMEHeader(%q) = %v, %v; want nil, err", input, m, err)
}
}
}
+func TestReadMIMEHeaderBytes(t *testing.T) {
+ for i := 0; i <= 0xff; i++ {
+ s := "Foo" + string(rune(i)) + "Bar: foo\r\n\r\n"
+ r := reader(s)
+ wantErr := true
+ switch {
+ case i >= '0' && i <= '9':
+ wantErr = false
+ case i >= 'a' && i <= 'z':
+ wantErr = false
+ case i >= 'A' && i <= 'Z':
+ wantErr = false
+ case i == '!' || i == '#' || i == '$' || i == '%' || i == '&' || i == '\'' || i == '*' || i == '+' || i == '-' || i == '.' || i == '^' || i == '_' || i == '`' || i == '|' || i == '~':
+ wantErr = false
+ case i == ':':
+ // Special case: "Foo:Bar: foo" is the header "Foo".
+ wantErr = false
+ case i == ' ':
+ wantErr = false
+ }
+ m, err := r.ReadMIMEHeader()
+ if err != nil != wantErr {
+ t.Errorf("ReadMIMEHeader(%q) = %v, %v; want error=%v", s, m, err, wantErr)
+ }
+ }
+ for i := 0; i <= 0xff; i++ {
+ s := "Foo: foo" + string(rune(i)) + "bar\r\n\r\n"
+ r := reader(s)
+ wantErr := true
+ switch {
+ case i >= 0x21 && i <= 0x7e:
+ wantErr = false
+ case i == ' ':
+ wantErr = false
+ case i == '\t':
+ wantErr = false
+ case i >= 0x80 && i <= 0xff:
+ wantErr = false
+ }
+ m, err := r.ReadMIMEHeader()
+ if (err != nil) != wantErr {
+ t.Errorf("ReadMIMEHeader(%q) = %v, %v; want error=%v", s, m, err, wantErr)
+ }
+ }
+}
+
// Test that continued lines are properly trimmed. Issue 11204.
func TestReadMIMEHeaderTrimContinued(t *testing.T) {
// In this header, \n and \r\n terminated lines are mixed on purpose.
b := []byte("content-Length")
want := "Content-Length"
n := testing.AllocsPerRun(200, func() {
- if x := canonicalMIMEHeaderKey(b); x != want {
+ if x, _ := canonicalMIMEHeaderKey(b); x != want {
t.Fatalf("canonicalMIMEHeaderKey(%q) = %q; want %q", b, x, want)
}
})