net/textproto: reject invalid header keys/values in ReadMIMEHeader

author Damien Neil <dneil@google.com>

Mon, 6 Jun 2022 20:54:22 +0000 (13:54 -0700)

committer Damien Neil <dneil@google.com>

Tue, 8 Nov 2022 20:36:15 +0000 (20:36 +0000)
author Damien Neil <dneil@google.com>
Mon, 6 Jun 2022 20:54:22 +0000 (13:54 -0700)
committer Damien Neil <dneil@google.com>
Tue, 8 Nov 2022 20:36:15 +0000 (20:36 +0000)
diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go

index a93f6eff1bb260ea036ffdb88020eebf0fcadc97..b6853182bfd580b98d1941107053d780f38211dd 100644 (file)
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@@ -6165,7 +6165,7 @@ func testUnsupportedTransferEncodingsReturn501(t *testing.T, mode testMode) {
                 "fugazi",
                 "foo-bar",
                 "unknown",
-               "\rchunked",
+               `" chunked"`,
         }
  
         for _, badTE := range unsupportedTEs {
diff --git a/src/net/textproto/reader.go b/src/net/textproto/reader.go

index 1cae6ba1e690d43d82181701b68602771e6d8bb7..4e4999b3c9534d3905865e80c5a294f51b872eeb 100644 (file)
--- a/src/net/textproto/reader.go
+++ b/src/net/textproto/reader.go
@@ -508,7 +508,15 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
                 if !ok {
                         return m, ProtocolError("malformed MIME header line: " + string(kv))
                 }
-               key := canonicalMIMEHeaderKey(k)
+               key, ok := canonicalMIMEHeaderKey(k)
+               if !ok {
+                       return m, ProtocolError("malformed MIME header line: " + string(kv))
+               }
+               for _, c := range v {
+                       if !validHeaderValueByte(c) {
+                               return m, ProtocolError("malformed MIME header line: " + string(kv))
+                       }
+               }
  
                 // As per RFC 7230 field-name is a token, tokens consist of one or more chars.
                 // We could return a ProtocolError here, but better to be liberal in what we
@@ -585,10 +593,12 @@ func CanonicalMIMEHeaderKey(s string) string {
                         return s
                 }
                 if upper && 'a' <= c && c <= 'z' {
-                       return canonicalMIMEHeaderKey([]byte(s))
+                       s, _ = canonicalMIMEHeaderKey([]byte(s))
+                       return s
                 }
                 if !upper && 'A' <= c && c <= 'Z' {
-                       return canonicalMIMEHeaderKey([]byte(s))
+                       s, _ = canonicalMIMEHeaderKey([]byte(s))
+                       return s
                 }
                 upper = c == '-'
         }
@@ -597,7 +607,7 @@ func CanonicalMIMEHeaderKey(s string) string {
  
  const toLower = 'a' - 'A'
  
-// validHeaderFieldByte reports whether b is a valid byte in a header
+// validHeaderFieldByte reports whether c is a valid byte in a header
  // field name. RFC 7230 says:
  //
  //     header-field   = field-name ":" OWS field-value OWS
@@ -605,8 +615,58 @@ const toLower = 'a' - 'A'
  //     tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  //             "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  //     token = 1*tchar
-func validHeaderFieldByte(b byte) bool {
-       return int(b) < len(isTokenTable) && isTokenTable[b]
+func validHeaderFieldByte(c byte) bool {
+       // mask is a 128-bit bitmap with 1s for allowed bytes,
+       // so that the byte c can be tested with a shift and an and.
+       // If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
+       // and this function will return false.
+       const mask = 0 |
+               (1<<(10)-1)<<'0' |
+               (1<<(26)-1)<<'a' |
+               (1<<(26)-1)<<'A' |
+               1<<'!' |
+               1<<'#' |
+               1<<'$' |
+               1<<'%' |
+               1<<'&' |
+               1<<'\'' |
+               1<<'*' |
+               1<<'+' |
+               1<<'-' |
+               1<<'.' |
+               1<<'^' |
+               1<<'_' |
+               1<<'`' |
+               1<<'|' |
+               1<<'~'
+       return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+               (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// validHeaderValueByte reports whether c is a valid byte in a header
+// field value. RFC 7230 says:
+//
+//     field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+//     field-vchar    = VCHAR / obs-text
+//     obs-text       = %x80-FF
+//
+// RFC 5234 says:
+//
+//     HTAB           =  %x09
+//     SP             =  %x20
+//     VCHAR          =  %x21-7E
+func validHeaderValueByte(c byte) bool {
+       // mask is a 128-bit bitmap with 1s for allowed bytes,
+       // so that the byte c can be tested with a shift and an and.
+       // If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
+       // Since this is the obs-text range, we invert the mask to
+       // create a bitmap with 1s for disallowed bytes.
+       const mask = 0 |
+               (1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
+               1<<0x20 | // SP: %x20
+               1<<0x09 // HTAB: %x09
+       return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
+               (uint64(1)<<(c-64))&^(mask>>64)) == 0
  }
  
  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
@@ -615,14 +675,29 @@ func validHeaderFieldByte(b byte) bool {
  //
  // For invalid inputs (if a contains spaces or non-token bytes), a
  // is unchanged and a string copy is returned.
-func canonicalMIMEHeaderKey(a []byte) string {
+//
+// ok is true if the header key contains only valid characters and spaces.
+// ReadMIMEHeader accepts header keys containing spaces, but does not
+// canonicalize them.
+func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
         // See if a looks like a header key. If not, return it unchanged.
+       noCanon := false
         for _, c := range a {
                 if validHeaderFieldByte(c) {
                         continue
                 }
                 // Don't canonicalize.
-               return string(a)
+               if c == ' ' {
+                       // We accept invalid headers with a space before the
+                       // colon, but must not canonicalize them.
+                       // See https://go.dev/issue/34540.
+                       noCanon = true
+                       continue
+               }
+               return string(a), false
+       }
+       if noCanon {
+               return string(a), true
         }
  
         upper := true
@@ -644,9 +719,9 @@ func canonicalMIMEHeaderKey(a []byte) string {
         // case, so a copy of a's bytes into a new string does not
         // happen in this map lookup:
         if v := commonHeader[string(a)]; v != "" {
-               return v
+               return v, true
         }
-       return string(a)
+       return string(a), true
  }
  
  // commonHeader interns common header strings.
@@ -700,85 +775,3 @@ func initCommonHeader() {
                 commonHeader[v] = v
         }
  }
-
-// isTokenTable is a copy of net/http/lex.go's isTokenTable.
-// See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
-var isTokenTable = [127]bool{
-       '!':  true,
-       '#':  true,
-       '$':  true,
-       '%':  true,
-       '&':  true,
-       '\'': true,
-       '*':  true,
-       '+':  true,
-       '-':  true,
-       '.':  true,
-       '0':  true,
-       '1':  true,
-       '2':  true,
-       '3':  true,
-       '4':  true,
-       '5':  true,
-       '6':  true,
-       '7':  true,
-       '8':  true,
-       '9':  true,
-       'A':  true,
-       'B':  true,
-       'C':  true,
-       'D':  true,
-       'E':  true,
-       'F':  true,
-       'G':  true,
-       'H':  true,
-       'I':  true,
-       'J':  true,
-       'K':  true,
-       'L':  true,
-       'M':  true,
-       'N':  true,
-       'O':  true,
-       'P':  true,
-       'Q':  true,
-       'R':  true,
-       'S':  true,
-       'T':  true,
-       'U':  true,
-       'W':  true,
-       'V':  true,
-       'X':  true,
-       'Y':  true,
-       'Z':  true,
-       '^':  true,
-       '_':  true,
-       '`':  true,
-       'a':  true,
-       'b':  true,
-       'c':  true,
-       'd':  true,
-       'e':  true,
-       'f':  true,
-       'g':  true,
-       'h':  true,
-       'i':  true,
-       'j':  true,
-       'k':  true,
-       'l':  true,
-       'm':  true,
-       'n':  true,
-       'o':  true,
-       'p':  true,
-       'q':  true,
-       'r':  true,
-       's':  true,
-       't':  true,
-       'u':  true,
-       'v':  true,
-       'w':  true,
-       'x':  true,
-       'y':  true,
-       'z':  true,
-       '|':  true,
-       '~':  true,
-}
diff --git a/src/net/textproto/reader_test.go b/src/net/textproto/reader_test.go

index d11d40f1cf10b3121cfaf8833f88b0d8aeef0eed..9618b874e6f8536b240a5227130a272e5c3df051 100644 (file)
--- a/src/net/textproto/reader_test.go
+++ b/src/net/textproto/reader_test.go
@@ -189,16 +189,62 @@ func TestReadMIMEHeaderMalformed(t *testing.T) {
                 "Foo-\r\n\tBar: foo\r\n\r\n",
                 "Foo\r\n\t: foo\r\n\r\n",
                 "Foo-\n\tBar",
+               "Foo \tBar: foo\r\n\r\n",
         }
-
         for _, input := range inputs {
                 r := reader(input)
-               if m, err := r.ReadMIMEHeader(); err == nil {
+               if m, err := r.ReadMIMEHeader(); err == nil || err == io.EOF {
                         t.Errorf("ReadMIMEHeader(%q) = %v, %v; want nil, err", input, m, err)
                 }
         }
  }
  
+func TestReadMIMEHeaderBytes(t *testing.T) {
+       for i := 0; i <= 0xff; i++ {
+               s := "Foo" + string(rune(i)) + "Bar: foo\r\n\r\n"
+               r := reader(s)
+               wantErr := true
+               switch {
+               case i >= '0' && i <= '9':
+                       wantErr = false
+               case i >= 'a' && i <= 'z':
+                       wantErr = false
+               case i >= 'A' && i <= 'Z':
+                       wantErr = false
+               case i == '!' || i == '#' || i == '$' || i == '%' || i == '&' || i == '\'' || i == '*' || i == '+' || i == '-' || i == '.' || i == '^' || i == '_' || i == '`' || i == '|' || i == '~':
+                       wantErr = false
+               case i == ':':
+                       // Special case: "Foo:Bar: foo" is the header "Foo".
+                       wantErr = false
+               case i == ' ':
+                       wantErr = false
+               }
+               m, err := r.ReadMIMEHeader()
+               if err != nil != wantErr {
+                       t.Errorf("ReadMIMEHeader(%q) = %v, %v; want error=%v", s, m, err, wantErr)
+               }
+       }
+       for i := 0; i <= 0xff; i++ {
+               s := "Foo: foo" + string(rune(i)) + "bar\r\n\r\n"
+               r := reader(s)
+               wantErr := true
+               switch {
+               case i >= 0x21 && i <= 0x7e:
+                       wantErr = false
+               case i == ' ':
+                       wantErr = false
+               case i == '\t':
+                       wantErr = false
+               case i >= 0x80 && i <= 0xff:
+                       wantErr = false
+               }
+               m, err := r.ReadMIMEHeader()
+               if (err != nil) != wantErr {
+                       t.Errorf("ReadMIMEHeader(%q) = %v, %v; want error=%v", s, m, err, wantErr)
+               }
+       }
+}
+
  // Test that continued lines are properly trimmed. Issue 11204.
  func TestReadMIMEHeaderTrimContinued(t *testing.T) {
         // In this header, \n and \r\n terminated lines are mixed on purpose.
@@ -317,7 +363,7 @@ func TestCommonHeaders(t *testing.T) {
         b := []byte("content-Length")
         want := "Content-Length"
         n := testing.AllocsPerRun(200, func() {
-               if x := canonicalMIMEHeaderKey(b); x != want {
+               if x, _ := canonicalMIMEHeaderKey(b); x != want {
                         t.Fatalf("canonicalMIMEHeaderKey(%q) = %q; want %q", b, x, want)
                 }
         })
author	Damien Neil <dneil@google.com>
	Mon, 6 Jun 2022 20:54:22 +0000 (13:54 -0700)
committer	Damien Neil <dneil@google.com>
	Tue, 8 Nov 2022 20:36:15 +0000 (20:36 +0000)
src/net/http/serve_test.go		patch \| blob \| history
src/net/textproto/reader.go		patch \| blob \| history
src/net/textproto/reader_test.go		patch \| blob \| history