]> Cypherpunks repositories - gostls13.git/commitdiff
net/textproto: faster header canonicalization with fewer allocations
authorJeff R. Allen <jra@nella.org>
Mon, 12 Nov 2012 23:31:42 +0000 (15:31 -0800)
committerBrad Fitzpatrick <bradfitz@golang.org>
Mon, 12 Nov 2012 23:31:42 +0000 (15:31 -0800)
By keeping a single copy of the strings that commonly show up
in headers, we can avoid one string allocation per header.

benchmark                  old ns/op    new ns/op    delta
BenchmarkReadMIMEHeader        19590        10824  -44.75%
BenchmarkUncommon               3168         1861  -41.26%

benchmark                 old allocs   new allocs    delta
BenchmarkReadMIMEHeader           32           25  -21.88%
BenchmarkUncommon                  5            5    0.00%

R=bradfitz, golang-dev, dave, rsc, jra
CC=golang-dev
https://golang.org/cl/6721055

src/pkg/net/textproto/reader.go
src/pkg/net/textproto/reader_test.go

index 3777424534d7f0ce97f12ff275b381df8d7c6be8..855350c31f05447e3b0809de67f4122c69e76faf 100644 (file)
@@ -486,6 +486,7 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
 // letter and any letter following a hyphen to upper case;
 // the rest are converted to lowercase.  For example, the
 // canonical key for "accept-encoding" is "Accept-Encoding".
+// MIME header keys are assumed to be ASCII only.
 func CanonicalMIMEHeaderKey(s string) string {
        // Quick check for canonical encoding.
        upper := true
@@ -502,28 +503,90 @@ func CanonicalMIMEHeaderKey(s string) string {
        return s
 }
 
+const toLower = 'a' - 'A'
+
 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
 // allowed to mutate the provided byte slice before returning the
 // string.
 func canonicalMIMEHeaderKey(a []byte) string {
-       // Canonicalize: first letter upper case
-       // and upper case after each dash.
-       // (Host, User-Agent, If-Modified-Since).
-       // MIME headers are ASCII only, so no Unicode issues.
+       // Look for it in commonHeaders , so that we can avoid an
+       // allocation by sharing the strings among all users
+       // of textproto. If we don't find it, a has been canonicalized
+       // so just return string(a).
        upper := true
-       for i, v := range a {
-               if v == ' ' {
+       lo := 0
+       hi := len(commonHeaders)
+       for i := 0; i < len(a); i++ {
+               // Canonicalize: first letter upper case
+               // and upper case after each dash.
+               // (Host, User-Agent, If-Modified-Since).
+               // MIME headers are ASCII only, so no Unicode issues.
+               if a[i] == ' ' {
                        a[i] = '-'
                        upper = true
                        continue
                }
-               if upper && 'a' <= v && v <= 'z' {
-                       a[i] = v + 'A' - 'a'
+               c := a[i]
+               if upper && 'a' <= c && c <= 'z' {
+                       c -= toLower
+               } else if !upper && 'A' <= c && c <= 'Z' {
+                       c += toLower
                }
-               if !upper && 'A' <= v && v <= 'Z' {
-                       a[i] = v + 'a' - 'A'
+               a[i] = c
+               upper = c == '-' // for next time
+
+               if lo < hi {
+                       for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) {
+                               lo++
+                       }
+                       for hi > lo && commonHeaders[hi-1][i] > c {
+                               hi--
+                       }
                }
-               upper = v == '-'
+       }
+       if lo < hi && len(commonHeaders[lo]) == len(a) {
+               return commonHeaders[lo]
        }
        return string(a)
 }
+
+var commonHeaders = []string{
+       "Accept",
+       "Accept-Charset",
+       "Accept-Encoding",
+       "Accept-Language",
+       "Accept-Ranges",
+       "Cache-Control",
+       "Cc",
+       "Connection",
+       "Content-Id",
+       "Content-Language",
+       "Content-Length",
+       "Content-Transfer-Encoding",
+       "Content-Type",
+       "Date",
+       "Dkim-Signature",
+       "Etag",
+       "Expires",
+       "From",
+       "Host",
+       "If-Modified-Since",
+       "If-None-Match",
+       "In-Reply-To",
+       "Last-Modified",
+       "Location",
+       "Message-Id",
+       "Mime-Version",
+       "Pragma",
+       "Received",
+       "Return-Path",
+       "Server",
+       "Set-Cookie",
+       "Subject",
+       "To",
+       "User-Agent",
+       "Via",
+       "X-Forwarded-For",
+       "X-Imforwards",
+       "X-Powered-By",
+}
index 9b6c76a0d085618d79333cd591ba62624283be4c..26987f6113263bbb0f3bf6fb9123c3a2253a6bdc 100644 (file)
@@ -24,6 +24,7 @@ var canonicalHeaderKeyTests = []canonicalHeaderKeyTest{
        {"uSER-aGENT", "User-Agent"},
        {"user-agent", "User-Agent"},
        {"USER-AGENT", "User-Agent"},
+       {"üser-agenT", "üser-Agent"}, // non-ASCII unchanged
 }
 
 func TestCanonicalMIMEHeaderKey(t *testing.T) {
@@ -241,18 +242,94 @@ func TestRFC959Lines(t *testing.T) {
        }
 }
 
+func TestCommonHeaders(t *testing.T) {
+       // need to disable the commonHeaders-based optimization
+       // during this check, or we'd not be testing anything
+       oldch := commonHeaders
+       commonHeaders = []string{}
+       defer func() { commonHeaders = oldch }()
+
+       last := ""
+       for _, h := range oldch {
+               if last > h {
+                       t.Errorf("%v is out of order", h)
+               }
+               if last == h {
+                       t.Errorf("%v is duplicated", h)
+               }
+               if canon := CanonicalMIMEHeaderKey(h); h != canon {
+                       t.Errorf("%v is not canonical", h)
+               }
+               last = h
+       }
+}
+
+var clientHeaders = strings.Replace(`Host: golang.org
+Connection: keep-alive
+Cache-Control: max-age=0
+Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
+User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3
+Accept-Encoding: gzip,deflate,sdch
+Accept-Language: en-US,en;q=0.8,fr-CH;q=0.6
+Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3
+COOKIE: __utma=000000000.0000000000.0000000000.0000000000.0000000000.00; __utmb=000000000.0.00.0000000000; __utmc=000000000; __utmz=000000000.0000000000.00.0.utmcsr=code.google.com|utmccn=(referral)|utmcmd=referral|utmcct=/p/go/issues/detail
+Non-Interned: test
+
+`, "\n", "\r\n", -1)
+
+var serverHeaders = strings.Replace(`Content-Type: text/html; charset=utf-8
+Content-Encoding: gzip
+Date: Thu, 27 Sep 2012 09:03:33 GMT
+Server: Google Frontend
+Cache-Control: private
+Content-Length: 2298
+VIA: 1.1 proxy.example.com:80 (XXX/n.n.n-nnn)
+Connection: Close
+Non-Interned: test
+
+`, "\n", "\r\n", -1)
+
 func BenchmarkReadMIMEHeader(b *testing.B) {
        var buf bytes.Buffer
        br := bufio.NewReader(&buf)
        r := NewReader(br)
        for i := 0; i < b.N; i++ {
-               buf.WriteString("User-Agent: not mozilla\r\nContent-Length: 23452\r\nContent-Type: text/html; charset-utf8\r\nFoo-Bar: foobar\r\nfoo-bar: some more string\r\n\r\n")
+               var want int
+               var find string
+               if (i & 1) == 1 {
+                       buf.WriteString(clientHeaders)
+                       want = 10
+                       find = "Cookie"
+               } else {
+                       buf.WriteString(serverHeaders)
+                       want = 9
+                       find = "Via"
+               }
+               h, err := r.ReadMIMEHeader()
+               if err != nil {
+                       b.Fatal(err)
+               }
+               if len(h) != want {
+                       b.Fatalf("wrong number of headers: got %d, want %d", len(h), want)
+               }
+               if _, ok := h[find]; !ok {
+                       b.Fatalf("did not find key %s", find)
+               }
+       }
+}
+
+func BenchmarkUncommon(b *testing.B) {
+       var buf bytes.Buffer
+       br := bufio.NewReader(&buf)
+       r := NewReader(br)
+       for i := 0; i < b.N; i++ {
+               buf.WriteString("uncommon-header-for-benchmark: foo\r\n\r\n")
                h, err := r.ReadMIMEHeader()
                if err != nil {
                        b.Fatal(err)
                }
-               if len(h) != 4 {
-                       b.Fatalf("want 4")
+               if _, ok := h["Uncommon-Header-For-Benchmark"]; !ok {
+                       b.Fatal("Missing result header.")
                }
        }
 }