]> Cypherpunks repositories - gostls13.git/commitdiff
http: fix scheme-relative URL parsing; add ParseRequestURL
authorBrad Fitzpatrick <brad@danga.com>
Thu, 13 Jan 2011 22:10:02 +0000 (09:10 +1100)
committerAndrew Gerrand <adg@golang.org>
Thu, 13 Jan 2011 22:10:02 +0000 (09:10 +1100)
Also adds some tests for Issue 900 which was the reason
the current URL parsing is broken.  (the previous fix
was wrong)

R=rsc, adg, dangabrad, bradfitzwork
CC=golang-dev
https://golang.org/cl/3910042

src/pkg/http/readrequest_test.go
src/pkg/http/request.go
src/pkg/http/serve_test.go
src/pkg/http/url.go
src/pkg/http/url_test.go

index 067e17ddae57a1a7b7070c9db8aa35447a8daf0a..5e1cbcbcbdcb6b4a6f39608ecac37531a93adb97 100644 (file)
@@ -69,6 +69,41 @@ var reqTests = []reqTest{
 
                "abcdef\n",
        },
+
+       // Tests that we don't parse a path that looks like a
+       // scheme-relative URI as a scheme-relative URI.
+       {
+               "GET //user@host/is/actually/a/path/ HTTP/1.1\r\n" +
+                       "Host: test\r\n\r\n",
+
+               Request{
+                       Method: "GET",
+                       RawURL: "//user@host/is/actually/a/path/",
+                       URL: &URL{
+                               Raw:          "//user@host/is/actually/a/path/",
+                               Scheme:       "",
+                               RawPath:      "//user@host/is/actually/a/path/",
+                               RawAuthority: "",
+                               RawUserinfo:  "",
+                               Host:         "",
+                               Path:         "//user@host/is/actually/a/path/",
+                               RawQuery:     "",
+                               Fragment:     "",
+                       },
+                       Proto:         "HTTP/1.1",
+                       ProtoMajor:    1,
+                       ProtoMinor:    1,
+                       Header:        map[string]string{},
+                       Close:         false,
+                       ContentLength: -1,
+                       Host:          "test",
+                       Referer:       "",
+                       UserAgent:     "",
+                       Form:          map[string][]string{},
+               },
+
+               "",
+       },
 }
 
 func TestReadRequest(t *testing.T) {
index b88689988d846e708ff6a47041c468513c1b7e47..04bebaaf55bc0df244776de2d793b286ccd9cb62 100644 (file)
@@ -504,7 +504,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
                return nil, &badStringError{"malformed HTTP version", req.Proto}
        }
 
-       if req.URL, err = ParseURL(req.RawURL); err != nil {
+       if req.URL, err = ParseRequestURL(req.RawURL); err != nil {
                return nil, err
        }
 
index 43e1b93a59fd2a07627970ed09e65803a7707a76..053d6dca448ec9697400938d56901d643b7bfef5 100644 (file)
@@ -7,7 +7,9 @@
 package http
 
 import (
+       "bufio"
        "bytes"
+       "io"
        "os"
        "net"
        "testing"
@@ -133,3 +135,86 @@ func TestConsumingBodyOnNextConn(t *testing.T) {
                t.Errorf("Serve returned %q; expected EOF", serveerr)
        }
 }
+
+type responseWriterMethodCall struct {
+       method                 string
+       headerKey, headerValue string // if method == "SetHeader"
+       bytesWritten           []byte // if method == "Write"
+       responseCode           int    // if method == "WriteHeader"
+}
+
+type recordingResponseWriter struct {
+       log []*responseWriterMethodCall
+}
+
+func (rw *recordingResponseWriter) RemoteAddr() string {
+       return "1.2.3.4"
+}
+
+func (rw *recordingResponseWriter) UsingTLS() bool {
+       return false
+}
+
+func (rw *recordingResponseWriter) SetHeader(k, v string) {
+       rw.log = append(rw.log, &responseWriterMethodCall{method: "SetHeader", headerKey: k, headerValue: v})
+}
+
+func (rw *recordingResponseWriter) Write(buf []byte) (int, os.Error) {
+       rw.log = append(rw.log, &responseWriterMethodCall{method: "Write", bytesWritten: buf})
+       return len(buf), nil
+}
+
+func (rw *recordingResponseWriter) WriteHeader(code int) {
+       rw.log = append(rw.log, &responseWriterMethodCall{method: "WriteHeader", responseCode: code})
+}
+
+func (rw *recordingResponseWriter) Flush() {
+       rw.log = append(rw.log, &responseWriterMethodCall{method: "Flush"})
+}
+
+func (rw *recordingResponseWriter) Hijack() (io.ReadWriteCloser, *bufio.ReadWriter, os.Error) {
+       panic("Not supported")
+}
+
+// Tests for http://code.google.com/p/go/issues/detail?id=900
+func TestMuxRedirectLeadingSlashes(t *testing.T) {
+       paths := []string{"//foo.txt", "///foo.txt", "/../../foo.txt"}
+       for _, path := range paths {
+               req, err := ReadRequest(bufio.NewReader(bytes.NewBufferString("GET " + path + " HTTP/1.1\r\nHost: test\r\n\r\n")))
+               if err != nil {
+                       t.Errorf("%s", err)
+               }
+               mux := NewServeMux()
+               resp := new(recordingResponseWriter)
+               resp.log = make([]*responseWriterMethodCall, 0)
+
+               mux.ServeHTTP(resp, req)
+
+               dumpLog := func() {
+                       t.Logf("For path %q:", path)
+                       for _, call := range resp.log {
+                               t.Logf("Got call: %s, header=%s, value=%s, buf=%q, code=%d", call.method,
+                                       call.headerKey, call.headerValue, call.bytesWritten, call.responseCode)
+                       }
+               }
+
+               if len(resp.log) != 2 {
+                       dumpLog()
+                       t.Errorf("expected 2 calls to response writer; got %d", len(resp.log))
+                       return
+               }
+
+               if resp.log[0].method != "SetHeader" ||
+                       resp.log[0].headerKey != "Location" || resp.log[0].headerValue != "/foo.txt" {
+                       dumpLog()
+                       t.Errorf("Expected SetHeader of Location to /foo.txt")
+                       return
+               }
+
+               if resp.log[1].method != "WriteHeader" || resp.log[1].responseCode != StatusMovedPermanently {
+                       dumpLog()
+                       t.Errorf("Expected WriteHeader of StatusMovedPermanently")
+                       return
+               }
+       }
+}
index f0ac4c1dfd7a5d584469184c53f4f171b0b5454a..e4aa077e52b16db87cfb37d1e9904ecb6a8b6e0f 100644 (file)
@@ -385,7 +385,25 @@ func split(s string, c byte, cutc bool) (string, string) {
 // ParseURL parses rawurl into a URL structure.
 // The string rawurl is assumed not to have a #fragment suffix.
 // (Web browsers strip #fragment before sending the URL to a web server.)
+// The rawurl may be relative or absolute.
 func ParseURL(rawurl string) (url *URL, err os.Error) {
+       return parseURL(rawurl, false)
+}
+
+// ParseRequestURL parses rawurl into a URL structure.  It assumes that
+// rawurl was received from an HTTP request, so the rawurl is interpreted
+// only as an absolute URI or an absolute path.
+// The string rawurl is assumed not to have a #fragment suffix.
+// (Web browsers strip #fragment before sending the URL to a web server.)
+func ParseRequestURL(rawurl string) (url *URL, err os.Error) {
+       return parseURL(rawurl, true)
+}
+
+// parseURL parses a URL from a string in one of two contexts.  If
+// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
+// in which case only absolute URLs or path-absolute relative URLs are allowed.
+// If viaRequest is false, all forms of relative URLs are allowed.
+func parseURL(rawurl string, viaRequest bool) (url *URL, err os.Error) {
        if rawurl == "" {
                err = os.ErrorString("empty url")
                goto Error
@@ -400,7 +418,9 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
                goto Error
        }
 
-       if url.Scheme != "" && (len(path) == 0 || path[0] != '/') {
+       leadingSlash := strings.HasPrefix(path, "/")
+
+       if url.Scheme != "" && !leadingSlash {
                // RFC 2396:
                // Absolute URI (has scheme) with non-rooted path
                // is uninterpreted.  It doesn't even have a ?query.
@@ -412,6 +432,11 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
                }
                url.OpaquePath = true
        } else {
+               if viaRequest && !leadingSlash {
+                       err = os.ErrorString("invalid URI for request")
+                       goto Error
+               }
+
                // Split off query before parsing path further.
                url.RawPath = path
                path, query := split(path, '?', false)
@@ -420,7 +445,8 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
                }
 
                // Maybe path is //authority/path
-               if url.Scheme != "" && len(path) > 2 && path[0:2] == "//" {
+               if (url.Scheme != "" || !viaRequest) &&
+                       strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") {
                        url.RawAuthority, path = split(path[2:], '/', false)
                        url.RawPath = url.RawPath[2+len(url.RawAuthority):]
                }
index 447d5390ef6500a22833c95e39a5e2bf7ce476dc..9a67185d24e7e60de5dfbbf4327366d5da69b123 100644 (file)
@@ -188,14 +188,48 @@ var urltests = []URLTest{
                },
                "",
        },
-       // leading // without scheme shouldn't create an authority
+       // leading // without scheme should create an authority
        {
                "//foo",
                &URL{
-                       Raw:     "//foo",
-                       Scheme:  "",
-                       RawPath: "//foo",
-                       Path:    "//foo",
+                       RawAuthority: "foo",
+                       Raw:          "//foo",
+                       Host:         "foo",
+                       Scheme:       "",
+                       RawPath:      "",
+                       Path:         "",
+               },
+               "",
+       },
+       // leading // without scheme, with userinfo, path, and query
+       {
+               "//user@foo/path?a=b",
+               &URL{
+                       Raw:          "//user@foo/path?a=b",
+                       RawAuthority: "user@foo",
+                       RawUserinfo:  "user",
+                       Scheme:       "",
+                       RawPath:      "/path?a=b",
+                       Path:         "/path",
+                       RawQuery:     "a=b",
+                       Host:         "foo",
+               },
+               "",
+       },
+       // Three leading slashes isn't an authority, but doesn't return an error.
+       // (We can't return an error, as this code is also used via
+       // ServeHTTP -> ReadRequest -> ParseURL, which is arguably a
+       // different URL parsing context, but currently shares the
+       // same codepath)
+       {
+               "///threeslashes",
+               &URL{
+                       RawAuthority: "",
+                       Raw:          "///threeslashes",
+                       Host:         "",
+                       Scheme:       "",
+                       RawPath:      "///threeslashes",
+                       Path:         "///threeslashes",
                },
                "",
        },
@@ -272,7 +306,7 @@ var urlfragtests = []URLTest{
 
 // more useful string for debugging than fmt's struct printer
 func ufmt(u *URL) string {
-       return fmt.Sprintf("%q, %q, %q, %q, %q, %q, %q, %q, %q",
+       return fmt.Sprintf("raw=%q, scheme=%q, rawpath=%q, auth=%q, userinfo=%q, host=%q, path=%q, rawq=%q, frag=%q",
                u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo,
                u.Host, u.Path, u.RawQuery, u.Fragment)
 }
@@ -301,6 +335,40 @@ func TestParseURLReference(t *testing.T) {
        DoTest(t, ParseURLReference, "ParseURLReference", urlfragtests)
 }
 
+const pathThatLooksSchemeRelative = "//not.a.user@not.a.host/just/a/path"
+
+var parseRequestUrlTests = []struct {
+       url           string
+       expectedValid bool
+}{
+       {"http://foo.com", true},
+       {"http://foo.com/", true},
+       {"http://foo.com/path", true},
+       {"/", true},
+       {pathThatLooksSchemeRelative, true},
+       {"//not.a.user@%66%6f%6f.com/just/a/path/also", true},
+       {"foo.html", false},
+       {"../dir/", false},
+}
+
+func TestParseRequestURL(t *testing.T) {
+       for _, test := range parseRequestUrlTests {
+               _, err := ParseRequestURL(test.url)
+               valid := err == nil
+               if valid != test.expectedValid {
+                       t.Errorf("Expected valid=%v for %q; got %v", test.expectedValid, test.url, valid)
+               }
+       }
+
+       url, err := ParseRequestURL(pathThatLooksSchemeRelative)
+       if err != nil {
+               t.Fatalf("Unexpected error %v", err)
+       }
+       if url.Path != pathThatLooksSchemeRelative {
+               t.Errorf("Expected path %q; got %q", pathThatLooksSchemeRelative, url.Path)
+       }
+}
+
 func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) {
        for _, tt := range tests {
                u, err := parse(tt.in)