]> Cypherpunks repositories - gostls13.git/commitdiff
http: correct escaping of different parts of URL
authorRuss Cox <rsc@golang.org>
Tue, 28 Sep 2010 01:54:04 +0000 (21:54 -0400)
committerRuss Cox <rsc@golang.org>
Tue, 28 Sep 2010 01:54:04 +0000 (21:54 -0400)
Fixes #1076.

R=adg
CC=golang-dev
https://golang.org/cl/2248045

src/pkg/http/client.go
src/pkg/http/readrequest_test.go
src/pkg/http/request.go
src/pkg/http/requestwrite_test.go
src/pkg/http/url.go
src/pkg/http/url_test.go

index 41e571c2c1b7233b6bc8dd40e562fe6e68be6efe..87f5c34d87e822e708c421455332c82aa658f50a 100644 (file)
@@ -45,7 +45,7 @@ func send(req *Request) (resp *Response, err os.Error) {
        if !hasPort(addr) {
                addr += ":" + req.URL.Scheme
        }
-       info := req.URL.Userinfo
+       info := req.URL.RawUserinfo
        if len(info) > 0 {
                enc := base64.URLEncoding
                encoded := make([]byte, enc.EncodedLen(len(info)))
index 7654dbfc74de2e9b060b6909b2307c6614a34808..f3bcc9f3df1a3d118be10bd4ad2762dadbbebf1a 100644 (file)
@@ -37,15 +37,15 @@ var reqTests = []reqTest{
                        Method: "GET",
                        RawURL: "http://www.techcrunch.com/",
                        URL: &URL{
-                               Raw:       "http://www.techcrunch.com/",
-                               Scheme:    "http",
-                               RawPath:   "/",
-                               Authority: "www.techcrunch.com",
-                               Userinfo:  "",
-                               Host:      "www.techcrunch.com",
-                               Path:      "/",
-                               RawQuery:  "",
-                               Fragment:  "",
+                               Raw:          "http://www.techcrunch.com/",
+                               Scheme:       "http",
+                               RawPath:      "/",
+                               RawAuthority: "www.techcrunch.com",
+                               RawUserinfo:  "",
+                               Host:         "www.techcrunch.com",
+                               Path:         "/",
+                               RawQuery:     "",
+                               Fragment:     "",
                        },
                        Proto:      "HTTP/1.1",
                        ProtoMajor: 1,
index 56a930e45146b196268854521773889e888835bc..81d718e97e08b9c08c37ad12596b1beef12c90fe 100644 (file)
@@ -191,7 +191,7 @@ func (req *Request) Write(w io.Writer) os.Error {
 
        uri := req.RawURL
        if uri == "" {
-               uri = valueOrDefault(urlEscape(req.URL.Path, false, false), "/")
+               uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/")
                if req.URL.RawQuery != "" {
                        uri += "?" + req.URL.RawQuery
                }
index 469df69d70fb05f3649cd67e92b2943c4bbff3e0..a74d54783418cf47c4c3b934e79efdef2edcd4cc 100644 (file)
@@ -21,15 +21,15 @@ var reqWriteTests = []reqWriteTest{
                        Method: "GET",
                        RawURL: "http://www.techcrunch.com/",
                        URL: &URL{
-                               Raw:       "http://www.techcrunch.com/",
-                               Scheme:    "http",
-                               RawPath:   "http://www.techcrunch.com/",
-                               Authority: "www.techcrunch.com",
-                               Userinfo:  "",
-                               Host:      "www.techcrunch.com",
-                               Path:      "/",
-                               RawQuery:  "",
-                               Fragment:  "",
+                               Raw:          "http://www.techcrunch.com/",
+                               Scheme:       "http",
+                               RawPath:      "http://www.techcrunch.com/",
+                               RawAuthority: "www.techcrunch.com",
+                               RawUserinfo:  "",
+                               Host:         "www.techcrunch.com",
+                               Path:         "/",
+                               RawQuery:     "",
+                               Fragment:     "",
                        },
                        Proto:      "HTTP/1.1",
                        ProtoMajor: 1,
index c1ede281123ea6aef15b7c8cb65ca7bbd81fac8d..23abc62a97cc5a7f8f87b85e4bd9bb34c3a812b7 100644 (file)
@@ -46,6 +46,17 @@ func unhex(c byte) byte {
        return 0
 }
 
+type encoding int
+
+const (
+       encodePath encoding = 1 + iota
+       encodeUserPassword
+       encodeQueryComponent
+       encodeFragment
+       encodeOpaque
+)
+
+
 type URLEscapeError string
 
 func (e URLEscapeError) String() string {
@@ -55,19 +66,52 @@ func (e URLEscapeError) String() string {
 // Return true if the specified character should be escaped when
 // appearing in a URL string, according to RFC 2396.
 // When 'all' is true the full range of reserved characters are matched.
-func shouldEscape(c byte, all bool) bool {
-       if c <= ' ' || c >= 0x7F {
-               return true
+func shouldEscape(c byte, mode encoding) bool {
+       // RFC 2396 §2.3 Unreserved characters (alphanum)
+       if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
+               return false
        }
        switch c {
-       case '<', '>', '#', '%', '"', // RFC 2396 delims
-               '{', '}', '|', '\\', '^', '[', ']', '`', // RFC2396 unwise
-               '?', '&', '=', '+': // RFC 2396 reserved
-               return true
-       case ';', '/', ':', '@', '$', ',': // RFC 2396 reserved
-               return all
+       case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark)
+               return false
+
+       case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+               // Different sections of the URL allow a few of
+               // the reserved characters to appear unescaped.
+               switch mode {
+               case encodePath: // §3.3
+                       // The RFC allows : @ & = + $ , but saves / ; for assigning
+                       // meaning to individual path segments.  This package
+                       // only manipulates the path as a whole, so we allow those
+                       // last two as well.  Clients that need to distinguish between
+                       // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath.
+                       // That leaves only ? to escape.
+                       return c == '?'
+
+               case encodeUserPassword: // §3.2.2
+                       // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
+                       // The parsing of userinfo treats : as special so we must escape that too.
+                       return c == '@' || c == '/' || c == ':'
+
+               case encodeQueryComponent: // §3.4
+                       // The RFC reserves (so we must escape) everything.
+                       return true
+
+               case encodeFragment: // §4.1
+                       // The RFC text is silent but the grammar allows
+                       // everything, so escape nothing.
+                       return false
+
+               case encodeOpaque: // §3 opaque_part
+                       // The RFC allows opaque_part to use all characters
+                       // except that the leading / must be escaped.
+                       // (We implement that case in String.)
+                       return false
+               }
        }
-       return false
+
+       // Everything else must be escaped.
+       return true
 }
 
 // CanonicalPath applies the algorithm specified in RFC 2396 to
@@ -127,17 +171,19 @@ func CanonicalPath(path string) string {
        return string(a)
 }
 
-// URLUnescape unescapes a URL-encoded string,
+// URLUnescape unescapes a string in ``URL encoded'' form,
 // converting %AB into the byte 0xAB and '+' into ' ' (space).
 // It returns an error if any % is not followed
 // by two hexadecimal digits.
-func URLUnescape(s string) (string, os.Error) { return urlUnescape(s, true) }
+// Despite the name, this encoding applies only to individual
+// components of the query portion of the URL.
+func URLUnescape(s string) (string, os.Error) {
+       return urlUnescape(s, encodeQueryComponent)
+}
 
-// urlUnescape is like URLUnescape but can be told not to
-// convert + into space.  URLUnescape implements what is
-// called "URL encoding" but that only applies to query strings.
-// Elsewhere in the URL, + does not mean space.
-func urlUnescape(s string, doPlus bool) (string, os.Error) {
+// urlUnescape is like URLUnescape but mode specifies
+// which section of the URL is being unescaped.
+func urlUnescape(s string, mode encoding) (string, os.Error) {
        // Count %, check that they're well-formed.
        n := 0
        hasPlus := false
@@ -154,7 +200,7 @@ func urlUnescape(s string, doPlus bool) (string, os.Error) {
                        }
                        i += 3
                case '+':
-                       hasPlus = doPlus
+                       hasPlus = mode == encodeQueryComponent
                        i++
                default:
                        i++
@@ -174,7 +220,7 @@ func urlUnescape(s string, doPlus bool) (string, os.Error) {
                        j++
                        i += 3
                case '+':
-                       if doPlus {
+                       if mode == encodeQueryComponent {
                                t[j] = ' '
                        } else {
                                t[j] = '+'
@@ -190,15 +236,19 @@ func urlUnescape(s string, doPlus bool) (string, os.Error) {
        return string(t), nil
 }
 
-// URLEscape converts a string into URL-encoded form.
-func URLEscape(s string) string { return urlEscape(s, true, true) }
+// URLEscape converts a string into ``URL encoded'' form.
+// Despite the name, this encoding applies only to individual
+// components of the query portion of the URL.
+func URLEscape(s string) string {
+       return urlEscape(s, encodeQueryComponent)
+}
 
-func urlEscape(s string, doPlus, all bool) string {
+func urlEscape(s string, mode encoding) string {
        spaceCount, hexCount := 0, 0
        for i := 0; i < len(s); i++ {
                c := s[i]
-               if shouldEscape(c, all) {
-                       if c == ' ' && doPlus {
+               if shouldEscape(c, mode) {
+                       if c == ' ' && mode == encodeQueryComponent {
                                spaceCount++
                        } else {
                                hexCount++
@@ -214,10 +264,10 @@ func urlEscape(s string, doPlus, all bool) string {
        j := 0
        for i := 0; i < len(s); i++ {
                switch c := s[i]; {
-               case c == ' ' && doPlus:
+               case c == ' ' && mode == encodeQueryComponent:
                        t[j] = '+'
                        j++
-               case shouldEscape(c, all):
+               case shouldEscape(c, mode):
                        t[j] = '%'
                        t[j+1] = "0123456789abcdef"[c>>4]
                        t[j+2] = "0123456789abcdef"[c&15]
@@ -230,25 +280,64 @@ func urlEscape(s string, doPlus, all bool) string {
        return string(t)
 }
 
+// UnescapeUserinfo parses the RawUserinfo field of a URL
+// as the form user or user:password and unescapes and returns
+// the two halves.
+//
+// This functionality should only be used with legacy web sites.
+// RFC 2396 warns that interpreting Userinfo this way
+// ``is NOT RECOMMENDED, because the passing of authentication
+// information in clear text (such as URI) has proven to be a
+// security risk in almost every case where it has been used.''
+func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) {
+       u, p := split(rawUserinfo, ':', true)
+       if user, err = urlUnescape(u, encodeUserPassword); err != nil {
+               return "", "", err
+       }
+       if password, err = urlUnescape(p, encodeUserPassword); err != nil {
+               return "", "", err
+       }
+       return
+}
+
+// EscapeUserinfo combines user and password in the form
+// user:password (or just user if password is empty) and then
+// escapes it for use as the URL.RawUserinfo field.
+//
+// This functionality should only be used with legacy web sites.
+// RFC 2396 warns that interpreting Userinfo this way
+// ``is NOT RECOMMENDED, because the passing of authentication
+// information in clear text (such as URI) has proven to be a
+// security risk in almost every case where it has been used.''
+func EscapeUserinfo(user, password string) string {
+       raw := urlEscape(user, encodeUserPassword)
+       if password != "" {
+               raw += ":" + urlEscape(password, encodeUserPassword)
+       }
+       return raw
+}
+
 // A URL represents a parsed URL (technically, a URI reference).
 // The general form represented is:
 //     scheme://[userinfo@]host/path[?query][#fragment]
-// The Raw, RawPath, and RawQuery fields are in "wire format" (special
-// characters must be hex-escaped if not meant to have special meaning).
+// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format"
+// (special characters must be hex-escaped if not meant to have special meaning).
 // All other fields are logical values; '+' or '%' represent themselves.
 //
-// Note, the reason for using wire format for the query is that it needs
-// to be split into key/value pairs before decoding.
+// The various Raw values are supplied in wire format because
+// clients typically have to split them into pieces before further
+// decoding.
 type URL struct {
-       Raw       string // the original string
-       Scheme    string // scheme
-       Authority string // [userinfo@]host
-       Userinfo  string // userinfo
-       Host      string // host
-       RawPath   string // /path[?query][#fragment]
-       Path      string // /path
-       RawQuery  string // query
-       Fragment  string // fragment
+       Raw          string // the original string
+       Scheme       string // scheme
+       RawAuthority string // [userinfo@]host
+       RawUserinfo  string // userinfo
+       Host         string // host
+       RawPath      string // /path[?query][#fragment]
+       Path         string // /path
+       OpaquePath   bool   // path is opaque (unrooted when scheme is present)
+       RawQuery     string // query
+       Fragment     string // fragment
 }
 
 // Maybe rawurl is of the form scheme:path.
@@ -304,56 +393,63 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
        url = new(URL)
        url.Raw = rawurl
 
-       // split off possible leading "http:", "mailto:", etc.
+       // Split off possible leading "http:", "mailto:", etc.
+       // Cannot contain escaped characters.
        var path string
        if url.Scheme, path, err = getscheme(rawurl); err != nil {
                goto Error
        }
 
-       // RFC 2396: a relative URI (no scheme) has a ?query,
-       // but absolute URIs only have query if path begins with /
-       var query string
-       if url.Scheme == "" || len(path) > 0 && path[0] == '/' {
-               path, query = split(path, '?', false)
+       if url.Scheme != "" && (len(path) == 0 || path[0] != '/') {
+               // RFC 2396:
+               // Absolute URI (has scheme) with non-rooted path
+               // is uninterpreted.  It doesn't even have a ?query.
+               // This is the case that handles mailto:name@example.com.
+               url.RawPath = path
+
+               if url.Path, err = urlUnescape(path, encodeOpaque); err != nil {
+                       goto Error
+               }
+               url.OpaquePath = true
+       } else {
+               // Split off query before parsing path further.
+               url.RawPath = path
+               path, query := split(path, '?', false)
                if len(query) > 1 {
                        url.RawQuery = query[1:]
                }
-       }
 
-       // Maybe path is //authority/path
-       if url.Scheme != "" && len(path) > 2 && path[0:2] == "//" {
-               url.Authority, path = split(path[2:], '/', false)
-       }
-       url.RawPath = path + query
+               // Maybe path is //authority/path
+               if url.Scheme != "" && len(path) > 2 && path[0:2] == "//" {
+                       url.RawAuthority, path = split(path[2:], '/', false)
+                       url.RawPath = url.RawPath[2+len(url.RawAuthority):]
+               }
 
-       // If there's no @, split's default is wrong.  Check explicitly.
-       if strings.Index(url.Authority, "@") < 0 {
-               url.Host = url.Authority
-       } else {
-               url.Userinfo, url.Host = split(url.Authority, '@', true)
-       }
+               // Split authority into userinfo@host.
+               // If there's no @, split's default is wrong.  Check explicitly.
+               var rawHost string
+               if strings.Index(url.RawAuthority, "@") < 0 {
+                       rawHost = url.RawAuthority
+               } else {
+                       url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true)
+               }
 
-       if url.Path, err = urlUnescape(path, false); err != nil {
-               goto Error
-       }
+               // We leave RawAuthority only in raw form because clients
+               // of common protocols should be using Userinfo and Host
+               // instead.  Clients that wish to use RawAuthority will have to
+               // interpret it themselves: RFC 2396 does not define the meaning.
 
-       // Remove escapes from the Authority and Userinfo fields, and verify
-       // that Scheme and Host contain no escapes (that would be illegal).
-       if url.Authority, err = urlUnescape(url.Authority, false); err != nil {
-               goto Error
-       }
-       if url.Userinfo, err = urlUnescape(url.Userinfo, false); err != nil {
-               goto Error
-       }
-       if strings.Index(url.Scheme, "%") >= 0 {
-               err = os.ErrorString("hexadecimal escape in scheme")
-               goto Error
-       }
-       if strings.Index(url.Host, "%") >= 0 {
-               err = os.ErrorString("hexadecimal escape in host")
-               goto Error
-       }
+               if strings.Index(rawHost, "%") >= 0 {
+                       // Host cannot contain escaped characters.
+                       err = os.ErrorString("hexadecimal escape in host")
+                       goto Error
+               }
+               url.Host = rawHost
 
+               if url.Path, err = urlUnescape(path, encodePath); err != nil {
+                       goto Error
+               }
+       }
        return url, nil
 
 Error:
@@ -372,7 +468,7 @@ func ParseURLReference(rawurlref string) (url *URL, err os.Error) {
        url.RawPath += frag
        if len(frag) > 1 {
                frag = frag[1:]
-               if url.Fragment, err = urlUnescape(frag, false); err != nil {
+               if url.Fragment, err = urlUnescape(frag, encodeFragment); err != nil {
                        return nil, &URLError{"parse", rawurl, err}
                }
        }
@@ -382,31 +478,40 @@ func ParseURLReference(rawurlref string) (url *URL, err os.Error) {
 // String reassembles url into a valid URL string.
 //
 // There are redundant fields stored in the URL structure:
-// the String method consults Scheme, Path, Host, Userinfo,
+// the String method consults Scheme, Path, Host, RawUserinfo,
 // RawQuery, and Fragment, but not Raw, RawPath or Authority.
 func (url *URL) String() string {
        result := ""
        if url.Scheme != "" {
                result += url.Scheme + ":"
        }
-       if url.Host != "" || url.Userinfo != "" {
+       if url.Host != "" || url.RawUserinfo != "" {
                result += "//"
-               if url.Userinfo != "" {
+               if url.RawUserinfo != "" {
                        // hide the password, if any
-                       info := url.Userinfo
+                       info := url.RawUserinfo
                        if i := strings.Index(info, ":"); i >= 0 {
                                info = info[0:i] + ":******"
                        }
-                       result += urlEscape(info, false, false) + "@"
+                       result += info + "@"
                }
                result += url.Host
        }
-       result += urlEscape(url.Path, false, false)
+       if url.OpaquePath {
+               path := url.Path
+               if strings.HasPrefix(path, "/") {
+                       result += "%2f"
+                       path = path[1:]
+               }
+               result += urlEscape(path, encodeOpaque)
+       } else {
+               result += urlEscape(url.Path, encodePath)
+       }
        if url.RawQuery != "" {
                result += "?" + url.RawQuery
        }
        if url.Fragment != "" {
-               result += "#" + urlEscape(url.Fragment, false, false)
+               result += "#" + urlEscape(url.Fragment, encodeFragment)
        }
        return result
 }
index 5ab512c4fd55f47c46f4575ca36e3c07bd105647..7bb36a76e3234d77a7c913b9072fa2491353fe74 100644 (file)
@@ -27,10 +27,10 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com",
                &URL{
-                       Raw:       "http://www.google.com",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
+                       Raw:          "http://www.google.com",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
                },
                "",
        },
@@ -38,12 +38,12 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com/",
                &URL{
-                       Raw:       "http://www.google.com/",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/",
-                       Path:      "/",
+                       Raw:          "http://www.google.com/",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/",
+                       Path:         "/",
                },
                "",
        },
@@ -51,26 +51,26 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com/file%20one%26two",
                &URL{
-                       Raw:       "http://www.google.com/file%20one%26two",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/file%20one%26two",
-                       Path:      "/file one&two",
+                       Raw:          "http://www.google.com/file%20one%26two",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/file%20one%26two",
+                       Path:         "/file one&two",
                },
-               "http://www.google.com/file%20one%26two",
+               "http://www.google.com/file%20one&two",
        },
        // user
        URLTest{
                "ftp://webmaster@www.google.com/",
                &URL{
-                       Raw:       "ftp://webmaster@www.google.com/",
-                       Scheme:    "ftp",
-                       Authority: "webmaster@www.google.com",
-                       Userinfo:  "webmaster",
-                       Host:      "www.google.com",
-                       RawPath:   "/",
-                       Path:      "/",
+                       Raw:          "ftp://webmaster@www.google.com/",
+                       Scheme:       "ftp",
+                       RawAuthority: "webmaster@www.google.com",
+                       RawUserinfo:  "webmaster",
+                       Host:         "www.google.com",
+                       RawPath:      "/",
+                       Path:         "/",
                },
                "",
        },
@@ -78,13 +78,13 @@ var urltests = []URLTest{
        URLTest{
                "ftp://john%20doe@www.google.com/",
                &URL{
-                       Raw:       "ftp://john%20doe@www.google.com/",
-                       Scheme:    "ftp",
-                       Authority: "john doe@www.google.com",
-                       Userinfo:  "john doe",
-                       Host:      "www.google.com",
-                       RawPath:   "/",
-                       Path:      "/",
+                       Raw:          "ftp://john%20doe@www.google.com/",
+                       Scheme:       "ftp",
+                       RawAuthority: "john%20doe@www.google.com",
+                       RawUserinfo:  "john%20doe",
+                       Host:         "www.google.com",
+                       RawPath:      "/",
+                       Path:         "/",
                },
                "ftp://john%20doe@www.google.com/",
        },
@@ -92,13 +92,13 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com/?q=go+language",
                &URL{
-                       Raw:       "http://www.google.com/?q=go+language",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/?q=go+language",
-                       Path:      "/",
-                       RawQuery:  "q=go+language",
+                       Raw:          "http://www.google.com/?q=go+language",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/?q=go+language",
+                       Path:         "/",
+                       RawQuery:     "q=go+language",
                },
                "",
        },
@@ -106,13 +106,13 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com/?q=go%20language",
                &URL{
-                       Raw:       "http://www.google.com/?q=go%20language",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/?q=go%20language",
-                       Path:      "/",
-                       RawQuery:  "q=go%20language",
+                       Raw:          "http://www.google.com/?q=go%20language",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/?q=go%20language",
+                       Path:         "/",
+                       RawQuery:     "q=go%20language",
                },
                "",
        },
@@ -120,26 +120,39 @@ var urltests = []URLTest{
        URLTest{
                "http://www.google.com/a%20b?q=c+d",
                &URL{
-                       Raw:       "http://www.google.com/a%20b?q=c+d",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/a%20b?q=c+d",
-                       Path:      "/a b",
-                       RawQuery:  "q=c+d",
+                       Raw:          "http://www.google.com/a%20b?q=c+d",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/a%20b?q=c+d",
+                       Path:         "/a b",
+                       RawQuery:     "q=c+d",
                },
                "",
        },
-       // path without /, so no query parsing
+       // path without leading /, so no query parsing
        URLTest{
                "http:www.google.com/?q=go+language",
                &URL{
-                       Raw:     "http:www.google.com/?q=go+language",
-                       Scheme:  "http",
-                       RawPath: "www.google.com/?q=go+language",
-                       Path:    "www.google.com/?q=go+language",
+                       Raw:        "http:www.google.com/?q=go+language",
+                       Scheme:     "http",
+                       RawPath:    "www.google.com/?q=go+language",
+                       Path:       "www.google.com/?q=go+language",
+                       OpaquePath: true,
+               },
+               "http:www.google.com/?q=go+language",
+       },
+       // path without leading /, so no query parsing
+       URLTest{
+               "http:%2f%2fwww.google.com/?q=go+language",
+               &URL{
+                       Raw:        "http:%2f%2fwww.google.com/?q=go+language",
+                       Scheme:     "http",
+                       RawPath:    "%2f%2fwww.google.com/?q=go+language",
+                       Path:       "//www.google.com/?q=go+language",
+                       OpaquePath: true,
                },
-               "http:www.google.com/%3fq%3dgo%2blanguage",
+               "http:%2f/www.google.com/?q=go+language",
        },
        // non-authority
        URLTest{
@@ -156,10 +169,11 @@ var urltests = []URLTest{
        URLTest{
                "mailto:webmaster@golang.org",
                &URL{
-                       Raw:     "mailto:webmaster@golang.org",
-                       Scheme:  "mailto",
-                       RawPath: "webmaster@golang.org",
-                       Path:    "webmaster@golang.org",
+                       Raw:        "mailto:webmaster@golang.org",
+                       Scheme:     "mailto",
+                       RawPath:    "webmaster@golang.org",
+                       Path:       "webmaster@golang.org",
+                       OpaquePath: true,
                },
                "",
        },
@@ -188,22 +202,22 @@ var urltests = []URLTest{
        URLTest{
                "http://user:password@google.com",
                &URL{
-                       Raw:       "http://user:password@google.com",
-                       Scheme:    "http",
-                       Authority: "user:password@google.com",
-                       Userinfo:  "user:password",
-                       Host:      "google.com",
+                       Raw:          "http://user:password@google.com",
+                       Scheme:       "http",
+                       RawAuthority: "user:password@google.com",
+                       RawUserinfo:  "user:password",
+                       Host:         "google.com",
                },
                "http://user:******@google.com",
        },
        URLTest{
                "http://user:longerpass@google.com",
                &URL{
-                       Raw:       "http://user:longerpass@google.com",
-                       Scheme:    "http",
-                       Authority: "user:longerpass@google.com",
-                       Userinfo:  "user:longerpass",
-                       Host:      "google.com",
+                       Raw:          "http://user:longerpass@google.com",
+                       Scheme:       "http",
+                       RawAuthority: "user:longerpass@google.com",
+                       RawUserinfo:  "user:longerpass",
+                       Host:         "google.com",
                },
                "http://user:******@google.com",
        },
@@ -213,13 +227,13 @@ var urlnofragtests = []URLTest{
        URLTest{
                "http://www.google.com/?q=go+language#foo",
                &URL{
-                       Raw:       "http://www.google.com/?q=go+language#foo",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/?q=go+language#foo",
-                       Path:      "/",
-                       RawQuery:  "q=go+language#foo",
+                       Raw:          "http://www.google.com/?q=go+language#foo",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/?q=go+language#foo",
+                       Path:         "/",
+                       RawQuery:     "q=go+language#foo",
                },
                "",
        },
@@ -229,37 +243,37 @@ var urlfragtests = []URLTest{
        URLTest{
                "http://www.google.com/?q=go+language#foo",
                &URL{
-                       Raw:       "http://www.google.com/?q=go+language#foo",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/?q=go+language#foo",
-                       Path:      "/",
-                       RawQuery:  "q=go+language",
-                       Fragment:  "foo",
+                       Raw:          "http://www.google.com/?q=go+language#foo",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/?q=go+language#foo",
+                       Path:         "/",
+                       RawQuery:     "q=go+language",
+                       Fragment:     "foo",
                },
                "",
        },
        URLTest{
                "http://www.google.com/?q=go+language#foo%26bar",
                &URL{
-                       Raw:       "http://www.google.com/?q=go+language#foo%26bar",
-                       Scheme:    "http",
-                       Authority: "www.google.com",
-                       Host:      "www.google.com",
-                       RawPath:   "/?q=go+language#foo%26bar",
-                       Path:      "/",
-                       RawQuery:  "q=go+language",
-                       Fragment:  "foo&bar",
+                       Raw:          "http://www.google.com/?q=go+language#foo%26bar",
+                       Scheme:       "http",
+                       RawAuthority: "www.google.com",
+                       Host:         "www.google.com",
+                       RawPath:      "/?q=go+language#foo%26bar",
+                       Path:         "/",
+                       RawQuery:     "q=go+language",
+                       Fragment:     "foo&bar",
                },
-               "",
+               "http://www.google.com/?q=go+language#foo&bar",
        },
 }
 
 // more useful string for debugging than fmt's struct printer
 func ufmt(u *URL) string {
        return fmt.Sprintf("%q, %q, %q, %q, %q, %q, %q, %q, %q",
-               u.Raw, u.Scheme, u.RawPath, u.Authority, u.Userinfo,
+               u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo,
                u.Host, u.Path, u.RawQuery, u.Fragment)
 }
 
@@ -307,11 +321,9 @@ func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string
 
 func TestURLString(t *testing.T) {
        DoTestString(t, ParseURL, "ParseURL", urltests)
-       DoTestString(t, ParseURL, "ParseURL", urlfragtests)
        DoTestString(t, ParseURL, "ParseURL", urlnofragtests)
        DoTestString(t, ParseURLReference, "ParseURLReference", urltests)
        DoTestString(t, ParseURLReference, "ParseURLReference", urlfragtests)
-       DoTestString(t, ParseURLReference, "ParseURLReference", urlnofragtests)
 }
 
 type URLEscapeTest struct {
@@ -467,3 +479,31 @@ func TestCanonicalPath(t *testing.T) {
                }
        }
 }
+
+type UserinfoTest struct {
+       User     string
+       Password string
+       Raw      string
+}
+
+var userinfoTests = []UserinfoTest{
+       UserinfoTest{"user", "password", "user:password"},
+       UserinfoTest{"foo:bar", "~!@#$%^&*()_+{}|[]\\-=`:;'\"<>?,./",
+               "foo%3abar:~!%40%23$%25%5e&*()_+%7b%7d%7c%5b%5d%5c-=%60%3a;'%22%3c%3e?,.%2f"},
+}
+
+func TestEscapeUserinfo(t *testing.T) {
+       for _, tt := range userinfoTests {
+               if raw := EscapeUserinfo(tt.User, tt.Password); raw != tt.Raw {
+                       t.Errorf("EscapeUserinfo(%q, %q) = %q, want %q", tt.User, tt.Password, raw, tt.Raw)
+               }
+       }
+}
+
+func TestUnescapeUserinfo(t *testing.T) {
+       for _, tt := range userinfoTests {
+               if user, pass, err := UnescapeUserinfo(tt.Raw); user != tt.User || pass != tt.Password || err != nil {
+                       t.Errorf("UnescapeUserinfo(%q) = %q, %q, %v, want %q, %q, nil", tt.Raw, user, pass, err, tt.User, tt.Password)
+               }
+       }
+}