]> Cypherpunks repositories - gostls13.git/commitdiff
net/http: unescape paths and patterns by segment
authorJonathan Amsterdam <jba@google.com>
Fri, 22 Sep 2023 19:57:46 +0000 (15:57 -0400)
committerJonathan Amsterdam <jba@google.com>
Mon, 25 Sep 2023 16:46:51 +0000 (16:46 +0000)
When parsing patterns and matching, split the path into segments at
slashes, then unescape each segment.

This behaves as most people would expect:

- The pattern "/%61" matches the paths "/a" and "/%61".

- The pattern "/%7B" matches the path "/{". (If we did not unescape
  patterns, there would be no way to write that pattern: because "/{"
  is a parse error because it is an invalid wildcard.)

- The pattern "/user/{u}" matches "/user/john%2Fdoe" with u set to
  "john/doe".

- The unexpected redirections of #21955 will not occur.

A later CL will restore the old behavior behind a GODEBUG setting.

Updates #61410.

Fixes #21955.

Change-Id: I99025e149021fc94bf87d351699270460db532d9
Reviewed-on: https://go-review.googlesource.com/c/go/+/530575
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
src/net/http/pattern.go
src/net/http/pattern_test.go
src/net/http/request_test.go
src/net/http/routing_tree.go
src/net/http/routing_tree_test.go
src/net/http/server.go

index 2993aeccb92d26ac8f79bbbc633e0502bb276529..0c8644d9cdc5aa59e9b688ebe2029fb56fb3c00a 100644 (file)
@@ -9,6 +9,7 @@ package http
 import (
        "errors"
        "fmt"
+       "net/url"
        "strings"
        "unicode"
 )
@@ -141,6 +142,7 @@ func parsePattern(s string) (_ *pattern, err error) {
                seg, rest = rest[:i], rest[i:]
                if i := strings.IndexByte(seg, '{'); i < 0 {
                        // Literal.
+                       seg = pathUnescape(seg)
                        p.segments = append(p.segments, segment{s: seg})
                } else {
                        // Wildcard.
@@ -178,6 +180,7 @@ func parsePattern(s string) (_ *pattern, err error) {
        return p, nil
 }
 
+// TODO(jba): remove this; it is unused.
 func isValidHTTPToken(s string) bool {
        if s == "" {
                return false
@@ -204,6 +207,15 @@ func isValidWildcardName(s string) bool {
        return true
 }
 
+func pathUnescape(path string) string {
+       u, err := url.PathUnescape(path)
+       if err != nil {
+               // Invalidly escaped path; use the original
+               return path
+       }
+       return u
+}
+
 // relationship is a relationship between two patterns, p1 and p2.
 type relationship string
 
index abda4d872d49d63d12120698734bc549a9d4223a..b219648f332db5006bfa541b90e28223ebe815fb 100644 (file)
@@ -94,6 +94,10 @@ func TestParsePattern(t *testing.T) {
                        "a.com/foo//",
                        pattern{host: "a.com", segments: []segment{lit("foo"), lit(""), multi("")}},
                },
+               {
+                       "/%61%62/%7b/%",
+                       pattern{segments: []segment{lit("ab"), lit("{"), lit("%")}},
+               },
        } {
                got := mustParsePattern(t, test.in)
                if !got.equal(&test.want) {
@@ -113,6 +117,8 @@ func TestParsePatternError(t *testing.T) {
                {"/{w}x", "at offset 1: bad wildcard segment"},
                {"/x{w}", "at offset 1: bad wildcard segment"},
                {"/{wx", "at offset 1: bad wildcard segment"},
+               {"/a/{/}/c", "at offset 3: bad wildcard segment"},
+               {"/a/{%61}/c", "at offset 3: bad wildcard name"}, // wildcard names aren't unescaped
                {"/{a$}", "at offset 1: bad wildcard name"},
                {"/{}", "at offset 1: empty wildcard"},
                {"POST a.com/x/{}/y", "at offset 13: empty wildcard"},
index 18034ce163f6f6bd037424a34cde6a62f345bb2b..835db91a1a33cb08a0497e956da2861e0ddf97d1 100644 (file)
@@ -1445,23 +1445,22 @@ func TestPathValue(t *testing.T) {
                                "d": "",
                        },
                },
-               // TODO(jba): uncomment these tests when we implement path escaping (forthcoming).
-               // {
-               //      "/names/{name}/{other...}",
-               //      "/names/" + url.PathEscape("/john") + "/address",
-               //      map[string]string{
-               //              "name":  "/john",
-               //              "other": "address",
-               //      },
-               // },
-               // {
-               //      "/names/{name}/{other...}",
-               //      "/names/" + url.PathEscape("john/doe") + "/address",
-               //      map[string]string{
-               //              "name":  "john/doe",
-               //              "other": "address",
-               //      },
-               // },
+               {
+                       "/names/{name}/{other...}",
+                       "/names/%2fjohn/address",
+                       map[string]string{
+                               "name":  "/john",
+                               "other": "address",
+                       },
+               },
+               {
+                       "/names/{name}/{other...}",
+                       "/names/john%2Fdoe/there/is%2F/more",
+                       map[string]string{
+                               "name":  "john/doe",
+                               "other": "there/is//more",
+                       },
+               },
        } {
                mux := NewServeMux()
                mux.HandleFunc(test.pattern, func(w ResponseWriter, r *Request) {
@@ -1555,3 +1554,54 @@ func TestStatus(t *testing.T) {
                }
        }
 }
+
+func TestEscapedPathsAndPatterns(t *testing.T) {
+       matches := []struct {
+               pattern string
+               paths   []string
+       }{
+               {
+                       "/a", // this pattern matches a path that unescapes to "/a"
+                       []string{"/a", "/%61"},
+               },
+               {
+                       "/%62", // patterns are unescaped by segment; matches paths that unescape to "/b"
+                       []string{"/b", "/%62"},
+               },
+               {
+                       "/%7B/%7D", // the only way to write a pattern that matches '{' or '}'
+                       []string{"/{/}", "/%7b/}", "/{/%7d", "/%7B/%7D"},
+               },
+               {
+                       "/%x", // patterns that do not unescape are left unchanged
+                       []string{"/%25x"},
+               },
+       }
+
+       mux := NewServeMux()
+       var gotPattern string
+       for _, m := range matches {
+               mux.HandleFunc(m.pattern, func(w ResponseWriter, r *Request) {
+                       gotPattern = m.pattern
+               })
+       }
+
+       server := httptest.NewServer(mux)
+       defer server.Close()
+
+       for _, m := range matches {
+               for _, p := range m.paths {
+                       res, err := Get(server.URL + p)
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       if res.StatusCode != 200 {
+                               t.Errorf("%s: got code %d, want 200", p, res.StatusCode)
+                               continue
+                       }
+                       if g, w := gotPattern, m.pattern; g != w {
+                               t.Errorf("%s: pattern: got %q, want %q", p, g, w)
+                       }
+               }
+       }
+}
index 46287174a546aaf7ee8b2615936b4015b9f9d5a6..8812ed04e249cf2f502b4c3cf55660a2b81d527a 100644 (file)
@@ -19,7 +19,6 @@
 package http
 
 import (
-       "net/url"
        "strings"
 )
 
@@ -180,7 +179,7 @@ func (n *routingNode) matchPath(path string, matches []string) (*routingNode, []
        // We skip this step if the segment is a trailing slash, because single wildcards
        // don't match trailing slashes.
        if seg != "/" {
-               if n, m := n.emptyChild.matchPath(rest, append(matches, matchValue(seg))); n != nil {
+               if n, m := n.emptyChild.matchPath(rest, append(matches, seg)); n != nil {
                        return n, m
                }
        }
@@ -190,25 +189,17 @@ func (n *routingNode) matchPath(path string, matches []string) (*routingNode, []
                // Don't record a match for a nameless wildcard (which arises from a
                // trailing slash in the pattern).
                if c.pattern.lastSegment().s != "" {
-                       matches = append(matches, matchValue(path[1:])) // remove initial slash
+                       matches = append(matches, pathUnescape(path[1:])) // remove initial slash
                }
                return c, matches
        }
        return nil, nil
 }
 
-func matchValue(path string) string {
-       m, err := url.PathUnescape(path)
-       if err != nil {
-               // Path is not properly escaped, so use the original.
-               return path
-       }
-       return m
-}
-
 // firstSegment splits path into its first segment, and the rest.
 // The path must begin with "/".
 // If path consists of only a slash, firstSegment returns ("/", "").
+// The segment is returned unescaped, if possible.
 func firstSegment(path string) (seg, rest string) {
        if path == "/" {
                return "/", ""
@@ -216,9 +207,9 @@ func firstSegment(path string) (seg, rest string) {
        path = path[1:] // drop initial slash
        i := strings.IndexByte(path, '/')
        if i < 0 {
-               return path, ""
+               i = len(path)
        }
-       return path[:i], path[i:]
+       return pathUnescape(path[:i]), path[i:]
 }
 
 // matchingMethods adds to methodSet all the methods that would result in a
index 149349f39703e09749f0bfab9d38d1a120e65904..2aac8b6cdf12898e0c67a307bfb1391760c1d89f 100644 (file)
@@ -22,6 +22,8 @@ func TestRoutingFirstSegment(t *testing.T) {
                {"/a/b/c", []string{"a", "b", "c"}},
                {"/a/b/", []string{"a", "b", "/"}},
                {"/", []string{"/"}},
+               {"/a/%62/c", []string{"a", "b", "c"}},
+               {"/a%2Fb%2fc", []string{"a/b/c"}},
        } {
                var got []string
                rest := test.in
index b9f4a6b44844d2c9a0a6ade8a92cbe96cb1c22b8..ee02d776ac5fc5e5e011bfcd16cf46e052ee5c9d 100644 (file)
@@ -2422,10 +2422,9 @@ func (mux *ServeMux) Handler(r *Request) (h Handler, pattern string) {
 // after the redirect.
 func (mux *ServeMux) findHandler(r *Request) (h Handler, patStr string, _ *pattern, matches []string) {
        var n *routingNode
-       // TODO(jba): use escaped path. This is an independent change that is also part
-       // of proposal https://go.dev/issue/61410.
-       path := r.URL.Path
        host := r.URL.Host
+       escapedPath := r.URL.EscapedPath()
+       path := escapedPath
        // CONNECT requests are not canonicalized.
        if r.Method == "CONNECT" {
                // If r.URL.Path is /tree and its handler is not registered,
@@ -2451,7 +2450,7 @@ func (mux *ServeMux) findHandler(r *Request) (h Handler, patStr string, _ *patte
                if u != nil {
                        return RedirectHandler(u.String(), StatusMovedPermanently), u.Path, nil, nil
                }
-               if path != r.URL.Path {
+               if path != escapedPath {
                        // Redirect to cleaned path.
                        patStr := ""
                        if n != nil {
@@ -2478,8 +2477,7 @@ func (mux *ServeMux) findHandler(r *Request) (h Handler, patStr string, _ *patte
 }
 
 // matchOrRedirect looks up a node in the tree that matches the host, method and path.
-// The path is known to be in canonical form, except for CONNECT methods.
-
+//
 // If the url argument is non-nil, handler also deals with trailing-slash
 // redirection: when a path doesn't match exactly, the match is tried again
 // after appending "/" to the path. If that second match succeeds, the last
@@ -2496,7 +2494,7 @@ func (mux *ServeMux) matchOrRedirect(host, method, path string, u *url.URL) (_ *
                path += "/"
                n2, _ := mux.tree.match(host, method, path)
                if exactMatch(n2, path) {
-                       return nil, nil, &url.URL{Path: path, RawQuery: u.RawQuery}
+                       return nil, nil, &url.URL{Path: cleanPath(u.Path) + "/", RawQuery: u.RawQuery}
                }
        }
        return n, matches, nil