From: Jonathan Amsterdam Date: Fri, 8 Sep 2023 00:14:47 +0000 (-0400) Subject: net/http: extended routing patterns X-Git-Tag: go1.22rc1~904 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=09465129bd9adc2713c16656ac614df396117355;p=gostls13.git net/http: extended routing patterns This is the first of several CLs implementing the proposal for enhanced ServeMux routing, https://go.dev/issue/61410. Define a type to represent extended routing patterns and a function to parse a string into one. Updates #61410. Change-Id: I779689acf1f14b20d12c9264251f7dc002b68c49 Reviewed-on: https://go-review.googlesource.com/c/go/+/526815 Run-TryBot: Jonathan Amsterdam Reviewed-by: Eli Bendersky TryBot-Result: Gopher Robot --- diff --git a/src/net/http/pattern.go b/src/net/http/pattern.go new file mode 100644 index 0000000000..a04fd901ca --- /dev/null +++ b/src/net/http/pattern.go @@ -0,0 +1,187 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Patterns for ServeMux routing. + +package http + +import ( + "errors" + "fmt" + "strings" + "unicode" +) + +// A pattern is something that can be matched against an HTTP request. +// It has an optional method, an optional host, and a path. +type pattern struct { + str string // original string + method string + host string + // The representation of a path differs from the surface syntax, which + // simplifies most algorithms. + // + // Paths ending in '/' are represented with an anonymous "..." wildcard. + // For example, the path "a/" is represented as a literal segment "a" followed + // by a segment with multi==true. + // + // Paths ending in "{$}" are represented with the literal segment "/". + // For example, the path "a/{$}" is represented as a literal segment "a" followed + // by a literal segment "/". + segments []segment + loc string // source location of registering call, for helpful messages +} + +// A segment is a pattern piece that matches one or more path segments, or +// a trailing slash. +// +// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash. +// Examples: +// +// "a" => segment{s: "a"} +// "/{$}" => segment{s: "/"} +// +// If wild is true and multi is false, it matches a single path segment. +// Example: +// +// "{x}" => segment{s: "x", wild: true} +// +// If both wild and multi are true, it matches all remaining path segments. +// Example: +// +// "{rest...}" => segment{s: "rest", wild: true, multi: true} +type segment struct { + s string // literal or wildcard name or "/" for "/{$}". + wild bool + multi bool // "..." wildcard +} + +// parsePattern parses a string into a Pattern. +// The string's syntax is +// +// [METHOD] [HOST]/[PATH] +// +// where: +// - METHOD is an HTTP method +// - HOST is a hostname +// - PATH consists of slash-separated segments, where each segment is either +// a literal or a wildcard of the form "{name}", "{name...}", or "{$}". +// +// METHOD, HOST and PATH are all optional; that is, the string can be "/". +// If METHOD is present, it must be followed by a single space. +// Wildcard names must be valid Go identifiers. +// The "{$}" and "{name...}" wildcard must occur at the end of PATH. +// PATH may end with a '/'. +// Wildcard names in a path must be distinct. +func parsePattern(s string) (*pattern, error) { + if len(s) == 0 { + return nil, errors.New("empty pattern") + } + // TODO(jba): record the rune offset in s to provide more information in errors. + method, rest, found := strings.Cut(s, " ") + if !found { + rest = method + method = "" + } + if method != "" && !validMethod(method) { + return nil, fmt.Errorf("net/http: invalid method %q", method) + } + p := &pattern{str: s, method: method} + + i := strings.IndexByte(rest, '/') + if i < 0 { + return nil, errors.New("host/path missing /") + } + p.host = rest[:i] + rest = rest[i:] + if strings.IndexByte(p.host, '{') >= 0 { + return nil, errors.New("host contains '{' (missing initial '/'?)") + } + // At this point, rest is the path. + + // An unclean path with a method that is not CONNECT can never match, + // because paths are cleaned before matching. + if method != "" && method != "CONNECT" && rest != cleanPath(rest) { + return nil, errors.New("non-CONNECT pattern with unclean path can never match") + } + + seenNames := map[string]bool{} // remember wildcard names to catch dups + for len(rest) > 0 { + // Invariant: rest[0] == '/'. + rest = rest[1:] + if len(rest) == 0 { + // Trailing slash. + p.segments = append(p.segments, segment{wild: true, multi: true}) + break + } + i := strings.IndexByte(rest, '/') + if i < 0 { + i = len(rest) + } + var seg string + seg, rest = rest[:i], rest[i:] + if i := strings.IndexByte(seg, '{'); i < 0 { + // Literal. + p.segments = append(p.segments, segment{s: seg}) + } else { + // Wildcard. + if i != 0 { + return nil, errors.New("bad wildcard segment (must start with '{')") + } + if seg[len(seg)-1] != '}' { + return nil, errors.New("bad wildcard segment (must end with '}')") + } + name := seg[1 : len(seg)-1] + if name == "$" { + if len(rest) != 0 { + return nil, errors.New("{$} not at end") + } + p.segments = append(p.segments, segment{s: "/"}) + break + } + name, multi := strings.CutSuffix(name, "...") + if multi && len(rest) != 0 { + return nil, errors.New("{...} wildcard not at end") + } + if name == "" { + return nil, errors.New("empty wildcard") + } + if !isValidWildcardName(name) { + return nil, fmt.Errorf("bad wildcard name %q", name) + } + if seenNames[name] { + return nil, fmt.Errorf("duplicate wildcard name %q", name) + } + seenNames[name] = true + p.segments = append(p.segments, segment{s: name, wild: true, multi: multi}) + } + } + return p, nil +} + +func isValidHTTPToken(s string) bool { + if s == "" { + return false + } + // See https://www.rfc-editor.org/rfc/rfc9110#section-5.6.2. + for _, r := range s { + if !unicode.IsLetter(r) && !unicode.IsDigit(r) && !strings.ContainsRune("!#$%&'*+.^_`|~-", r) { + return false + } + } + return true +} + +func isValidWildcardName(s string) bool { + if s == "" { + return false + } + // Valid Go identifier. + for i, c := range s { + if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) { + return false + } + } + return true +} diff --git a/src/net/http/pattern_test.go b/src/net/http/pattern_test.go new file mode 100644 index 0000000000..759e1267f9 --- /dev/null +++ b/src/net/http/pattern_test.go @@ -0,0 +1,167 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "slices" + "strings" + "testing" +) + +func TestParsePattern(t *testing.T) { + lit := func(name string) segment { + return segment{s: name} + } + + wild := func(name string) segment { + return segment{s: name, wild: true} + } + + multi := func(name string) segment { + s := wild(name) + s.multi = true + return s + } + + for _, test := range []struct { + in string + want pattern + }{ + {"/", pattern{segments: []segment{multi("")}}}, + {"/a", pattern{segments: []segment{lit("a")}}}, + { + "/a/", + pattern{segments: []segment{lit("a"), multi("")}}, + }, + {"/path/to/something", pattern{segments: []segment{ + lit("path"), lit("to"), lit("something"), + }}}, + { + "/{w1}/lit/{w2}", + pattern{ + segments: []segment{wild("w1"), lit("lit"), wild("w2")}, + }, + }, + { + "/{w1}/lit/{w2}/", + pattern{ + segments: []segment{wild("w1"), lit("lit"), wild("w2"), multi("")}, + }, + }, + { + "example.com/", + pattern{host: "example.com", segments: []segment{multi("")}}, + }, + { + "GET /", + pattern{method: "GET", segments: []segment{multi("")}}, + }, + { + "POST example.com/foo/{w}", + pattern{ + method: "POST", + host: "example.com", + segments: []segment{lit("foo"), wild("w")}, + }, + }, + { + "/{$}", + pattern{segments: []segment{lit("/")}}, + }, + { + "DELETE example.com/a/{foo12}/{$}", + pattern{method: "DELETE", host: "example.com", segments: []segment{lit("a"), wild("foo12"), lit("/")}}, + }, + { + "/foo/{$}", + pattern{segments: []segment{lit("foo"), lit("/")}}, + }, + { + "/{a}/foo/{rest...}", + pattern{segments: []segment{wild("a"), lit("foo"), multi("rest")}}, + }, + { + "//", + pattern{segments: []segment{lit(""), multi("")}}, + }, + { + "/foo///./../bar", + pattern{segments: []segment{lit("foo"), lit(""), lit(""), lit("."), lit(".."), lit("bar")}}, + }, + { + "a.com/foo//", + pattern{host: "a.com", segments: []segment{lit("foo"), lit(""), multi("")}}, + }, + } { + got := mustParsePattern(t, test.in) + if !got.equal(&test.want) { + t.Errorf("%q:\ngot %#v\nwant %#v", test.in, got, &test.want) + } + } +} + +func TestParsePatternError(t *testing.T) { + for _, test := range []struct { + in string + contains string + }{ + {"", "empty pattern"}, + {"A=B /", "invalid method"}, + {" ", "missing /"}, + {"/{w}x", "bad wildcard segment"}, + {"/x{w}", "bad wildcard segment"}, + {"/{wx", "bad wildcard segment"}, + {"/{a$}", "bad wildcard name"}, + {"/{}", "empty wildcard"}, + {"/{...}", "empty wildcard"}, + {"/{$...}", "bad wildcard"}, + {"/{$}/", "{$} not at end"}, + {"/{$}/x", "{$} not at end"}, + {"/{a...}/", "not at end"}, + {"/{a...}/x", "not at end"}, + {"{a}/b", "missing initial '/'"}, + {"/a/{x}/b/{x...}", "duplicate wildcard name"}, + {"GET //", "unclean path"}, + } { + _, err := parsePattern(test.in) + if err == nil || !strings.Contains(err.Error(), test.contains) { + t.Errorf("%q:\ngot %v, want error containing %q", test.in, err, test.contains) + } + } +} + +func (p1 *pattern) equal(p2 *pattern) bool { + return p1.method == p2.method && p1.host == p2.host && + slices.Equal(p1.segments, p2.segments) +} + +func TestIsValidHTTPToken(t *testing.T) { + for _, test := range []struct { + in string + want bool + }{ + {"", false}, + {"GET", true}, + {"get", true}, + {"white space", false}, + {"#!~", true}, + {"a-b1_2", true}, + {"notok)", false}, + } { + got := isValidHTTPToken(test.in) + if g, w := got, test.want; g != w { + t.Errorf("%q: got %t, want %t", test.in, g, w) + } + } +} + +func mustParsePattern(t *testing.T, s string) *pattern { + t.Helper() + p, err := parsePattern(s) + if err != nil { + t.Fatal(err) + } + return p +}