From c4e910895b3d91e4c7d4d6b5cd0af5e0eb787b72 Mon Sep 17 00:00:00 2001 From: Julien Cretel Date: Thu, 23 Oct 2025 16:44:15 +0000 Subject: [PATCH] net/url: speed up escape and unescape MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This change adds a generated 8-bit bitmask for use in functions shouldEscape and ishex. Function shouldEscape is now inlineable. Function escape is now much faster; function unescape is a bit faster. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: net/url cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ QueryEscape/#00-8 58.38n ± 1% 35.98n ± 1% -38.38% (p=0.000 n=20) QueryEscape/#01-8 303.50n ± 0% 94.77n ± 0% -68.77% (p=0.000 n=20) QueryEscape/#02-8 202.90n ± 0% 78.66n ± 1% -61.23% (p=0.000 n=20) QueryEscape/#03-8 444.5n ± 0% 145.9n ± 0% -67.17% (p=0.000 n=20) QueryEscape/#04-8 2678.0n ± 0% 913.7n ± 0% -65.88% (p=0.000 n=20) PathEscape/#00-8 81.34n ± 0% 44.64n ± 1% -45.12% (p=0.000 n=20) PathEscape/#01-8 307.65n ± 0% 96.71n ± 1% -68.56% (p=0.000 n=20) PathEscape/#02-8 200.80n ± 1% 78.25n ± 0% -61.03% (p=0.000 n=20) PathEscape/#03-8 450.1n ± 1% 145.5n ± 0% -67.67% (p=0.000 n=20) PathEscape/#04-8 2663.5n ± 0% 876.5n ± 0% -67.09% (p=0.000 n=20) QueryUnescape/#00-8 53.32n ± 1% 51.67n ± 1% -3.09% (p=0.000 n=20) QueryUnescape/#01-8 161.0n ± 1% 136.2n ± 1% -15.40% (p=0.000 n=20) QueryUnescape/#02-8 126.1n ± 1% 118.3n ± 1% -6.23% (p=0.000 n=20) QueryUnescape/#03-8 294.6n ± 0% 273.1n ± 0% -7.30% (p=0.000 n=20) QueryUnescape/#04-8 1.511µ ± 0% 1.411µ ± 0% -6.62% (p=0.000 n=20) PathUnescape/#00-8 63.84n ± 1% 53.59n ± 1% -16.05% (p=0.000 n=20) PathUnescape/#01-8 163.6n ± 3% 137.9n ± 1% -15.71% (p=0.000 n=20) PathUnescape/#02-8 126.4n ± 1% 119.1n ± 1% -5.78% (p=0.000 n=20) PathUnescape/#03-8 294.2n ± 0% 273.3n ± 0% -7.12% (p=0.000 n=20) PathUnescape/#04-8 1.554µ ± 0% 1.417µ ± 0% -8.78% (p=0.000 n=20) geomean 277.8n 162.7n -41.44% This change draws heavy inspiration from CL 174998, which showed promise but stalled years ago. Updates #17860 Change-Id: Idcbb1696608998b9e2fc91e1f2a488d8f1f6028c GitHub-Last-Rev: ff360c2f1b51b1e725d10c0864a6b698d3a5ffc3 GitHub-Pull-Request: golang/go#75914 Reviewed-on: https://go-review.googlesource.com/c/go/+/712200 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: Jorropo Reviewed-by: Takuto Nagami Reviewed-by: Dmitri Shuralyov --- src/net/url/encoding_table.go | 114 +++++++++++++++ src/net/url/gen_encoding_table.go | 234 ++++++++++++++++++++++++++++++ src/net/url/url.go | 106 +------------- 3 files changed, 354 insertions(+), 100 deletions(-) create mode 100644 src/net/url/encoding_table.go create mode 100644 src/net/url/gen_encoding_table.go diff --git a/src/net/url/encoding_table.go b/src/net/url/encoding_table.go new file mode 100644 index 0000000000..60b3564948 --- /dev/null +++ b/src/net/url/encoding_table.go @@ -0,0 +1,114 @@ +// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT. + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package url + +type encoding uint8 + +const ( + encodePath encoding = 1 << iota + encodePathSegment + encodeHost + encodeZone + encodeUserPassword + encodeQueryComponent + encodeFragment + + // hexChar is actually NOT an encoding mode, but there are only seven + // encoding modes. We might as well abuse the otherwise unused most + // significant bit in uint8 to indicate whether a character is + // hexadecimal. + hexChar +) + +var table = [256]encoding{ + '!': encodeFragment | encodeZone | encodeHost, + '"': encodeZone | encodeHost, + '$': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '&': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '\'': encodeZone | encodeHost, + '(': encodeFragment | encodeZone | encodeHost, + ')': encodeFragment | encodeZone | encodeHost, + '*': encodeFragment | encodeZone | encodeHost, + '+': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + ',': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath, + '-': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '.': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '/': encodeFragment | encodePath, + '0': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '1': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '2': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '3': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '4': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '5': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '6': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '7': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '8': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '9': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + ':': encodeFragment | encodeZone | encodeHost | encodePathSegment | encodePath, + ';': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath, + '<': encodeZone | encodeHost, + '=': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '>': encodeZone | encodeHost, + '?': encodeFragment, + '@': encodeFragment | encodePathSegment | encodePath, + 'A': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'B': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'C': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'D': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'E': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'F': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'G': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'H': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'I': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'J': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'K': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'L': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'M': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'N': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'O': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'P': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'Q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'R': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'S': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'T': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'U': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'V': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'W': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'X': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'Y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'Z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '[': encodeZone | encodeHost, + ']': encodeZone | encodeHost, + '_': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'a': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'b': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'c': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'd': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'e': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'f': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'g': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'h': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'i': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'j': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'k': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'l': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'm': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'n': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'o': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'p': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'r': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 's': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 't': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'u': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'v': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'w': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'x': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + 'z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, + '~': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath, +} diff --git a/src/net/url/gen_encoding_table.go b/src/net/url/gen_encoding_table.go new file mode 100644 index 0000000000..5defe5046b --- /dev/null +++ b/src/net/url/gen_encoding_table.go @@ -0,0 +1,234 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +package main + +import ( + "bytes" + _ "embed" + "fmt" + "go/format" + "io" + "log" + "maps" + "os" + "slices" + "strconv" + "strings" +) + +// We embed this source file in the resulting code-generation program in order +// to extract the definitions of the encoding type and constants from it and +// include them in the generated file. +// +//go:embed gen_encoding_table.go +var genSource string + +const filename = "encoding_table.go" + +func main() { + var out bytes.Buffer + fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.") + fmt.Fprintln(&out) + fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.") + fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style") + fmt.Fprintln(&out, "// license that can be found in the LICENSE file.") + fmt.Fprintln(&out) + fmt.Fprintln(&out, "package url") + fmt.Fprintln(&out) + generateEnc(&out, genSource) + generateTable(&out) + + formatted, err := format.Source(out.Bytes()) + if err != nil { + log.Fatal("format:", err) + } + + err = os.WriteFile(filename, formatted, 0644) + if err != nil { + log.Fatal("WriteFile:", err) + } +} + +func generateEnc(w io.Writer, src string) { + var writeLine bool + for line := range strings.Lines(src) { + if strings.HasPrefix(line, "// START encoding") { + writeLine = true + continue + } + if strings.HasPrefix(line, "// END encoding") { + return + } + if writeLine { + fmt.Fprint(w, line) + } + } +} + +func generateTable(w io.Writer) { + fmt.Fprintln(w, "var table = [256]encoding{") + + // Sort the encodings (in decreasing order) to guarantee a stable output. + sortedEncs := slices.Sorted(maps.Keys(encNames)) + slices.Reverse(sortedEncs) + + for i := range 256 { + c := byte(i) + var lineBuf bytes.Buffer + + // Write key to line buffer. + lineBuf.WriteString(strconv.QuoteRune(rune(c))) + + lineBuf.WriteByte(':') + + // Write value to line buffer. + blankVal := true + if ishex(c) { + // Set the hexChar bit if this char is hexadecimal. + lineBuf.WriteString("hexChar") + blankVal = false + } + for _, enc := range sortedEncs { + if !shouldEscape(c, enc) { + if !blankVal { + lineBuf.WriteByte('|') + } + // Set this encoding mode's bit if this char should NOT be + // escaped. + name := encNames[enc] + lineBuf.WriteString(name) + blankVal = false + } + } + + if !blankVal { + lineBuf.WriteString(",\n") + w.Write(lineBuf.Bytes()) + } + } + fmt.Fprintln(w, "}") +} + +// START encoding (keep this marker comment in sync with genEnc) +type encoding uint8 + +const ( + encodePath encoding = 1 << iota + encodePathSegment + encodeHost + encodeZone + encodeUserPassword + encodeQueryComponent + encodeFragment + + // hexChar is actually NOT an encoding mode, but there are only seven + // encoding modes. We might as well abuse the otherwise unused most + // significant bit in uint8 to indicate whether a character is + // hexadecimal. + hexChar +) + +// END encoding (keep this marker comment in sync with genEnc) + +// Keep this in sync with the definitions of encoding mode constants. +var encNames = map[encoding]string{ + encodePath: "encodePath", + encodePathSegment: "encodePathSegment", + encodeHost: "encodeHost", + encodeZone: "encodeZone", + encodeUserPassword: "encodeUserPassword", + encodeQueryComponent: "encodeQueryComponent", + encodeFragment: "encodeFragment", +} + +// Return true if the specified character should be escaped when +// appearing in a URL string, according to RFC 3986. +// +// Please be informed that for now shouldEscape does not check all +// reserved characters correctly. See golang.org/issue/5684. +func shouldEscape(c byte, mode encoding) bool { + // §2.3 Unreserved characters (alphanum) + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { + return false + } + + if mode == encodeHost || mode == encodeZone { + // §3.2.2 Host allows + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + // as part of reg-name. + // We add : because we include :port as part of host. + // We add [ ] because we include [ipv6]:port as part of host. + // We add < > because they're the only characters left that + // we could possibly allow, and Parse will reject them if we + // escape them (because hosts can't use %-encoding for + // ASCII bytes). + switch c { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': + return false + } + } + + switch c { + case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) + return false + + case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) + // Different sections of the URL allow a few of + // the reserved characters to appear unescaped. + switch mode { + case encodePath: // §3.3 + // The RFC allows : @ & = + $ but saves / ; , for assigning + // meaning to individual path segments. This package + // only manipulates the path as a whole, so we allow those + // last three as well. That leaves only ? to escape. + return c == '?' + + case encodePathSegment: // §3.3 + // The RFC allows : @ & = + $ but saves / ; , for assigning + // meaning to individual path segments. + return c == '/' || c == ';' || c == ',' || c == '?' + + case encodeUserPassword: // §3.2.1 + // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in + // userinfo, so we must escape only '@', '/', and '?'. + // The parsing of userinfo treats ':' as special so we must escape + // that too. + return c == '@' || c == '/' || c == '?' || c == ':' + + case encodeQueryComponent: // §3.4 + // The RFC reserves (so we must escape) everything. + return true + + case encodeFragment: // §4.1 + // The RFC text is silent but the grammar allows + // everything, so escape nothing. + return false + } + } + + if mode == encodeFragment { + // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are + // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not + // need to be escaped. To minimize potential breakage, we apply two restrictions: + // (1) we always escape sub-delims outside of the fragment, and (2) we always + // escape single quote to avoid breaking callers that had previously assumed that + // single quotes would be escaped. See issue #19917. + switch c { + case '!', '(', ')', '*': + return false + } + } + + // Everything else must be escaped. + return true +} + +func ishex(c byte) bool { + return '0' <= c && c <= '9' || + 'a' <= c && c <= 'f' || + 'A' <= c && c <= 'F' +} diff --git a/src/net/url/url.go b/src/net/url/url.go index 4508f26608..71fd8f59b3 100644 --- a/src/net/url/url.go +++ b/src/net/url/url.go @@ -7,6 +7,9 @@ // See RFC 3986. This package generally follows RFC 3986, except where // it deviates for compatibility reasons. // RFC 6874 followed for IPv6 zone literals. + +//go:generate go run gen_encoding_table.go + package url // When sending changes, first search old issues for history on decisions. @@ -50,15 +53,7 @@ func (e *Error) Temporary() bool { const upperhex = "0123456789ABCDEF" func ishex(c byte) bool { - switch { - case '0' <= c && c <= '9': - return true - case 'a' <= c && c <= 'f': - return true - case 'A' <= c && c <= 'F': - return true - } - return false + return table[c]&hexChar != 0 } func unhex(c byte) byte { @@ -74,18 +69,6 @@ func unhex(c byte) byte { } } -type encoding int - -const ( - encodePath encoding = 1 + iota - encodePathSegment - encodeHost - encodeZone - encodeUserPassword - encodeQueryComponent - encodeFragment -) - type EscapeError string func (e EscapeError) Error() string { @@ -98,86 +81,9 @@ func (e InvalidHostError) Error() string { return "invalid character " + strconv.Quote(string(e)) + " in host name" } -// Return true if the specified character should be escaped when -// appearing in a URL string, according to RFC 3986. -// -// Please be informed that for now shouldEscape does not check all -// reserved characters correctly. See golang.org/issue/5684. +// See the reference implementation in gen_encoding_table.go. func shouldEscape(c byte, mode encoding) bool { - // §2.3 Unreserved characters (alphanum) - if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { - return false - } - - if mode == encodeHost || mode == encodeZone { - // §3.2.2 Host allows - // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" - // as part of reg-name. - // We add : because we include :port as part of host. - // We add [ ] because we include [ipv6]:port as part of host. - // We add < > because they're the only characters left that - // we could possibly allow, and Parse will reject them if we - // escape them (because hosts can't use %-encoding for - // ASCII bytes). - switch c { - case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': - return false - } - } - - switch c { - case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) - return false - - case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) - // Different sections of the URL allow a few of - // the reserved characters to appear unescaped. - switch mode { - case encodePath: // §3.3 - // The RFC allows : @ & = + $ but saves / ; , for assigning - // meaning to individual path segments. This package - // only manipulates the path as a whole, so we allow those - // last three as well. That leaves only ? to escape. - return c == '?' - - case encodePathSegment: // §3.3 - // The RFC allows : @ & = + $ but saves / ; , for assigning - // meaning to individual path segments. - return c == '/' || c == ';' || c == ',' || c == '?' - - case encodeUserPassword: // §3.2.1 - // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in - // userinfo, so we must escape only '@', '/', and '?'. - // The parsing of userinfo treats ':' as special so we must escape - // that too. - return c == '@' || c == '/' || c == '?' || c == ':' - - case encodeQueryComponent: // §3.4 - // The RFC reserves (so we must escape) everything. - return true - - case encodeFragment: // §4.1 - // The RFC text is silent but the grammar allows - // everything, so escape nothing. - return false - } - } - - if mode == encodeFragment { - // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are - // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not - // need to be escaped. To minimize potential breakage, we apply two restrictions: - // (1) we always escape sub-delims outside of the fragment, and (2) we always - // escape single quote to avoid breaking callers that had previously assumed that - // single quotes would be escaped. See issue #19917. - switch c { - case '!', '(', ')', '*': - return false - } - } - - // Everything else must be escaped. - return true + return table[c]&mode == 0 } // QueryUnescape does the inverse transformation of [QueryEscape], -- 2.52.0