--- /dev/null
+// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package url
+
+type encoding uint8
+
+const (
+ encodePath encoding = 1 << iota
+ encodePathSegment
+ encodeHost
+ encodeZone
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+
+ // hexChar is actually NOT an encoding mode, but there are only seven
+ // encoding modes. We might as well abuse the otherwise unused most
+ // significant bit in uint8 to indicate whether a character is
+ // hexadecimal.
+ hexChar
+)
+
+var table = [256]encoding{
+ '!': encodeFragment | encodeZone | encodeHost,
+ '"': encodeZone | encodeHost,
+ '$': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '&': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '\'': encodeZone | encodeHost,
+ '(': encodeFragment | encodeZone | encodeHost,
+ ')': encodeFragment | encodeZone | encodeHost,
+ '*': encodeFragment | encodeZone | encodeHost,
+ '+': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ',': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '-': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '.': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '/': encodeFragment | encodePath,
+ '0': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '1': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '2': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '3': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '4': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '5': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '6': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '7': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '8': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '9': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ':': encodeFragment | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ';': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '<': encodeZone | encodeHost,
+ '=': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '>': encodeZone | encodeHost,
+ '?': encodeFragment,
+ '@': encodeFragment | encodePathSegment | encodePath,
+ 'A': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'B': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'C': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'D': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'E': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'F': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'G': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'H': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'I': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'J': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'K': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'L': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'M': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'N': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'O': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'P': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'R': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'S': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'T': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'U': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'V': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'W': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'X': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '[': encodeZone | encodeHost,
+ ']': encodeZone | encodeHost,
+ '_': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'a': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'b': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'c': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'd': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'e': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'f': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'g': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'h': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'i': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'j': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'k': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'l': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'm': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'n': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'o': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'p': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'r': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 's': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 't': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'u': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'v': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'w': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'x': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '~': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+}
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+import (
+ "bytes"
+ _ "embed"
+ "fmt"
+ "go/format"
+ "io"
+ "log"
+ "maps"
+ "os"
+ "slices"
+ "strconv"
+ "strings"
+)
+
+// We embed this source file in the resulting code-generation program in order
+// to extract the definitions of the encoding type and constants from it and
+// include them in the generated file.
+//
+//go:embed gen_encoding_table.go
+var genSource string
+
+const filename = "encoding_table.go"
+
+func main() {
+ var out bytes.Buffer
+ fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.")
+ fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style")
+ fmt.Fprintln(&out, "// license that can be found in the LICENSE file.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "package url")
+ fmt.Fprintln(&out)
+ generateEnc(&out, genSource)
+ generateTable(&out)
+
+ formatted, err := format.Source(out.Bytes())
+ if err != nil {
+ log.Fatal("format:", err)
+ }
+
+ err = os.WriteFile(filename, formatted, 0644)
+ if err != nil {
+ log.Fatal("WriteFile:", err)
+ }
+}
+
+func generateEnc(w io.Writer, src string) {
+ var writeLine bool
+ for line := range strings.Lines(src) {
+ if strings.HasPrefix(line, "// START encoding") {
+ writeLine = true
+ continue
+ }
+ if strings.HasPrefix(line, "// END encoding") {
+ return
+ }
+ if writeLine {
+ fmt.Fprint(w, line)
+ }
+ }
+}
+
+func generateTable(w io.Writer) {
+ fmt.Fprintln(w, "var table = [256]encoding{")
+
+ // Sort the encodings (in decreasing order) to guarantee a stable output.
+ sortedEncs := slices.Sorted(maps.Keys(encNames))
+ slices.Reverse(sortedEncs)
+
+ for i := range 256 {
+ c := byte(i)
+ var lineBuf bytes.Buffer
+
+ // Write key to line buffer.
+ lineBuf.WriteString(strconv.QuoteRune(rune(c)))
+
+ lineBuf.WriteByte(':')
+
+ // Write value to line buffer.
+ blankVal := true
+ if ishex(c) {
+ // Set the hexChar bit if this char is hexadecimal.
+ lineBuf.WriteString("hexChar")
+ blankVal = false
+ }
+ for _, enc := range sortedEncs {
+ if !shouldEscape(c, enc) {
+ if !blankVal {
+ lineBuf.WriteByte('|')
+ }
+ // Set this encoding mode's bit if this char should NOT be
+ // escaped.
+ name := encNames[enc]
+ lineBuf.WriteString(name)
+ blankVal = false
+ }
+ }
+
+ if !blankVal {
+ lineBuf.WriteString(",\n")
+ w.Write(lineBuf.Bytes())
+ }
+ }
+ fmt.Fprintln(w, "}")
+}
+
+// START encoding (keep this marker comment in sync with genEnc)
+type encoding uint8
+
+const (
+ encodePath encoding = 1 << iota
+ encodePathSegment
+ encodeHost
+ encodeZone
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+
+ // hexChar is actually NOT an encoding mode, but there are only seven
+ // encoding modes. We might as well abuse the otherwise unused most
+ // significant bit in uint8 to indicate whether a character is
+ // hexadecimal.
+ hexChar
+)
+
+// END encoding (keep this marker comment in sync with genEnc)
+
+// Keep this in sync with the definitions of encoding mode constants.
+var encNames = map[encoding]string{
+ encodePath: "encodePath",
+ encodePathSegment: "encodePathSegment",
+ encodeHost: "encodeHost",
+ encodeZone: "encodeZone",
+ encodeUserPassword: "encodeUserPassword",
+ encodeQueryComponent: "encodeQueryComponent",
+ encodeFragment: "encodeFragment",
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+//
+// Please be informed that for now shouldEscape does not check all
+// reserved characters correctly. See golang.org/issue/5684.
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+ return false
+ }
+
+ if mode == encodeHost || mode == encodeZone {
+ // §3.2.2 Host allows
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // as part of reg-name.
+ // We add : because we include :port as part of host.
+ // We add [ ] because we include [ipv6]:port as part of host.
+ // We add < > because they're the only characters left that
+ // we could possibly allow, and Parse will reject them if we
+ // escape them (because hosts can't use %-encoding for
+ // ASCII bytes).
+ switch c {
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
+ return false
+ }
+ }
+
+ switch c {
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
+ return false
+
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments. This package
+ // only manipulates the path as a whole, so we allow those
+ // last three as well. That leaves only ? to escape.
+ return c == '?'
+
+ case encodePathSegment: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments.
+ return c == '/' || c == ';' || c == ',' || c == '?'
+
+ case encodeUserPassword: // §3.2.1
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
+ // userinfo, so we must escape only '@', '/', and '?'.
+ // The parsing of userinfo treats ':' as special so we must escape
+ // that too.
+ return c == '@' || c == '/' || c == '?' || c == ':'
+
+ case encodeQueryComponent: // §3.4
+ // The RFC reserves (so we must escape) everything.
+ return true
+
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing.
+ return false
+ }
+ }
+
+ if mode == encodeFragment {
+ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
+ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
+ // need to be escaped. To minimize potential breakage, we apply two restrictions:
+ // (1) we always escape sub-delims outside of the fragment, and (2) we always
+ // escape single quote to avoid breaking callers that had previously assumed that
+ // single quotes would be escaped. See issue #19917.
+ switch c {
+ case '!', '(', ')', '*':
+ return false
+ }
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+func ishex(c byte) bool {
+ return '0' <= c && c <= '9' ||
+ 'a' <= c && c <= 'f' ||
+ 'A' <= c && c <= 'F'
+}
// See RFC 3986. This package generally follows RFC 3986, except where
// it deviates for compatibility reasons.
// RFC 6874 followed for IPv6 zone literals.
+
+//go:generate go run gen_encoding_table.go
+
package url
// When sending changes, first search old issues for history on decisions.
const upperhex = "0123456789ABCDEF"
func ishex(c byte) bool {
- switch {
- case '0' <= c && c <= '9':
- return true
- case 'a' <= c && c <= 'f':
- return true
- case 'A' <= c && c <= 'F':
- return true
- }
- return false
+ return table[c]&hexChar != 0
}
func unhex(c byte) byte {
}
}
-type encoding int
-
-const (
- encodePath encoding = 1 + iota
- encodePathSegment
- encodeHost
- encodeZone
- encodeUserPassword
- encodeQueryComponent
- encodeFragment
-)
-
type EscapeError string
func (e EscapeError) Error() string {
return "invalid character " + strconv.Quote(string(e)) + " in host name"
}
-// Return true if the specified character should be escaped when
-// appearing in a URL string, according to RFC 3986.
-//
-// Please be informed that for now shouldEscape does not check all
-// reserved characters correctly. See golang.org/issue/5684.
+// See the reference implementation in gen_encoding_table.go.
func shouldEscape(c byte, mode encoding) bool {
- // §2.3 Unreserved characters (alphanum)
- if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
- return false
- }
-
- if mode == encodeHost || mode == encodeZone {
- // §3.2.2 Host allows
- // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
- // as part of reg-name.
- // We add : because we include :port as part of host.
- // We add [ ] because we include [ipv6]:port as part of host.
- // We add < > because they're the only characters left that
- // we could possibly allow, and Parse will reject them if we
- // escape them (because hosts can't use %-encoding for
- // ASCII bytes).
- switch c {
- case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
- return false
- }
- }
-
- switch c {
- case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
- return false
-
- case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
- // Different sections of the URL allow a few of
- // the reserved characters to appear unescaped.
- switch mode {
- case encodePath: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments. This package
- // only manipulates the path as a whole, so we allow those
- // last three as well. That leaves only ? to escape.
- return c == '?'
-
- case encodePathSegment: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments.
- return c == '/' || c == ';' || c == ',' || c == '?'
-
- case encodeUserPassword: // §3.2.1
- // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
- // userinfo, so we must escape only '@', '/', and '?'.
- // The parsing of userinfo treats ':' as special so we must escape
- // that too.
- return c == '@' || c == '/' || c == '?' || c == ':'
-
- case encodeQueryComponent: // §3.4
- // The RFC reserves (so we must escape) everything.
- return true
-
- case encodeFragment: // §4.1
- // The RFC text is silent but the grammar allows
- // everything, so escape nothing.
- return false
- }
- }
-
- if mode == encodeFragment {
- // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
- // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
- // need to be escaped. To minimize potential breakage, we apply two restrictions:
- // (1) we always escape sub-delims outside of the fragment, and (2) we always
- // escape single quote to avoid breaking callers that had previously assumed that
- // single quotes would be escaped. See issue #19917.
- switch c {
- case '!', '(', ')', '*':
- return false
- }
- }
-
- // Everything else must be escaped.
- return true
+ return table[c]&mode == 0
}
// QueryUnescape does the inverse transformation of [QueryEscape],