}
func cleanQueryParams(s string) string {
- if strings.Contains(s, ";") {
+ reencode := func(s string) string {
v, _ := url.ParseQuery(s)
return v.Encode()
}
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case ';':
+ return reencode(s)
+ case '%':
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ return reencode(s)
+ }
+ i += 3
+ default:
+ i++
+ }
+ }
return s
}
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
- isPercentEscape := func(s string, i int) bool {
- return i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2])
- }
-
// Count %, check that they're well-formed.
n := 0
hasPlus := false
for i := 0; i < len(s); {
switch s[i] {
case '%':
- if !isPercentEscape(s, i) {
- // https://url.spec.whatwg.org/#percent-encoded-bytes
- // says that % followed by non-hex characters
- // should be accepted with no error.
- i++
- continue
- }
n++
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ s = s[i:]
+ if len(s) > 3 {
+ s = s[:3]
+ }
+ return "", EscapeError(s)
+ }
// Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used
// for non-ASCII bytes.
for i := 0; i < len(s); i++ {
switch s[i] {
case '%':
- if !isPercentEscape(s, i) {
- t.WriteByte('%')
- } else {
- t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
- i += 2
- }
+ t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
+ i += 2
case '+':
if mode == encodeQueryComponent {
t.WriteByte(' ')
// These two cases are valid as textual representations as
// described in RFC 4007, but are not valid as address
// literals with IPv6 zone identifiers in URIs as described in
- // RFC 6874. However, this seems to be overridden by
- // https://url.spec.whatwg.org/#percent-encoded-bytes
- // which permits unencoded % characters.
- {"http://[fe80::1%en0]/", true},
- {"http://[fe80::1%en0]:8080/", true},
+ // RFC 6874.
+ {"http://[fe80::1%en0]/", false},
+ {"http://[fe80::1%en0]:8080/", false},
}
func TestParseRequestURI(t *testing.T) {
},
{
"%", // not enough characters after %
- "%",
- nil,
+ "",
+ EscapeError("%"),
},
{
"%a", // not enough characters after %
- "%a",
- nil,
+ "",
+ EscapeError("%a"),
},
{
"%1", // not enough characters after %
- "%1",
- nil,
+ "",
+ EscapeError("%1"),
},
{
"123%45%6", // not enough characters after %
- "123E%6",
- nil,
+ "",
+ EscapeError("%6"),
},
{
"%zzzzz", // invalid hex digits
- "%zzzzz",
- nil,
+ "",
+ EscapeError("%zz"),
},
{
"a+b",
}
}
+func TestParseFailure(t *testing.T) {
+ // Test that the first parse error is returned.
+ const url = "%gh&%ij"
+ _, err := ParseQuery(url)
+ errStr := fmt.Sprint(err)
+ if !strings.Contains(errStr, "%gh") {
+ t.Errorf(`ParseQuery(%q) returned error %q, want something containing %q"`, url, errStr, "%gh")
+ }
+}
+
func TestParseErrors(t *testing.T) {
tests := []struct {
in string
{
base: "http://[fe80::1%en0]:8080/",
elem: []string{"/go"},
- out: "http://[fe80::1%25en0]:8080/go",
},
{
base: "https://go.googlesource.com",