From 69ca0a859cbd8807b03b7772a11f5a147cd46565 Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Tue, 22 Nov 2022 09:22:13 -0800 Subject: [PATCH] Revert "net/url, net/http/httputil: accept invalid percent encodings" This reverts CL 450375. Reason for revert: This change causes test failures (and possibly other problems) for users depending on the existing validation behavior. Rolling back the change for now to give us more time to consider its impact. This landed late in the cycle and isn't urgent; it can wait for 1.21 if we do want to make the change. Fixes #56884 For #56732 Change-Id: I082023c67f1bbb933a617453ab92b67abba876ef Reviewed-on: https://go-review.googlesource.com/c/go/+/452795 TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Run-TryBot: Damien Neil Reviewed-by: Heschi Kreinick --- src/net/http/httputil/reverseproxy.go | 27 ++++++++++++++- src/net/http/httputil/reverseproxy_test.go | 2 +- src/net/url/url.go | 26 +++++---------- src/net/url/url_test.go | 39 +++++++++++++--------- 4 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/net/http/httputil/reverseproxy.go b/src/net/http/httputil/reverseproxy.go index ad0221ff33..190279ca00 100644 --- a/src/net/http/httputil/reverseproxy.go +++ b/src/net/http/httputil/reverseproxy.go @@ -816,9 +816,34 @@ func (c switchProtocolCopier) copyToBackend(errc chan<- error) { } func cleanQueryParams(s string) string { - if strings.Contains(s, ";") { + reencode := func(s string) string { v, _ := url.ParseQuery(s) return v.Encode() } + for i := 0; i < len(s); { + switch s[i] { + case ';': + return reencode(s) + case '%': + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + return reencode(s) + } + i += 3 + default: + i++ + } + } return s } + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} diff --git a/src/net/http/httputil/reverseproxy_test.go b/src/net/http/httputil/reverseproxy_test.go index 5a0237494c..5b882d3a45 100644 --- a/src/net/http/httputil/reverseproxy_test.go +++ b/src/net/http/httputil/reverseproxy_test.go @@ -1831,7 +1831,7 @@ func testReverseProxyQueryParameterSmuggling(t *testing.T, wantCleanQuery bool, cleanQuery: "a=1", }, { rawQuery: "a=1&a=%zz&b=3", - cleanQuery: "a=1&a=%zz&b=3", + cleanQuery: "a=1&b=3", }} { res, err := frontend.Client().Get(frontend.URL + "?" + test.rawQuery) if err != nil { diff --git a/src/net/url/url.go b/src/net/url/url.go index e959ed4ee6..d530a50d40 100644 --- a/src/net/url/url.go +++ b/src/net/url/url.go @@ -198,24 +198,20 @@ func PathUnescape(s string) (string, error) { // unescape unescapes a string; the mode specifies // which section of the URL string is being unescaped. func unescape(s string, mode encoding) (string, error) { - isPercentEscape := func(s string, i int) bool { - return i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]) - } - // Count %, check that they're well-formed. n := 0 hasPlus := false for i := 0; i < len(s); { switch s[i] { case '%': - if !isPercentEscape(s, i) { - // https://url.spec.whatwg.org/#percent-encoded-bytes - // says that % followed by non-hex characters - // should be accepted with no error. - i++ - continue - } n++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[:3] + } + return "", EscapeError(s) + } // Per https://tools.ietf.org/html/rfc3986#page-21 // in the host component %-encoding can only be used // for non-ASCII bytes. @@ -259,12 +255,8 @@ func unescape(s string, mode encoding) (string, error) { for i := 0; i < len(s); i++ { switch s[i] { case '%': - if !isPercentEscape(s, i) { - t.WriteByte('%') - } else { - t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) - i += 2 - } + t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) + i += 2 case '+': if mode == encodeQueryComponent { t.WriteByte(' ') diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go index 899ec99e43..577cf631c8 100644 --- a/src/net/url/url_test.go +++ b/src/net/url/url_test.go @@ -704,11 +704,9 @@ var parseRequestURLTests = []struct { // These two cases are valid as textual representations as // described in RFC 4007, but are not valid as address // literals with IPv6 zone identifiers in URIs as described in - // RFC 6874. However, this seems to be overridden by - // https://url.spec.whatwg.org/#percent-encoded-bytes - // which permits unencoded % characters. - {"http://[fe80::1%en0]/", true}, - {"http://[fe80::1%en0]:8080/", true}, + // RFC 6874. + {"http://[fe80::1%en0]/", false}, + {"http://[fe80::1%en0]:8080/", false}, } func TestParseRequestURI(t *testing.T) { @@ -898,28 +896,28 @@ var unescapeTests = []EscapeTest{ }, { "%", // not enough characters after % - "%", - nil, + "", + EscapeError("%"), }, { "%a", // not enough characters after % - "%a", - nil, + "", + EscapeError("%a"), }, { "%1", // not enough characters after % - "%1", - nil, + "", + EscapeError("%1"), }, { "123%45%6", // not enough characters after % - "123E%6", - nil, + "", + EscapeError("%6"), }, { "%zzzzz", // invalid hex digits - "%zzzzz", - nil, + "", + EscapeError("%zz"), }, { "a+b", @@ -1593,6 +1591,16 @@ func TestRequestURI(t *testing.T) { } } +func TestParseFailure(t *testing.T) { + // Test that the first parse error is returned. + const url = "%gh&%ij" + _, err := ParseQuery(url) + errStr := fmt.Sprint(err) + if !strings.Contains(errStr, "%gh") { + t.Errorf(`ParseQuery(%q) returned error %q, want something containing %q"`, url, errStr, "%gh") + } +} + func TestParseErrors(t *testing.T) { tests := []struct { in string @@ -2110,7 +2118,6 @@ func TestJoinPath(t *testing.T) { { base: "http://[fe80::1%en0]:8080/", elem: []string{"/go"}, - out: "http://[fe80::1%25en0]:8080/go", }, { base: "https://go.googlesource.com", -- 2.50.0