import (
"bytes"
"errors"
+ "fmt"
"sort"
"strconv"
"strings"
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
s = s[i:]
if len(s) > 3 {
- s = s[0:3]
+ s = s[:3]
}
return "", EscapeError(s)
}
if i == 0 {
return "", "", errors.New("missing protocol scheme")
}
- return rawurl[0:i], rawurl[i+1:], nil
+ return rawurl[:i], rawurl[i+1:], nil
default:
// we have encountered an invalid character,
// so there is no valid scheme
return s, ""
}
if cutc {
- return s[0:i], s[i+len(c):]
+ return s[:i], s[i+len(c):]
}
- return s[0:i], s[i:]
+ return s[:i], s[i:]
}
// Parse parses rawurl into a URL structure.
return user, host, nil
}
-// parseHost parses host as an authority without user information.
+// parseHost parses host as an authority without user
+// information. That is, as host[:port].
func parseHost(host string) (string, error) {
litOrName := host
+ var colonPort string // ":80" or ""
if strings.HasPrefix(host, "[") {
// Parse an IP-Literal in RFC 3986 and RFC 6874.
// E.g., "[fe80::1], "[fe80::1%25en0]"
// RFC 4007 defines "%" as a delimiter character in
// the textual representation of IPv6 addresses.
// Per RFC 6874, in URIs that "%" is encoded as "%25".
- i := strings.LastIndex(host[1:], "]")
+ i := strings.LastIndex(host, "]")
if i < 0 {
return "", errors.New("missing ']' in host")
}
+ colonPort = host[i+1:]
// Parse a host subcomponent without a ZoneID in RFC
// 6874 because the ZoneID is allowed to use the
// percent encoded form.
- j := strings.Index(host[1:1+i], "%25")
+ j := strings.Index(host[:i], "%25")
if j < 0 {
- litOrName = host[1 : 1+i]
+ litOrName = host[1:i]
} else {
- litOrName = host[1 : 1+j]
+ litOrName = host[1:j]
+ }
+ } else {
+ if i := strings.Index(host, ":"); i != -1 {
+ colonPort = host[i:]
}
}
// A URI containing an IP-Literal without a ZoneID or
if strings.Contains(litOrName, "%") {
return "", errors.New("percent-encoded characters in host")
}
+ if !validOptionalPort(colonPort) {
+ return "", fmt.Errorf("invalid port %q after host", colonPort)
+ }
var err error
if host, err = unescape(host, encodeHost); err != nil {
return "", err
return true
}
+// validOptionalPort reports whether port is either an empty string
+// or matches /^:\d+$/
+func validOptionalPort(port string) bool {
+ if port == "" {
+ return true
+ }
+ if port[0] != ':' || len(port) == 1 {
+ return false
+ }
+ for _, b := range port[1:] {
+ if b < '0' || b > '9' {
+ return false
+ }
+ }
+ return true
+}
+
// String reassembles the URL into a valid URL string.
// The general form of the result is one of:
//
}
}
+func TestParseAuthority(t *testing.T) {
+ tests := []struct {
+ in string
+ wantErr bool
+ }{
+ {"http://[::1]", false},
+ {"http://[::1]:80", false},
+ {"http://[::1]:namedport", true}, // rfc3986 3.2.3
+ {"http://[::1]/", false},
+ {"http://[::1]a", true},
+ {"http://[::1]%23", true},
+ {"http://[::1%25en0]", false}, // valid zone id
+ {"http://[::1]:", true}, // colon, but no port
+ {"http://[::1]:%38%30", true}, // no hex in port
+ {"http://[::1%25%10]", false}, // TODO: reject the %10 after the valid zone %25 separator?
+ {"http://[%10::1]", true}, // no %xx escapes in IP address
+ {"http://[::1]/%48", false}, // %xx in path is fine
+ {"http://%41:8080/", true}, // TODO: arguably we should accept reg-name with %xx
+ }
+ for _, tt := range tests {
+ u, err := Parse(tt.in)
+ if tt.wantErr {
+ if err == nil {
+ t.Errorf("Parse(%q) = %#v; want an error", tt.in, u)
+ }
+ continue
+ }
+ if err != nil {
+ t.Logf("Parse(%q) = %v; want no error", tt.in, err)
+ }
+ }
+}
+
type shouldEscapeTest struct {
in byte
mode encoding