// along with support code.
package module
+// IMPORTANT NOTE
+//
+// This file essentially defines the set of valid import paths for the go command.
+// There are many subtle considerations, including Unicode ambiguity,
+// security, network, and file system representations.
+//
+// Changes to the semantics in this file require approval from rsc.
+
import (
"fmt"
"sort"
"strings"
- "unicode"
"unicode/utf8"
"cmd/go/internal/semver"
// firstPathOK reports whether r can appear in the first element of a module path.
// The first element of the path must be an LDH domain name, at least for now.
+// To avoid case ambiguity, the domain name must be entirely lower case.
func firstPathOK(r rune) bool {
return r == '-' || r == '.' ||
'0' <= r && r <= '9' ||
- 'A' <= r && r <= 'Z' ||
'a' <= r && r <= 'z'
}
// pathOK reports whether r can appear in a module path.
-// Paths must avoid potentially problematic ASCII punctuation
-// and control characters but otherwise can be any Unicode printable character,
-// as defined by Go's IsPrint.
+// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . / _ and ~.
+// This matches what "go get" has historically recognized in import paths.
+// TODO(rsc): We would like to allow Unicode letters, but that requires additional
+// care in the safe encoding (see note below).
func pathOK(r rune) bool {
if r < utf8.RuneSelf {
- return r == '+' || r == ',' || r == '-' || r == '.' || r == '/' || r == '_' || r == '~' ||
+ return r == '+' || r == '-' || r == '.' || r == '/' || r == '_' || r == '~' ||
'0' <= r && r <= '9' ||
'A' <= r && r <= 'Z' ||
'a' <= r && r <= 'z'
}
- return unicode.IsPrint(r)
+ return false
}
// CheckPath checks that a module path is valid.
func CheckPath(path string) error {
- if !utf8.ValidString(path) {
- return fmt.Errorf("malformed module path %q: invalid UTF-8", path)
+ if err := checkImportPath(path); err != nil {
+ return fmt.Errorf("malformed module path %q: %v", path, err)
}
- if path == "" {
- return fmt.Errorf("malformed module path %q: empty string", path)
- }
-
i := strings.Index(path, "/")
if i < 0 {
i = len(path)
if !strings.Contains(path[:i], ".") {
return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
}
- if path[i-1] == '.' {
- return fmt.Errorf("malformed module path %q: trailing dot in first path element", path)
- }
- if path[0] == '.' {
- return fmt.Errorf("malformed module path %q: leading dot in first path element", path)
- }
if path[0] == '-' {
return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
}
- if strings.Contains(path, "..") {
- return fmt.Errorf("malformed module path %q: double dot", path)
- }
- if strings.Contains(path, "//") {
- return fmt.Errorf("malformed module path %q: double slash", path)
- }
for _, r := range path[:i] {
if !firstPathOK(r) {
return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
}
}
+ if _, _, ok := SplitPathVersion(path); !ok {
+ return fmt.Errorf("malformed module path %q: invalid version %s", path, path[strings.LastIndex(path, "/")+1:])
+ }
+ return nil
+}
+
+// CheckImportPath checks that an import path is valid.
+func CheckImportPath(path string) error {
+ if err := checkImportPath(path); err != nil {
+ return fmt.Errorf("malformed import path %q: %v", path, err)
+ }
+ return nil
+}
+
+// checkImportPath checks that an import path is valid.
+// It returns an error describing why but not mentioning path.
+// Because these checks apply to both module paths and import paths,
+// the caller is expected to add the "malformed ___ path %q: " prefix.
+func checkImportPath(path string) error {
+ if !utf8.ValidString(path) {
+ return fmt.Errorf("invalid UTF-8")
+ }
+ if path == "" {
+ return fmt.Errorf("empty string")
+ }
+ if strings.Contains(path, "..") {
+ return fmt.Errorf("double dot")
+ }
+ if strings.Contains(path, "//") {
+ return fmt.Errorf("double slash")
+ }
if path[len(path)-1] == '/' {
- return fmt.Errorf("malformed module path %q: trailing slash", path)
+ return fmt.Errorf("trailing slash")
}
- for _, r := range path {
+ elemStart := 0
+ for i, r := range path {
if !pathOK(r) {
- return fmt.Errorf("malformed module path %q: invalid char %q", path, r)
+ return fmt.Errorf("invalid char %q", r)
+ }
+ if r == '/' {
+ if err := checkElem(path[elemStart:i]); err != nil {
+ return err
+ }
+ elemStart = i + 1
}
}
- if _, _, ok := SplitPathVersion(path); !ok {
- return fmt.Errorf("malformed module path %q: invalid version %s", path, path[strings.LastIndex(path, "/")+1:])
+ if err := checkElem(path[elemStart:]); err != nil {
+ return err
+ }
+ return nil
+}
+
+// checkElem checks whether an individual path element is valid.
+func checkElem(elem string) error {
+ if elem == "" {
+ return fmt.Errorf("empty path element")
+ }
+ if elem[0] == '.' {
+ return fmt.Errorf("leading dot in path element")
+ }
+ if elem[len(elem)-1] == '.' {
+ return fmt.Errorf("trailing dot in path element")
+ }
+
+ // Windows disallows a bunch of path elements, sadly.
+ // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
+ short := elem
+ if i := strings.Index(short, "."); i >= 0 {
+ short = short[:i]
+ }
+ for _, bad := range badWindowsNames {
+ if strings.EqualFold(bad, short) {
+ return fmt.Errorf("disallowed path element %q", elem)
+ }
}
return nil
}
+// badWindowsNames are the reserved file path elements on Windows.
+// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
+var badWindowsNames = []string{
+ "CON",
+ "PRN",
+ "AUX",
+ "NUL",
+ "COM1",
+ "COM2",
+ "COM3",
+ "COM4",
+ "COM5",
+ "COM6",
+ "COM7",
+ "COM8",
+ "COM9",
+ "LPT1",
+ "LPT2",
+ "LPT3",
+ "LPT4",
+ "LPT5",
+ "LPT6",
+ "LPT7",
+ "LPT8",
+ "LPT9",
+}
+
// SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
// and version is either empty or "/vN" for N >= 2.
// As a special case, gopkg.in paths are recognized directly;
return fi < fj
})
}
+
+// Safe encodings
+//
+// Module paths appear as substrings of file system paths
+// (in the download cache) and of web server URLs in the proxy protocol.
+// In general we cannot rely on file systems to be case-sensitive,
+// nor can we rely on web servers, since they read from file systems.
+// That is, we cannot rely on the file system to keep rsc.io/QUOTE
+// and rsc.io/quote separate. Windows and macOS don't.
+// Instead, we must never require two different casings of a file path.
+// Because we want the download cache to match the proxy protocol,
+// and because we want the proxy protocol to be possible to serve
+// from a tree of static files (which might be stored on a case-insensitive
+// file system), the proxy protocol must never require two different casings
+// of a URL path either.
+//
+// One possibility would be to make the safe encoding be the lowercase
+// hexadecimal encoding of the actual path bytes. This would avoid ever
+// needing different casings of a file path, but it would be fairly illegible
+// to most programmers when those paths appeared in the file system
+// (including in file paths in compiler errors and stack traces)
+// in web server logs, and so on. Instead, we want a safe encoding that
+// leaves most paths unaltered.
+//
+// The safe encoding is this:
+// replace every uppercase letter with an exclamation mark
+// followed by the letter's lowercase equivalent.
+//
+// For example,
+// github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go.
+// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
+// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
+//
+// Import paths that avoid upper-case letters are left unchanged.
+// Note that because import paths are ASCII-only and avoid various
+// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
+// and avoids the same problematic punctuation.
+//
+// Import paths have never allowed exclamation marks, so there is no
+// need to define how to encode a literal !.
+//
+// Although paths are disallowed from using Unicode (see pathOK above),
+// the eventual plan is to allow Unicode letters as well, to assume that
+// file systems and URLs are Unicode-safe (storing UTF-8), and apply
+// the !-for-uppercase convention. Note however that not all runes that
+// are different but case-fold equivalent are an upper/lower pair.
+// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
+// are considered to case-fold to each other. When we do add Unicode
+// letters, we must not assume that upper/lower are the only case-equivalent pairs.
+// Perhaps the Kelvin symbol would be disallowed entirely, for example.
+// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
+//
+// Also, it would be nice to allow Unicode marks as well as letters,
+// but marks include combining marks, and then we must deal not
+// only with case folding but also normalization: both U+00E9 ('é')
+// and U+0065 U+0301 ('e' followed by combining acute accent)
+// look the same on the page and are treated by some file systems
+// as the same path. If we do allow Unicode marks in paths, there
+// must be some kind of normalization to allow only one canonical
+// encoding of any character used in an import path.
+
+// EncodePath returns the safe encoding of the given module path.
+// It fails if the module path is invalid.
+func EncodePath(path string) (encoding string, err error) {
+ if err := CheckPath(path); err != nil {
+ return "", err
+ }
+
+ haveUpper := false
+ for _, r := range path {
+ if r == '!' || r >= utf8.RuneSelf {
+ // This should be disallowed by CheckPath, but diagnose anyway.
+ // The correctness of the encoding loop below depends on it.
+ return "", fmt.Errorf("internal error: inconsistency in EncodePath")
+ }
+ if 'A' <= r && r <= 'Z' {
+ haveUpper = true
+ }
+ }
+
+ if !haveUpper {
+ return path, nil
+ }
+
+ var buf []byte
+ for _, r := range path {
+ if 'A' <= r && r <= 'Z' {
+ buf = append(buf, '!', byte(r+'a'-'A'))
+ } else {
+ buf = append(buf, byte(r))
+ }
+ }
+ return string(buf), nil
+}
+
+// DecodePath returns the module path of the given safe encoding.
+// It fails if the encoding is invalid.
+func DecodePath(encoding string) (path string, err error) {
+ var buf []byte
+
+ bang := false
+ for _, r := range encoding {
+ if r >= utf8.RuneSelf {
+ goto BadEncoding
+ }
+ if bang {
+ bang = false
+ if r < 'a' || 'z' < r {
+ goto BadEncoding
+ }
+ buf = append(buf, byte(r+'A'-'a'))
+ continue
+ }
+ if r == '!' {
+ bang = true
+ continue
+ }
+ if 'A' <= r && r <= 'Z' {
+ goto BadEncoding
+ }
+ buf = append(buf, byte(r))
+ }
+ if bang {
+ goto BadEncoding
+ }
+ path = string(buf)
+ if err := CheckPath(path); err != nil {
+ return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
+ }
+ return path, nil
+
+BadEncoding:
+ return "", fmt.Errorf("invalid module path encoding %q", encoding)
+}
}
var checkPathTests = []struct {
- path string
- ok bool
+ path string
+ ok bool
+ importOK bool
}{
- {"x.y/z", true},
- {"x.y", true},
-
- {"", false},
- {"x.y/\xFFz", false},
- {"/x.y/z", false},
- {"x./z", false},
- {".x/z", false},
- {"-x/z", false},
- {"x..y/z", false},
- {"x.y/z/../../w", false},
- {"x.y//z", false},
- {"x.y/z//w", false},
- {"x.y/z/", false},
-
- {"x.y/z/v0", false},
- {"x.y/z/v1", false},
- {"x.y/z/v2", true},
- {"x.y/z/v2.0", false},
-
- {"!x.y/z", false},
- {"_x.y/z", false},
- {"x.y!/z", false},
- {"x.y\"/z", false},
- {"x.y#/z", false},
- {"x.y$/z", false},
- {"x.y%/z", false},
- {"x.y&/z", false},
- {"x.y'/z", false},
- {"x.y(/z", false},
- {"x.y)/z", false},
- {"x.y*/z", false},
- {"x.y+/z", false},
- {"x.y,/z", false},
- {"x.y-/z", true},
- {"x.y./zt", false},
- {"x.y:/z", false},
- {"x.y;/z", false},
- {"x.y</z", false},
- {"x.y=/z", false},
- {"x.y>/z", false},
- {"x.y?/z", false},
- {"x.y@/z", false},
- {"x.y[/z", false},
- {"x.y\\/z", false},
- {"x.y]/z", false},
- {"x.y^/z", false},
- {"x.y_/z", false},
- {"x.y`/z", false},
- {"x.y{/z", false},
- {"x.y}/z", false},
- {"x.y~/z", false},
- {"x.y/z!", false},
- {"x.y/z\"", false},
- {"x.y/z#", false},
- {"x.y/z$", false},
- {"x.y/z%", false},
- {"x.y/z&", false},
- {"x.y/z'", false},
- {"x.y/z(", false},
- {"x.y/z)", false},
- {"x.y/z*", false},
- {"x.y/z+", true},
- {"x.y/z,", true},
- {"x.y/z-", true},
- {"x.y/z.t", true},
- {"x.y/z/t", true},
- {"x.y/z:", false},
- {"x.y/z;", false},
- {"x.y/z<", false},
- {"x.y/z=", false},
- {"x.y/z>", false},
- {"x.y/z?", false},
- {"x.y/z@", false},
- {"x.y/z[", false},
- {"x.y/z\\", false},
- {"x.y/z]", false},
- {"x.y/z^", false},
- {"x.y/z_", true},
- {"x.y/z`", false},
- {"x.y/z{", false},
- {"x.y/z}", false},
- {"x.y/z~", true},
+ {"x.y/z", true, true},
+ {"x.y", true, true},
+
+ {"", false, false},
+ {"x.y/\xFFz", false, false},
+ {"/x.y/z", false, false},
+ {"x./z", false, false},
+ {".x/z", false, false},
+ {"-x/z", false, true},
+ {"x..y/z", false, false},
+ {"x.y/z/../../w", false, false},
+ {"x.y//z", false, false},
+ {"x.y/z//w", false, false},
+ {"x.y/z/", false, false},
+
+ {"x.y/z/v0", false, true},
+ {"x.y/z/v1", false, true},
+ {"x.y/z/v2", true, true},
+ {"x.y/z/v2.0", false, true},
+ {"X.y/z", false, true},
+
+ {"!x.y/z", false, false},
+ {"_x.y/z", false, true},
+ {"x.y!/z", false, false},
+ {"x.y\"/z", false, false},
+ {"x.y#/z", false, false},
+ {"x.y$/z", false, false},
+ {"x.y%/z", false, false},
+ {"x.y&/z", false, false},
+ {"x.y'/z", false, false},
+ {"x.y(/z", false, false},
+ {"x.y)/z", false, false},
+ {"x.y*/z", false, false},
+ {"x.y+/z", false, true},
+ {"x.y,/z", false, false},
+ {"x.y-/z", true, true},
+ {"x.y./zt", false, false},
+ {"x.y:/z", false, false},
+ {"x.y;/z", false, false},
+ {"x.y</z", false, false},
+ {"x.y=/z", false, false},
+ {"x.y>/z", false, false},
+ {"x.y?/z", false, false},
+ {"x.y@/z", false, false},
+ {"x.y[/z", false, false},
+ {"x.y\\/z", false, false},
+ {"x.y]/z", false, false},
+ {"x.y^/z", false, false},
+ {"x.y_/z", false, true},
+ {"x.y`/z", false, false},
+ {"x.y{/z", false, false},
+ {"x.y}/z", false, false},
+ {"x.y~/z", false, true},
+ {"x.y/z!", false, false},
+ {"x.y/z\"", false, false},
+ {"x.y/z#", false, false},
+ {"x.y/z$", false, false},
+ {"x.y/z%", false, false},
+ {"x.y/z&", false, false},
+ {"x.y/z'", false, false},
+ {"x.y/z(", false, false},
+ {"x.y/z)", false, false},
+ {"x.y/z*", false, false},
+ {"x.y/z+", true, true},
+ {"x.y/z,", false, false},
+ {"x.y/z-", true, true},
+ {"x.y/z.t", true, true},
+ {"x.y/z/t", true, true},
+ {"x.y/z:", false, false},
+ {"x.y/z;", false, false},
+ {"x.y/z<", false, false},
+ {"x.y/z=", false, false},
+ {"x.y/z>", false, false},
+ {"x.y/z?", false, false},
+ {"x.y/z@", false, false},
+ {"x.y/z[", false, false},
+ {"x.y/z\\", false, false},
+ {"x.y/z]", false, false},
+ {"x.y/z^", false, false},
+ {"x.y/z_", true, true},
+ {"x.y/z`", false, false},
+ {"x.y/z{", false, false},
+ {"x.y/z}", false, false},
+ {"x.y/z~", true, true},
+ {"x.y/x.foo", true, true},
+ {"x.y/aux.foo", false, false},
+ {"x.y/prn", false, false},
+ {"x.y/prn2", true, true},
+ {"x.y/com", true, true},
+ {"x.y/com1", false, false},
+ {"x.y/com1.txt", false, false},
+ {"x.y/calm1", true, true},
+ {"github.com/!123/logrus", false, false},
+
+ // TODO: CL 41822 allowed Unicode letters in old "go get"
+ // without due consideration of the implications, and only on github.com (!).
+ // For now, we disallow non-ASCII characters in module mode,
+ // in both module paths and general import paths,
+ // until we can get the implications right.
+ // When we do, we'll enable them everywhere, not just for GitHub.
+ {"github.com/user/unicode/испытание", false, false},
}
func TestCheckPath(t *testing.T) {
} else if !tt.ok && err == nil {
t.Errorf("CheckPath(%q) succeeded, wanted error", tt.path)
}
+
+ err = CheckImportPath(tt.path)
+ if tt.importOK && err != nil {
+ t.Errorf("CheckImportPath(%q) = %v, wanted nil error", tt.path, err)
+ } else if !tt.importOK && err == nil {
+ t.Errorf("CheckImportPath(%q) succeeded, wanted error", tt.path)
+ }
}
}
}
}
}
+
+var encodeTests = []struct {
+ path string
+ enc string // empty means same as path
+}{
+ {path: "ascii.com/abcdefghijklmnopqrstuvwxyz.-+/~_0123456789"},
+ {path: "github.com/GoogleCloudPlatform/omega", enc: "github.com/!google!cloud!platform/omega"},
+}
+
+func TestEncodePath(t *testing.T) {
+ // Check invalid paths.
+ for _, tt := range checkPathTests {
+ if !tt.ok {
+ _, err := EncodePath(tt.path)
+ if err == nil {
+ t.Errorf("EncodePath(%q): succeeded, want error (invalid path)", tt.path)
+ }
+ }
+ }
+
+ // Check encodings.
+ for _, tt := range encodeTests {
+ enc, err := EncodePath(tt.path)
+ if err != nil {
+ t.Errorf("EncodePath(%q): unexpected error: %v", tt.path, err)
+ continue
+ }
+ want := tt.enc
+ if want == "" {
+ want = tt.path
+ }
+ if enc != want {
+ t.Errorf("EncodePath(%q) = %q, want %q", tt.path, enc, want)
+ }
+ }
+}
+
+var badDecode = []string{
+ "github.com/GoogleCloudPlatform/omega",
+ "github.com/!google!cloud!platform!/omega",
+ "github.com/!0google!cloud!platform/omega",
+ "github.com/!_google!cloud!platform/omega",
+ "github.com/!!google!cloud!platform/omega",
+ "",
+}
+
+func TestDecodePath(t *testing.T) {
+ // Check invalid decodings.
+ for _, bad := range badDecode {
+ _, err := DecodePath(bad)
+ if err == nil {
+ t.Errorf("DecodePath(%q): succeeded, want error (invalid decoding)", bad)
+ }
+ }
+
+ // Check invalid paths (or maybe decodings).
+ for _, tt := range checkPathTests {
+ if !tt.ok {
+ path, err := DecodePath(tt.path)
+ if err == nil {
+ t.Errorf("DecodePath(%q) = %q, want error (invalid path)", tt.path, path)
+ }
+ }
+ }
+
+ // Check encodings.
+ for _, tt := range encodeTests {
+ enc := tt.enc
+ if enc == "" {
+ enc = tt.path
+ }
+ path, err := DecodePath(enc)
+ if err != nil {
+ t.Errorf("DecodePath(%q): unexpected error: %v", enc, err)
+ continue
+ }
+ if path != tt.path {
+ t.Errorf("DecodePath(%q) = %q, want %q", enc, path, tt.path)
+ }
+ }
+}