From: Brad Fitzpatrick Date: Tue, 6 Mar 2012 06:36:15 +0000 (-0800) Subject: cmd/go: allow go get with arbitrary URLs X-Git-Tag: weekly.2012-03-13~190 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=932c8ddba158a91056eba87045bb6d5ddbeb39f7;p=gostls13.git cmd/go: allow go get with arbitrary URLs This CL permits using arbitrary, non-VCS-qualified URLs as aliases for fully VCS-qualified and/or well-known code hosting sites. Example 1) A VCS-qualified URL can now be shorter. Before: $ go get camlistore.org/r/p/camlistore.git/pkg/blobref After: $ go get camlistore.org/pkg/blobref Example 2) A custom domain can be used as the import, referencing a well-known code hosting site. Before: $ go get github.com/bradfitz/sonden After: $ go get bradfitz.com/pkg/sonden The mechanism used is a tag in the HTML document retrieved from fetching: https://?go-get=1 (preferred) http://?go-get=1 (fallback) The meta tag should look like: The full-repo-root must be a full URL root to a repository containing a scheme and *not* containing a ".vcs" qualifier. The vcs is one of "git", "hg", "svn", etc. The import-alias-prefix must be a prefix or exact match of the package being fetched with "go get". If there are multiple meta tags, only the one with a prefix matching the import path is used. It is an error if multiple go-import values match the import prefix. If the import-alias-prefix is not an exact match for the import, another HTTP fetch is performed, at the declared root (which does *not* need to be the domain's root). For example, assuming that "camlistore.org/pkg/blobref" declares in its HTML head: ... then: $ go get camlistore.org/pkg/blobref ... looks at the following URLs: https://camlistore.org/pkg/blobref?go-get=1 http://camlistore.org/pkg/blobref?go-get=1 https://camlistore.org/?go-get=1 http://camlistore.org/?go-get=1 Ultimately it finds, at the root (camlistore.org/), the same go-import: ... and proceeds to trust it, checking out git //camlistore.org/r/p/camlistore at the import path of "camlistore.org" on disk. Fixes #3099 R=r, rsc, gary.burd, eikeon, untheoretic, n13m3y3r, rsc CC=golang-dev https://golang.org/cl/5660051 --- diff --git a/src/cmd/go/bootstrap.go b/src/cmd/go/bootstrap.go index bc9a3dbbcf..32941404cd 100644 --- a/src/cmd/go/bootstrap.go +++ b/src/cmd/go/bootstrap.go @@ -10,8 +10,21 @@ package main -import "errors" +import ( + "errors" + "io" +) + +var errHTTP = errors.New("no http in bootstrap go command") func httpGET(url string) ([]byte, error) { - return nil, errors.New("no http in bootstrap go command") + return nil, errHTTP +} + +func httpsOrHTTP(importPath string) (string, io.ReadCloser, error) { + return "", nil, errHTTP +} + +func parseMetaGoImports(r io.Reader) (imports []metaImport) { + panic("unreachable") } diff --git a/src/cmd/go/discovery.go b/src/cmd/go/discovery.go new file mode 100644 index 0000000000..d9f930867e --- /dev/null +++ b/src/cmd/go/discovery.go @@ -0,0 +1,63 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !cmd_go_bootstrap + +// This code is compiled into the real 'go' binary, but it is not +// compiled into the binary that is built during all.bash, so as +// to avoid needing to build net (and thus use cgo) during the +// bootstrap process. + +package main + +import ( + "encoding/xml" + "io" + "strings" +) + +// parseMetaGoImports returns meta imports from the HTML in r. +// Parsing ends at the end of the section or the beginning of the . +func parseMetaGoImports(r io.Reader) (imports []metaImport) { + d := xml.NewDecoder(r) + d.Strict = false + for { + t, err := d.Token() + if err != nil { + return + } + if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") { + return + } + if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") { + return + } + e, ok := t.(xml.StartElement) + if !ok || !strings.EqualFold(e.Name.Local, "meta") { + continue + } + if attrValue(e.Attr, "name") != "go-import" { + continue + } + if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 { + imports = append(imports, metaImport{ + Prefix: f[0], + VCS: f[1], + RepoRoot: f[2], + }) + } + } + return +} + +// attrValue returns the attribute value for the case-insensitive key +// `name', or the empty string if nothing is found. +func attrValue(attrs []xml.Attr, name string) string { + for _, a := range attrs { + if strings.EqualFold(a.Name.Local, name) { + return a.Value + } + } + return "" +} diff --git a/src/cmd/go/get.go b/src/cmd/go/get.go index 0ad22adb0d..aa0ab7bd75 100644 --- a/src/cmd/go/get.go +++ b/src/cmd/go/get.go @@ -162,10 +162,11 @@ func downloadPackage(p *Package) error { } else { // Analyze the import path to determine the version control system, // repository, and the import path for the root of the repository. - vcs, repo, rootPath, err = vcsForImportPath(p.ImportPath) - } - if err != nil { - return err + rr, err := repoRootForImportPath(p.ImportPath) + if err != nil { + return err + } + vcs, repo, rootPath = rr.vcs, rr.repo, rr.root } if p.build.SrcRoot == "" { diff --git a/src/cmd/go/http.go b/src/cmd/go/http.go index 8d9b2a1654..834de6cf24 100644 --- a/src/cmd/go/http.go +++ b/src/cmd/go/http.go @@ -13,8 +13,11 @@ package main import ( "fmt" + "io" "io/ioutil" + "log" "net/http" + "net/url" ) // httpGET returns the data from an HTTP GET request for the given URL. @@ -33,3 +36,51 @@ func httpGET(url string) ([]byte, error) { } return b, nil } + +// httpClient is the default HTTP client, but a variable so it can be +// changed by tests, without modifying http.DefaultClient. +var httpClient = http.DefaultClient + +// httpsOrHTTP returns the body of either the importPath's +// https resource or, if unavailable, the http resource. +func httpsOrHTTP(importPath string) (urlStr string, body io.ReadCloser, err error) { + fetch := func(scheme string) (urlStr string, res *http.Response, err error) { + u, err := url.Parse(scheme + "://" + importPath) + if err != nil { + return "", nil, err + } + u.RawQuery = "go-get=1" + urlStr = u.String() + if buildV { + log.Printf("Fetching %s", urlStr) + } + res, err = httpClient.Get(urlStr) + return + } + closeBody := func(res *http.Response) { + if res != nil { + res.Body.Close() + } + } + urlStr, res, err := fetch("https") + if err != nil || res.StatusCode != 200 { + if buildV { + if err != nil { + log.Printf("https fetch failed.") + } else { + log.Printf("ignoring https fetch with status code %d", res.StatusCode) + } + } + closeBody(res) + urlStr, res, err = fetch("http") + } + if err != nil { + closeBody(res) + log.Printf("http fetch failed") + return "", nil, err + } + // Note: accepting a non-200 OK here, so people can serve a + // meta import in their http 404 page. + log.Printf("Parsing meta tags from %s (status code %d)", urlStr, res.StatusCode) + return urlStr, res.Body, nil +} diff --git a/src/cmd/go/vcs.go b/src/cmd/go/vcs.go index cf3410242f..dee7cec37d 100644 --- a/src/cmd/go/vcs.go +++ b/src/cmd/go/vcs.go @@ -7,7 +7,9 @@ package main import ( "bytes" "encoding/json" + "errors" "fmt" + "log" "os" "os/exec" "path/filepath" @@ -302,12 +304,41 @@ func vcsForDir(p *Package) (vcs *vcsCmd, root string, err error) { return nil, "", fmt.Errorf("directory %q is not using a known version control system", dir) } -// vcsForImportPath analyzes importPath to determine the +// repoRoot represents a version control system, a repo, and a root of +// where to put it on disk. +type repoRoot struct { + vcs *vcsCmd + + // repo is the repository URL, including scheme + repo string + + // root is the import path corresponding to the root of the + // repository + root string +} + +// repoRootForImportPath analyzes importPath to determine the // version control system, and code repository to use. -// On return, repo is the repository URL and root is the -// import path corresponding to the root of the repository -// (thus root is a prefix of importPath). -func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err error) { +func repoRootForImportPath(importPath string) (*repoRoot, error) { + rr, err := repoRootForImportPathStatic(importPath, "") + if err == errUnknownSite { + rr, err = repoRootForImportDynamic(importPath) + } + return rr, err +} + +var errUnknownSite = errors.New("dynamic lookup required to find mapping") + +// repoRootForImportPathStatic attempts to map importPath to a +// repoRoot using the commonly-used VCS hosting sites in vcsPaths +// (github.com/user/dir), or from a fully-qualified importPath already +// containing its VCS type (foo.com/repo.git/dir) +// +// If scheme is non-empty, that scheme is forced. +func repoRootForImportPathStatic(importPath, scheme string) (*repoRoot, error) { + if strings.Contains(importPath, "://") { + return nil, fmt.Errorf("invalid import path %q", importPath) + } for _, srv := range vcsPaths { if !strings.HasPrefix(importPath, srv.prefix) { continue @@ -315,7 +346,7 @@ func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err er m := srv.regexp.FindStringSubmatch(importPath) if m == nil { if srv.prefix != "" { - return nil, "", "", fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) + return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) } continue } @@ -338,24 +369,127 @@ func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err er } if srv.check != nil { if err := srv.check(match); err != nil { - return nil, "", "", err + return nil, err } } vcs := vcsByCmd(match["vcs"]) if vcs == nil { - return nil, "", "", fmt.Errorf("unknown version control system %q", match["vcs"]) + return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) } if srv.ping { - for _, scheme := range vcs.scheme { - if vcs.ping(scheme, match["repo"]) == nil { - match["repo"] = scheme + "://" + match["repo"] - break + if scheme != "" { + match["repo"] = scheme + "://" + match["repo"] + } else { + for _, scheme := range vcs.scheme { + if vcs.ping(scheme, match["repo"]) == nil { + match["repo"] = scheme + "://" + match["repo"] + break + } } } } - return vcs, match["repo"], match["root"], nil + rr := &repoRoot{ + vcs: vcs, + repo: match["repo"], + root: match["root"], + } + return rr, nil + } + return nil, errUnknownSite +} + +// repoRootForImportDynamic finds a *repoRoot for a custom domain that's not +// statically known by repoRootForImportPathStatic. +// +// This handles "vanity import paths" like "name.tld/pkg/foo". +func repoRootForImportDynamic(importPath string) (*repoRoot, error) { + slash := strings.Index(importPath, "/") + if slash < 0 { + return nil, fmt.Errorf("missing / in import %q", importPath) + } + urlStr, body, err := httpsOrHTTP(importPath) + if err != nil { + return nil, fmt.Errorf("http/https fetch for import %q: %v", importPath, err) + } + defer body.Close() + metaImport, err := matchGoImport(parseMetaGoImports(body), importPath) + if err != nil { + if err != errNoMatch { + return nil, fmt.Errorf("parse %s: %v", urlStr, err) + } + return nil, fmt.Errorf("parse %s: no go-import meta tags", urlStr) + } + if buildV { + log.Printf("get %q: found meta tag %#v at %s", importPath, metaImport, urlStr) + } + // If the import was "uni.edu/bob/project", which said the + // prefix was "uni.edu" and the RepoRoot was "evilroot.com", + // make sure we don't trust Bob and check out evilroot.com to + // "uni.edu" yet (possibly overwriting/preempting another + // non-evil student). Instead, first verify the root and see + // if it matches Bob's claim. + if metaImport.Prefix != importPath { + if buildV { + log.Printf("get %q: verifying non-authoritative meta tag", importPath) + } + urlStr0 := urlStr + urlStr, body, err = httpsOrHTTP(metaImport.Prefix) + if err != nil { + return nil, fmt.Errorf("fetch %s: %v", urlStr, err) + } + imports := parseMetaGoImports(body) + if len(imports) == 0 { + return nil, fmt.Errorf("fetch %s: no go-import meta tag", urlStr) + } + metaImport2, err := matchGoImport(imports, importPath) + if err != nil || metaImport != metaImport2 { + return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, metaImport.Prefix) + } + } + + if !strings.Contains(metaImport.RepoRoot, "://") { + return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, metaImport.RepoRoot) + } + rr := &repoRoot{ + vcs: vcsByCmd(metaImport.VCS), + repo: metaImport.RepoRoot, + root: metaImport.Prefix, + } + if rr.vcs == nil { + return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, metaImport.VCS) + } + return rr, nil +} + +// metaImport represents the parsed tags from HTML files. +type metaImport struct { + Prefix, VCS, RepoRoot string +} + +// errNoMatch is returned from matchGoImport when there's no applicable match. +var errNoMatch = errors.New("no import match") + +// matchGoImport returns the metaImport from imports matching importPath. +// An error is returned if there are multiple matches. +// errNoMatch is returned if none match. +func matchGoImport(imports []metaImport, importPath string) (_ metaImport, err error) { + match := -1 + for i, im := range imports { + if !strings.HasPrefix(importPath, im.Prefix) { + continue + } + if match != -1 { + err = fmt.Errorf("multiple meta tags match import path %q", importPath) + return + } + match = i + } + if match == -1 { + err = errNoMatch + return } - return nil, "", "", fmt.Errorf("unrecognized import path %q", importPath) + return imports[match], nil } // expand rewrites s to replace {k} with match[k] for each key k in match.