From dc2d64bf81b3342d35bdbfa42b971087ade6dfc6 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 10 Apr 2015 15:59:06 +0200 Subject: [PATCH] cmd/go: cache results of HTTP requests done during meta tag discovery Previously, running $ go get -u -v golang.org/x/tools/cmd/godoc would results in dozens of HTTP requests for https://golang.org/x/tools?go-get=1 once per package under x/tools. Now it caches the results. We still end up doing one HTTP request for all the packages under x/tools, but this reduces the total number of HTTP requests in ~half. This also moves the singleflight package back into an internal package. singleflight was originally elsewhere as a package, then got copied into "net" (without its tests). But now that we have internal, put it in its own package, and restore its test. Fixes #9249 Change-Id: Ieb5cf04fc4d0a0c188cb957efdc7ea3068c34e3f Reviewed-on: https://go-review.googlesource.com/8727 Reviewed-by: Ian Lance Taylor Run-TryBot: Brad Fitzpatrick --- src/cmd/dist/build.go | 1 + src/cmd/go/vcs.go | 92 ++++++++++++++----- src/go/build/deps_test.go | 2 +- .../singleflight}/singleflight.go | 34 +++---- .../singleflight/singleflight_test.go | 73 +++++++++++++++ src/net/lookup.go | 9 +- 6 files changed, 170 insertions(+), 41 deletions(-) rename src/{net => internal/singleflight}/singleflight.go (73%) create mode 100644 src/internal/singleflight/singleflight_test.go diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index ba624aa5ef..6c4a09485f 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -859,6 +859,7 @@ var buildorder = []string{ "errors", "sync/atomic", "sync", + "internal/singleflight", "io", "unicode", "unicode/utf8", diff --git a/src/cmd/go/vcs.go b/src/cmd/go/vcs.go index 43027134e1..408104d776 100644 --- a/src/cmd/go/vcs.go +++ b/src/cmd/go/vcs.go @@ -9,12 +9,14 @@ import ( "encoding/json" "errors" "fmt" + "internal/singleflight" "log" "os" "os/exec" "path/filepath" "regexp" "strings" + "sync" ) // A vcsCmd describes how to use a version control system @@ -566,7 +568,7 @@ func repoRootForImportPathStatic(importPath, scheme string) (*repoRoot, error) { // repoRootForImportDynamic finds a *repoRoot for a custom domain that's not // statically known by repoRootForImportPathStatic. // -// This handles "vanity import paths" like "name.tld/pkg/foo". +// This handles custom import paths like "name.tld/pkg/foo". func repoRootForImportDynamic(importPath string) (*repoRoot, error) { slash := strings.Index(importPath, "/") if slash < 0 { @@ -585,7 +587,8 @@ func repoRootForImportDynamic(importPath string) (*repoRoot, error) { if err != nil { return nil, fmt.Errorf("parsing %s: %v", importPath, err) } - metaImport, err := matchGoImport(imports, importPath) + // Find the matched meta import. + mmi, err := matchGoImport(imports, importPath) if err != nil { if err != errNoMatch { return nil, fmt.Errorf("parse %s: %v", urlStr, err) @@ -593,7 +596,7 @@ func repoRootForImportDynamic(importPath string) (*repoRoot, error) { return nil, fmt.Errorf("parse %s: no go-import meta tags", urlStr) } if buildV { - log.Printf("get %q: found meta tag %#v at %s", importPath, metaImport, urlStr) + log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) } // If the import was "uni.edu/bob/project", which said the // prefix was "uni.edu" and the RepoRoot was "evilroot.com", @@ -601,42 +604,89 @@ func repoRootForImportDynamic(importPath string) (*repoRoot, error) { // "uni.edu" yet (possibly overwriting/preempting another // non-evil student). Instead, first verify the root and see // if it matches Bob's claim. - if metaImport.Prefix != importPath { + if mmi.Prefix != importPath { if buildV { log.Printf("get %q: verifying non-authoritative meta tag", importPath) } urlStr0 := urlStr - urlStr, body, err = httpsOrHTTP(metaImport.Prefix) + var imports []metaImport + urlStr, imports, err = metaImportsForPrefix(mmi.Prefix) if err != nil { - return nil, fmt.Errorf("fetch %s: %v", urlStr, err) - } - imports, err := parseMetaGoImports(body) - if err != nil { - return nil, fmt.Errorf("parsing %s: %v", importPath, err) - } - if len(imports) == 0 { - return nil, fmt.Errorf("fetch %s: no go-import meta tag", urlStr) + return nil, err } metaImport2, err := matchGoImport(imports, importPath) - if err != nil || metaImport != metaImport2 { - return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, metaImport.Prefix) + if err != nil || mmi != metaImport2 { + return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) } } - if !strings.Contains(metaImport.RepoRoot, "://") { - return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, metaImport.RepoRoot) + if !strings.Contains(mmi.RepoRoot, "://") { + return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, mmi.RepoRoot) } rr := &repoRoot{ - vcs: vcsByCmd(metaImport.VCS), - repo: metaImport.RepoRoot, - root: metaImport.Prefix, + vcs: vcsByCmd(mmi.VCS), + repo: mmi.RepoRoot, + root: mmi.Prefix, } if rr.vcs == nil { - return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, metaImport.VCS) + return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) } return rr, nil } +var fetchGroup singleflight.Group +var ( + fetchCacheMu sync.Mutex + fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix +) + +// metaImportsForPrefix takes a package's root import path as declared in a tag +// and returns its HTML discovery URL and the parsed metaImport lines +// found on the page. +// +// The importPath is of the form "golang.org/x/tools". +// It is an error if no imports are found. +// urlStr will still be valid if err != nil. +// The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" +func metaImportsForPrefix(importPrefix string) (urlStr string, imports []metaImport, err error) { + setCache := func(res fetchResult) (fetchResult, error) { + fetchCacheMu.Lock() + defer fetchCacheMu.Unlock() + fetchCache[importPrefix] = res + return res, nil + } + + resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { + fetchCacheMu.Lock() + if res, ok := fetchCache[importPrefix]; ok { + fetchCacheMu.Unlock() + return res, nil + } + fetchCacheMu.Unlock() + + urlStr, body, err := httpsOrHTTP(importPrefix) + if err != nil { + return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) + } + imports, err := parseMetaGoImports(body) + if err != nil { + return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) + } + if len(imports) == 0 { + err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) + } + return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) + }) + res := resi.(fetchResult) + return res.urlStr, res.imports, res.err +} + +type fetchResult struct { + urlStr string // e.g. "https://foo.com/x/bar?go-get=1" + imports []metaImport + err error +} + // metaImport represents the parsed tags from HTML files. type metaImport struct { diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 5719ffcec6..b826cf0c81 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -240,7 +240,7 @@ var pkgDeps = map[string][]string{ // Basic networking. // Because net must be used by any package that wants to // do networking portably, it must have a small dependency set: just L1+basic os. - "net": {"L1", "CGO", "os", "syscall", "time", "internal/syscall/windows"}, + "net": {"L1", "CGO", "os", "syscall", "time", "internal/syscall/windows", "internal/singleflight"}, // NET enables use of basic network-related packages. "NET": { diff --git a/src/net/singleflight.go b/src/internal/singleflight/singleflight.go similarity index 73% rename from src/net/singleflight.go rename to src/internal/singleflight/singleflight.go index bf599f0cc9..f4cb2d670d 100644 --- a/src/net/singleflight.go +++ b/src/internal/singleflight/singleflight.go @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package net +// Package singleflight provides a duplicate function call suppression +// mechanism. +package singleflight import "sync" @@ -19,22 +21,22 @@ type call struct { // mutex held before the WaitGroup is done, and are read but // not written after the WaitGroup is done. dups int - chans []chan<- singleflightResult + chans []chan<- Result } -// singleflight represents a class of work and forms a namespace in +// Group represents a class of work and forms a namespace in // which units of work can be executed with duplicate suppression. -type singleflight struct { +type Group struct { mu sync.Mutex // protects m m map[string]*call // lazily initialized } -// singleflightResult holds the results of Do, so they can be passed +// Result holds the results of Do, so they can be passed // on a channel. -type singleflightResult struct { - v interface{} - err error - shared bool +type Result struct { + Val interface{} + Err error + Shared bool } // Do executes and returns the results of the given function, making @@ -42,7 +44,7 @@ type singleflightResult struct { // time. If a duplicate comes in, the duplicate caller waits for the // original to complete and receives the same results. // The return value shared indicates whether v was given to multiple callers. -func (g *singleflight) Do(key string, fn func() (interface{}, error)) (v interface{}, err error, shared bool) { +func (g *Group) Do(key string, fn func() (interface{}, error)) (v interface{}, err error, shared bool) { g.mu.Lock() if g.m == nil { g.m = make(map[string]*call) @@ -64,8 +66,8 @@ func (g *singleflight) Do(key string, fn func() (interface{}, error)) (v interfa // DoChan is like Do but returns a channel that will receive the // results when they are ready. -func (g *singleflight) DoChan(key string, fn func() (interface{}, error)) <-chan singleflightResult { - ch := make(chan singleflightResult, 1) +func (g *Group) DoChan(key string, fn func() (interface{}, error)) <-chan Result { + ch := make(chan Result, 1) g.mu.Lock() if g.m == nil { g.m = make(map[string]*call) @@ -76,7 +78,7 @@ func (g *singleflight) DoChan(key string, fn func() (interface{}, error)) <-chan g.mu.Unlock() return ch } - c := &call{chans: []chan<- singleflightResult{ch}} + c := &call{chans: []chan<- Result{ch}} c.wg.Add(1) g.m[key] = c g.mu.Unlock() @@ -87,14 +89,14 @@ func (g *singleflight) DoChan(key string, fn func() (interface{}, error)) <-chan } // doCall handles the single call for a key. -func (g *singleflight) doCall(c *call, key string, fn func() (interface{}, error)) { +func (g *Group) doCall(c *call, key string, fn func() (interface{}, error)) { c.val, c.err = fn() c.wg.Done() g.mu.Lock() delete(g.m, key) for _, ch := range c.chans { - ch <- singleflightResult{c.val, c.err, c.dups > 0} + ch <- Result{c.val, c.err, c.dups > 0} } g.mu.Unlock() } @@ -102,7 +104,7 @@ func (g *singleflight) doCall(c *call, key string, fn func() (interface{}, error // Forget tells the singleflight to forget about a key. Future calls // to Do for this key will call the function rather than waiting for // an earlier call to complete. -func (g *singleflight) Forget(key string) { +func (g *Group) Forget(key string) { g.mu.Lock() delete(g.m, key) g.mu.Unlock() diff --git a/src/internal/singleflight/singleflight_test.go b/src/internal/singleflight/singleflight_test.go new file mode 100644 index 0000000000..30ba7f7ab4 --- /dev/null +++ b/src/internal/singleflight/singleflight_test.go @@ -0,0 +1,73 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package singleflight + +import ( + "errors" + "fmt" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestDo(t *testing.T) { + var g Group + v, err, _ := g.Do("key", func() (interface{}, error) { + return "bar", nil + }) + if got, want := fmt.Sprintf("%v (%T)", v, v), "bar (string)"; got != want { + t.Errorf("Do = %v; want %v", got, want) + } + if err != nil { + t.Errorf("Do error = %v", err) + } +} + +func TestDoErr(t *testing.T) { + var g Group + someErr := errors.New("Some error") + v, err, _ := g.Do("key", func() (interface{}, error) { + return nil, someErr + }) + if err != someErr { + t.Errorf("Do error = %v; want someErr %v", err, someErr) + } + if v != nil { + t.Errorf("unexpected non-nil value %#v", v) + } +} + +func TestDoDupSuppress(t *testing.T) { + var g Group + c := make(chan string) + var calls int32 + fn := func() (interface{}, error) { + atomic.AddInt32(&calls, 1) + return <-c, nil + } + + const n = 10 + var wg sync.WaitGroup + for i := 0; i < n; i++ { + wg.Add(1) + go func() { + v, err, _ := g.Do("key", fn) + if err != nil { + t.Errorf("Do error: %v", err) + } + if v.(string) != "bar" { + t.Errorf("got %q; want %q", v, "bar") + } + wg.Done() + }() + } + time.Sleep(100 * time.Millisecond) // let goroutines above block + c <- "bar" + wg.Wait() + if got := atomic.LoadInt32(&calls); got != 1 { + t.Errorf("number of calls = %d; want 1", got) + } +} diff --git a/src/net/lookup.go b/src/net/lookup.go index be4b0c2df6..5adcd8bb68 100644 --- a/src/net/lookup.go +++ b/src/net/lookup.go @@ -4,7 +4,10 @@ package net -import "time" +import ( + "internal/singleflight" + "time" +) // protocols contains minimal mappings between internet protocol // names and numbers for platforms that don't have a complete list of @@ -39,7 +42,7 @@ func LookupIP(host string) (ips []IP, err error) { return } -var lookupGroup singleflight +var lookupGroup singleflight.Group // lookupIPMerge wraps lookupIP, but makes sure that for any given // host, only one lookup is in-flight at a time. The returned memory @@ -98,7 +101,7 @@ func lookupIPDeadline(host string, deadline time.Time) (addrs []IPAddr, err erro return nil, errTimeout case r := <-ch: - return lookupIPReturn(r.v, r.err, r.shared) + return lookupIPReturn(r.Val, r.Err, r.Shared) } } -- 2.48.1