From 846f971daa03fda149a3d66a3fda6eb6a2a7484e Mon Sep 17 00:00:00 2001 From: Robert Findley Date: Sun, 15 May 2022 19:31:42 -0400 Subject: [PATCH] go/types, types2: remove Named.once in favor of monotonic state Introduce a monotonic state variable to track the lifecycle of a named type, replacing the existing sync.Once. Having a single guard for the state of underlying and methods will allow for cleaning-up when the type is fully expanded. In the future, this state may also be used for detecting access to information such as underlying or methods before the type is fully set-up, though that will require rethinking our type-checking of invalid cyclic types. Also remove support for type-type inference. If we ever support this feature in the future (inference of missing type arguments for named type instances), it will likely involve additional machinery that does not yet exist. Remove the current partial support to simplify our internal APIs. In particular, this means that Named.resolver is only used for lazy loading. As a result, we can revert the lazy loader signature to its previous form. A lot of exposition is added for how Named types work. Along the way, the terminology we use to describe them is refined. Some microbenchmarks are added that were useful in evaluating the tradeoffs between synchronization mechanisms. Updates #52728 Change-Id: I4e147360bc6e5d8cd4f37e32e86fece0530a6480 Reviewed-on: https://go-review.googlesource.com/c/go/+/404875 Run-TryBot: Robert Findley Reviewed-by: Robert Griesemer TryBot-Result: Gopher Robot --- src/cmd/compile/internal/types2/api_test.go | 2 +- src/cmd/compile/internal/types2/decl.go | 4 +- .../compile/internal/types2/instantiate.go | 3 - src/cmd/compile/internal/types2/named.go | 289 +++++++++++++----- src/cmd/compile/internal/types2/named_test.go | 75 +++++ src/cmd/compile/internal/types2/object.go | 14 +- src/cmd/compile/internal/types2/typexpr.go | 48 +-- src/go/types/api_test.go | 2 +- src/go/types/decl.go | 4 +- src/go/types/instantiate.go | 3 - src/go/types/named.go | 289 +++++++++++++----- src/go/types/named_test.go | 88 ++++++ src/go/types/object.go | 14 +- src/go/types/typexpr.go | 53 +--- 14 files changed, 609 insertions(+), 279 deletions(-) create mode 100644 src/cmd/compile/internal/types2/named_test.go create mode 100644 src/go/types/named_test.go diff --git a/src/cmd/compile/internal/types2/api_test.go b/src/cmd/compile/internal/types2/api_test.go index e6de955a6e..f5526bb25a 100644 --- a/src/cmd/compile/internal/types2/api_test.go +++ b/src/cmd/compile/internal/types2/api_test.go @@ -38,7 +38,7 @@ func pkgFor(path, source string, info *Info) (*Package, error) { return conf.Check(f.PkgName.Value, []*syntax.File{f}, info) } -func mustTypecheck(t *testing.T, path, source string, info *Info) string { +func mustTypecheck(t testing.TB, path, source string, info *Info) string { pkg, err := pkgFor(path, source, info) if err != nil { name := path diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index b6f81aa8a5..008d3698b7 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -522,8 +522,8 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *Named assert(rhs != nil) named.fromRHS = rhs - // If the underlying was not set while type-checking the right-hand side, it - // is invalid and an error should have been reported elsewhere. + // If the underlying type was not set while type-checking the right-hand + // side, it is invalid and an error should have been reported elsewhere. if named.underlying == nil { named.underlying = Typ[Invalid] } diff --git a/src/cmd/compile/internal/types2/instantiate.go b/src/cmd/compile/internal/types2/instantiate.go index bb90ab3736..44ed0319c8 100644 --- a/src/cmd/compile/internal/types2/instantiate.go +++ b/src/cmd/compile/internal/types2/instantiate.go @@ -79,9 +79,6 @@ func (check *Checker) instance(pos syntax.Pos, orig Type, targs []Type, ctxt *Co tname := NewTypeName(pos, orig.obj.pkg, orig.obj.name, nil) named := check.newNamed(tname, orig, nil, nil) // underlying, tparams, and methods are set when named is resolved named.targs = newTypeList(targs) - named.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - return expandNamed(ctxt, n, pos) - } res = named case *Signature: diff --git a/src/cmd/compile/internal/types2/named.go b/src/cmd/compile/internal/types2/named.go index 849398a6f4..0a2b2aa6b1 100644 --- a/src/cmd/compile/internal/types2/named.go +++ b/src/cmd/compile/internal/types2/named.go @@ -5,32 +5,116 @@ package types2 import ( - "cmd/compile/internal/syntax" "sync" + "sync/atomic" ) +// Type-checking Named types is subtle, because they may be recursively +// defined, and because their full details may be spread across multiple +// declarations (via methods). For this reason they are type-checked lazily, +// to avoid information being accessed before it is complete. +// +// Conceptually, it is helpful to think of named types as having two distinct +// sets of information: +// - "LHS" information, defining their identity: Obj() and TypeArgs() +// - "RHS" information, defining their details: TypeParams(), Underlying(), +// and methods. +// +// In this taxonomy, LHS information is available immediately, but RHS +// information is lazy. Specifically, a named type N may be constructed in any +// of the following ways: +// 1. type-checked from the source +// 2. loaded eagerly from export data +// 3. loaded lazily from export data (when using unified IR) +// 4. instantiated from a generic type +// +// In cases 1, 3, and 4, it is possible that the underlying type or methods of +// N may not be immediately available. +// - During type-checking, we allocate N before type-checking its underlying +// type or methods, so that we may resolve recursive references. +// - When loading from export data, we may load its methods and underlying +// type lazily using a provided load function. +// - After instantiating, we lazily expand the underlying type and methods +// (note that instances may be created while still in the process of +// type-checking the original type declaration). +// +// In cases 3 and 4 this lazy construction may also occur concurrently, due to +// concurrent use of the type checker API (after type checking or importing has +// finished). It is critical that we keep track of state, so that Named types +// are constructed exactly once and so that we do not access their details too +// soon. +// +// We achieve this by tracking state with an atomic state variable, and +// guarding potentially concurrent calculations with a mutex. At any point in +// time this state variable determines which data on N may be accessed. As +// state monotonically progresses, any data available at state M may be +// accessed without acquiring the mutex at state N, provided N >= M. +// +// GLOSSARY: Here are a few terms used in this file to describe Named types: +// - We say that a Named type is "instantiated" if it has been constructed by +// instantiating a generic named type with type arguments. +// - We say that a Named type is "declared" if it corresponds to a type +// declaration in the source. Instantiated named types correspond to a type +// instantiation in the source, not a declaration. But their Origin type is +// a declared type. +// - We say that a Named type is "resolved" if its RHS information has been +// loaded or fully type-checked. For Named types constructed from export +// data, this may involve invoking a loader function to extract information +// from export data. For instantiated named types this involves reading +// information from their origin. +// - We say that a Named type is "expanded" if it is an instantiated type and +// type parameters in its underlying type and methods have been substituted +// with the type arguments from the instantiation. A type may be partially +// expanded if some but not all of these details have been substituted. +// Similarly, we refer to these individual details (underlying type or +// method) as being "expanded". +// - When all information is known for a named type, we say it is "complete". +// +// Some invariants to keep in mind: each declared Named type has a single +// corresponding object, and that object's type is the (possibly generic) Named +// type. Declared Named types are identical if and only if their pointers are +// identical. On the other hand, multiple instantiated Named types may be +// identical even though their pointers are not identical. One has to use +// Identical to compare them. For instantiated named types, their obj is a +// synthetic placeholder that records their position of the corresponding +// instantiation in the source (if they were constructed during type checking). + // A Named represents a named (defined) type. type Named struct { - check *Checker - obj *TypeName // corresponding declared object for declared types; placeholder for instantiated types - orig *Named // original, uninstantiated type - fromRHS Type // type (on RHS of declaration) this *Named type is derived from (for cycle reporting) + check *Checker // non-nil during type-checking; nil otherwise + obj *TypeName // corresponding declared object for declared types; see above for instantiated types + orig *Named // origin type for instantiated types, this type for declared types + targs *TypeList // type arguments (after instantiation), or nil + + // fromRHS holds the type (on RHS of declaration) this *Named type is derived + // from (for cycle reporting). Only used by validType, and therefore does not + // require synchronization. + fromRHS Type + + mu sync.Mutex // guards all fields below + state_ uint32 // the current state of this type; must only be accessed atomically underlying Type // possibly a *Named during setup; never a *Named once set up completely tparams *TypeParamList // type parameters, or nil - targs *TypeList // type arguments (after instantiation), or nil // methods declared for this type (not the method set of this type). // Signatures are type-checked lazily. // For non-instantiated types, this is a fully populated list of methods. For - // instantiated types, this is a 'lazy' list, and methods are instantiated - // when they are first accessed. + // instantiated types, this is a 'lazy' list, and methods are individually + // expanded when they are first accessed. methods *methodList - // resolver may be provided to lazily resolve type parameters, underlying, and methods. - resolver func(*Context, *Named) (tparams *TypeParamList, underlying Type, methods *methodList) - once sync.Once // ensures that tparams, underlying, and methods are resolved before accessing + // loader may be provided to lazily load type parameters, underlying, and methods. + loader func(*Named) (tparams []*TypeParam, underlying Type, methods []*Func) } +// namedState represents the possible states that a named type may assume. +type namedState uint32 + +const ( + unresolved namedState = iota // tparams, underlying type and methods might be unavailable + resolved +) + // NewNamed returns a new named type for the given type name, underlying type, and associated methods. // If the given type name obj doesn't have a type yet, its type is set to the returned named type. // The underlying type must not be a *Named. @@ -41,24 +125,74 @@ func NewNamed(obj *TypeName, underlying Type, methods []*Func) *Named { return (*Checker)(nil).newNamed(obj, nil, underlying, newMethodList(methods)) } -func (t *Named) resolve(ctxt *Context) *Named { - if t.resolver == nil { - return t +// resolve resolves the type parameters, methods, and underlying type of n. +// This information may be loaded from a provided loader function, or computed +// from an origin type (in the case of instances). +// +// After resolution, the type parameters, methods, and underlying type of n are +// accessible; but if n is an instantiated type, its methods may still be +// unexpanded. +func (n *Named) resolve(ctxt *Context) *Named { + if n.state() >= resolved { // avoid locking below + return n } - t.once.Do(func() { - // TODO(mdempsky): Since we're passing t to the resolver anyway - // (necessary because types2 expects the receiver type for methods - // on defined interface types to be the Named rather than the - // underlying Interface), maybe it should just handle calling - // SetTypeParams, SetUnderlying, and AddMethod instead? Those - // methods would need to support reentrant calls though. It would - // also make the API more future-proof towards further extensions - // (like SetTypeParams). - t.tparams, t.underlying, t.methods = t.resolver(ctxt, t) - t.fromRHS = t.underlying // for cycle detection - }) - return t + // TODO(rfindley): if n.check is non-nil we can avoid locking here, since + // type-checking is not concurrent. Evaluate if this is worth doing. + n.mu.Lock() + defer n.mu.Unlock() + + if n.state() >= resolved { + return n + } + + if n.TypeArgs().Len() > 0 { + assert(n.underlying == nil) // n is an unresolved instance + assert(n.loader == nil) // instances are created by instantiation, in which case n.loader is nil + n.orig.resolve(ctxt) + + underlying := n.expandUnderlying(ctxt) + + n.tparams = n.orig.tparams + n.underlying = underlying + n.fromRHS = n.orig.fromRHS // for cycle detection + n.methods = newLazyMethodList(n.orig.methods.Len()) + n.setState(resolved) + return n + } + + // TODO(mdempsky): Since we're passing n to the loader anyway + // (necessary because types2 expects the receiver type for methods + // on defined interface types to be the Named rather than the + // underlying Interface), maybe it should just handle calling + // SetTypeParams, SetUnderlying, and AddMethod instead? Those + // methods would need to support reentrant calls though. It would + // also make the API more future-proof towards further extensions. + if n.loader != nil { + assert(n.underlying == nil) + + tparams, underlying, methods := n.loader(n) + + n.tparams = bindTParams(tparams) + n.underlying = underlying + n.fromRHS = underlying // for cycle detection + n.methods = newMethodList(methods) + n.loader = nil + } + + n.setState(resolved) + return n +} + +// state atomically accesses the current state of the receiver. +func (n *Named) state() namedState { + return namedState(atomic.LoadUint32(&n.state_)) +} + +// setState atomically stores the given state for n. +// Must only be called while holding n.mu. +func (n *Named) setState(state namedState) { + atomic.StoreUint32(&n.state_, uint32(state)) } // newNamed is like NewNamed but with a *Checker receiver and additional orig argument. @@ -80,16 +214,16 @@ func (check *Checker) newNamed(obj *TypeName, orig *Named, underlying Type, meth func (t *Named) cleanup() { assert(t.orig.orig == t.orig) // Ensure that every defined type created in the course of type-checking has - // either non-*Named underlying, or is unresolved. + // either non-*Named underlying type, or is unexpanded. // - // This guarantees that we don't leak any types whose underlying is *Named, - // because any unresolved instances will lazily compute their underlying by - // substituting in the underlying of their origin. The origin must have - // either been imported or type-checked and expanded here, and in either case - // its underlying will be fully expanded. + // This guarantees that we don't leak any types whose underlying type is + // *Named, because any unexpanded instances will lazily compute their + // underlying type by substituting in the underlying type of their origin. + // The origin must have either been imported or type-checked and expanded + // here, and in either case its underlying type will be fully expanded. switch t.underlying.(type) { case nil: - if t.resolver == nil { + if t.TypeArgs().Len() == 0 { panic("nil underlying") } case *Named: @@ -134,12 +268,13 @@ func (t *Named) NumMethods() int { return t.resolve(nil).methods.Len() } func (t *Named) Method(i int) *Func { t.resolve(nil) return t.methods.At(i, func() *Func { - return t.instantiateMethod(i) + return t.expandMethod(i) }) } -// instantiateMethod instantiates the i'th method for an instantiated receiver. -func (t *Named) instantiateMethod(i int) *Func { +// expandMethod substitutes type arguments in the i'th method for an +// instantiated receiver. +func (t *Named) expandMethod(i int) *Func { assert(t.TypeArgs().Len() > 0) // t must be an instance // t.orig.methods is not lazy. origm is the method instantiated with its @@ -275,7 +410,7 @@ func (n0 *Named) under() Type { check := n0.check n := n0 - seen := make(map[*Named]int) // types that need their underlying resolved + seen := make(map[*Named]int) // types that need their underlying type resolved var path []Object // objects encountered, for cycle reporting loop: @@ -352,66 +487,64 @@ func (check *Checker) bestContext(ctxt *Context) *Context { return NewContext() } -// expandNamed ensures that the underlying type of n is instantiated. -// The underlying type will be Typ[Invalid] if there was an error. -func expandNamed(ctxt *Context, n *Named, instPos syntax.Pos) (tparams *TypeParamList, underlying Type, methods *methodList) { +// expandUnderlying substitutes type arguments in the underlying type n.orig, +// returning the result. Returns Typ[Invalid] if there was an error. +func (n *Named) expandUnderlying(ctxt *Context) Type { check := n.check if check != nil && check.conf.Trace { - check.trace(instPos, "-- expandNamed %s", n) + check.trace(n.obj.pos, "-- Named.expandUnderlying %s", n) check.indent++ defer func() { check.indent-- - check.trace(instPos, "=> %s (tparams = %s, under = %s)", n, tparams.list(), underlying) + check.trace(n.obj.pos, "=> %s (tparams = %s, under = %s)", n, n.tparams.list(), n.underlying) }() } - n.orig.resolve(ctxt) assert(n.orig.underlying != nil) if _, unexpanded := n.orig.underlying.(*Named); unexpanded { - // We should only get an unexpanded underlying here during type checking + // We should only get a Named underlying type here during type checking // (for example, in recursive type declarations). assert(check != nil) } - // Mismatching arg and tparam length may be checked elsewhere. - if n.orig.tparams.Len() == n.targs.Len() { - // We must always have a context, to avoid infinite recursion. - ctxt = check.bestContext(ctxt) - h := ctxt.instanceHash(n.orig, n.targs.list()) - // ensure that an instance is recorded for h to avoid infinite recursion. - ctxt.update(h, n.orig, n.TypeArgs().list(), n) - - smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) - underlying = n.check.subst(instPos, n.orig.underlying, smap, ctxt) - // If the underlying of n is an interface, we need to set the receiver of - // its methods accurately -- we set the receiver of interface methods on - // the RHS of a type declaration to the defined type. - if iface, _ := underlying.(*Interface); iface != nil { - if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { - // If the underlying doesn't actually use type parameters, it's possible - // that it wasn't substituted. In this case we need to create a new - // *Interface before modifying receivers. - if iface == n.orig.underlying { - old := iface - iface = check.newInterface() - iface.embeddeds = old.embeddeds - iface.complete = old.complete - iface.implicit = old.implicit // should be false but be conservative - underlying = iface - } - iface.methods = methods + if n.orig.tparams.Len() != n.targs.Len() { + // Mismatching arg and tparam length may be checked elsewhere. + return Typ[Invalid] + } + + // We must always have a context, to avoid infinite recursion. + ctxt = check.bestContext(ctxt) + h := ctxt.instanceHash(n.orig, n.targs.list()) + // ensure that an instance is recorded for h to avoid infinite recursion. + ctxt.update(h, n.orig, n.TypeArgs().list(), n) + + smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) + underlying := n.check.subst(n.obj.pos, n.orig.underlying, smap, ctxt) + // If the underlying type of n is an interface, we need to set the receiver + // of its methods accurately -- we set the receiver of interface methods on + // the RHS of a type declaration to the defined type. + if iface, _ := underlying.(*Interface); iface != nil { + if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { + // If the underlying type doesn't actually use type parameters, it's + // possible that it wasn't substituted. In this case we need to create + // a new *Interface before modifying receivers. + if iface == n.orig.underlying { + old := iface + iface = check.newInterface() + iface.embeddeds = old.embeddeds + iface.complete = old.complete + iface.implicit = old.implicit // should be false but be conservative + underlying = iface } + iface.methods = methods } - } else { - underlying = Typ[Invalid] } - - return n.orig.tparams, underlying, newLazyMethodList(n.orig.methods.Len()) + return underlying } -// safeUnderlying returns the underlying of typ without expanding instances, to -// avoid infinite recursion. +// safeUnderlying returns the underlying type of typ without expanding +// instances, to avoid infinite recursion. // // TODO(rfindley): eliminate this function or give it a better name. func safeUnderlying(typ Type) Type { diff --git a/src/cmd/compile/internal/types2/named_test.go b/src/cmd/compile/internal/types2/named_test.go new file mode 100644 index 0000000000..14a982048a --- /dev/null +++ b/src/cmd/compile/internal/types2/named_test.go @@ -0,0 +1,75 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types2_test + +import ( + "testing" + + . "cmd/compile/internal/types2" +) + +func BenchmarkNamed(b *testing.B) { + const src = ` +package p + +type T struct { + P int +} + +func (T) M(int) {} +func (T) N() (i int) { return } + +type G[P any] struct { + F P +} + +func (G[P]) M(P) {} +func (G[P]) N() (p P) { return } + +type Inst = G[int] + ` + pkg, err := pkgFor("p", src, nil) + if err != nil { + b.Fatal(err) + } + + var ( + T = pkg.Scope().Lookup("T").Type() + G = pkg.Scope().Lookup("G").Type() + SrcInst = pkg.Scope().Lookup("Inst").Type() + UserInst = mustInstantiate(b, G, Typ[Int]) + ) + + tests := []struct { + name string + typ Type + }{ + {"nongeneric", T}, + {"generic", G}, + {"src instance", SrcInst}, + {"user instance", UserInst}, + } + + b.Run("Underlying", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = test.typ.Underlying() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = test.typ.Underlying() + } + }) + } + }) +} + +func mustInstantiate(tb testing.TB, orig Type, targs ...Type) Type { + inst, err := Instantiate(nil, orig, targs, true) + if err != nil { + tb.Fatal(err) + } + return inst +} diff --git a/src/cmd/compile/internal/types2/object.go b/src/cmd/compile/internal/types2/object.go index 75f7ea5b12..df080f071c 100644 --- a/src/cmd/compile/internal/types2/object.go +++ b/src/cmd/compile/internal/types2/object.go @@ -279,19 +279,7 @@ func NewTypeName(pos syntax.Pos, pkg *Package, name string, typ Type) *TypeName // lazily calls resolve to finish constructing the Named object. func NewTypeNameLazy(pos syntax.Pos, pkg *Package, name string, load func(named *Named) (tparams []*TypeParam, underlying Type, methods []*Func)) *TypeName { obj := NewTypeName(pos, pkg, name, nil) - - resolve := func(_ *Context, t *Named) (*TypeParamList, Type, *methodList) { - tparams, underlying, methods := load(t) - - switch underlying.(type) { - case nil, *Named: - panic(fmt.Sprintf("invalid underlying type %T", t.underlying)) - } - - return bindTParams(tparams), underlying, newMethodList(methods) - } - - NewNamed(obj, nil, nil).resolver = resolve + NewNamed(obj, nil, nil).loader = load return obj } diff --git a/src/cmd/compile/internal/types2/typexpr.go b/src/cmd/compile/internal/types2/typexpr.go index 1f8b40dba6..020653332d 100644 --- a/src/cmd/compile/internal/types2/typexpr.go +++ b/src/cmd/compile/internal/types2/typexpr.go @@ -432,57 +432,20 @@ func (check *Checker) instantiatedType(x syntax.Expr, xlist []syntax.Expr, def * return Typ[Invalid] } - // enableTypeTypeInference controls whether to infer missing type arguments - // using constraint type inference. See issue #51527. - const enableTypeTypeInference = false - // create the instance ctxt := check.bestContext(nil) - h := ctxt.instanceHash(orig, targs) - // targs may be incomplete, and require inference. In any case we should de-duplicate. - inst, _ := ctxt.lookup(h, orig, targs).(*Named) - // If inst is non-nil, we can't just return here. Inst may have been - // constructed via recursive substitution, in which case we wouldn't do the - // validation below. Ensure that the validation (and resulting errors) runs - // for each instantiated type in the source. - if inst == nil { - // x may be a selector for an imported type; use its start pos rather than x.Pos(). - tname := NewTypeName(syntax.StartPos(x), orig.obj.pkg, orig.obj.name, nil) - inst = check.newNamed(tname, orig, nil, nil) // underlying, methods and tparams are set when named is resolved - inst.targs = newTypeList(targs) - inst = ctxt.update(h, orig, targs, inst).(*Named) - } + inst := check.instance(x.Pos(), orig, targs, ctxt).(*Named) def.setUnderlying(inst) - inst.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - tparams := n.orig.TypeParams().list() - - targs := n.targs.list() - if enableTypeTypeInference && len(targs) < len(tparams) { - // If inference fails, len(inferred) will be 0, and inst.underlying will - // be set to Typ[Invalid] in expandNamed. - inferred := check.infer(x.Pos(), tparams, targs, nil, nil) - if len(inferred) > len(targs) { - n.targs = newTypeList(inferred) - } - } - - return expandNamed(ctxt, n, x.Pos()) - } - // orig.tparams may not be set up, so we need to do expansion later. check.later(func() { // This is an instance from the source, not from recursive substitution, // and so it must be resolved during type-checking so that we can report // errors. - inst.resolve(ctxt) - // Since check is non-nil, we can still mutate inst. Unpinning the resolver - // frees some memory. - inst.resolver = nil check.recordInstance(x, inst.TypeArgs().list(), inst) - if check.validateTArgLen(x.Pos(), inst.tparams.Len(), inst.targs.Len()) { - if i, err := check.verify(x.Pos(), inst.tparams.list(), inst.targs.list()); err != nil { + if check.validateTArgLen(x.Pos(), inst.TypeParams().Len(), inst.targs.Len()) { + if i, err := check.verify(x.Pos(), inst.TypeParams().list(), inst.targs.list()); err != nil { // best position for error reporting pos := x.Pos() if i < len(xlist) { @@ -490,10 +453,13 @@ func (check *Checker) instantiatedType(x syntax.Expr, xlist []syntax.Expr, def * } check.softErrorf(pos, "%s", err) } else { - check.mono.recordInstance(check.pkg, x.Pos(), inst.tparams.list(), inst.targs.list(), xlist) + check.mono.recordInstance(check.pkg, x.Pos(), inst.TypeParams().list(), inst.targs.list(), xlist) } } + // TODO(rfindley): remove this call: we don't need to call validType here, + // as cycles can only occur for types used inside a Named type declaration, + // and so it suffices to call validType from declared types. check.validType(inst) }).describef(x, "resolve instance %s", inst) diff --git a/src/go/types/api_test.go b/src/go/types/api_test.go index eb17f9280d..db2ace5feb 100644 --- a/src/go/types/api_test.go +++ b/src/go/types/api_test.go @@ -43,7 +43,7 @@ func pkgForMode(path, source string, info *Info, mode parser.Mode) (*Package, er return conf.Check(f.Name.Name, fset, []*ast.File{f}, info) } -func mustTypecheck(t *testing.T, path, source string, info *Info) string { +func mustTypecheck(t testing.TB, path, source string, info *Info) string { pkg, err := pkgFor(path, source, info) if err != nil { name := path diff --git a/src/go/types/decl.go b/src/go/types/decl.go index 123d296791..61c9696948 100644 --- a/src/go/types/decl.go +++ b/src/go/types/decl.go @@ -579,8 +579,8 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *ast.TypeSpec, def *Named) { assert(rhs != nil) named.fromRHS = rhs - // If the underlying was not set while type-checking the right-hand side, it - // is invalid and an error should have been reported elsewhere. + // If the underlying type was not set while type-checking the right-hand + // side, it is invalid and an error should have been reported elsewhere. if named.underlying == nil { named.underlying = Typ[Invalid] } diff --git a/src/go/types/instantiate.go b/src/go/types/instantiate.go index 964a4f907c..8be0eab407 100644 --- a/src/go/types/instantiate.go +++ b/src/go/types/instantiate.go @@ -79,9 +79,6 @@ func (check *Checker) instance(pos token.Pos, orig Type, targs []Type, ctxt *Con tname := NewTypeName(pos, orig.obj.pkg, orig.obj.name, nil) named := check.newNamed(tname, orig, nil, nil) // underlying, tparams, and methods are set when named is resolved named.targs = newTypeList(targs) - named.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - return expandNamed(ctxt, n, pos) - } res = named case *Signature: diff --git a/src/go/types/named.go b/src/go/types/named.go index a82679eb10..bfb4a11da7 100644 --- a/src/go/types/named.go +++ b/src/go/types/named.go @@ -5,32 +5,116 @@ package types import ( - "go/token" "sync" + "sync/atomic" ) +// Type-checking Named types is subtle, because they may be recursively +// defined, and because their full details may be spread across multiple +// declarations (via methods). For this reason they are type-checked lazily, +// to avoid information being accessed before it is complete. +// +// Conceptually, it is helpful to think of named types as having two distinct +// sets of information: +// - "LHS" information, defining their identity: Obj() and TypeArgs() +// - "RHS" information, defining their details: TypeParams(), Underlying(), +// and methods. +// +// In this taxonomy, LHS information is available immediately, but RHS +// information is lazy. Specifically, a named type N may be constructed in any +// of the following ways: +// 1. type-checked from the source +// 2. loaded eagerly from export data +// 3. loaded lazily from export data (when using unified IR) +// 4. instantiated from a generic type +// +// In cases 1, 3, and 4, it is possible that the underlying type or methods of +// N may not be immediately available. +// - During type-checking, we allocate N before type-checking its underlying +// type or methods, so that we may resolve recursive references. +// - When loading from export data, we may load its methods and underlying +// type lazily using a provided load function. +// - After instantiating, we lazily expand the underlying type and methods +// (note that instances may be created while still in the process of +// type-checking the original type declaration). +// +// In cases 3 and 4 this lazy construction may also occur concurrently, due to +// concurrent use of the type checker API (after type checking or importing has +// finished). It is critical that we keep track of state, so that Named types +// are constructed exactly once and so that we do not access their details too +// soon. +// +// We achieve this by tracking state with an atomic state variable, and +// guarding potentially concurrent calculations with a mutex. At any point in +// time this state variable determines which data on N may be accessed. As +// state monotonically progresses, any data available at state M may be +// accessed without acquiring the mutex at state N, provided N >= M. +// +// GLOSSARY: Here are a few terms used in this file to describe Named types: +// - We say that a Named type is "instantiated" if it has been constructed by +// instantiating a generic named type with type arguments. +// - We say that a Named type is "declared" if it corresponds to a type +// declaration in the source. Instantiated named types correspond to a type +// instantiation in the source, not a declaration. But their Origin type is +// a declared type. +// - We say that a Named type is "resolved" if its RHS information has been +// loaded or fully type-checked. For Named types constructed from export +// data, this may involve invoking a loader function to extract information +// from export data. For instantiated named types this involves reading +// information from their origin. +// - We say that a Named type is "expanded" if it is an instantiated type and +// type parameters in its underlying type and methods have been substituted +// with the type arguments from the instantiation. A type may be partially +// expanded if some but not all of these details have been substituted. +// Similarly, we refer to these individual details (underlying type or +// method) as being "expanded". +// - When all information is known for a named type, we say it is "complete". +// +// Some invariants to keep in mind: each declared Named type has a single +// corresponding object, and that object's type is the (possibly generic) Named +// type. Declared Named types are identical if and only if their pointers are +// identical. On the other hand, multiple instantiated Named types may be +// identical even though their pointers are not identical. One has to use +// Identical to compare them. For instantiated named types, their obj is a +// synthetic placeholder that records their position of the corresponding +// instantiation in the source (if they were constructed during type checking). + // A Named represents a named (defined) type. type Named struct { - check *Checker - obj *TypeName // corresponding declared object for declared types; placeholder for instantiated types - orig *Named // original, uninstantiated type - fromRHS Type // type (on RHS of declaration) this *Named type is derived of (for cycle reporting) + check *Checker // non-nil during type-checking; nil otherwise + obj *TypeName // corresponding declared object for declared types; see above for instantiated types + orig *Named // origin type for instantiated types, this type for declared types + targs *TypeList // type arguments (after instantiation), or nil + + // fromRHS holds the type (on RHS of declaration) this *Named type is derived + // from (for cycle reporting). Only used by validType, and therefore does not + // require synchronization. + fromRHS Type + + mu sync.Mutex // guards all fields below + state_ uint32 // the current state of this type; must only be accessed atomically underlying Type // possibly a *Named during setup; never a *Named once set up completely tparams *TypeParamList // type parameters, or nil - targs *TypeList // type arguments (after instantiation), or nil // methods declared for this type (not the method set of this type). // Signatures are type-checked lazily. // For non-instantiated types, this is a fully populated list of methods. For - // instantiated types, this is a 'lazy' list, and methods are instantiated - // when they are first accessed. + // instantiated types, this is a 'lazy' list, and methods are individually + // expanded when they are first accessed. methods *methodList - // resolver may be provided to lazily resolve type parameters, underlying, and methods. - resolver func(*Context, *Named) (tparams *TypeParamList, underlying Type, methods *methodList) - once sync.Once // ensures that tparams, underlying, and methods are resolved before accessing + // loader may be provided to lazily load type parameters, underlying, and methods. + loader func(*Named) (tparams []*TypeParam, underlying Type, methods []*Func) } +// namedState represents the possible states that a named type may assume. +type namedState uint32 + +const ( + unresolved namedState = iota // tparams, underlying type and methods might be unavailable + resolved +) + // NewNamed returns a new named type for the given type name, underlying type, and associated methods. // If the given type name obj doesn't have a type yet, its type is set to the returned named type. // The underlying type must not be a *Named. @@ -41,24 +125,74 @@ func NewNamed(obj *TypeName, underlying Type, methods []*Func) *Named { return (*Checker)(nil).newNamed(obj, nil, underlying, newMethodList(methods)) } -func (t *Named) resolve(ctxt *Context) *Named { - if t.resolver == nil { - return t +// resolve resolves the type parameters, methods, and underlying type of n. +// This information may be loaded from a provided loader function, or computed +// from an origin type (in the case of instances). +// +// After resolution, the type parameters, methods, and underlying type of n are +// accessible; but if n is an instantiated type, its methods may still be +// unexpanded. +func (n *Named) resolve(ctxt *Context) *Named { + if n.state() >= resolved { // avoid locking below + return n } - t.once.Do(func() { - // TODO(mdempsky): Since we're passing t to the resolver anyway - // (necessary because types2 expects the receiver type for methods - // on defined interface types to be the Named rather than the - // underlying Interface), maybe it should just handle calling - // SetTypeParams, SetUnderlying, and AddMethod instead? Those - // methods would need to support reentrant calls though. It would - // also make the API more future-proof towards further extensions - // (like SetTypeParams). - t.tparams, t.underlying, t.methods = t.resolver(ctxt, t) - t.fromRHS = t.underlying // for cycle detection - }) - return t + // TODO(rfindley): if n.check is non-nil we can avoid locking here, since + // type-checking is not concurrent. Evaluate if this is worth doing. + n.mu.Lock() + defer n.mu.Unlock() + + if n.state() >= resolved { + return n + } + + if n.TypeArgs().Len() > 0 { + assert(n.underlying == nil) // n is an unresolved instance + assert(n.loader == nil) // instances are created by instantiation, in which case n.loader is nil + n.orig.resolve(ctxt) + + underlying := n.expandUnderlying(ctxt) + + n.tparams = n.orig.tparams + n.underlying = underlying + n.fromRHS = n.orig.fromRHS // for cycle detection + n.methods = newLazyMethodList(n.orig.methods.Len()) + n.setState(resolved) + return n + } + + // TODO(mdempsky): Since we're passing n to the loader anyway + // (necessary because types2 expects the receiver type for methods + // on defined interface types to be the Named rather than the + // underlying Interface), maybe it should just handle calling + // SetTypeParams, SetUnderlying, and AddMethod instead? Those + // methods would need to support reentrant calls though. It would + // also make the API more future-proof towards further extensions. + if n.loader != nil { + assert(n.underlying == nil) + + tparams, underlying, methods := n.loader(n) + + n.tparams = bindTParams(tparams) + n.underlying = underlying + n.fromRHS = underlying // for cycle detection + n.methods = newMethodList(methods) + n.loader = nil + } + + n.setState(resolved) + return n +} + +// state atomically accesses the current state of the receiver. +func (n *Named) state() namedState { + return namedState(atomic.LoadUint32(&n.state_)) +} + +// setState atomically stores the given state for n. +// Must only be called while holding n.mu. +func (n *Named) setState(state namedState) { + atomic.StoreUint32(&n.state_, uint32(state)) } // newNamed is like NewNamed but with a *Checker receiver and additional orig argument. @@ -80,16 +214,16 @@ func (check *Checker) newNamed(obj *TypeName, orig *Named, underlying Type, meth func (t *Named) cleanup() { assert(t.orig.orig == t.orig) // Ensure that every defined type created in the course of type-checking has - // either non-*Named underlying, or is unresolved. + // either non-*Named underlying type, or is unexpanded. // - // This guarantees that we don't leak any types whose underlying is *Named, - // because any unresolved instances will lazily compute their underlying by - // substituting in the underlying of their origin. The origin must have - // either been imported or type-checked and expanded here, and in either case - // its underlying will be fully expanded. + // This guarantees that we don't leak any types whose underlying type is + // *Named, because any unexpanded instances will lazily compute their + // underlying type by substituting in the underlying type of their origin. + // The origin must have either been imported or type-checked and expanded + // here, and in either case its underlying type will be fully expanded. switch t.underlying.(type) { case nil: - if t.resolver == nil { + if t.TypeArgs().Len() == 0 { panic("nil underlying") } case *Named: @@ -136,12 +270,13 @@ func (t *Named) NumMethods() int { return t.resolve(nil).methods.Len() } func (t *Named) Method(i int) *Func { t.resolve(nil) return t.methods.At(i, func() *Func { - return t.instantiateMethod(i) + return t.expandMethod(i) }) } -// instantiateMethod instantiates the i'th method for an instantiated receiver. -func (t *Named) instantiateMethod(i int) *Func { +// expandMethod substitutes type arguments in the i'th method for an +// instantiated receiver. +func (t *Named) expandMethod(i int) *Func { assert(t.TypeArgs().Len() > 0) // t must be an instance // t.orig.methods is not lazy. origm is the method instantiated with its @@ -277,7 +412,7 @@ func (n0 *Named) under() Type { check := n0.check n := n0 - seen := make(map[*Named]int) // types that need their underlying resolved + seen := make(map[*Named]int) // types that need their underlying type resolved var path []Object // objects encountered, for cycle reporting loop: @@ -354,66 +489,64 @@ func (check *Checker) bestContext(ctxt *Context) *Context { return NewContext() } -// expandNamed ensures that the underlying type of n is instantiated. -// The underlying type will be Typ[Invalid] if there was an error. -func expandNamed(ctxt *Context, n *Named, instPos token.Pos) (tparams *TypeParamList, underlying Type, methods *methodList) { +// expandUnderlying substitutes type arguments in the underlying type n.orig, +// returning the result. Returns Typ[Invalid] if there was an error. +func (n *Named) expandUnderlying(ctxt *Context) Type { check := n.check if check != nil && trace { - check.trace(instPos, "-- expandNamed %s", n) + check.trace(n.obj.pos, "-- Named.expandUnderlying %s", n) check.indent++ defer func() { check.indent-- - check.trace(instPos, "=> %s (tparams = %s, under = %s)", n, tparams.list(), underlying) + check.trace(n.obj.pos, "=> %s (tparams = %s, under = %s)", n, n.tparams.list(), n.underlying) }() } - n.orig.resolve(ctxt) assert(n.orig.underlying != nil) if _, unexpanded := n.orig.underlying.(*Named); unexpanded { - // We should only get an unexpanded underlying here during type checking + // We should only get a Named underlying type here during type checking // (for example, in recursive type declarations). assert(check != nil) } - // Mismatching arg and tparam length may be checked elsewhere. - if n.orig.tparams.Len() == n.targs.Len() { - // We must always have a context, to avoid infinite recursion. - ctxt = check.bestContext(ctxt) - h := ctxt.instanceHash(n.orig, n.targs.list()) - // ensure that an instance is recorded for h to avoid infinite recursion. - ctxt.update(h, n.orig, n.TypeArgs().list(), n) - - smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) - underlying = n.check.subst(instPos, n.orig.underlying, smap, ctxt) - // If the underlying of n is an interface, we need to set the receiver of - // its methods accurately -- we set the receiver of interface methods on - // the RHS of a type declaration to the defined type. - if iface, _ := underlying.(*Interface); iface != nil { - if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { - // If the underlying doesn't actually use type parameters, it's possible - // that it wasn't substituted. In this case we need to create a new - // *Interface before modifying receivers. - if iface == n.orig.underlying { - old := iface - iface = check.newInterface() - iface.embeddeds = old.embeddeds - iface.complete = old.complete - iface.implicit = old.implicit // should be false but be conservative - underlying = iface - } - iface.methods = methods + if n.orig.tparams.Len() != n.targs.Len() { + // Mismatching arg and tparam length may be checked elsewhere. + return Typ[Invalid] + } + + // We must always have a context, to avoid infinite recursion. + ctxt = check.bestContext(ctxt) + h := ctxt.instanceHash(n.orig, n.targs.list()) + // ensure that an instance is recorded for h to avoid infinite recursion. + ctxt.update(h, n.orig, n.TypeArgs().list(), n) + + smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) + underlying := n.check.subst(n.obj.pos, n.orig.underlying, smap, ctxt) + // If the underlying type of n is an interface, we need to set the receiver + // of its methods accurately -- we set the receiver of interface methods on + // the RHS of a type declaration to the defined type. + if iface, _ := underlying.(*Interface); iface != nil { + if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { + // If the underlying type doesn't actually use type parameters, it's + // possible that it wasn't substituted. In this case we need to create + // a new *Interface before modifying receivers. + if iface == n.orig.underlying { + old := iface + iface = check.newInterface() + iface.embeddeds = old.embeddeds + iface.complete = old.complete + iface.implicit = old.implicit // should be false but be conservative + underlying = iface } + iface.methods = methods } - } else { - underlying = Typ[Invalid] } - - return n.orig.tparams, underlying, newLazyMethodList(n.orig.methods.Len()) + return underlying } -// safeUnderlying returns the underlying of typ without expanding instances, to -// avoid infinite recursion. +// safeUnderlying returns the underlying type of typ without expanding +// instances, to avoid infinite recursion. // // TODO(rfindley): eliminate this function or give it a better name. func safeUnderlying(typ Type) Type { diff --git a/src/go/types/named_test.go b/src/go/types/named_test.go new file mode 100644 index 0000000000..74cdb48889 --- /dev/null +++ b/src/go/types/named_test.go @@ -0,0 +1,88 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types_test + +import ( + "testing" + + . "go/types" +) + +func BenchmarkNamed(b *testing.B) { + const src = ` +package p + +type T struct { + P int +} + +func (T) M(int) {} +func (T) N() (i int) { return } + +type G[P any] struct { + F P +} + +func (G[P]) M(P) {} +func (G[P]) N() (p P) { return } + +type Inst = G[int] + ` + pkg, err := pkgForMode("p", src, nil, 0) + if err != nil { + b.Fatal(err) + } + + var ( + T = pkg.Scope().Lookup("T").Type() + G = pkg.Scope().Lookup("G").Type() + SrcInst = pkg.Scope().Lookup("Inst").Type() + UserInst = mustInstantiate(b, G, Typ[Int]) + ) + + tests := []struct { + name string + typ Type + }{ + {"nongeneric", T}, + {"generic", G}, + {"src instance", SrcInst}, + {"user instance", UserInst}, + } + + b.Run("Underlying", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = test.typ.Underlying() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = test.typ.Underlying() + } + }) + } + }) + + b.Run("NewMethodSet", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = NewMethodSet(test.typ) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = NewMethodSet(test.typ) + } + }) + } + }) +} + +func mustInstantiate(tb testing.TB, orig Type, targs ...Type) Type { + inst, err := Instantiate(nil, orig, targs, true) + if err != nil { + tb.Fatal(err) + } + return inst +} diff --git a/src/go/types/object.go b/src/go/types/object.go index ae138a5879..f203b0113d 100644 --- a/src/go/types/object.go +++ b/src/go/types/object.go @@ -233,19 +233,7 @@ func NewTypeName(pos token.Pos, pkg *Package, name string, typ Type) *TypeName { // lazily calls resolve to finish constructing the Named object. func _NewTypeNameLazy(pos token.Pos, pkg *Package, name string, load func(named *Named) (tparams []*TypeParam, underlying Type, methods []*Func)) *TypeName { obj := NewTypeName(pos, pkg, name, nil) - - resolve := func(_ *Context, t *Named) (*TypeParamList, Type, *methodList) { - tparams, underlying, methods := load(t) - - switch underlying.(type) { - case nil, *Named: - panic(fmt.Sprintf("invalid underlying type %T", t.underlying)) - } - - return bindTParams(tparams), underlying, newMethodList(methods) - } - - NewNamed(obj, nil, nil).resolver = resolve + NewNamed(obj, nil, nil).loader = load return obj } diff --git a/src/go/types/typexpr.go b/src/go/types/typexpr.go index 7afc66a925..c7161e00a5 100644 --- a/src/go/types/typexpr.go +++ b/src/go/types/typexpr.go @@ -385,14 +385,13 @@ func (check *Checker) typInternal(e0 ast.Expr, def *Named) (T Type) { } func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (res Type) { - pos := ix.X.Pos() if trace { - check.trace(pos, "-- instantiating type %s with %s", ix.X, ix.Indices) + check.trace(ix.Pos(), "-- instantiating type %s with %s", ix.X, ix.Indices) check.indent++ defer func() { check.indent-- // Don't format the underlying here. It will always be nil. - check.trace(pos, "=> %s", res) + check.trace(ix.Pos(), "=> %s", res) }() } @@ -417,57 +416,20 @@ func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (re return Typ[Invalid] } - // enableTypeTypeInference controls whether to infer missing type arguments - // using constraint type inference. See issue #51527. - const enableTypeTypeInference = false - // create the instance ctxt := check.bestContext(nil) - h := ctxt.instanceHash(orig, targs) - // targs may be incomplete, and require inference. In any case we should de-duplicate. - inst, _ := ctxt.lookup(h, orig, targs).(*Named) - // If inst is non-nil, we can't just return here. Inst may have been - // constructed via recursive substitution, in which case we wouldn't do the - // validation below. Ensure that the validation (and resulting errors) runs - // for each instantiated type in the source. - if inst == nil { - // x may be a selector for an imported type; use its start pos rather than x.Pos(). - tname := NewTypeName(ix.Pos(), orig.obj.pkg, orig.obj.name, nil) - inst = check.newNamed(tname, orig, nil, nil) // underlying, methods and tparams are set when named is resolved - inst.targs = newTypeList(targs) - inst = ctxt.update(h, orig, targs, inst).(*Named) - } + inst := check.instance(ix.Pos(), orig, targs, ctxt).(*Named) def.setUnderlying(inst) - inst.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - tparams := n.orig.TypeParams().list() - - targs := n.targs.list() - if enableTypeTypeInference && len(targs) < len(tparams) { - // If inference fails, len(inferred) will be 0, and inst.underlying will - // be set to Typ[Invalid] in expandNamed. - inferred := check.infer(ix.Orig, tparams, targs, nil, nil) - if len(inferred) > len(targs) { - n.targs = newTypeList(inferred) - } - } - - return expandNamed(ctxt, n, pos) - } - // orig.tparams may not be set up, so we need to do expansion later. check.later(func() { // This is an instance from the source, not from recursive substitution, // and so it must be resolved during type-checking so that we can report // errors. - inst.resolve(ctxt) - // Since check is non-nil, we can still mutate inst. Unpinning the resolver - // frees some memory. - inst.resolver = nil check.recordInstance(ix.Orig, inst.TypeArgs().list(), inst) - if check.validateTArgLen(pos, inst.tparams.Len(), inst.targs.Len()) { - if i, err := check.verify(pos, inst.tparams.list(), inst.targs.list()); err != nil { + if check.validateTArgLen(ix.Pos(), inst.TypeParams().Len(), inst.targs.Len()) { + if i, err := check.verify(ix.Pos(), inst.TypeParams().list(), inst.targs.list()); err != nil { // best position for error reporting pos := ix.Pos() if i < len(ix.Indices) { @@ -475,10 +437,13 @@ func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (re } check.softErrorf(atPos(pos), _InvalidTypeArg, err.Error()) } else { - check.mono.recordInstance(check.pkg, pos, inst.tparams.list(), inst.targs.list(), ix.Indices) + check.mono.recordInstance(check.pkg, ix.Pos(), inst.TypeParams().list(), inst.targs.list(), ix.Indices) } } + // TODO(rfindley): remove this call: we don't need to call validType here, + // as cycles can only occur for types used inside a Named type declaration, + // and so it suffices to call validType from declared types. check.validType(inst) }).describef(ix, "resolve instance %s", inst) -- 2.50.0