]> Cypherpunks repositories - gostls13.git/commitdiff
unique: add unique package and implement Make/Handle
authorMichael Anthony Knyszek <mknyszek@google.com>
Thu, 4 Apr 2024 04:51:24 +0000 (04:51 +0000)
committerMichael Knyszek <mknyszek@google.com>
Mon, 22 Apr 2024 18:14:07 +0000 (18:14 +0000)
This change adds the unique package for canonicalizing values, as
described by the proposal in #62483.

Fixes #62483.

Change-Id: I1dc3d34ec12351cb4dc3838a8ea29a5368d59e99
Reviewed-on: https://go-review.googlesource.com/c/go/+/574355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ingo Oeser <nightlyone@googlemail.com>
Reviewed-by: David Chase <drchase@google.com>
12 files changed:
api/next/62483.txt [new file with mode: 0644]
doc/next/6-stdlib/2-unique.md [new file with mode: 0644]
doc/next/6-stdlib/99-minor/unique/62483.md [new file with mode: 0644]
src/go/build/deps_test.go
src/go/doc/comment/std.go
src/runtime/mgc.go
src/unique/clone.go [new file with mode: 0644]
src/unique/clone_test.go [new file with mode: 0644]
src/unique/doc.go [new file with mode: 0644]
src/unique/handle.go [new file with mode: 0644]
src/unique/handle_bench_test.go [new file with mode: 0644]
src/unique/handle_test.go [new file with mode: 0644]

diff --git a/api/next/62483.txt b/api/next/62483.txt
new file mode 100644 (file)
index 0000000..11b8ff6
--- /dev/null
@@ -0,0 +1,3 @@
+pkg unique, func Make[$0 comparable]($0) Handle[$0] #62483
+pkg unique, method (Handle[$0]) Value() $0 #62483
+pkg unique, type Handle[$0 comparable] struct #62483
diff --git a/doc/next/6-stdlib/2-unique.md b/doc/next/6-stdlib/2-unique.md
new file mode 100644 (file)
index 0000000..b2c3bdf
--- /dev/null
@@ -0,0 +1,13 @@
+### New unique package
+
+The new [unique](/pkg/unique) package provides facilites for
+canonicalizing values (like "interning" or "hash-consing").
+
+Any value of comparable type may be canonicalized with the new
+`Make[T]` function, which produces a reference to a canonical copy of
+the value in the form of a `Handle[T]`.
+Two `Handle[T]` are equal if and only if the values used to produce the
+handles are equal, allowing programs to deduplicate values and reduce
+their memory footprint.
+Comparing two `Handle[T]` values is efficient, reducing down to a simple
+pointer comparison.
diff --git a/doc/next/6-stdlib/99-minor/unique/62483.md b/doc/next/6-stdlib/99-minor/unique/62483.md
new file mode 100644 (file)
index 0000000..d281ab2
--- /dev/null
@@ -0,0 +1 @@
+<!-- This is a new package; covered in 6-stdlib/2-unique.md. -->
index 595466987476348266b8d6174328fda54eabc273..11b6722e220427e5f3cd4971274e6fdb717a071a 100644 (file)
@@ -164,6 +164,9 @@ var depsRules = `
        bufio, path, strconv
        < STR;
 
+       RUNTIME, internal/concurrent
+       < unique;
+
        # OS is basic OS access, including helpers (path/filepath, os/exec, etc).
        # OS includes string routines, but those must be layered above package os.
        # OS does not include reflection.
index fd8c8ce3c230f5b4a0c53b12038b1db13ce0a5f9..e19792c825ba3093feb30463c19dc22d15fe860e 100644 (file)
@@ -43,5 +43,6 @@ var stdPkgs = []string{
        "testing",
        "time",
        "unicode",
+       "unique",
        "unsafe",
 }
index 3d3ecb0f883044723e14dd58210889ed939d8cb6..83afd55c47cd9d4f6f19ddca885abef00e80e509 100644 (file)
@@ -1694,7 +1694,8 @@ func gcResetMarkState() {
 // Hooks for other packages
 
 var poolcleanup func()
-var boringCaches []unsafe.Pointer // for crypto/internal/boring
+var boringCaches []unsafe.Pointer  // for crypto/internal/boring
+var uniqueMapCleanup chan struct{} // for unique
 
 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
 func sync_runtime_registerPoolCleanup(f func()) {
@@ -1706,6 +1707,18 @@ func boring_registerCache(p unsafe.Pointer) {
        boringCaches = append(boringCaches, p)
 }
 
+//go:linkname unique_runtime_registerUniqueMapCleanup unique.runtime_registerUniqueMapCleanup
+func unique_runtime_registerUniqueMapCleanup(f func()) {
+       // Start the goroutine in the runtime so it's counted as a system goroutine.
+       uniqueMapCleanup = make(chan struct{}, 1)
+       go func(cleanup func()) {
+               for {
+                       <-uniqueMapCleanup
+                       cleanup()
+               }
+       }(f)
+}
+
 func clearpools() {
        // clear sync.Pools
        if poolcleanup != nil {
@@ -1717,6 +1730,14 @@ func clearpools() {
                atomicstorep(p, nil)
        }
 
+       // clear unique maps
+       if uniqueMapCleanup != nil {
+               select {
+               case uniqueMapCleanup <- struct{}{}:
+               default:
+               }
+       }
+
        // Clear central sudog cache.
        // Leave per-P caches alone, they have strictly bounded size.
        // Disconnect cached list before dropping it on the floor,
diff --git a/src/unique/clone.go b/src/unique/clone.go
new file mode 100644 (file)
index 0000000..b30d44e
--- /dev/null
@@ -0,0 +1,100 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+       "internal/abi"
+       "unsafe"
+)
+
+// clone makes a copy of value, and may update string values found in value
+// with a cloned version of those strings. The purpose of explicitly cloning
+// strings is to avoid accidentally giving a large string a long lifetime.
+//
+// Note that this will clone strings in structs and arrays found in value,
+// and will clone value if it itself is a string. It will not, however, clone
+// strings if value is of interface or slice type (that is, found via an
+// indirection).
+func clone[T comparable](value T, seq *cloneSeq) T {
+       for _, offset := range seq.stringOffsets {
+               ps := (*string)(unsafe.Pointer(uintptr(unsafe.Pointer(&value)) + offset))
+               *ps = cloneString(*ps)
+       }
+       return value
+}
+
+// singleStringClone describes how to clone a single string.
+var singleStringClone = cloneSeq{stringOffsets: []uintptr{0}}
+
+// cloneSeq describes how to clone a value of a particular type.
+type cloneSeq struct {
+       stringOffsets []uintptr
+}
+
+// makeCloneSeq creates a cloneSeq for a type.
+func makeCloneSeq(typ *abi.Type) cloneSeq {
+       if typ == nil {
+               return cloneSeq{}
+       }
+       if typ.Kind() == abi.String {
+               return singleStringClone
+       }
+       var seq cloneSeq
+       switch typ.Kind() {
+       case abi.Struct:
+               buildStructCloneSeq(typ, &seq, 0)
+       case abi.Array:
+               buildArrayCloneSeq(typ, &seq, 0)
+       }
+       return seq
+}
+
+// buildStructCloneSeq populates a cloneSeq for an abi.Type that has Kind abi.Struct.
+func buildStructCloneSeq(typ *abi.Type, seq *cloneSeq, baseOffset uintptr) {
+       styp := typ.StructType()
+       for i := range styp.Fields {
+               f := &styp.Fields[i]
+               switch f.Typ.Kind() {
+               case abi.String:
+                       seq.stringOffsets = append(seq.stringOffsets, baseOffset+f.Offset)
+               case abi.Struct:
+                       buildStructCloneSeq(f.Typ, seq, baseOffset+f.Offset)
+               case abi.Array:
+                       buildArrayCloneSeq(f.Typ, seq, baseOffset+f.Offset)
+               }
+       }
+}
+
+// buildArrayCloneSeq populates a cloneSeq for an abi.Type that has Kind abi.Array.
+func buildArrayCloneSeq(typ *abi.Type, seq *cloneSeq, baseOffset uintptr) {
+       atyp := typ.ArrayType()
+       etyp := atyp.Elem
+       offset := baseOffset
+       for range atyp.Len {
+               switch etyp.Kind() {
+               case abi.String:
+                       seq.stringOffsets = append(seq.stringOffsets, offset)
+               case abi.Struct:
+                       buildStructCloneSeq(etyp, seq, offset)
+               case abi.Array:
+                       buildArrayCloneSeq(etyp, seq, offset)
+               }
+               offset += etyp.Size()
+               align := uintptr(etyp.FieldAlign())
+               offset = (offset + align - 1) &^ (align - 1)
+       }
+}
+
+// cloneString is a copy of strings.Clone, because we can't depend on the strings
+// package here. Several packages that might make use of unique, like net, explicitly
+// forbid depending on unicode, which strings depends on.
+func cloneString(s string) string {
+       if len(s) == 0 {
+               return ""
+       }
+       b := make([]byte, len(s))
+       copy(b, s)
+       return unsafe.String(&b[0], len(b))
+}
diff --git a/src/unique/clone_test.go b/src/unique/clone_test.go
new file mode 100644 (file)
index 0000000..69a9a54
--- /dev/null
@@ -0,0 +1,37 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+       "internal/abi"
+       "internal/goarch"
+       "reflect"
+       "testing"
+)
+
+func TestMakeCloneSeq(t *testing.T) {
+       testCloneSeq[testString](t, cSeq(0))
+       testCloneSeq[testIntArray](t, cSeq())
+       testCloneSeq[testEface](t, cSeq())
+       testCloneSeq[testStringArray](t, cSeq(0, 2*goarch.PtrSize, 4*goarch.PtrSize))
+       testCloneSeq[testStringStruct](t, cSeq(0))
+       testCloneSeq[testStringStructArrayStruct](t, cSeq(0, 2*goarch.PtrSize))
+       testCloneSeq[testStruct](t, cSeq(8))
+}
+
+func cSeq(stringOffsets ...uintptr) cloneSeq {
+       return cloneSeq{stringOffsets: stringOffsets}
+}
+
+func testCloneSeq[T any](t *testing.T, want cloneSeq) {
+       typName := reflect.TypeFor[T]().Name()
+       typ := abi.TypeOf(*new(T))
+       t.Run(typName, func(t *testing.T) {
+               got := makeCloneSeq(typ)
+               if !reflect.DeepEqual(got, want) {
+                       t.Errorf("unexpected cloneSeq for type %s: got %#v, want %#v", typName, got, want)
+               }
+       })
+}
diff --git a/src/unique/doc.go b/src/unique/doc.go
new file mode 100644 (file)
index 0000000..0133789
--- /dev/null
@@ -0,0 +1,9 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+The unique package provides facilities for canonicalizing ("interning")
+comparable values.
+*/
+package unique
diff --git a/src/unique/handle.go b/src/unique/handle.go
new file mode 100644 (file)
index 0000000..d98f802
--- /dev/null
@@ -0,0 +1,174 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+       "internal/abi"
+       "internal/concurrent"
+       "internal/weak"
+       "runtime"
+       "sync"
+       _ "unsafe"
+)
+
+// Handle is a globally unique identity for some value of type T.
+//
+// Two handles compare equal exactly if the two values used to create the handles
+// would have also compared equal. The comparison of two handles is trivial and
+// typically much more efficient than comparing the values used to create them.
+type Handle[T comparable] struct {
+       value *T
+}
+
+// Value returns a shallow copy of the T value that produced the Handle.
+func (h Handle[T]) Value() T {
+       return *h.value
+}
+
+// Make returns a globally unique handle for a value of type T. Handles
+// are equal if and only if the values used to produce them are equal.
+func Make[T comparable](value T) Handle[T] {
+       // Find the map for type T.
+       typ := abi.TypeOf(value)
+       ma, ok := uniqueMaps.Load(typ)
+       if !ok {
+               // This is a good time to initialize cleanup, since we must go through
+               // this path on the first use of Make, and it's not on the hot path.
+               setupMake.Do(registerCleanup)
+               ma = addUniqueMap[T](typ)
+       }
+       m := ma.(*uniqueMap[T])
+
+       // Keep around any values we allocate for insertion. There
+       // are a few different ways we can race with other threads
+       // and create values that we might discard. By keeping
+       // the first one we make around, we can avoid generating
+       // more than one per racing thread.
+       var (
+               toInsert     *T // Keep this around to keep it alive.
+               toInsertWeak weak.Pointer[T]
+       )
+       newValue := func() weak.Pointer[T] {
+               if toInsert == nil {
+                       toInsert = new(T)
+                       *toInsert = clone(value, &m.cloneSeq)
+                       toInsertWeak = weak.Make(toInsert)
+               }
+               return toInsertWeak
+       }
+       var ptr *T
+       for {
+               // Check the map.
+               wp, ok := m.Load(value)
+               if !ok {
+                       // Try to insert a new value into the map.
+                       wp, _ = m.LoadOrStore(value, newValue())
+               }
+               // Now that we're sure there's a value in the map, let's
+               // try to get the pointer we need out of it.
+               ptr = wp.Strong()
+               if ptr != nil {
+                       break
+               }
+               // The weak pointer is nil, so the old value is truly dead.
+               // Try to remove it and start over.
+               m.CompareAndDelete(value, wp)
+       }
+       runtime.KeepAlive(toInsert)
+       return Handle[T]{ptr}
+}
+
+var (
+       // uniqueMaps is an index of type-specific concurrent maps used for unique.Make.
+       //
+       // The two-level map might seem odd at first since the HashTrieMap could have "any"
+       // as its key type, but the issue is escape analysis. We do not want to force lookups
+       // to escape the argument, and using a type-specific map allows us to avoid that where
+       // possible (for example, for strings and plain-ol'-data structs). We also get the
+       // benefit of not cramming every different type into a single map, but that's certainly
+       // not enough to outweigh the cost of two map lookups. What is worth it though, is saving
+       // on those allocations.
+       uniqueMaps = concurrent.NewHashTrieMap[*abi.Type, any]() // any is always a *uniqueMap[T].
+
+       // cleanupFuncs are functions that clean up dead weak pointers in type-specific
+       // maps in uniqueMaps. We express cleanup this way because there's no way to iterate
+       // over the sync.Map and call functions on the type-specific data structures otherwise.
+       // These cleanup funcs each close over one of these type-specific maps.
+       //
+       // cleanupMu protects cleanupNotify and is held across the entire cleanup. Used for testing.
+       // cleanupNotify is a test-only mechanism that allow tests to wait for the cleanup to run.
+       cleanupMu      sync.Mutex
+       cleanupFuncsMu sync.Mutex
+       cleanupFuncs   []func()
+       cleanupNotify  []func() // One-time notifcations when cleanups finish.
+)
+
+type uniqueMap[T comparable] struct {
+       *concurrent.HashTrieMap[T, weak.Pointer[T]]
+       cloneSeq
+}
+
+func addUniqueMap[T comparable](typ *abi.Type) *uniqueMap[T] {
+       // Create a map for T and try to register it. We could
+       // race with someone else, but that's fine; it's one
+       // small, stray allocation. The number of allocations
+       // this can create is bounded by a small constant.
+       m := &uniqueMap[T]{
+               HashTrieMap: concurrent.NewHashTrieMap[T, weak.Pointer[T]](),
+               cloneSeq:    makeCloneSeq(typ),
+       }
+       a, loaded := uniqueMaps.LoadOrStore(typ, m)
+       if !loaded {
+               // Add a cleanup function for the new map.
+               cleanupFuncsMu.Lock()
+               cleanupFuncs = append(cleanupFuncs, func() {
+                       // Delete all the entries whose weak references are nil and clean up
+                       // deleted entries.
+                       m.Enumerate(func(key T, wp weak.Pointer[T]) bool {
+                               if wp.Strong() == nil {
+                                       m.CompareAndDelete(key, wp)
+                               }
+                               return true
+                       })
+               })
+               cleanupFuncsMu.Unlock()
+       }
+       return a.(*uniqueMap[T])
+}
+
+// setupMake is used to perform initial setup for unique.Make.
+var setupMake sync.Once
+
+// startBackgroundCleanup sets up a background goroutine to occasionally call cleanupFuncs.
+func registerCleanup() {
+       runtime_registerUniqueMapCleanup(func() {
+               // Lock for cleanup.
+               cleanupMu.Lock()
+
+               // Grab funcs to run.
+               cleanupFuncsMu.Lock()
+               cf := cleanupFuncs
+               cleanupFuncsMu.Unlock()
+
+               // Run cleanup.
+               for _, f := range cf {
+                       f()
+               }
+
+               // Run cleanup notifications.
+               for _, f := range cleanupNotify {
+                       f()
+               }
+               cleanupNotify = nil
+
+               // Finished.
+               cleanupMu.Unlock()
+       })
+}
+
+// Implemented in runtime.
+
+//go:linkname runtime_registerUniqueMapCleanup
+func runtime_registerUniqueMapCleanup(cleanup func())
diff --git a/src/unique/handle_bench_test.go b/src/unique/handle_bench_test.go
new file mode 100644 (file)
index 0000000..51f94c3
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+       "fmt"
+       "runtime"
+       "testing"
+)
+
+func BenchmarkMake(b *testing.B) {
+       benchmarkMake(b, []string{"foo"})
+}
+
+func BenchmarkMakeMany(b *testing.B) {
+       benchmarkMake(b, testData[:])
+}
+
+func BenchmarkMakeManyMany(b *testing.B) {
+       benchmarkMake(b, testDataLarge[:])
+}
+
+func benchmarkMake(b *testing.B, testData []string) {
+       handles := make([]Handle[string], 0, len(testData))
+       for i := range testData {
+               handles = append(handles, Make(testData[i]))
+       }
+
+       b.ReportAllocs()
+       b.ResetTimer()
+
+       b.RunParallel(func(pb *testing.PB) {
+               i := 0
+               for pb.Next() {
+                       _ = Make(testData[i])
+                       i++
+                       if i >= len(testData) {
+                               i = 0
+                       }
+               }
+       })
+
+       b.StopTimer()
+
+       runtime.GC()
+       runtime.GC()
+}
+
+var (
+       testData      [128]string
+       testDataLarge [128 << 10]string
+)
+
+func init() {
+       for i := range testData {
+               testData[i] = fmt.Sprintf("%b", i)
+       }
+       for i := range testDataLarge {
+               testDataLarge[i] = fmt.Sprintf("%b", i)
+       }
+}
diff --git a/src/unique/handle_test.go b/src/unique/handle_test.go
new file mode 100644 (file)
index 0000000..dffe10a
--- /dev/null
@@ -0,0 +1,111 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+       "fmt"
+       "internal/abi"
+       "reflect"
+       "runtime"
+       "testing"
+)
+
+// Set up special types. Because the internal maps are sharded by type,
+// this will ensure that we're not overlapping with other tests.
+type testString string
+type testIntArray [4]int
+type testEface any
+type testStringArray [3]string
+type testStringStruct struct {
+       a string
+}
+type testStringStructArrayStruct struct {
+       s [2]testStringStruct
+}
+type testStruct struct {
+       z float64
+       b string
+}
+
+func TestHandle(t *testing.T) {
+       testHandle[testString](t, "foo")
+       testHandle[testString](t, "bar")
+       testHandle[testString](t, "")
+       testHandle[testIntArray](t, [4]int{7, 77, 777, 7777})
+       testHandle[testEface](t, nil)
+       testHandle[testStringArray](t, [3]string{"a", "b", "c"})
+       testHandle[testStringStruct](t, testStringStruct{"x"})
+       testHandle[testStringStructArrayStruct](t, testStringStructArrayStruct{
+               s: [2]testStringStruct{testStringStruct{"y"}, testStringStruct{"z"}},
+       })
+       testHandle[testStruct](t, testStruct{0.5, "184"})
+}
+
+func testHandle[T comparable](t *testing.T, value T) {
+       name := reflect.TypeFor[T]().Name()
+       t.Run(fmt.Sprintf("%s/%#v", name, value), func(t *testing.T) {
+               t.Parallel()
+
+               v0 := Make(value)
+               v1 := Make(value)
+
+               if v0.Value() != v1.Value() {
+                       t.Error("v0.Value != v1.Value")
+               }
+               if v0.Value() != value {
+                       t.Errorf("v0.Value not %#v", value)
+               }
+               if v0 != v1 {
+                       t.Error("v0 != v1")
+               }
+
+               drainMaps(t)
+               checkMapsFor(t, value)
+       })
+}
+
+// drainMaps ensures that the internal maps are drained.
+func drainMaps(t *testing.T) {
+       t.Helper()
+
+       wait := make(chan struct{}, 1)
+
+       // Set up a one-time notification for the next time the cleanup runs.
+       // Note: this will only run if there's no other active cleanup, so
+       // we can be sure that the next time cleanup runs, it'll see the new
+       // notification.
+       cleanupMu.Lock()
+       cleanupNotify = append(cleanupNotify, func() {
+               select {
+               case wait <- struct{}{}:
+               default:
+               }
+       })
+
+       runtime.GC()
+       cleanupMu.Unlock()
+
+       // Wait until cleanup runs.
+       <-wait
+}
+
+func checkMapsFor[T comparable](t *testing.T, value T) {
+       // Manually load the value out of the map.
+       typ := abi.TypeOf(value)
+       a, ok := uniqueMaps.Load(typ)
+       if !ok {
+               return
+       }
+       m := a.(*uniqueMap[T])
+       wp, ok := m.Load(value)
+       if !ok {
+               return
+       }
+       if wp.Strong() != nil {
+               t.Errorf("value %v still referenced a handle (or tiny block?) ", value)
+               return
+       }
+       t.Errorf("failed to drain internal maps of %v", value)
+}