From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 18 Feb 2025 03:19:06 +0000 (+0000)
Subject: unique: use a bespoke canonicalization map and runtime.AddCleanup
X-Git-Tag: go1.25rc1~354
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0028532118eed355d0ac6337c63b01219cdc4c17;p=gostls13.git

unique: use a bespoke canonicalization map and runtime.AddCleanup

This change moves the unique package away from using a concurrent map
and instead toward a bespoke concurrent canonicalization map. The map
holds all its keys weakly, though keys may be looked up by value. The
result is the strong pointer for the canonical value. Entries in the map
are automatically cleaned up once the canonical reference no longer
exists.

Why do this? There's a problem with the current implementation when it
comes to chains of unique.Handle: because the unique map will have a
unique.Handle stored in its keys, each nested handle must be cleaned up
1 GC at a time. It takes N GC cycles, at minimum, to clean up a nested
chain of N handles. This implementation, where the *only* value in the
set is weakly-held, does not have this problem. The entire chain is
dropped at once.

The canon map implementation is a stripped-down version of HashTrieMap.
The weak set implementation also has lower memory overheads by virtue of
the fact that keys are all stored weakly. Whereas the previous map had
both a T and a weak.Pointer[T], this *only* has a weak.Pointer[T].

The canonicalization map is a better abstraction overall and
dramatically simplifies the unique.Make code.

While we're here, delete the background goroutine and switch to
runtime.AddCleanup. This is a step toward fixing #71772. We still need
some kind of back-pressure mechanism, which will be implemented in a
follow-up CL.

For #71772.
Fixes #71846.

Change-Id: I5b2ee04ebfc7f6dd24c2c4a959dd0f6a8af24ca4
Reviewed-on: https://go-review.googlesource.com/c/go/+/650256
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
---

diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 40ebdf4ad0..9add92557c 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -324,7 +324,7 @@ func isGoPointerWithoutSpan(p unsafe.Pointer) bool {
 // blockUntilEmptyFinalizerQueue blocks until either the finalizer
 // queue is emptied (and the finalizers have executed) or the timeout
 // is reached. Returns true if the finalizer queue was emptied.
-// This is used by the runtime and sync tests.
+// This is used by the runtime, sync, and unique tests.
 func blockUntilEmptyFinalizerQueue(timeout int64) bool {
 	start := nanotime()
 	for nanotime()-start < timeout {
@@ -342,6 +342,11 @@ func blockUntilEmptyFinalizerQueue(timeout int64) bool {
 	return false
 }
 
+//go:linkname unique_runtime_blockUntilEmptyFinalizerQueue unique.runtime_blockUntilEmptyFinalizerQueue
+func unique_runtime_blockUntilEmptyFinalizerQueue(timeout int64) bool {
+	return blockUntilEmptyFinalizerQueue(timeout)
+}
+
 // SetFinalizer sets the finalizer associated with obj to the provided
 // finalizer function. When the garbage collector finds an unreachable block
 // with an associated finalizer, it clears the association and runs
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index d5f3403425..cbcd60e281 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1835,8 +1835,7 @@ func gcResetMarkState() {
 // Hooks for other packages
 
 var poolcleanup func()
-var boringCaches []unsafe.Pointer  // for crypto/internal/boring
-var uniqueMapCleanup chan struct{} // for unique
+var boringCaches []unsafe.Pointer // for crypto/internal/boring
 
 // sync_runtime_registerPoolCleanup should be an internal detail,
 // but widely used packages access it using linkname.
@@ -1857,22 +1856,6 @@ func boring_registerCache(p unsafe.Pointer) {
 	boringCaches = append(boringCaches, p)
 }
 
-//go:linkname unique_runtime_registerUniqueMapCleanup unique.runtime_registerUniqueMapCleanup
-func unique_runtime_registerUniqueMapCleanup(f func()) {
-	// Create the channel on the system stack so it doesn't inherit the current G's
-	// synctest bubble (if any).
-	systemstack(func() {
-		uniqueMapCleanup = make(chan struct{}, 1)
-	})
-	// Start the goroutine in the runtime so it's counted as a system goroutine.
-	go func(cleanup func()) {
-		for {
-			<-uniqueMapCleanup
-			cleanup()
-		}
-	}(f)
-}
-
 func clearpools() {
 	// clear sync.Pools
 	if poolcleanup != nil {
@@ -1884,14 +1867,6 @@ func clearpools() {
 		atomicstorep(p, nil)
 	}
 
-	// clear unique maps
-	if uniqueMapCleanup != nil {
-		select {
-		case uniqueMapCleanup <- struct{}{}:
-		default:
-		}
-	}
-
 	// Clear central sudog cache.
 	// Leave per-P caches alone, they have strictly bounded size.
 	// Disconnect cached list before dropping it on the floor,
diff --git a/src/unique/canonmap.go b/src/unique/canonmap.go
new file mode 100644
index 0000000000..a3494eef99
--- /dev/null
+++ b/src/unique/canonmap.go
@@ -0,0 +1,385 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+	"internal/abi"
+	"internal/goarch"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"unsafe"
+	"weak"
+)
+
+// canonMap is a map of T -> *T. The map controls the creation
+// of a canonical *T, and elements of the map are automatically
+// deleted when the canonical *T is no longer referenced.
+type canonMap[T comparable] struct {
+	root atomic.Pointer[indirect[T]]
+	hash func(unsafe.Pointer, uintptr) uintptr
+	seed uintptr
+}
+
+func newCanonMap[T comparable]() *canonMap[T] {
+	cm := new(canonMap[T])
+	cm.root.Store(newIndirectNode[T](nil))
+
+	var m map[T]struct{}
+	mapType := abi.TypeOf(m).MapType()
+	cm.hash = mapType.Hasher
+	cm.seed = uintptr(runtime_rand())
+	return cm
+}
+
+func (m *canonMap[T]) Load(key T) *T {
+	hash := m.hash(abi.NoEscape(unsafe.Pointer(&key)), m.seed)
+
+	i := m.root.Load()
+	hashShift := 8 * goarch.PtrSize
+	for hashShift != 0 {
+		hashShift -= nChildrenLog2
+
+		n := i.children[(hash>>hashShift)&nChildrenMask].Load()
+		if n == nil {
+			return nil
+		}
+		if n.isEntry {
+			v, _ := n.entry().lookup(key)
+			return v
+		}
+		i = n.indirect()
+	}
+	panic("unique.canonMap: ran out of hash bits while iterating")
+}
+
+func (m *canonMap[T]) LoadOrStore(key T) *T {
+	hash := m.hash(abi.NoEscape(unsafe.Pointer(&key)), m.seed)
+
+	var i *indirect[T]
+	var hashShift uint
+	var slot *atomic.Pointer[node[T]]
+	var n *node[T]
+	for {
+		// Find the key or a candidate location for insertion.
+		i = m.root.Load()
+		hashShift = 8 * goarch.PtrSize
+		haveInsertPoint := false
+		for hashShift != 0 {
+			hashShift -= nChildrenLog2
+
+			slot = &i.children[(hash>>hashShift)&nChildrenMask]
+			n = slot.Load()
+			if n == nil {
+				// We found a nil slot which is a candidate for insertion.
+				haveInsertPoint = true
+				break
+			}
+			if n.isEntry {
+				// We found an existing entry, which is as far as we can go.
+				// If it stays this way, we'll have to replace it with an
+				// indirect node.
+				if v, _ := n.entry().lookup(key); v != nil {
+					return v
+				}
+				haveInsertPoint = true
+				break
+			}
+			i = n.indirect()
+		}
+		if !haveInsertPoint {
+			panic("unique.canonMap: ran out of hash bits while iterating")
+		}
+
+		// Grab the lock and double-check what we saw.
+		i.mu.Lock()
+		n = slot.Load()
+		if (n == nil || n.isEntry) && !i.dead.Load() {
+			// What we saw is still true, so we can continue with the insert.
+			break
+		}
+		// We have to start over.
+		i.mu.Unlock()
+	}
+	// N.B. This lock is held from when we broke out of the outer loop above.
+	// We specifically break this out so that we can use defer here safely.
+	// One option is to break this out into a new function instead, but
+	// there's so much local iteration state used below that this turns out
+	// to be cleaner.
+	defer i.mu.Unlock()
+
+	var oldEntry *entry[T]
+	if n != nil {
+		oldEntry = n.entry()
+		if v, _ := oldEntry.lookup(key); v != nil {
+			// Easy case: by loading again, it turns out exactly what we wanted is here!
+			return v
+		}
+	}
+	newEntry, canon, wp := newEntryNode(key, hash)
+	// Prune dead pointers. This is to avoid O(n) lookups when we store the exact same
+	// value in the set but the cleanup hasn't run yet because it got delayed for some
+	// reason.
+	oldEntry = oldEntry.prune()
+	if oldEntry == nil {
+		// Easy case: create a new entry and store it.
+		slot.Store(&newEntry.node)
+	} else {
+		// We possibly need to expand the entry already there into one or more new nodes.
+		//
+		// Publish the node last, which will make both oldEntry and newEntry visible. We
+		// don't want readers to be able to observe that oldEntry isn't in the tree.
+		slot.Store(m.expand(oldEntry, newEntry, hash, hashShift, i))
+	}
+	runtime.AddCleanup(canon, func(_ struct{}) {
+		m.cleanup(hash, wp)
+	}, struct{}{})
+	return canon
+}
+
+// expand takes oldEntry and newEntry whose hashes conflict from bit 64 down to hashShift and
+// produces a subtree of indirect nodes to hold the two new entries. newHash is the hash of
+// the value in the new entry.
+func (m *canonMap[T]) expand(oldEntry, newEntry *entry[T], newHash uintptr, hashShift uint, parent *indirect[T]) *node[T] {
+	// Check for a hash collision.
+	oldHash := oldEntry.hash
+	if oldHash == newHash {
+		// Store the old entry in the new entry's overflow list, then store
+		// the new entry.
+		newEntry.overflow.Store(oldEntry)
+		return &newEntry.node
+	}
+	// We have to add an indirect node. Worse still, we may need to add more than one.
+	newIndirect := newIndirectNode(parent)
+	top := newIndirect
+	for {
+		if hashShift == 0 {
+			panic("unique.canonMap: ran out of hash bits while inserting")
+		}
+		hashShift -= nChildrenLog2 // hashShift is for the level parent is at. We need to go deeper.
+		oi := (oldHash >> hashShift) & nChildrenMask
+		ni := (newHash >> hashShift) & nChildrenMask
+		if oi != ni {
+			newIndirect.children[oi].Store(&oldEntry.node)
+			newIndirect.children[ni].Store(&newEntry.node)
+			break
+		}
+		nextIndirect := newIndirectNode(newIndirect)
+		newIndirect.children[oi].Store(&nextIndirect.node)
+		newIndirect = nextIndirect
+	}
+	return &top.node
+}
+
+// cleanup deletes the entry corresponding to wp in the canon map, if it's
+// still in the map. wp must have a Value method that returns nil by the
+// time this function is called. hash must be the hash of the value that
+// wp once pointed to (that is, the hash of *wp.Value()).
+func (m *canonMap[T]) cleanup(hash uintptr, wp weak.Pointer[T]) {
+	var i *indirect[T]
+	var hashShift uint
+	var slot *atomic.Pointer[node[T]]
+	var n *node[T]
+	for {
+		// Find wp in the map by following hash.
+		i = m.root.Load()
+		hashShift = 8 * goarch.PtrSize
+		haveEntry := false
+		for hashShift != 0 {
+			hashShift -= nChildrenLog2
+
+			slot = &i.children[(hash>>hashShift)&nChildrenMask]
+			n = slot.Load()
+			if n == nil {
+				// We found a nil slot, already deleted.
+				return
+			}
+			if n.isEntry {
+				if !n.entry().hasWeakPointer(wp) {
+					// The weak pointer was already pruned.
+					return
+				}
+				haveEntry = true
+				break
+			}
+			i = n.indirect()
+		}
+		if !haveEntry {
+			panic("unique.canonMap: ran out of hash bits while iterating")
+		}
+
+		// Grab the lock and double-check what we saw.
+		i.mu.Lock()
+		n = slot.Load()
+		if n != nil && n.isEntry {
+			// Prune the entry node without thinking too hard. If we do
+			// somebody else's work, such as someone trying to insert an
+			// entry with the same hash (probably the same value) then
+			// great, they'll back out without taking the lock.
+			newEntry := n.entry().prune()
+			if newEntry == nil {
+				slot.Store(nil)
+			} else {
+				slot.Store(&newEntry.node)
+			}
+
+			// Delete interior nodes that are empty, up the tree.
+			//
+			// We'll hand-over-hand lock our way up the tree as we do this,
+			// since we need to delete each empty node's link in its parent,
+			// which requires the parents' lock.
+			for i.parent != nil && i.empty() {
+				if hashShift == 8*goarch.PtrSize {
+					panic("internal/sync.HashTrieMap: ran out of hash bits while iterating")
+				}
+				hashShift += nChildrenLog2
+
+				// Delete the current node in the parent.
+				parent := i.parent
+				parent.mu.Lock()
+				i.dead.Store(true) // Could be done outside of parent's lock.
+				parent.children[(hash>>hashShift)&nChildrenMask].Store(nil)
+				i.mu.Unlock()
+				i = parent
+			}
+			i.mu.Unlock()
+			return
+		}
+		// We have to start over.
+		i.mu.Unlock()
+	}
+}
+
+// node is the header for a node. It's polymorphic and
+// is actually either an entry or an indirect.
+type node[T comparable] struct {
+	isEntry bool
+}
+
+func (n *node[T]) entry() *entry[T] {
+	if !n.isEntry {
+		panic("called entry on non-entry node")
+	}
+	return (*entry[T])(unsafe.Pointer(n))
+}
+
+func (n *node[T]) indirect() *indirect[T] {
+	if n.isEntry {
+		panic("called indirect on entry node")
+	}
+	return (*indirect[T])(unsafe.Pointer(n))
+}
+
+const (
+	// 16 children. This seems to be the sweet spot for
+	// load performance: any smaller and we lose out on
+	// 50% or more in CPU performance. Any larger and the
+	// returns are minuscule (~1% improvement for 32 children).
+	nChildrenLog2 = 4
+	nChildren     = 1 << nChildrenLog2
+	nChildrenMask = nChildren - 1
+)
+
+// indirect is an internal node in the hash-trie.
+type indirect[T comparable] struct {
+	node[T]
+	dead     atomic.Bool
+	parent   *indirect[T]
+	mu       sync.Mutex // Protects mutation to children and any children that are entry nodes.
+	children [nChildren]atomic.Pointer[node[T]]
+}
+
+func newIndirectNode[T comparable](parent *indirect[T]) *indirect[T] {
+	return &indirect[T]{node: node[T]{isEntry: false}, parent: parent}
+}
+
+func (i *indirect[T]) empty() bool {
+	for j := range i.children {
+		if i.children[j].Load() != nil {
+			return false
+		}
+	}
+	return true
+}
+
+// entry is a leaf node in the hash-trie.
+type entry[T comparable] struct {
+	node[T]
+	overflow atomic.Pointer[entry[T]] // Overflow for hash collisions.
+	key      weak.Pointer[T]
+	hash     uintptr
+}
+
+func newEntryNode[T comparable](key T, hash uintptr) (*entry[T], *T, weak.Pointer[T]) {
+	k := new(T)
+	*k = key
+	wp := weak.Make(k)
+	return &entry[T]{
+		node: node[T]{isEntry: true},
+		key:  wp,
+		hash: hash,
+	}, k, wp
+}
+
+// lookup finds the entry in the overflow chain that has the provided key.
+//
+// Returns the key's canonical pointer and the weak pointer for that canonical pointer.
+func (e *entry[T]) lookup(key T) (*T, weak.Pointer[T]) {
+	for e != nil {
+		s := e.key.Value()
+		if s != nil && *s == key {
+			return s, e.key
+		}
+		e = e.overflow.Load()
+	}
+	return nil, weak.Pointer[T]{}
+}
+
+// hasWeakPointer returns true if the provided weak pointer can be found in the overflow chain.
+func (e *entry[T]) hasWeakPointer(wp weak.Pointer[T]) bool {
+	for e != nil {
+		if e.key == wp {
+			return true
+		}
+		e = e.overflow.Load()
+	}
+	return false
+}
+
+// prune removes all entries in the overflow chain whose keys are nil.
+//
+// The caller must hold the lock on e's parent node.
+func (e *entry[T]) prune() *entry[T] {
+	// Prune the head of the list.
+	for e != nil {
+		if e.key.Value() != nil {
+			break
+		}
+		e = e.overflow.Load()
+	}
+	if e == nil {
+		return nil
+	}
+
+	// Prune individual nodes in the list.
+	newHead := e
+	i := &e.overflow
+	e = i.Load()
+	for e != nil {
+		if e.key.Value() != nil {
+			i = &e.overflow
+		} else {
+			i.Store(e.overflow.Load())
+		}
+		e = e.overflow.Load()
+	}
+	return newHead
+}
+
+// Pull in runtime.rand so that we don't need to take a dependency
+// on math/rand/v2.
+//
+//go:linkname runtime_rand runtime.rand
+func runtime_rand() uint64
diff --git a/src/unique/canonmap_test.go b/src/unique/canonmap_test.go
new file mode 100644
index 0000000000..e8f56d8e00
--- /dev/null
+++ b/src/unique/canonmap_test.go
@@ -0,0 +1,179 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unique
+
+import (
+	"internal/abi"
+	"runtime"
+	"strconv"
+	"sync"
+	"testing"
+	"unsafe"
+)
+
+func TestCanonMap(t *testing.T) {
+	testCanonMap(t, func() *canonMap[string] {
+		return newCanonMap[string]()
+	})
+}
+
+func TestCanonMapBadHash(t *testing.T) {
+	testCanonMap(t, func() *canonMap[string] {
+		return newBadCanonMap[string]()
+	})
+}
+
+func TestCanonMapTruncHash(t *testing.T) {
+	testCanonMap(t, func() *canonMap[string] {
+		// Stub out the good hash function with a different terrible one
+		// (truncated hash). Everything should still work as expected.
+		// This is useful to test independently to catch issues with
+		// near collisions, where only the last few bits of the hash differ.
+		return newTruncCanonMap[string]()
+	})
+}
+
+func testCanonMap(t *testing.T, newMap func() *canonMap[string]) {
+	t.Run("LoadEmpty", func(t *testing.T) {
+		m := newMap()
+
+		for _, s := range testData {
+			expectMissing(t, s)(m.Load(s))
+		}
+	})
+	t.Run("LoadOrStore", func(t *testing.T) {
+		t.Run("Sequential", func(t *testing.T) {
+			m := newMap()
+
+			var refs []*string
+			for _, s := range testData {
+				expectMissing(t, s)(m.Load(s))
+				refs = append(refs, expectPresent(t, s)(m.LoadOrStore(s)))
+				expectPresent(t, s)(m.Load(s))
+				expectPresent(t, s)(m.LoadOrStore(s))
+			}
+			drainCleanupQueue(t)
+			for _, s := range testData {
+				expectPresent(t, s)(m.Load(s))
+				expectPresent(t, s)(m.LoadOrStore(s))
+			}
+			runtime.KeepAlive(refs)
+			refs = nil
+			drainCleanupQueue(t)
+			for _, s := range testData {
+				expectMissing(t, s)(m.Load(s))
+				expectPresent(t, s)(m.LoadOrStore(s))
+			}
+		})
+		t.Run("ConcurrentUnsharedKeys", func(t *testing.T) {
+			makeKey := func(s string, id int) string {
+				return s + "-" + strconv.Itoa(id)
+			}
+
+			// Expand and shrink the map multiple times to try to get
+			// insertions and cleanups to overlap.
+			m := newMap()
+			gmp := runtime.GOMAXPROCS(-1)
+			for try := range 3 {
+				var wg sync.WaitGroup
+				for i := range gmp {
+					wg.Add(1)
+					go func(id int) {
+						defer wg.Done()
+
+						var refs []*string
+						for _, s := range testData {
+							key := makeKey(s, id)
+							if try == 0 {
+								expectMissing(t, key)(m.Load(key))
+							}
+							refs = append(refs, expectPresent(t, key)(m.LoadOrStore(key)))
+							expectPresent(t, key)(m.Load(key))
+							expectPresent(t, key)(m.LoadOrStore(key))
+						}
+						for i, s := range testData {
+							key := makeKey(s, id)
+							expectPresent(t, key)(m.Load(key))
+							if got, want := expectPresent(t, key)(m.LoadOrStore(key)), refs[i]; got != want {
+								t.Errorf("canonical entry %p did not match ref %p", got, want)
+							}
+						}
+						// N.B. We avoid trying to test entry cleanup here
+						// because it's going to be very flaky, especially
+						// in the bad hash cases.
+					}(i)
+				}
+				wg.Wait()
+			}
+
+			// Drain cleanups so everything is deleted.
+			drainCleanupQueue(t)
+
+			// Double-check that it's all gone.
+			for id := range gmp {
+				makeKey := func(s string) string {
+					return s + "-" + strconv.Itoa(id)
+				}
+				for _, s := range testData {
+					key := makeKey(s)
+					expectMissing(t, key)(m.Load(key))
+				}
+			}
+		})
+	})
+}
+
+func expectMissing[T comparable](t *testing.T, key T) func(got *T) {
+	t.Helper()
+	return func(got *T) {
+		t.Helper()
+
+		if got != nil {
+			t.Errorf("expected key %v to be missing from map, got %p", key, got)
+		}
+	}
+}
+
+func expectPresent[T comparable](t *testing.T, key T) func(got *T) *T {
+	t.Helper()
+	return func(got *T) *T {
+		t.Helper()
+
+		if got == nil {
+			t.Errorf("expected key %v to be present in map, got %p", key, got)
+		}
+		if got != nil && *got != key {
+			t.Errorf("key %v is present in map, but canonical version has the wrong value: got %v, want %v", key, *got, key)
+		}
+		return got
+	}
+}
+
+// newBadCanonMap creates a new canonMap for the provided key type
+// but with an intentionally bad hash function.
+func newBadCanonMap[T comparable]() *canonMap[T] {
+	// Stub out the good hash function with a terrible one.
+	// Everything should still work as expected.
+	m := newCanonMap[T]()
+	m.hash = func(_ unsafe.Pointer, _ uintptr) uintptr {
+		return 0
+	}
+	return m
+}
+
+// newTruncCanonMap creates a new canonMap for the provided key type
+// but with an intentionally bad hash function.
+func newTruncCanonMap[T comparable]() *canonMap[T] {
+	// Stub out the good hash function with a terrible one.
+	// Everything should still work as expected.
+	m := newCanonMap[T]()
+	var mx map[string]int
+	mapType := abi.TypeOf(mx).MapType()
+	hasher := mapType.Hasher
+	m.hash = func(p unsafe.Pointer, n uintptr) uintptr {
+		return hasher(p, n) & ((uintptr(1) << 4) - 1)
+	}
+	return m
+}
diff --git a/src/unique/handle.go b/src/unique/handle.go
index 520ab70f8c..a107fcbe7a 100644
--- a/src/unique/handle.go
+++ b/src/unique/handle.go
@@ -7,10 +7,7 @@ package unique
 import (
 	"internal/abi"
 	isync "internal/sync"
-	"runtime"
-	"sync"
 	"unsafe"
-	"weak"
 )
 
 var zero uintptr
@@ -41,139 +38,39 @@ func Make[T comparable](value T) Handle[T] {
 	}
 	ma, ok := uniqueMaps.Load(typ)
 	if !ok {
-		// This is a good time to initialize cleanup, since we must go through
-		// this path on the first use of Make, and it's not on the hot path.
-		setupMake.Do(registerCleanup)
-		ma = addUniqueMap[T](typ)
+		m := &uniqueMap[T]{canonMap: newCanonMap[T](), cloneSeq: makeCloneSeq(typ)}
+		ma, _ = uniqueMaps.LoadOrStore(typ, m)
 	}
 	m := ma.(*uniqueMap[T])
 
-	// Keep around any values we allocate for insertion. There
-	// are a few different ways we can race with other threads
-	// and create values that we might discard. By keeping
-	// the first one we make around, we can avoid generating
-	// more than one per racing thread.
-	var (
-		toInsert     *T // Keep this around to keep it alive.
-		toInsertWeak weak.Pointer[T]
-	)
-	newValue := func() (T, weak.Pointer[T]) {
-		if toInsert == nil {
-			toInsert = new(T)
-			*toInsert = clone(value, &m.cloneSeq)
-			toInsertWeak = weak.Make(toInsert)
-		}
-		return *toInsert, toInsertWeak
+	// Find the value in the map.
+	ptr := m.Load(value)
+	if ptr == nil {
+		// Insert a new value into the map.
+		ptr = m.LoadOrStore(clone(value, &m.cloneSeq))
 	}
-	var ptr *T
-	for {
-		// Check the map.
-		wp, ok := m.Load(value)
-		if !ok {
-			// Try to insert a new value into the map.
-			k, v := newValue()
-			wp, _ = m.LoadOrStore(k, v)
-		}
-		// Now that we're sure there's a value in the map, let's
-		// try to get the pointer we need out of it.
-		ptr = wp.Value()
-		if ptr != nil {
-			break
-		}
-		// The weak pointer is nil, so the old value is truly dead.
-		// Try to remove it and start over.
-		m.CompareAndDelete(value, wp)
-	}
-	runtime.KeepAlive(toInsert)
 	return Handle[T]{ptr}
 }
 
-var (
-	// uniqueMaps is an index of type-specific concurrent maps used for unique.Make.
-	//
-	// The two-level map might seem odd at first since the HashTrieMap could have "any"
-	// as its key type, but the issue is escape analysis. We do not want to force lookups
-	// to escape the argument, and using a type-specific map allows us to avoid that where
-	// possible (for example, for strings and plain-ol'-data structs). We also get the
-	// benefit of not cramming every different type into a single map, but that's certainly
-	// not enough to outweigh the cost of two map lookups. What is worth it though, is saving
-	// on those allocations.
-	uniqueMaps isync.HashTrieMap[*abi.Type, any] // any is always a *uniqueMap[T].
-
-	// cleanupFuncs are functions that clean up dead weak pointers in type-specific
-	// maps in uniqueMaps. We express cleanup this way because there's no way to iterate
-	// over the sync.Map and call functions on the type-specific data structures otherwise.
-	// These cleanup funcs each close over one of these type-specific maps.
-	//
-	// cleanupMu protects cleanupNotify and is held across the entire cleanup. Used for testing.
-	// cleanupNotify is a test-only mechanism that allow tests to wait for the cleanup to run.
-	cleanupMu      sync.Mutex
-	cleanupFuncsMu sync.Mutex
-	cleanupFuncs   []func()
-	cleanupNotify  []func() // One-time notifications when cleanups finish.
-)
+// uniqueMaps is an index of type-specific concurrent maps used for unique.Make.
+//
+// The two-level map might seem odd at first since the HashTrieMap could have "any"
+// as its key type, but the issue is escape analysis. We do not want to force lookups
+// to escape the argument, and using a type-specific map allows us to avoid that where
+// possible (for example, for strings and plain-ol'-data structs). We also get the
+// benefit of not cramming every different type into a single map, but that's certainly
+// not enough to outweigh the cost of two map lookups. What is worth it though, is saving
+// on those allocations.
+var uniqueMaps isync.HashTrieMap[*abi.Type, any] // any is always a *uniqueMap[T].
 
 type uniqueMap[T comparable] struct {
-	isync.HashTrieMap[T, weak.Pointer[T]]
+	*canonMap[T]
 	cloneSeq
 }
 
-func addUniqueMap[T comparable](typ *abi.Type) *uniqueMap[T] {
-	// Create a map for T and try to register it. We could
-	// race with someone else, but that's fine; it's one
-	// small, stray allocation. The number of allocations
-	// this can create is bounded by a small constant.
-	m := &uniqueMap[T]{cloneSeq: makeCloneSeq(typ)}
-	a, loaded := uniqueMaps.LoadOrStore(typ, m)
-	if !loaded {
-		// Add a cleanup function for the new map.
-		cleanupFuncsMu.Lock()
-		cleanupFuncs = append(cleanupFuncs, func() {
-			// Delete all the entries whose weak references are nil and clean up
-			// deleted entries.
-			m.All()(func(key T, wp weak.Pointer[T]) bool {
-				if wp.Value() == nil {
-					m.CompareAndDelete(key, wp)
-				}
-				return true
-			})
-		})
-		cleanupFuncsMu.Unlock()
-	}
-	return a.(*uniqueMap[T])
-}
-
-// setupMake is used to perform initial setup for unique.Make.
-var setupMake sync.Once
-
-// startBackgroundCleanup sets up a background goroutine to occasionally call cleanupFuncs.
-func registerCleanup() {
-	runtime_registerUniqueMapCleanup(func() {
-		// Lock for cleanup.
-		cleanupMu.Lock()
-
-		// Grab funcs to run.
-		cleanupFuncsMu.Lock()
-		cf := cleanupFuncs
-		cleanupFuncsMu.Unlock()
-
-		// Run cleanup.
-		for _, f := range cf {
-			f()
-		}
-
-		// Run cleanup notifications.
-		for _, f := range cleanupNotify {
-			f()
-		}
-		cleanupNotify = nil
-
-		// Finished.
-		cleanupMu.Unlock()
-	})
-}
-
 // Implemented in runtime.
-
-//go:linkname runtime_registerUniqueMapCleanup
-func runtime_registerUniqueMapCleanup(cleanup func())
+//
+// Used only by tests.
+//
+//go:linkname runtime_blockUntilEmptyFinalizerQueue
+func runtime_blockUntilEmptyFinalizerQueue(timeout int64) bool
diff --git a/src/unique/handle_test.go b/src/unique/handle_test.go
index c8fd20b4cb..7cd63c5eeb 100644
--- a/src/unique/handle_test.go
+++ b/src/unique/handle_test.go
@@ -33,6 +33,10 @@ type testStruct struct {
 	b string
 }
 type testZeroSize struct{}
+type testNestedHandle struct {
+	next Handle[testNestedHandle]
+	arr  [6]int
+}
 
 func TestHandle(t *testing.T) {
 	testHandle(t, testString("foo"))
@@ -53,8 +57,6 @@ func TestHandle(t *testing.T) {
 func testHandle[T comparable](t *testing.T, value T) {
 	name := reflect.TypeFor[T]().Name()
 	t.Run(fmt.Sprintf("%s/%#v", name, value), func(t *testing.T) {
-		t.Parallel()
-
 		v0 := Make(value)
 		v1 := Make(value)
 
@@ -80,26 +82,14 @@ func drainMaps[T comparable](t *testing.T) {
 	if unsafe.Sizeof(*(new(T))) == 0 {
 		return // zero-size types are not inserted.
 	}
+	drainCleanupQueue(t)
+}
 
-	wait := make(chan struct{}, 1)
-
-	// Set up a one-time notification for the next time the cleanup runs.
-	// Note: this will only run if there's no other active cleanup, so
-	// we can be sure that the next time cleanup runs, it'll see the new
-	// notification.
-	cleanupMu.Lock()
-	cleanupNotify = append(cleanupNotify, func() {
-		select {
-		case wait <- struct{}{}:
-		default:
-		}
-	})
-
-	runtime.GC()
-	cleanupMu.Unlock()
+func drainCleanupQueue(t *testing.T) {
+	t.Helper()
 
-	// Wait until cleanup runs.
-	<-wait
+	runtime.GC() // Queue up the cleanups.
+	runtime_blockUntilEmptyFinalizerQueue(int64(5 * time.Second))
 }
 
 func checkMapsFor[T comparable](t *testing.T, value T) {
@@ -110,15 +100,10 @@ func checkMapsFor[T comparable](t *testing.T, value T) {
 		return
 	}
 	m := a.(*uniqueMap[T])
-	wp, ok := m.Load(value)
-	if !ok {
-		return
-	}
-	if wp.Value() != nil {
-		t.Errorf("value %v still referenced a handle (or tiny block?) ", value)
-		return
+	p := m.Load(value)
+	if p != nil {
+		t.Errorf("value %v still referenced by a handle (or tiny block?): internal pointer %p", value, p)
 	}
-	t.Errorf("failed to drain internal maps of %v", value)
 }
 
 func TestMakeClonesStrings(t *testing.T) {
@@ -162,3 +147,32 @@ func TestHandleUnsafeString(t *testing.T) {
 		}
 	}
 }
+
+func nestHandle(n testNestedHandle) testNestedHandle {
+	return testNestedHandle{
+		next: Make(n),
+		arr:  n.arr,
+	}
+}
+
+func TestNestedHandle(t *testing.T) {
+	n0 := testNestedHandle{arr: [6]int{1, 2, 3, 4, 5, 6}}
+	n1 := nestHandle(n0)
+	n2 := nestHandle(n1)
+	n3 := nestHandle(n2)
+
+	if v := n3.next.Value(); v != n2 {
+		t.Errorf("n3.Value != n2: %#v vs. %#v", v, n2)
+	}
+	if v := n2.next.Value(); v != n1 {
+		t.Errorf("n2.Value != n1: %#v vs. %#v", v, n1)
+	}
+	if v := n1.next.Value(); v != n0 {
+		t.Errorf("n1.Value != n0: %#v vs. %#v", v, n0)
+	}
+
+	// In a good implementation, the entire chain, down to the bottom-most
+	// value, should all be gone after we drain the maps.
+	drainMaps[testNestedHandle](t)
+	checkMapsFor(t, n0)
+}