From 390ffce7d60abf7d5317c0a78beef64affd0059a Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 21 Mar 2025 16:26:15 +0000 Subject: [PATCH] [release-branch.go1.24] runtime: prevent unnecessary zeroing of large objects with pointers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit CL 614257 refactored mallocgc but lost an optimization: if a span for a large object is already backed by memory fresh from the OS (and thus zeroed), we don't need to zero it. CL 614257 unconditionally zeroed spans for large objects that contain pointers. This change restores the optimization from before CL 614257, which seems to matter in some real-world programs. While we're here, let's also fix a hole with the garbage collector being able to observe uninitialized memory of the large object is observed by the conservative scanner before being published. The gory details are in a comment in heapSetTypeLarge. In short, this change makes span.largeType an atomic variable, such that the GC can only observe initialized memory if span.largeType != nil. For #72991. Fixes #73800. Change-Id: I2048aeb220ab363d252ffda7d980b8788e9674dc Reviewed-on: https://go-review.googlesource.com/c/go/+/659956 Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Felix Geisendörfer (cherry picked from commit df9888ea4e97feb755e452609be5078686370995) Reviewed-on: https://go-review.googlesource.com/c/go/+/682356 --- src/runtime/malloc.go | 48 ++++++++++++++++++++++-------------- src/runtime/mbitmap.go | 56 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 21 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 73d663f7f5..ba9e02e976 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1544,12 +1544,13 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin size = span.elemsize x := unsafe.Pointer(span.base()) - // Ensure that the stores above that initialize x to - // type-safe memory and set the heap bits occur before - // the caller can make x observable to the garbage - // collector. Otherwise, on weakly ordered machines, - // the garbage collector could follow a pointer to x, - // but see uninitialized memory or stale heap bits. + // Ensure that the store above that sets largeType to + // nil happens before the caller can make x observable + // to the garbage collector. + // + // Otherwise, on weakly ordered machines, the garbage + // collector could follow a pointer to x, but see a stale + // largeType value. publicationBarrier() // As x and the heap bits are initialized, update // freeIndexForScan now so x is seen by the GC @@ -1592,22 +1593,33 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin } // Objects can be zeroed late in a context where preemption can occur. - // If the object contains pointers, its pointer data must be cleared - // or otherwise indicate that the GC shouldn't scan it. + // // x will keep the memory alive. - if noscan := typ == nil || !typ.Pointers(); !noscan || (needzero && span.needzero != 0) { + if needzero && span.needzero != 0 { // N.B. size == fullSize always in this case. memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302 - - // Finish storing the type information for this case. - mp := acquirem() - if !noscan { - getMCache(mp).scanAlloc += heapSetTypeLarge(uintptr(x), size, typ, span) - } - // Publish the object with the now-zeroed memory. - publicationBarrier() - releasem(mp) } + + // Set the type and run the publication barrier while non-preemptible. We need to make + // sure that between heapSetTypeLarge and publicationBarrier we cannot get preempted, + // otherwise the GC could potentially observe non-zeroed memory but largeType set on weak + // memory architectures. + // + // The GC can also potentially observe non-zeroed memory if conservative scanning spuriously + // observes a partially-allocated object, see the freeIndexForScan update above. This case is + // handled by synchronization inside heapSetTypeLarge. + mp = acquirem() + if typ != nil && typ.Pointers() { + // Finish storing the type information, now that we're certain the memory is zeroed. + getMCache(mp).scanAlloc += heapSetTypeLarge(uintptr(x), size, typ, span) + } + // Publish the object again, now with zeroed memory and initialized type information. + // + // Even if we didn't update any type information, this is necessary to ensure that, for example, + // x written to a global without any synchronization still results in other goroutines observing + // zeroed memory. + publicationBarrier() + releasem(mp) return x, size } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 148b2d788e..4bffabece2 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -191,7 +191,9 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers { typ = *(**_type)(unsafe.Pointer(addr)) addr += mallocHeaderSize } else { - typ = span.largeType + // Synchronize with allocator, in case this came from the conservative scanner. + // See heapSetTypeLarge for more details. + typ = (*_type)(atomic.Loadp(unsafe.Pointer(&span.largeType))) if typ == nil { // Allow a nil type here for delayed zeroing. See mallocgc. return typePointers{} @@ -739,8 +741,56 @@ func heapSetTypeSmallHeader(x, dataSize uintptr, typ *_type, header **_type, spa func heapSetTypeLarge(x, dataSize uintptr, typ *_type, span *mspan) uintptr { gctyp := typ - // Write out the header. - span.largeType = gctyp + // Write out the header atomically to synchronize with the garbage collector. + // + // This atomic store is paired with an atomic load in typePointersOfUnchecked. + // This store ensures that initializing x's memory cannot be reordered after + // this store. Meanwhile the load in typePointersOfUnchecked ensures that + // reading x's memory cannot be reordered before largeType is loaded. Together, + // these two operations guarantee that the garbage collector can only see + // initialized memory if largeType is non-nil. + // + // Gory details below... + // + // Ignoring conservative scanning for a moment, this store need not be atomic + // if we have a publication barrier on our side. This is because the garbage + // collector cannot observe x unless: + // 1. It stops this goroutine and scans its stack, or + // 2. We return from mallocgc and publish the pointer somewhere. + // Either case requires a write on our side, followed by some synchronization + // followed by a read by the garbage collector. + // + // In case (1), the garbage collector can only observe a nil largeType, since it + // had to stop our goroutine when it was preemptible during zeroing. For the + // duration of the zeroing, largeType is nil and the object has nothing interesting + // for the garbage collector to look at, so the garbage collector will not access + // the object at all. + // + // In case (2), the garbage collector can also observe a nil largeType. This + // might happen if the object was newly allocated, and a new GC cycle didn't start + // (that would require a global barrier, STW). In this case, the garbage collector + // will once again ignore the object, and that's safe because objects are + // allocate-black. + // + // However, the garbage collector can also observe a non-nil largeType in case (2). + // This is still okay, since to access the object's memory, it must have first + // loaded the object's pointer from somewhere. This makes the access of the object's + // memory a data-dependent load, and our publication barrier in the allocator + // guarantees that a data-dependent load must observe a version of the object's + // data from after the publication barrier executed. + // + // Unfortunately conservative scanning is a problem. There's no guarantee of a + // data dependency as in case (2) because conservative scanning can produce pointers + // 'out of thin air' in that it need not have been written somewhere by the allocating + // thread first. It might not even be a pointer, or it could be a pointer written to + // some stack location long ago. This is the fundamental reason why we need + // explicit synchronization somewhere in this whole mess. We choose to put that + // synchronization on largeType. + // + // As described at the very top, the treating largeType as an atomic variable, on + // both the reader and writer side, is sufficient to ensure that only initialized + // memory at x will be observed if largeType is non-nil. + atomic.StorepNoWB(unsafe.Pointer(&span.largeType), unsafe.Pointer(gctyp)) if doubleCheckHeapSetType { doubleCheckHeapType(x, dataSize, typ, &span.largeType, span) } -- 2.50.0