From 24ea1aa25c954bbbe9968d735795a649833b0b1c Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 13 May 2025 20:09:57 +0000 Subject: [PATCH] runtime: only update freeIndexForScan outside of the mark phase Currently, it's possible for asynchronous preemption to observe a partially initialized object. The sequence of events goes like this: - The GC is in the mark phase. - Thread T1 is allocating object O1. - Thread T1 zeroes the allocation, runs the publication barrier, and updates freeIndexForScan. It has not yet updated the mark bit on O1. - Thread T2 is conservatively scanning some stack frame. That stack frame has a dead pointer with the same address as O1. - T2 picks up the pointer, checks isFree (which checks freeIndexForScan without an import barrier), and sees that O1 is allocated. It marks and queues O1. - T2 then goes to scan O1, and observes uninitialized memory. Although a publication barrier was executed, T2 did not have an import barrier. T2 may thus observe T1's writes to zero the object out-of-order with the write to freeIndexForScan. Normally this would be impossible if T2 got a pointer to O1 from somewhere written by T1. The publication barrier guarantees that if the read side is data-dependent on the write side then we'd necessarily observe all writes to O1 before T1 published it. However, T2 got the pointer 'out of thin air' by scanning a stack frame with a dead pointer on it. One fix to this problem would be to add the import barrier in the conservative scanner. We would then also need to put freeIndexForScan behind the publication barrier, or make the write to freeIndexForScan exactly that barrier. However, there's a simpler way. We don't actually care if conservative scanning observes a stale freeIndexForScan during the mark phase. Newly-allocated memory is always marked at the point of allocation (the allocate-black policy part of the GC's design). So it doesn't actually matter that if the garbage collector scans that memory or not. This change modifies the allocator to only update freeIndexForScan outside the mark phase. This means freeIndexForScan is essentially a snapshot of freeindex at the point the mark phase started. Because there's no more race between conservative scanning and newly-allocated objects, the complicated scenario above is no longer a possibility. One thing we do have to be careful of is other callers of isFree. Previously freeIndexForScan would always track freeindex, now it no longer does. This change thus introduces isFreeOrNewlyAllocated which is used by the conservative scanner, and uses freeIndexForScan. Meanwhile isFree goes back to using freeindex like it used to. This change also documents the requirement on isFree that the caller must have obtained the pointer not 'out of thin air' but after the object was published. isFree is not currently used anywhere particularly sensitive (heap dump and checkmark mode, where the world is stopped in both cases) so using freeindex is both conceptually simple and also safe. Change-Id: If66b8c536b775971203fb4358c17d711c2944723 Reviewed-on: https://go-review.googlesource.com/c/go/+/672340 Reviewed-by: David Chase Reviewed-by: Cherry Mui Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/runtime/malloc.go | 150 ++++++++++++++++++++--------------------- src/runtime/mbitmap.go | 25 +++++++ src/runtime/mgcmark.go | 7 +- 3 files changed, 105 insertions(+), 77 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index ccdebb26fb..7f85dfd0ed 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1196,23 +1196,23 @@ func mallocgcTiny(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { // the garbage collector could follow a pointer to x, // but see uninitialized memory or stale heap bits. publicationBarrier() - // As x and the heap bits are initialized, update - // freeIndexForScan now so x is seen by the GC - // (including conservative scan) as an allocated object. - // While this pointer can't escape into user code as a - // _live_ pointer until we return, conservative scanning - // may find a dead pointer that happens to point into this - // object. Delaying this update until now ensures that - // conservative scanning considers this pointer dead until - // this point. - span.freeIndexForScan = span.freeindex - - // Allocate black during GC. - // All slots hold nil so no scanning is needed. - // This may be racing with GC so do it atomically if there can be - // a race marking the bit. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex } // Note cache c only valid while m acquired; see #47302 @@ -1298,23 +1298,23 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe // the garbage collector could follow a pointer to x, // but see uninitialized memory or stale heap bits. publicationBarrier() - // As x and the heap bits are initialized, update - // freeIndexForScan now so x is seen by the GC - // (including conservative scan) as an allocated object. - // While this pointer can't escape into user code as a - // _live_ pointer until we return, conservative scanning - // may find a dead pointer that happens to point into this - // object. Delaying this update until now ensures that - // conservative scanning considers this pointer dead until - // this point. - span.freeIndexForScan = span.freeindex - - // Allocate black during GC. - // All slots hold nil so no scanning is needed. - // This may be racing with GC so do it atomically if there can be - // a race marking the bit. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex } // Note cache c only valid while m acquired; see #47302 @@ -1389,23 +1389,23 @@ func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintpt // the garbage collector could follow a pointer to x, // but see uninitialized memory or stale heap bits. publicationBarrier() - // As x and the heap bits are initialized, update - // freeIndexForScan now so x is seen by the GC - // (including conservative scan) as an allocated object. - // While this pointer can't escape into user code as a - // _live_ pointer until we return, conservative scanning - // may find a dead pointer that happens to point into this - // object. Delaying this update until now ensures that - // conservative scanning considers this pointer dead until - // this point. - span.freeIndexForScan = span.freeindex - - // Allocate black during GC. - // All slots hold nil so no scanning is needed. - // This may be racing with GC so do it atomically if there can be - // a race marking the bit. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex } // Note cache c only valid while m acquired; see #47302 @@ -1482,23 +1482,23 @@ func mallocgcSmallScanHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) // the garbage collector could follow a pointer to x, // but see uninitialized memory or stale heap bits. publicationBarrier() - // As x and the heap bits are initialized, update - // freeIndexForScan now so x is seen by the GC - // (including conservative scan) as an allocated object. - // While this pointer can't escape into user code as a - // _live_ pointer until we return, conservative scanning - // may find a dead pointer that happens to point into this - // object. Delaying this update until now ensures that - // conservative scanning considers this pointer dead until - // this point. - span.freeIndexForScan = span.freeindex - - // Allocate black during GC. - // All slots hold nil so no scanning is needed. - // This may be racing with GC so do it atomically if there can be - // a race marking the bit. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex } // Note cache c only valid while m acquired; see #47302 @@ -1555,23 +1555,23 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin // the garbage collector could follow a pointer to x, // but see uninitialized memory or stale heap bits. publicationBarrier() - // As x and the heap bits are initialized, update - // freeIndexForScan now so x is seen by the GC - // (including conservative scan) as an allocated object. - // While this pointer can't escape into user code as a - // _live_ pointer until we return, conservative scanning - // may find a dead pointer that happens to point into this - // object. Delaying this update until now ensures that - // conservative scanning considers this pointer dead until - // this point. - span.freeIndexForScan = span.freeindex - - // Allocate black during GC. - // All slots hold nil so no scanning is needed. - // This may be racing with GC so do it atomically if there can be - // a race marking the bit. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex } // Note cache c only valid while m acquired; see #47302 diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index f9a4c4ce3d..11720f840e 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1095,7 +1095,32 @@ func (s *mspan) nextFreeIndex() uint16 { // The caller must ensure s.state is mSpanInUse, and there must have // been no preemption points since ensuring this (which could allow a // GC transition, which would allow the state to change). +// +// Callers must ensure that the index passed here must not have been +// produced from a pointer that came from 'thin air', as might happen +// with conservative scanning. func (s *mspan) isFree(index uintptr) bool { + if index < uintptr(s.freeindex) { + return false + } + bytep, mask := s.allocBits.bitp(index) + return *bytep&mask == 0 +} + +// isFreeOrNewlyAllocated reports whether the index'th object in s is +// either unallocated or has been allocated since the beginning of the +// last mark phase. +// +// The caller must ensure s.state is mSpanInUse, and there must have +// been no preemption points since ensuring this (which could allow a +// GC transition, which would allow the state to change). +// +// Callers must ensure that the index passed here must not have been +// produced from a pointer that came from 'thin air', as might happen +// with conservative scanning, unless the GC is currently in the mark +// phase. If the GC is currently in the mark phase, this function is +// safe to call for out-of-thin-air pointers. +func (s *mspan) isFreeOrNewlyAllocated(index uintptr) bool { if index < uintptr(s.freeIndexForScan) { return false } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 5aabc14b40..171d76d32a 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1555,7 +1555,7 @@ func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackSca return ' ' } idx := span.objIndex(val) - if span.isFree(idx) { + if span.isFreeOrNewlyAllocated(idx) { return ' ' } return '*' @@ -1608,8 +1608,11 @@ func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackSca } // Check if val points to an allocated object. + // + // Ignore objects allocated during the mark phase, they've + // been allocated black. idx := span.objIndex(val) - if span.isFree(idx) { + if span.isFreeOrNewlyAllocated(idx) { continue } -- 2.51.0