// BSS root.
rootBlockBytes = 256 << 10
- // rootBlockSpans is the number of spans to scan per span
- // root.
- rootBlockSpans = 8 * 1024 // 64MB worth of spans
-
// maxObletBytes is the maximum bytes of an object to scan at
// once. Larger objects will be split up into "oblets" of at
// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
// a syscall, so its overhead is nontrivial). Higher values
// make the system less responsive to incoming work.
drainCheckThreshold = 100000
+
+ // pagesPerSpanRoot indicates how many pages to scan from a span root
+ // at a time. Used by special root marking.
+ //
+ // Higher values improve throughput by increasing locality, but
+ // increase the minimum latency of a marking operation.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divide pagesPerArena.
+ pagesPerSpanRoot = 512
+
+ // go115NewMarkrootSpans is a feature flag that indicates whether
+ // to use the new bitmap-based markrootSpans implementation.
+ go115NewMarkrootSpans = true
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
// some miscellany) and initializes scanning-related state.
//
// The world must be stopped.
-//
-//go:nowritebarrier
func gcMarkRootPrepare() {
work.nFlushCacheRoots = 0
//
// We depend on addfinalizer to mark objects that get
// finalizers after root marking.
- //
- // We're only interested in scanning the in-use spans,
- // which will all be swept at this point. More spans
- // may be added to this list during concurrent GC, but
- // we only care about spans that were allocated before
- // this mark phase.
- work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ if go115NewMarkrootSpans {
+ // We're going to scan the whole heap (that was available at the time the
+ // mark phase started, i.e. markArenas) for in-use spans which have specials.
+ //
+ // Break up the work into arenas, and further into chunks.
+ //
+ // Snapshot allArenas as markArenas. This snapshot is safe because allArenas
+ // is append-only.
+ mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
+ work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
+ } else {
+ // We're only interested in scanning the in-use spans,
+ // which will all be swept at this point. More spans
+ // may be added to this list during concurrent GC, but
+ // we only care about spans that were allocated before
+ // this mark phase.
+ work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ }
// Scan stacks.
//
unlock(&sched.gFree.lock)
}
-// markrootSpans marks roots for one shard of work.spans.
+// markrootSpans marks roots for one shard of markArenas.
//
//go:nowritebarrier
func markrootSpans(gcw *gcWork, shard int) {
+ if !go115NewMarkrootSpans {
+ oldMarkrootSpans(gcw, shard)
+ return
+ }
+ // Objects with finalizers have two GC-related invariants:
+ //
+ // 1) Everything reachable from the object must be marked.
+ // This ensures that when we pass the object to its finalizer,
+ // everything the finalizer can reach will be retained.
+ //
+ // 2) Finalizer specials (which are not in the garbage
+ // collected heap) are roots. In practice, this means the fn
+ // field must be scanned.
+ sg := mheap_.sweepgen
+
+ // Find the arena and page index into that arena for this shard.
+ ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)]
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena)
+
+ // Construct slice of bitmap which we'll iterate over.
+ specialsbits := ha.pageSpecials[arenaPage/8:]
+ specialsbits = specialsbits[:pagesPerSpanRoot/8]
+ for i := range specialsbits {
+ // Find set bits, which correspond to spans with specials.
+ specials := atomic.Load8(&specialsbits[i])
+ if specials == 0 {
+ continue
+ }
+ for j := uint(0); j < 8; j++ {
+ if specials&(1<<j) == 0 {
+ continue
+ }
+ // Find the span for this bit.
+ //
+ // This value is guaranteed to be non-nil because having
+ // specials implies that the span is in-use, and since we're
+ // currently marking we can be sure that we don't have to worry
+ // about the span being freed and re-used.
+ s := ha.spans[arenaPage+uint(i)*8+j]
+
+ // The state must be mSpanInUse if the specials bit is set, so
+ // sanity check that.
+ if state := s.state.get(); state != mSpanInUse {
+ print("s.state = ", state, "\n")
+ throw("non in-use span found with specials bit set")
+ }
+ // Check that this span was swept (it may be cached or uncached).
+ if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
+ // sweepgen was updated (+2) during non-checkmark GC pass
+ print("sweep ", s.sweepgen, " ", sg, "\n")
+ throw("gc: unswept span")
+ }
+
+ // Lock the specials to prevent a special from being
+ // removed from the list while we're traversing it.
+ lock(&s.speciallock)
+ for sp := s.specials; sp != nil; sp = sp.next {
+ if sp.kind != _KindSpecialFinalizer {
+ continue
+ }
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf := (*specialfinalizer)(unsafe.Pointer(sp))
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+
+ // Mark everything that can be reached from
+ // the object (but *not* the object itself or
+ // we'll never collect it).
+ scanobject(p, gcw)
+
+ // The special itself is a root.
+ scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
+ }
+ unlock(&s.speciallock)
+ }
+ }
+}
+
+// oldMarkrootSpans marks roots for one shard of work.spans.
+//
+// For go115NewMarkrootSpans = false.
+//
+//go:nowritebarrier
+func oldMarkrootSpans(gcw *gcWork, shard int) {
// Objects with finalizers have two GC-related invariants:
//
// 1) Everything reachable from the object must be marked.
// maxPhysHugePageSize sets an upper-bound on the maximum huge page size
// that the runtime supports.
maxPhysHugePageSize = pallocChunkBytes
+
+ // pagesPerReclaimerChunk indicates how many pages to scan from the
+ // pageInUse bitmap at a time. Used by the page reclaimer.
+ //
+ // Higher values reduce contention on scanning indexes (such as
+ // h.reclaimIndex), but increase the minimum latency of the
+ // operation.
+ //
+ // The time required to scan this many pages can vary a lot depending
+ // on how many spans are actually freed. Experimentally, it can
+ // scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
+ // free spans at ~32 MB/ms. Using 512 pages bounds this at
+ // roughly 100µs.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divid pagesPerArena.
+ pagesPerReclaimerChunk = 512
)
// Main malloc heap.
// simply blocking GC (by disabling preemption).
sweepArenas []arenaIdx
+ // markArenas is a snapshot of allArenas taken at the beginning
+ // of the mark cycle. Because allArenas is append-only, neither
+ // this slice nor its contents will change during the mark, so
+ // it can be read safely.
+ markArenas []arenaIdx
+
// curArena is the arena that the heap is currently growing
// into. This should always be physPageSize-aligned.
curArena struct {
base, end uintptr
}
- _ uint32 // ensure 64-bit alignment of central
+ // _ uint32 // ensure 64-bit alignment of central
// central free lists for small size classes.
// the padding makes sure that the mcentrals are
// operations.
pageMarks [pagesPerArena / 8]uint8
+ // pageSpecials is a bitmap that indicates which spans have
+ // specials (finalizers or other). Like pageInUse, only the bit
+ // corresponding to the first page in each span is used.
+ //
+ // Writes are done atomically whenever a special is added to
+ // a span and whenever the last special is removed from a span.
+ // Reads are done atomically to find spans containing specials
+ // during marking.
+ pageSpecials [pagesPerArena / 8]uint8
+
// zeroedBase marks the first byte of the first page in this
// arena which hasn't been used yet and is therefore already
// zero. zeroedBase is relative to the arena base.
//
// h must NOT be locked.
func (h *mheap) reclaim(npage uintptr) {
- // This scans pagesPerChunk at a time. Higher values reduce
- // contention on h.reclaimPos, but increase the minimum
- // latency of performing a reclaim.
- //
- // Must be a multiple of the pageInUse bitmap element size.
- //
- // The time required by this can vary a lot depending on how
- // many spans are actually freed. Experimentally, it can scan
- // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
- // free spans at ~32 MB/ms. Using 512 pages bounds this at
- // roughly 100µs.
- //
// TODO(austin): Half of the time spent freeing spans is in
// locking/unlocking the heap (even with low contention). We
// could make the slow path here several times faster by
// batching heap frees.
- const pagesPerChunk = 512
// Bail early if there's no more reclaim work.
if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
}
// Claim a chunk of work.
- idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
+ idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
if idx/pagesPerArena >= uintptr(len(arenas)) {
// Page reclaiming is done.
atomic.Store64(&h.reclaimIndex, 1<<63)
}
// Scan this chunk.
- nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
+ nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk)
if nfound <= npage {
npage -= nfound
} else {
kind byte // kind of special
}
+// spanHasSpecials marks a span as having specials in the arena bitmap.
+func spanHasSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8))
+}
+
+// spanHasNoSpecials marks a span as having no specials in the arena bitmap.
+func spanHasNoSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8)))
+}
+
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
s.offset = uint16(offset)
s.next = *t
*t = s
+ if go115NewMarkrootSpans {
+ spanHasSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
offset := uintptr(p) - span.base()
+ var result *special
lock(&span.speciallock)
t := &span.specials
for {
// "interior" specials (p must be exactly equal to s->offset).
if offset == uintptr(s.offset) && kind == s.kind {
*t = s.next
- unlock(&span.speciallock)
- releasem(mp)
- return s
+ result = s
+ break
}
t = &s.next
}
+ if go115NewMarkrootSpans && span.specials == nil {
+ spanHasNoSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
- return nil
+ return result
}
// The described object has a finalizer set for it.