From eacdf76b93174484ffc526d9c45f4836f0738dee Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 20 Feb 2020 20:03:39 +0000 Subject: [PATCH] runtime: add bitmap-based markrootSpans implementation Currently markrootSpans, the scanning routine which scans span specials (particularly finalizers) as roots, uses sweepSpans to shard work and find spans to mark. However, as part of a future CL to change span ownership and how mcentral works, we want to avoid having markrootSpans use the sweep bufs to find specials, so in this change we introduce a new mechanism. Much like for the page reclaimer, we set up a per-page bitmap where the first page for a span is marked if the span contains any specials, and unmarked if it has no specials. This bitmap is updated by addspecial, removespecial, and during sweeping. markrootSpans then shards this bitmap into mark work and markers iterate over the bitmap looking for spans with specials to mark. Unlike the page reclaimer, we don't need to use the pageInUse bits because having a special implies that a span is in-use. While in terms of computational complexity this design is technically worse, because it needs to iterate over the mapped heap, in practice this iteration is very fast (we can skip over large swathes of the heap very quickly) and we only look at spans that have any specials at all, rather than having to touch each span. This new implementation of markrootSpans is behind a feature flag called go115NewMarkrootSpans. Updates #37487. Change-Id: I8ea07b6c11059f6d412fe419e0ab512d989377b8 Reviewed-on: https://go-review.googlesource.com/c/go/+/221178 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements --- src/runtime/malloc.go | 8 +++ src/runtime/mgcmark.go | 133 +++++++++++++++++++++++++++++++++++----- src/runtime/mgcsweep.go | 4 ++ src/runtime/mheap.go | 82 +++++++++++++++++++------ 4 files changed, 193 insertions(+), 34 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index e1ec5e6496..29e0071b3c 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -465,6 +465,14 @@ func mallocinit() { physHugePageShift++ } } + if pagesPerArena%pagesPerSpanRoot != 0 { + print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerSpanRoot (", pagesPerSpanRoot, ")\n") + throw("bad pagesPerSpanRoot") + } + if pagesPerArena%pagesPerReclaimerChunk != 0 { + print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n") + throw("bad pagesPerReclaimerChunk") + } // Initialize the heap. mheap_.init() diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 301d8020f1..ea73ccc1b1 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -21,10 +21,6 @@ const ( // BSS root. rootBlockBytes = 256 << 10 - // rootBlockSpans is the number of spans to scan per span - // root. - rootBlockSpans = 8 * 1024 // 64MB worth of spans - // maxObletBytes is the maximum bytes of an object to scan at // once. Larger objects will be split up into "oblets" of at // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds @@ -41,14 +37,26 @@ const ( // a syscall, so its overhead is nontrivial). Higher values // make the system less responsive to incoming work. drainCheckThreshold = 100000 + + // pagesPerSpanRoot indicates how many pages to scan from a span root + // at a time. Used by special root marking. + // + // Higher values improve throughput by increasing locality, but + // increase the minimum latency of a marking operation. + // + // Must be a multiple of the pageInUse bitmap element size and + // must also evenly divide pagesPerArena. + pagesPerSpanRoot = 512 + + // go115NewMarkrootSpans is a feature flag that indicates whether + // to use the new bitmap-based markrootSpans implementation. + go115NewMarkrootSpans = true ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // // The world must be stopped. -// -//go:nowritebarrier func gcMarkRootPrepare() { work.nFlushCacheRoots = 0 @@ -79,13 +87,24 @@ func gcMarkRootPrepare() { // // We depend on addfinalizer to mark objects that get // finalizers after root marking. - // - // We're only interested in scanning the in-use spans, - // which will all be swept at this point. More spans - // may be added to this list during concurrent GC, but - // we only care about spans that were allocated before - // this mark phase. - work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + if go115NewMarkrootSpans { + // We're going to scan the whole heap (that was available at the time the + // mark phase started, i.e. markArenas) for in-use spans which have specials. + // + // Break up the work into arenas, and further into chunks. + // + // Snapshot allArenas as markArenas. This snapshot is safe because allArenas + // is append-only. + mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] + work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) + } else { + // We're only interested in scanning the in-use spans, + // which will all be swept at this point. More spans + // may be added to this list during concurrent GC, but + // we only care about spans that were allocated before + // this mark phase. + work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + } // Scan stacks. // @@ -293,10 +312,96 @@ func markrootFreeGStacks() { unlock(&sched.gFree.lock) } -// markrootSpans marks roots for one shard of work.spans. +// markrootSpans marks roots for one shard of markArenas. // //go:nowritebarrier func markrootSpans(gcw *gcWork, shard int) { + if !go115NewMarkrootSpans { + oldMarkrootSpans(gcw, shard) + return + } + // Objects with finalizers have two GC-related invariants: + // + // 1) Everything reachable from the object must be marked. + // This ensures that when we pass the object to its finalizer, + // everything the finalizer can reach will be retained. + // + // 2) Finalizer specials (which are not in the garbage + // collected heap) are roots. In practice, this means the fn + // field must be scanned. + sg := mheap_.sweepgen + + // Find the arena and page index into that arena for this shard. + ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)] + ha := mheap_.arenas[ai.l1()][ai.l2()] + arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena) + + // Construct slice of bitmap which we'll iterate over. + specialsbits := ha.pageSpecials[arenaPage/8:] + specialsbits = specialsbits[:pagesPerSpanRoot/8] + for i := range specialsbits { + // Find set bits, which correspond to spans with specials. + specials := atomic.Load8(&specialsbits[i]) + if specials == 0 { + continue + } + for j := uint(0); j < 8; j++ { + if specials&(1<= 1<<63 { @@ -755,7 +775,7 @@ func (h *mheap) reclaim(npage uintptr) { } // Claim a chunk of work. - idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk) + idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk) if idx/pagesPerArena >= uintptr(len(arenas)) { // Page reclaiming is done. atomic.Store64(&h.reclaimIndex, 1<<63) @@ -769,7 +789,7 @@ func (h *mheap) reclaim(npage uintptr) { } // Scan this chunk. - nfound := h.reclaimChunk(arenas, idx, pagesPerChunk) + nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk) if nfound <= npage { npage -= nfound } else { @@ -1593,6 +1613,22 @@ type special struct { kind byte // kind of special } +// spanHasSpecials marks a span as having specials in the arena bitmap. +func spanHasSpecials(s *mspan) { + arenaPage := (s.base() / pageSize) % pagesPerArena + ai := arenaIndex(s.base()) + ha := mheap_.arenas[ai.l1()][ai.l2()] + atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8)) +} + +// spanHasNoSpecials marks a span as having no specials in the arena bitmap. +func spanHasNoSpecials(s *mspan) { + arenaPage := (s.base() / pageSize) % pagesPerArena + ai := arenaIndex(s.base()) + ha := mheap_.arenas[ai.l1()][ai.l2()] + atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8))) +} + // Adds the special record s to the list of special records for // the object p. All fields of s should be filled in except for // offset & next, which this routine will fill in. @@ -1638,6 +1674,9 @@ func addspecial(p unsafe.Pointer, s *special) bool { s.offset = uint16(offset) s.next = *t *t = s + if go115NewMarkrootSpans { + spanHasSpecials(span) + } unlock(&span.speciallock) releasem(mp) @@ -1661,6 +1700,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { offset := uintptr(p) - span.base() + var result *special lock(&span.speciallock) t := &span.specials for { @@ -1672,15 +1712,17 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { // "interior" specials (p must be exactly equal to s->offset). if offset == uintptr(s.offset) && kind == s.kind { *t = s.next - unlock(&span.speciallock) - releasem(mp) - return s + result = s + break } t = &s.next } + if go115NewMarkrootSpans && span.specials == nil { + spanHasNoSpecials(span) + } unlock(&span.speciallock) releasem(mp) - return nil + return result } // The described object has a finalizer set for it. -- 2.50.0