From: Michael Anthony Knyszek Date: Fri, 23 Sep 2022 16:32:34 +0000 (+0000) Subject: runtime: manage huge pages explicitly X-Git-Tag: go1.21rc1~846 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8fa9e3beee8b0e6baa7333740996181268b60a3a;p=gostls13.git runtime: manage huge pages explicitly This change makes it so that on Linux the Go runtime explicitly marks page heap memory as either available to be backed by hugepages or not using heuristics based on density. The motivation behind this change is twofold: 1. In default Linux configurations, khugepaged can recoalesce hugepages even after the scavenger breaks them up, resulting in significant overheads for small heaps when their heaps shrink. 2. The Go runtime already has some heuristics about this, but those heuristics appear to have bit-rotted and result in haphazard hugepage management. Unlucky (but otherwise fairly dense) regions of memory end up not backed by huge pages while sparse regions end up accidentally marked MADV_HUGEPAGE and are not later broken up by the scavenger, because it already got the memory it needed from more dense sections (this is more likely to happen with small heaps that go idle). In this change, the runtime uses a new policy: 1. Mark all new memory MADV_HUGEPAGE. 2. Track whether each page chunk (4 MiB) became dense during the GC cycle. Mark those MADV_HUGEPAGE, and hide them from the scavenger. 3. If a chunk is not dense for 1 full GC cycle, make it visible to the scavenger. 4. The scavenger marks a chunk MADV_NOHUGEPAGE before it scavenges it. This policy is intended to try and back memory that is a good candidate for huge pages (high occupancy) with huge pages, and give memory that is not (low occupancy) to the scavenger. Occupancy is defined not just by occupancy at any instant of time, but also occupancy in the near future. It's generally true that by the end of a GC cycle the heap gets quite dense (from the perspective of the page allocator). Because we want scavenging and huge page management to happen together (the right time to MADV_NOHUGEPAGE is just before scavenging in order to break up huge pages and keep them that way) and the cost of applying MADV_HUGEPAGE and MADV_NOHUGEPAGE is somewhat high, the scavenger avoids releasing memory in dense page chunks. All this together means the scavenger will now more generally release memory on a ~1 GC cycle delay. Notably this has implications for scavenging to maintain the memory limit and the runtime/debug.FreeOSMemory API. This change makes it so that in these cases all memory is visible to the scavenger regardless of sparseness and delays the page allocator in re-marking this memory with MADV_NOHUGEPAGE for around 1 GC cycle to mitigate churn. The end result of this change should be little-to-no performance difference for dense heaps (MADV_HUGEPAGE works a lot like the default unmarked state) but should allow the scavenger to more effectively take back fragments of huge pages. The main risk here is churn, because MADV_HUGEPAGE usually forces the kernel to immediately back memory with a huge page. That's the reason for the large amount of hysteresis (1 full GC cycle) and why the definition of high density is 96% occupancy. Fixes #55328. Change-Id: I8da7998f1a31b498a9cc9bc662c1ae1a6bf64630 Reviewed-on: https://go-review.googlesource.com/c/go/+/436395 Reviewed-by: Michael Pratt Run-TryBot: Michael Knyszek TryBot-Result: Gopher Robot --- diff --git a/src/runtime/debug/garbage_test.go b/src/runtime/debug/garbage_test.go index 7213bbe641..cd91782d27 100644 --- a/src/runtime/debug/garbage_test.go +++ b/src/runtime/debug/garbage_test.go @@ -146,7 +146,7 @@ func TestFreeOSMemory(t *testing.T) { return } if after.HeapReleased-before.HeapReleased < bigBytes-slack { - t.Fatalf("less than %d released: %d -> %d", bigBytes, before.HeapReleased, after.HeapReleased) + t.Fatalf("less than %d released: %d -> %d", bigBytes-slack, before.HeapReleased, after.HeapReleased) } } diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 498c63f5b6..1045d510ef 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -831,7 +831,7 @@ func (p *PageAlloc) Free(base, npages uintptr) { // None of the tests need any higher-level locking, so we just // take the lock internally. lock(pp.mheapLock) - pp.free(base, npages, true) + pp.free(base, npages) unlock(pp.mheapLock) }) } @@ -841,7 +841,7 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) { func (p *PageAlloc) Scavenge(nbytes uintptr) (r uintptr) { pp := (*pageAlloc)(p) systemstack(func() { - r = pp.scavenge(nbytes, nil) + r = pp.scavenge(nbytes, nil, true) }) return } @@ -995,9 +995,8 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { p := new(pageAlloc) // We've got an entry, so initialize the pageAlloc. - p.init(new(mutex), testSysStat) + p.init(new(mutex), testSysStat, true) lockInit(p.mheapLock, lockRankMheap) - p.test = true for i, init := range chunks { addr := chunkBase(chunkIdx(i)) @@ -1009,11 +1008,18 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { }) // Initialize the bitmap and update pageAlloc metadata. - chunk := p.chunkOf(chunkIndex(addr)) + ci := chunkIndex(addr) + chunk := p.chunkOf(ci) // Clear all the scavenged bits which grow set. chunk.scavenged.clearRange(0, pallocChunkPages) + // Simulate the allocation and subsequent free of all pages in + // the chunk for the scavenge index. This sets the state equivalent + // with all pages within the index being free. + p.scav.index.alloc(ci, pallocChunkPages) + p.scav.index.free(ci, 0, pallocChunkPages) + // Apply scavenge state if applicable. if scav != nil { if scvg, ok := scav[i]; ok { @@ -1033,19 +1039,10 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { // it and it's a no-op anyway. if s.N != 0 { chunk.allocRange(s.I, s.N) - } - } - // Make sure the scavenge index is updated. - // - // This is an inefficient way to do it, but it's also the simplest way. - minPages := physPageSize / pageSize - if minPages < 1 { - minPages = 1 - } - _, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages) - if npages != 0 { - p.scav.index.mark(addr, addr+pallocChunkBytes) + // Make sure the scavenge index is updated. + p.scav.index.alloc(ci, s.N) + } } // Update heap metadata for the allocRange calls above. @@ -1070,8 +1067,6 @@ func FreePageAlloc(pp *PageAlloc) { for l := 0; l < summaryLevels; l++ { sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes) } - // Only necessary on 64-bit. This is a global on 32-bit. - sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks))) } else { resSize := uintptr(0) for _, s := range p.summary { @@ -1080,6 +1075,9 @@ func FreePageAlloc(pp *PageAlloc) { sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize)) } + // Free extra data structures. + sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks))*unsafe.Sizeof(atomicScavChunkData{})) + // Subtract back out whatever we mapped for the summaries. // sysUsed adds to p.sysStat and memstats.mappedReady no matter what // (and in anger should actually be accounted for), and there's no other @@ -1629,23 +1627,96 @@ type ScavengeIndex struct { func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex { s := new(ScavengeIndex) - s.i.chunks = make([]atomic.Uint8, uintptr(1<= end { return r, 0 @@ -639,17 +665,17 @@ func bgscavenge(c chan int) { // scavenge scavenges nbytes worth of free pages, starting with the // highest address first. Successive calls continue from where it left -// off until the heap is exhausted. Call scavengeStartGen to bring it -// back to the top of the heap. +// off until the heap is exhausted. force makes all memory available to +// scavenge, ignoring huge page heuristics. // // Returns the amount of memory scavenged in bytes. // // scavenge always tries to scavenge nbytes worth of memory, and will // only fail to do so if the heap is exhausted for now. -func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool) uintptr { +func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool, force bool) uintptr { released := uintptr(0) for released < nbytes { - ci, pageIdx := p.scav.index.find() + ci, pageIdx := p.scav.index.find(force) if ci == 0 { break } @@ -737,10 +763,14 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt // Mark the range we're about to scavenge as allocated, because // we don't want any allocating goroutines to grab it while - // the scavenging is in progress. - if scav := p.allocRange(addr, uintptr(npages)); scav != 0 { - throw("double scavenge") - } + // the scavenging is in progress. Be careful here -- just do the + // bare minimum to avoid stepping on our own scavenging stats. + p.chunkOf(ci).allocRange(base, npages) + p.update(addr, uintptr(npages), true, true) + + // Grab whether the chunk is hugepage backed and if it is, + // clear it. We're about to break up this huge page. + shouldNoHugePage := p.scav.index.setNoHugePage(ci) // With that done, it's safe to unlock. unlock(p.mheapLock) @@ -748,13 +778,16 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt if !p.test { pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages)) - // Only perform the actual scavenging if we're not in a test. + // Only perform sys* operations if we're not in a test. // It's dangerous to do so otherwise. + if shouldNoHugePage { + sysNoHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes) + } sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) // Update global accounting only when not in test, otherwise // the runtime's accounting will be wrong. - nbytes := int64(npages) * pageSize + nbytes := int64(npages * pageSize) gcController.heapReleased.add(nbytes) gcController.heapFree.add(-nbytes) @@ -767,7 +800,11 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt // Relock the heap, because now we need to make these pages // available allocation. Free them back to the page allocator. lock(p.mheapLock) - p.free(addr, uintptr(npages), true) + if b := (offAddr{addr}); b.lessThan(p.searchAddr) { + p.searchAddr = b + } + p.chunkOf(ci).free(base, npages) + p.update(addr, uintptr(npages), true, false) // Mark the range as scavenged. p.chunkOf(ci).scavenged.setRange(base, npages) @@ -777,7 +814,7 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt } } // Mark this chunk as having no free pages. - p.scav.index.clear(ci) + p.scav.index.setEmpty(ci) unlock(p.mheapLock) return 0 @@ -965,27 +1002,33 @@ func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (ui // scavengeIndex is a structure for efficiently managing which pageAlloc chunks have // memory available to scavenge. type scavengeIndex struct { - // chunks is a bitmap representing the entire address space. Each bit represents - // a single chunk, and a 1 value indicates the presence of pages available for - // scavenging. Updates to the bitmap are serialized by the pageAlloc lock. + // chunks is a scavChunkData-per-chunk structure that indicates the presence of pages + // available for scavenging. Updates to the index are serialized by the pageAlloc lock. // - // The underlying storage of chunks is platform dependent and may not even be - // totally mapped read/write. min and max reflect the extent that is safe to access. - // min is inclusive, max is exclusive. + // It tracks chunk occupancy and a generation counter per chunk. If a chunk's occupancy + // never exceeds pallocChunkDensePages over the course of a single GC cycle, the chunk + // becomes eligible for scavenging on the next cycle. If a chunk ever hits this density + // threshold it immediately becomes unavailable for scavenging in the current cycle as + // well as the next. // - // searchAddr is the maximum address (in the offset address space, so we have a linear + // For a chunk size of 4 MiB this structure will only use 2 MiB for a 1 TiB contiguous heap. + chunks []atomicScavChunkData + min, max atomic.Uintptr + + // searchAddr* is the maximum address (in the offset address space, so we have a linear // view of the address space; see mranges.go:offAddr) containing memory available to // scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups. // - // searchAddr is always inclusive and should be the base address of the highest runtime + // searchAddr* is always inclusive and should be the base address of the highest runtime // page available for scavenging. // - // searchAddr is managed by both find and mark. + // searchAddrForce is managed by find and free. + // searchAddrBg is managed by find and nextGen. // - // Normally, find monotonically decreases searchAddr as it finds no more free pages to + // Normally, find monotonically decreases searchAddr* as it finds no more free pages to // scavenge. However, mark, when marking a new chunk at an index greater than the current // searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here - // is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they + // is that concurrent calls to find will fail to monotonically decrease searchAddr*, and so they // won't barge over new memory becoming available to scavenge. Furthermore, this ensures // that some future caller of find *must* observe the new high index. That caller // (or any other racing with it), then makes searchAddr positive before continuing, bringing @@ -994,47 +1037,52 @@ type scavengeIndex struct { // A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr) // is always guaranteed to be >= min and < max (converted to heap addresses). // - // TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster - // iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either - // copy them from math/bits or fix the fact that we can't import math/bits' code from - // the runtime due to compiler instrumentation. - searchAddr atomicOffAddr - chunks []atomic.Uint8 - minHeapIdx atomic.Int32 - min, max atomic.Int32 + // searchAddrBg is increased only on each new generation and is mainly used by the + // background scavenger and heap-growth scavenging. searchAddrForce is increased continuously + // as memory gets freed and is mainly used by eager memory reclaim such as debug.FreeOSMemory + // and scavenging to maintain the memory limit. + searchAddrBg atomicOffAddr + searchAddrForce atomicOffAddr + + // freeHWM is the highest address (in offset address space) that was freed + // this generation. + freeHWM offAddr + + // Generation counter. Updated by nextGen at the end of each mark phase. + gen uint32 + + // test indicates whether or not we're in a test. + test bool } // find returns the highest chunk index that may contain pages available to scavenge. // It also returns an offset to start searching in the highest chunk. -func (s *scavengeIndex) find() (chunkIdx, uint) { - searchAddr, marked := s.searchAddr.Load() +func (s *scavengeIndex) find(force bool) (chunkIdx, uint) { + cursor := &s.searchAddrBg + if force { + cursor = &s.searchAddrForce + } + searchAddr, marked := cursor.Load() if searchAddr == minOffAddr.addr() { // We got a cleared search addr. return 0, 0 } - // Starting from searchAddr's chunk, and moving down to minHeapIdx, - // iterate until we find a chunk with pages to scavenge. - min := s.minHeapIdx.Load() - searchChunk := chunkIndex(uintptr(searchAddr)) - start := int32(searchChunk / 8) + // Starting from searchAddr's chunk, iterate until we find a chunk with pages to scavenge. + gen := s.gen + min := chunkIdx(s.min.Load()) + start := chunkIndex(uintptr(searchAddr)) for i := start; i >= min; i-- { - // Skip over irrelevant address space. - chunks := s.chunks[i].Load() - if chunks == 0 { + // Skip over chunks. + if !s.chunks[i].load().shouldScavenge(gen, force) { continue } - // Note that we can't have 8 leading zeroes here because - // we necessarily skipped that case. So, what's left is - // an index. If there are no zeroes, we want the 7th - // index, if 1 zero, the 6th, and so on. - n := 7 - sys.LeadingZeros8(chunks) - ci := chunkIdx(uint(i)*8 + uint(n)) - if searchChunk == ci { - return ci, chunkPageIndex(uintptr(searchAddr)) + // We're still scavenging this chunk. + if i == start { + return i, chunkPageIndex(uintptr(searchAddr)) } // Try to reduce searchAddr to newSearchAddr. - newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize + newSearchAddr := chunkBase(i) + pallocChunkBytes - pageSize if marked { // Attempt to be the first one to decrease the searchAddr // after an increase. If we fail, that means there was another @@ -1042,78 +1090,273 @@ func (s *scavengeIndex) find() (chunkIdx, uint) { // it doesn't matter. We may lose some performance having an // incorrect search address, but it's far more important that // we don't miss updates. - s.searchAddr.StoreUnmark(searchAddr, newSearchAddr) + cursor.StoreUnmark(searchAddr, newSearchAddr) } else { // Decrease searchAddr. - s.searchAddr.StoreMin(newSearchAddr) + cursor.StoreMin(newSearchAddr) } - return ci, pallocChunkPages - 1 + return i, pallocChunkPages - 1 } // Clear searchAddr, because we've exhausted the heap. - s.searchAddr.Clear() + cursor.Clear() return 0, 0 } -// mark sets the inclusive range of chunks between indices start and end as -// containing pages available to scavenge. +// alloc updates metadata for chunk at index ci with the fact that +// an allocation of npages occurred. // -// Must be serialized with other mark, markRange, and clear calls. -func (s *scavengeIndex) mark(base, limit uintptr) { - start, end := chunkIndex(base), chunkIndex(limit-pageSize) - if start == end { - // Within a chunk. - mask := uint8(1 << (start % 8)) - s.chunks[start/8].Or(mask) - } else if start/8 == end/8 { - // Within the same byte in the index. - mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8) - s.chunks[start/8].Or(mask) - } else { - // Crosses multiple bytes in the index. - startAligned := chunkIdx(alignUp(uintptr(start), 8)) - endAligned := chunkIdx(alignDown(uintptr(end), 8)) - - // Do the end of the first byte first. - if width := startAligned - start; width > 0 { - mask := uint8(uint16(1< 0 { - mask := uint8(uint16(1< scavChunkHiOccPages { + // Mark dense chunks as specifically backed by huge pages. + sc.setHugePage() + if !s.test { + sysHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes) } } - newSearchAddr := limit - pageSize - searchAddr, _ := s.searchAddr.Load() - // N.B. Because mark is serialized, it's not necessary to do a - // full CAS here. mark only ever increases searchAddr, while + s.chunks[ci].store(sc) +} + +// free updates metadata for chunk at index ci with the fact that +// a free of npages occurred. +// +// free may only run concurrently with find. +func (s *scavengeIndex) free(ci chunkIdx, page, npages uint) { + sc := s.chunks[ci].load() + sc.free(npages, s.gen) + s.chunks[ci].store(sc) + + // Update scavenge search addresses. + addr := chunkBase(ci) + uintptr(page+npages-1)*pageSize + if s.freeHWM.lessThan(offAddr{addr}) { + s.freeHWM = offAddr{addr} + } + // N.B. Because free is serialized, it's not necessary to do a + // full CAS here. free only ever increases searchAddr, while // find only ever decreases it. Since we only ever race with // decreases, even if the value we loaded is stale, the actual // value will never be larger. - if (offAddr{searchAddr}).lessThan(offAddr{newSearchAddr}) { - s.searchAddr.StoreMarked(newSearchAddr) + searchAddr, _ := s.searchAddrForce.Load() + if (offAddr{searchAddr}).lessThan(offAddr{addr}) { + s.searchAddrForce.StoreMarked(addr) + } +} + +// nextGen moves the scavenger forward one generation. Must be called +// once per GC cycle, but may be called more often to force more memory +// to be released. +// +// nextGen may only run concurrently with find. +func (s *scavengeIndex) nextGen() { + s.gen++ + searchAddr, _ := s.searchAddrBg.Load() + if (offAddr{searchAddr}).lessThan(s.freeHWM) { + s.searchAddrBg.StoreMarked(s.freeHWM.addr()) } + s.freeHWM = minOffAddr +} + +// setEmpty marks that the scavenger has finished looking at ci +// for now to prevent the scavenger from getting stuck looking +// at the same chunk. +// +// setEmpty may only run concurrently with find. +func (s *scavengeIndex) setEmpty(ci chunkIdx) { + val := s.chunks[ci].load() + val.setEmpty() + s.chunks[ci].store(val) } -// clear sets the chunk at index ci as not containing pages available to scavenge. +// setNoHugePage updates the backed-by-hugepages status of a particular chunk. +// Returns true if the set was successful (not already backed by huge pages). // -// Must be serialized with other mark, markRange, and clear calls. -func (s *scavengeIndex) clear(ci chunkIdx) { - s.chunks[ci/8].And(^uint8(1 << (ci % 8))) +// setNoHugePage may only run concurrently with find. +func (s *scavengeIndex) setNoHugePage(ci chunkIdx) bool { + val := s.chunks[ci].load() + if !val.isHugePage() { + return false + } + val.setNoHugePage() + s.chunks[ci].store(val) + return true +} + +// atomicScavChunkData is an atomic wrapper around a scavChunkData +// that stores it in its packed form. +type atomicScavChunkData struct { + value atomic.Uint64 +} + +// load loads and unpacks a scavChunkData. +func (sc *atomicScavChunkData) load() scavChunkData { + return unpackScavChunkData(sc.value.Load()) +} + +// store packs and writes a new scavChunkData. store must be serialized +// with other calls to store. +func (sc *atomicScavChunkData) store(ssc scavChunkData) { + sc.value.Store(ssc.pack()) +} + +// scavChunkData tracks information about a palloc chunk for +// scavenging. It packs well into 64 bits. +// +// The zero value always represents a valid newly-grown chunk. +type scavChunkData struct { + // inUse indicates how many pages in this chunk are currently + // allocated. + // + // Only the first 10 bits are used. + inUse uint16 + + // lastInUse indicates how many pages in this chunk were allocated + // when we transitioned from gen-1 to gen. + // + // Only the first 10 bits are used. + lastInUse uint16 + + // gen is the generation counter from a scavengeIndex from the + // last time this scavChunkData was updated. + gen uint32 + + // scavChunkFlags represents additional flags + // + // Note: only 6 bits are available. + scavChunkFlags +} + +// unpackScavChunkData unpacks a scavChunkData from a uint64. +func unpackScavChunkData(sc uint64) scavChunkData { + return scavChunkData{ + inUse: uint16(sc), + lastInUse: uint16(sc>>16) & scavChunkInUseMask, + gen: uint32(sc >> 32), + scavChunkFlags: scavChunkFlags(uint8(sc>>(16+logScavChunkInUseMax)) & scavChunkFlagsMask), + } +} + +// pack returns sc packed into a uint64. +func (sc scavChunkData) pack() uint64 { + return uint64(sc.inUse) | + (uint64(sc.lastInUse) << 16) | + (uint64(sc.scavChunkFlags) << (16 + logScavChunkInUseMax)) | + (uint64(sc.gen) << 32) +} + +const ( + // scavChunkHasFree indicates whether the chunk has anything left to + // scavenge. This is the opposite of "empty," used elsewhere in this + // file. The reason we say "HasFree" here is so the zero value is + // correct for a newly-grown chunk. (New memory is scavenged.) + scavChunkHasFree scavChunkFlags = 1 << iota + // scavChunkNoHugePage indicates whether this chunk has been marked + // sysNoHugePage. If not set, it means the chunk is marked sysHugePage. + // The negative here is unfortunate, but necessary to make it so that + // the zero value of scavChunkData accurately represents the state of + // a newly-grown chunk. (New memory is marked as backed by huge pages.) + scavChunkNoHugePage + + // scavChunkMaxFlags is the maximum number of flags we can have, given how + // a scavChunkData is packed into 8 bytes. + scavChunkMaxFlags = 6 + scavChunkFlagsMask = (1 << scavChunkMaxFlags) - 1 + + // logScavChunkInUseMax is the number of bits needed to represent the number + // of pages allocated in a single chunk. This is 1 more than log2 of the + // number of pages in the chunk because we need to represent a fully-allocated + // chunk. + logScavChunkInUseMax = logPallocChunkPages + 1 + scavChunkInUseMask = (1 << logScavChunkInUseMax) - 1 +) + +// scavChunkFlags is a set of bit-flags for the scavenger for each palloc chunk. +type scavChunkFlags uint8 + +// isEmpty returns true if the hasFree flag is unset. +func (sc *scavChunkFlags) isEmpty() bool { + return (*sc)&scavChunkHasFree == 0 +} + +// setEmpty clears the hasFree flag. +func (sc *scavChunkFlags) setEmpty() { + *sc &^= scavChunkHasFree +} + +// setNonEmpty sets the hasFree flag. +func (sc *scavChunkFlags) setNonEmpty() { + *sc |= scavChunkHasFree +} + +// isHugePage returns false if the noHugePage flag is set. +func (sc *scavChunkFlags) isHugePage() bool { + return (*sc)&scavChunkNoHugePage == 0 +} + +// setHugePage clears the noHugePage flag. +func (sc *scavChunkFlags) setHugePage() { + *sc &^= scavChunkNoHugePage +} + +// setNoHugePage sets the noHugePage flag. +func (sc *scavChunkFlags) setNoHugePage() { + *sc |= scavChunkNoHugePage +} + +// shouldScavenge returns true if the corresponding chunk should be interrogated +// by the scavenger. +func (sc scavChunkData) shouldScavenge(currGen uint32, force bool) bool { + if sc.isEmpty() { + // Nothing to scavenge. + return false + } + if force { + // We're forcing the memory to be scavenged. + return true + } + if sc.gen == currGen { + // In the current generation, if either the current or last generation + // is dense, then skip scavenging. Inverting that, we should scavenge + // if both the current and last generation were not dense. + return sc.inUse < scavChunkHiOccPages && sc.lastInUse < scavChunkHiOccPages + } + // If we're one or more generations ahead, we know inUse represents the current + // state of the chunk, since otherwise it would've been updated already. + return sc.inUse < scavChunkHiOccPages +} + +// alloc updates sc given that npages were allocated in the corresponding chunk. +func (sc *scavChunkData) alloc(npages uint, newGen uint32) { + if uint(sc.inUse)+npages > pallocChunkPages { + print("runtime: inUse=", sc.inUse, " npages=", npages, "\n") + throw("too many pages allocated in chunk?") + } + if sc.gen != newGen { + sc.lastInUse = sc.inUse + sc.gen = newGen + } + sc.inUse += uint16(npages) + if sc.inUse == pallocChunkPages { + // There's nothing for the scavenger to take from here. + sc.setEmpty() + } +} + +// free updates sc given that npages was freed in the corresponding chunk. +func (sc *scavChunkData) free(npages uint, newGen uint32) { + if uint(sc.inUse) < npages { + print("runtime: inUse=", sc.inUse, " npages=", npages, "\n") + throw("allocated pages below zero?") + } + if sc.gen != newGen { + sc.lastInUse = sc.inUse + sc.gen = newGen + } + sc.inUse -= uint16(npages) + // The scavenger can no longer be done with this chunk now that + // new memory has been freed into it. + sc.setNonEmpty() } type piController struct { diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go index c436ff060f..d7624d6d72 100644 --- a/src/runtime/mgcscavenge_test.go +++ b/src/runtime/mgcscavenge_test.go @@ -564,149 +564,278 @@ func TestScavenger(t *testing.T) { } func TestScavengeIndex(t *testing.T) { - setup := func(t *testing.T) (func(ChunkIdx, uint), func(uintptr, uintptr)) { + // This test suite tests the scavengeIndex data structure. + + // markFunc is a function that makes the address range [base, limit) + // available for scavenging in a test index. + type markFunc func(base, limit uintptr) + + // findFunc is a function that searches for the next available page + // to scavenge in the index. It asserts that the page is found in + // chunk "ci" at page "offset." + type findFunc func(ci ChunkIdx, offset uint) + + // The structure of the tests below is as follows: + // + // setup creates a fake scavengeIndex that can be mutated and queried by + // the functions it returns. Those functions capture the testing.T that + // setup is called with, so they're bound to the subtest they're created in. + // + // Tests are then organized into test cases which mark some pages as + // scavenge-able then try to find them. Tests expect that the initial + // state of the scavengeIndex has all of the chunks as dense in the last + // generation and empty to the scavenger. + // + // There are a few additional tests that interleave mark and find operations, + // so they're defined separately, but use the same infrastructure. + setup := func(t *testing.T, force bool) (mark markFunc, find findFunc, nextGen func()) { t.Helper() // Pick some reasonable bounds. We don't need a huge range just to test. si := NewScavengeIndex(BaseChunkIdx, BaseChunkIdx+64) - find := func(want ChunkIdx, wantOffset uint) { + + // Initialize all the chunks as dense and empty. + // + // Also, reset search addresses so that we can get page offsets. + si.AllocRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0)) + si.NextGen() + si.FreeRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0)) + for ci := BaseChunkIdx; ci < BaseChunkIdx+64; ci++ { + si.SetEmpty(ci) + } + si.ResetSearchAddrs() + + // Create and return test functions. + mark = func(base, limit uintptr) { t.Helper() - got, gotOffset := si.Find() + si.AllocRange(base, limit) + si.FreeRange(base, limit) + } + find = func(want ChunkIdx, wantOffset uint) { + t.Helper() + + got, gotOffset := si.Find(force) if want != got { t.Errorf("find: wanted chunk index %d, got %d", want, got) } - if want != got { + if wantOffset != gotOffset { t.Errorf("find: wanted page offset %d, got %d", wantOffset, gotOffset) } if t.Failed() { t.FailNow() } - si.Clear(got) + si.SetEmpty(got) } - mark := func(base, limit uintptr) { + nextGen = func() { t.Helper() - si.Mark(base, limit) + si.NextGen() } - return find, mark + return } - t.Run("Uninitialized", func(t *testing.T) { - find, _ := setup(t) - find(0, 0) - }) - t.Run("OnePage", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4)) - find(BaseChunkIdx, 3) - find(0, 0) - }) - t.Run("FirstPage", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1)) - find(BaseChunkIdx, 0) - find(0, 0) - }) - t.Run("SeveralPages", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14)) - find(BaseChunkIdx, 13) - find(0, 0) - }) - t.Run("WholeChunk", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) - find(BaseChunkIdx, PallocChunkPages-1) - find(0, 0) - }) - t.Run("LastPage", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0)) - find(BaseChunkIdx, PallocChunkPages-1) - find(0, 0) - }) - t.Run("TwoChunks", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128)) - find(BaseChunkIdx+1, 127) - find(BaseChunkIdx, PallocChunkPages-1) - find(0, 0) - }) - t.Run("TwoChunksOffset", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129)) - find(BaseChunkIdx+8, 128) - find(BaseChunkIdx+7, PallocChunkPages-1) - find(0, 0) - }) - t.Run("SevenChunksOffset", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15)) - find(BaseChunkIdx+13, 14) - for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- { - find(i, PallocChunkPages-1) - } - find(0, 0) - }) - t.Run("ThirtyTwoChunks", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) - for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { - find(i, PallocChunkPages-1) - } - find(0, 0) - }) - t.Run("ThirtyTwoChunksOffset", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0)) - for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- { - find(i, PallocChunkPages-1) - } - find(0, 0) - }) - t.Run("Mark", func(t *testing.T) { - find, mark := setup(t) + + // Each of these test cases calls mark and then find once. + type testCase struct { + name string + mark func(markFunc) + find func(findFunc) + } + for _, test := range []testCase{ + { + name: "Uninitialized", + mark: func(_ markFunc) {}, + find: func(_ findFunc) {}, + }, + { + name: "OnePage", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, 3) + }, + }, + { + name: "FirstPage", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, 0) + }, + }, + { + name: "SeveralPages", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, 13) + }, + }, + { + name: "WholeChunk", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, PallocChunkPages-1) + }, + }, + { + name: "LastPage", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, PallocChunkPages-1) + }, + }, + { + name: "TwoChunks", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128)) + }, + find: func(find findFunc) { + find(BaseChunkIdx+1, 127) + find(BaseChunkIdx, PallocChunkPages-1) + }, + }, + { + name: "TwoChunksOffset", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129)) + }, + find: func(find findFunc) { + find(BaseChunkIdx+8, 128) + find(BaseChunkIdx+7, PallocChunkPages-1) + }, + }, + { + name: "SevenChunksOffset", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15)) + }, + find: func(find findFunc) { + find(BaseChunkIdx+13, 14) + for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + { + name: "ThirtyTwoChunks", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + }, + find: func(find findFunc) { + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + { + name: "ThirtyTwoChunksOffset", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0)) + }, + find: func(find findFunc) { + for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + { + name: "Mark", + mark: func(mark markFunc) { + for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ { + mark(PageBase(i, 0), PageBase(i+1, 0)) + } + }, + find: func(find findFunc) { + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + { + name: "MarkIdempotentOneChunk", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) + }, + find: func(find findFunc) { + find(BaseChunkIdx, PallocChunkPages-1) + }, + }, + { + name: "MarkIdempotentThirtyTwoChunks", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + }, + find: func(find findFunc) { + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + { + name: "MarkIdempotentThirtyTwoChunksOffset", + mark: func(mark markFunc) { + mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0)) + mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0)) + }, + find: func(find findFunc) { + for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- { + find(i, PallocChunkPages-1) + } + }, + }, + } { + test := test + t.Run("Bg/"+test.name, func(t *testing.T) { + mark, find, nextGen := setup(t, false) + test.mark(mark) + find(0, 0) // Make sure we find nothing at this point. + nextGen() // Move to the next generation. + test.find(find) // Now we should be able to find things. + find(0, 0) // The test should always fully exhaust the index. + }) + t.Run("Force/"+test.name, func(t *testing.T) { + mark, find, _ := setup(t, true) + test.mark(mark) + test.find(find) // Finding should always work when forced. + find(0, 0) // The test should always fully exhaust the index. + }) + } + t.Run("Bg/MarkInterleaved", func(t *testing.T) { + mark, find, nextGen := setup(t, false) for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ { mark(PageBase(i, 0), PageBase(i+1, 0)) - } - for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + nextGen() find(i, PallocChunkPages-1) } find(0, 0) }) - t.Run("MarkInterleaved", func(t *testing.T) { - find, mark := setup(t) + t.Run("Force/MarkInterleaved", func(t *testing.T) { + mark, find, _ := setup(t, true) for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ { mark(PageBase(i, 0), PageBase(i+1, 0)) find(i, PallocChunkPages-1) } find(0, 0) }) - t.Run("MarkIdempotentOneChunk", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) - find(BaseChunkIdx, PallocChunkPages-1) - find(0, 0) - }) - t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) - mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) - for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { - find(i, PallocChunkPages-1) - } - find(0, 0) - }) - t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) { - find, mark := setup(t) - mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0)) - mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0)) - for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- { - find(i, PallocChunkPages-1) - } - find(0, 0) - }) +} + +func TestScavChunkDataPack(t *testing.T) { + if !CheckPackScavChunkData(1918237402, 512, 512, 0b11) { + t.Error("failed pack/unpack check for scavChunkData 1") + } + if !CheckPackScavChunkData(^uint32(0), 12, 0, 0b00) { + t.Error("failed pack/unpack check for scavChunkData 2") + } } func FuzzPIController(f *testing.F) { diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 773e27e646..febe519750 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -260,9 +260,11 @@ func finishsweep_m() { c.fullUnswept(sg).reset() } - // Sweeping is done, so if the scavenger isn't already awake, - // wake it up. There's definitely work for it to do at this - // point. + // Sweeping is done, so there won't be any new memory to + // scavenge for a bit. + // + // If the scavenger isn't already awake, wake it up. There's + // definitely work for it to do at this point. scavenger.wake() nextMarkBitArenaEpoch() diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 06592fe95b..ee005978fb 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -773,7 +773,7 @@ func (h *mheap) init() { h.central[i].mcentral.init(spanClass(i)) } - h.pages.init(&h.lock, &memstats.gcMiscSys) + h.pages.init(&h.lock, &memstats.gcMiscSys, false) } // reclaim sweeps and reclaims at least npage pages into the heap. @@ -1274,6 +1274,7 @@ HaveSpan: // pages not to get touched until we return. Simultaneously, it's important // to do this before calling sysUsed because that may commit address space. bytesToScavenge := uintptr(0) + forceScavenge := false if limit := gcController.memoryLimit.Load(); !gcCPULimiter.limiting() { // Assist with scavenging to maintain the memory limit by the amount // that we expect to page in. @@ -1282,6 +1283,7 @@ HaveSpan: // someone can set a really big memory limit that isn't maxInt64. if uint64(scav)+inuse > uint64(limit) { bytesToScavenge = uintptr(uint64(scav) + inuse - uint64(limit)) + forceScavenge = true } } if goal := scavenge.gcPercentGoal.Load(); goal != ^uint64(0) && growth > 0 { @@ -1323,7 +1325,7 @@ HaveSpan: // Scavenge, but back out if the limiter turns on. h.pages.scavenge(bytesToScavenge, func() bool { return gcCPULimiter.limiting() - }) + }, forceScavenge) // Finish up accounting. now = nanotime() @@ -1629,7 +1631,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { memstats.heapStats.release() // Mark the space as free. - h.pages.free(s.base(), s.npages, false) + h.pages.free(s.base(), s.npages) // Free the span structure. We no longer have a use for it. s.state.set(mSpanDead) @@ -1639,6 +1641,10 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // scavengeAll acquires the heap lock (blocking any additional // manipulation of the page allocator) and iterates over the whole // heap, scavenging every free page available. +// +// Must run on the system stack because it acquires the heap lock. +// +//go:systemstack func (h *mheap) scavengeAll() { // Disallow malloc or panic while holding the heap lock. We do // this here because this is a non-mallocgc entry-point to @@ -1646,7 +1652,8 @@ func (h *mheap) scavengeAll() { gp := getg() gp.m.mallocing++ - released := h.pages.scavenge(^uintptr(0), nil) + // Force scavenge everything. + released := h.pages.scavenge(^uintptr(0), nil, true) gp.m.mallocing-- diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 4f35cafc24..da1b14e5a4 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -257,11 +257,9 @@ type pageAlloc struct { // known by the page allocator to be currently in-use (passed // to grow). // - // This field is currently unused on 32-bit architectures but - // is harmless to track. We care much more about having a - // contiguous heap in these cases and take additional measures - // to ensure that, so in nearly all cases this should have just - // 1 element. + // We care much more about having a contiguous heap in these cases + // and take additional measures to ensure that, so in nearly all + // cases this should have just 1 element. // // All access is protected by the mheapLock. inUse addrRanges @@ -300,7 +298,7 @@ type pageAlloc struct { test bool } -func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) { +func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat, test bool) { if levelLogPages[0] > logMaxPackedValue { // We can't represent 1< 0 { need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1])) } @@ -188,17 +185,18 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) { } // Update the scavenge index. - p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat) + p.summaryMappedReady += p.scav.index.sysGrow(base, limit, p.sysStat) } -// grow increases the index's backing store in response to a heap growth. +// sysGrow increases the index's backing store in response to a heap growth. // // Returns the amount of memory added to sysStat. -func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr { +func (s *scavengeIndex) sysGrow(base, limit uintptr, sysStat *sysMemStat) uintptr { if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 { print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n") throw("sysGrow bounds not aligned to pallocChunkBytes") } + scSize := unsafe.Sizeof(atomicScavChunkData{}) // Map and commit the pieces of chunks that we need. // // We always map the full range of the minimum heap address to the @@ -212,24 +210,24 @@ func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr { // index. haveMin := s.min.Load() haveMax := s.max.Load() - needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize)) - needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize)) + needMin := alignDown(uintptr(chunkIndex(base)), physPageSize/scSize) + needMax := alignUp(uintptr(chunkIndex(limit)), physPageSize/scSize) // Extend the range down to what we have, if there's no overlap. if needMax < haveMin { needMax = haveMin } - if needMin > haveMax { + if haveMax != 0 && needMin > haveMax { needMin = haveMax } have := makeAddrRange( // Avoid a panic from indexing one past the last element. - uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin), - uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax), + uintptr(unsafe.Pointer(&s.chunks[0]))+haveMin*scSize, + uintptr(unsafe.Pointer(&s.chunks[0]))+haveMax*scSize, ) need := makeAddrRange( // Avoid a panic from indexing one past the last element. - uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin), - uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax), + uintptr(unsafe.Pointer(&s.chunks[0]))+needMin*scSize, + uintptr(unsafe.Pointer(&s.chunks[0]))+needMax*scSize, ) // Subtract any overlap from rounding. We can't re-map memory because // it'll be zeroed. @@ -247,11 +245,14 @@ func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr { s.max.Store(needMax) } } - // Update minHeapIdx. Note that even if there's no mapping work to do, - // we may still have a new, lower minimum heap address. - minHeapIdx := s.minHeapIdx.Load() - if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx { - s.minHeapIdx.Store(baseIdx) - } return need.size() } + +// sysInit initializes the scavengeIndex' chunks array. +func (s *scavengeIndex) sysInit() { + n := uintptr(1<