runtime: redesign scavenging algorithm

author Michael Anthony Knyszek <mknyszek@google.com>

Sun, 10 Apr 2022 20:34:17 +0000 (20:34 +0000)

committer Michael Knyszek <mknyszek@google.com>

Tue, 3 May 2022 15:13:53 +0000 (15:13 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Sun, 10 Apr 2022 20:34:17 +0000 (20:34 +0000)
committer Michael Knyszek <mknyszek@google.com>
Tue, 3 May 2022 15:13:53 +0000 (15:13 +0000)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index f1bdf93f46271561a1979373d81fe3c703f95b38..0d17ddfe30c9b054ad645cc6e210c65d9eafda6b 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -968,7 +968,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
         p.init(new(mutex), testSysStat)
         lockInit(p.mheapLock, lockRankMheap)
         p.test = true
-
         for i, init := range chunks {
                 addr := chunkBase(chunkIdx(i))
  
@@ -1007,6 +1006,18 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
                         }
                 }
  
+               // Make sure the scavenge index is updated.
+               //
+               // This is an inefficient way to do it, but it's also the simplest way.
+               minPages := physPageSize / pageSize
+               if minPages < 1 {
+                       minPages = 1
+               }
+               _, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages)
+               if npages != 0 {
+                       p.scav.index.mark(addr, addr+pallocChunkBytes)
+               }
+
                 // Update heap metadata for the allocRange calls above.
                 systemstack(func() {
                         lock(p.mheapLock)
@@ -1015,12 +1026,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
                 })
         }
  
-       systemstack(func() {
-               lock(p.mheapLock)
-               p.scavengeStartGen()
-               unlock(p.mheapLock)
-       })
-
         return (*PageAlloc)(p)
  }
  
@@ -1035,6 +1040,8 @@ func FreePageAlloc(pp *PageAlloc) {
                 for l := 0; l < summaryLevels; l++ {
                         sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes)
                 }
+               // Only necessary on 64-bit. This is a global on 32-bit.
+               sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks)))
         } else {
                 resSize := uintptr(0)
                 for _, s := range p.summary {
@@ -1042,6 +1049,7 @@ func FreePageAlloc(pp *PageAlloc) {
                 }
                 sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize))
         }
+
         // Subtract back out whatever we mapped for the summaries.
         // sysUsed adds to p.sysStat and memstats.mappedReady no matter what
         // (and in anger should actually be accounted for), and there's no other
@@ -1550,3 +1558,28 @@ func (s *Scavenger) Stop() {
         s.Wake()
         <-s.done
  }
+
+type ScavengeIndex struct {
+       i scavengeIndex
+}
+
+func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex {
+       s := new(ScavengeIndex)
+       s.i.chunks = make([]atomic.Uint8, uintptr(1<<heapAddrBits/pallocChunkBytes/8))
+       s.i.min.Store(int32(min / 8))
+       s.i.max.Store(int32(max / 8))
+       return s
+}
+
+func (s *ScavengeIndex) Find() (ChunkIdx, uint) {
+       ci, off := s.i.find()
+       return ChunkIdx(ci), off
+}
+
+func (s *ScavengeIndex) Mark(base, limit uintptr) {
+       s.i.mark(base, limit)
+}
+
+func (s *ScavengeIndex) Clear(ci ChunkIdx) {
+       s.i.clear(chunkIdx(ci))
+}
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go

index 9423db271b51011bee3fbbf09f31b15df15857f8..d22e6635f810189e6a3dc2a84146d3c831fcf9b3 100644 (file)
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -118,11 +118,6 @@ const (
         // This ratio is used as part of multiplicative factor to help the scavenger account
         // for the additional costs of using scavenged memory in its pacing.
         scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
-
-       // scavengeReservationShards determines the amount of memory the scavenger
-       // should reserve for scavenging at a time. Specifically, the amount of
-       // memory reserved is (heap size in bytes) / scavengeReservationShards.
-       scavengeReservationShards = 64
  )
  
  // heapRetained returns an estimate of the current heap RSS.
@@ -642,26 +637,16 @@ func bgscavenge(c chan int) {
  // scavenge always tries to scavenge nbytes worth of memory, and will
  // only fail to do so if the heap is exhausted for now.
  func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
-       var (
-               addrs addrRange
-               gen   uint32
-       )
         released := uintptr(0)
         for released < nbytes {
-               if addrs.size() == 0 {
-                       if addrs, gen = p.scavengeReserve(); addrs.size() == 0 {
-                               break
-                       }
+               ci, pageIdx := p.scav.index.find()
+               if ci == 0 {
+                       break
                 }
                 systemstack(func() {
-                       r, a := p.scavengeOne(addrs, nbytes-released)
-                       released += r
-                       addrs = a
+                       released += p.scavengeOne(ci, pageIdx, nbytes-released)
                 })
         }
-       // Only unreserve the space which hasn't been scavenged or searched
-       // to ensure we always make progress.
-       p.scavengeUnreserve(addrs, gen)
         return released
  }
  
@@ -672,11 +657,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
  // application.
  //
  // scavenger.lock must be held.
-func printScavTrace(gen uint32, released uintptr, forced bool) {
+func printScavTrace(released uintptr, forced bool) {
         assertLockHeld(&scavenger.lock)
  
         printlock()
-       print("scav ", gen, " ",
+       print("scav ",
                 released>>10, " KiB work, ",
                 gcController.heapReleased.load()>>10, " KiB total, ",
                 (gcController.heapInUse.load()*100)/heapRetained(), "% util",
@@ -691,130 +676,20 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
         printunlock()
  }
  
-// scavengeStartGen starts a new scavenge generation, resetting
-// the scavenger's search space to the full in-use address space.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeStartGen() {
-       assertLockHeld(p.mheapLock)
-
-       lock(&p.scav.lock)
-       if debug.scavtrace > 0 {
-               printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false)
-       }
-       p.inUse.cloneInto(&p.scav.inUse)
-
-       // Pick the new starting address for the scavenger cycle.
-       var startAddr offAddr
-       if p.scav.scavLWM.lessThan(p.scav.freeHWM) {
-               // The "free" high watermark exceeds the "scavenged" low watermark,
-               // so there are free scavengable pages in parts of the address space
-               // that the scavenger already searched, the high watermark being the
-               // highest one. Pick that as our new starting point to ensure we
-               // see those pages.
-               startAddr = p.scav.freeHWM
-       } else {
-               // The "free" high watermark does not exceed the "scavenged" low
-               // watermark. This means the allocator didn't free any memory in
-               // the range we scavenged last cycle, so we might as well continue
-               // scavenging from where we were.
-               startAddr = p.scav.scavLWM
-       }
-       p.scav.inUse.removeGreaterEqual(startAddr.addr())
-
-       // reservationBytes may be zero if p.inUse.totalBytes is small, or if
-       // scavengeReservationShards is large. This case is fine as the scavenger
-       // will simply be turned off, but it does mean that scavengeReservationShards,
-       // in concert with pallocChunkBytes, dictates the minimum heap size at which
-       // the scavenger triggers. In practice this minimum is generally less than an
-       // arena in size, so virtually every heap has the scavenger on.
-       p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
-       p.scav.gen++
-       atomic.Storeuintptr(&p.scav.released, 0)
-       p.scav.freeHWM = minOffAddr
-       p.scav.scavLWM = maxOffAddr
-       unlock(&p.scav.lock)
-}
-
-// scavengeReserve reserves a contiguous range of the address space
-// for scavenging. The maximum amount of space it reserves is proportional
-// to the size of the heap. The ranges are reserved from the high addresses
-// first.
-//
-// Returns the reserved range and the scavenge generation number for it.
-func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
-       lock(&p.scav.lock)
-       gen := p.scav.gen
-
-       // Start by reserving the minimum.
-       r := p.scav.inUse.removeLast(p.scav.reservationBytes)
-
-       // Return early if the size is zero; we don't want to use
-       // the bogus address below.
-       if r.size() == 0 {
-               unlock(&p.scav.lock)
-               return r, gen
-       }
-
-       // The scavenger requires that base be aligned to a
-       // palloc chunk because that's the unit of operation for
-       // the scavenger, so align down, potentially extending
-       // the range.
-       newBase := alignDown(r.base.addr(), pallocChunkBytes)
-
-       // Remove from inUse however much extra we just pulled out.
-       p.scav.inUse.removeGreaterEqual(newBase)
-       unlock(&p.scav.lock)
-
-       r.base = offAddr{newBase}
-       return r, gen
-}
-
-// scavengeUnreserve returns an unscavenged portion of a range that was
-// previously reserved with scavengeReserve.
-func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
-       if r.size() == 0 {
-               return
-       }
-       if r.base.addr()%pallocChunkBytes != 0 {
-               throw("unreserving unaligned region")
-       }
-       lock(&p.scav.lock)
-       if gen == p.scav.gen {
-               p.scav.inUse.add(r)
-       }
-       unlock(&p.scav.lock)
-}
-
-// scavengeOne walks over address range work until it finds
+// scavengeOne walks over the chunk at chunk index ci and searches for
  // a contiguous run of pages to scavenge. It will try to scavenge
  // at most max bytes at once, but may scavenge more to avoid
  // breaking huge pages. Once it scavenges some memory it returns
  // how much it scavenged in bytes.
  //
-// Returns the number of bytes scavenged and the part of work
-// which was not yet searched.
+// searchIdx is the page index to start searching from in ci.
  //
-// work's base address must be aligned to pallocChunkBytes.
+// Returns the number of bytes scavenged.
  //
  // Must run on the systemstack because it acquires p.mheapLock.
  //
  //go:systemstack
-func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) {
-       // Defensively check if we've received an empty address range.
-       // If so, just return.
-       if work.size() == 0 {
-               // Nothing to do.
-               return 0, work
-       }
-       // Check the prerequisites of work.
-       if work.base.addr()%pallocChunkBytes != 0 {
-               throw("scavengeOne called with unaligned work region")
-       }
+func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintptr {
         // Calculate the maximum number of pages to scavenge.
         //
         // This should be alignUp(max, pageSize) / pageSize but max can and will
@@ -836,168 +711,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange
                 minPages = 1
         }
  
-       // Fast path: check the chunk containing the top-most address in work.
-       if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 {
-               return r, w
-       } else {
-               work = w
-       }
-
-       // findCandidate finds the next scavenge candidate in work optimistically.
-       //
-       // Returns the candidate chunk index and true on success, and false on failure.
-       //
-       // The heap need not be locked.
-       findCandidate := func(work addrRange) (chunkIdx, bool) {
-               // Iterate over this work's chunks.
-               for i := chunkIndex(work.limit.addr() - 1); i >= chunkIndex(work.base.addr()); i-- {
-                       // If this chunk is totally in-use or has no unscavenged pages, don't bother
-                       // doing a more sophisticated check.
-                       //
-                       // Note we're accessing the summary and the chunks without a lock, but
-                       // that's fine. We're being optimistic anyway.
-
-                       // Check quickly if there are enough free pages at all.
-                       if p.summary[len(p.summary)-1][i].max() < uint(minPages) {
-                               continue
-                       }
-
-                       // Run over the chunk looking harder for a candidate. Again, we could
-                       // race with a lot of different pieces of code, but we're just being
-                       // optimistic. Make sure we load the l2 pointer atomically though, to
-                       // avoid races with heap growth. It may or may not be possible to also
-                       // see a nil pointer in this case if we do race with heap growth, but
-                       // just defensively ignore the nils. This operation is optimistic anyway.
-                       l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()])))
-                       if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) {
-                               return i, true
-                       }
-               }
-               return 0, false
-       }
-
-       // Slow path: iterate optimistically over the in-use address space
-       // looking for any free and unscavenged page. If we think we see something,
-       // lock and verify it!
-       for work.size() != 0 {
-
-               // Search for the candidate.
-               candidateChunkIdx, ok := findCandidate(work)
-               if !ok {
-                       // We didn't find a candidate, so we're done.
-                       work.limit = work.base
-                       break
-               }
-
-               // Lock, so we can verify what we found.
-               lock(p.mheapLock)
-
-               // Find, verify, and scavenge if we can.
-               chunk := p.chunkOf(candidateChunkIdx)
-               base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
-               if npages > 0 {
-                       work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
-                       unlock(p.mheapLock)
-                       return uintptr(npages) * pageSize, work
-               }
-               unlock(p.mheapLock)
-
-               // We were fooled, so let's continue from where we left off.
-               work.limit = offAddr{chunkBase(candidateChunkIdx)}
-       }
-       return 0, work
-}
-
-// scavengeOneFast is the fast path for scavengeOne, which just checks the top
-// chunk of work for some pages to scavenge.
-//
-// Must run on the system stack because it acquires the heap lock.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) {
-       maxAddr := work.limit.addr() - 1
-       maxChunk := chunkIndex(maxAddr)
-
         lock(p.mheapLock)
-       if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
+       if p.summary[len(p.summary)-1][ci].max() >= uint(minPages) {
                 // We only bother looking for a candidate if there at least
                 // minPages free pages at all.
-               base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+               base, npages := p.chunkOf(ci).findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
  
                 // If we found something, scavenge it and return!
                 if npages != 0 {
-                       work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+                       // Compute the full address for the start of the range.
+                       addr := chunkBase(ci) + uintptr(base)*pageSize
+
+                       // Mark the range we're about to scavenge as allocated, because
+                       // we don't want any allocating goroutines to grab it while
+                       // the scavenging is in progress.
+                       if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
+                               throw("double scavenge")
+                       }
+
+                       // With that done, it's safe to unlock.
                         unlock(p.mheapLock)
-                       return uintptr(npages) * pageSize, work
-               }
-       }
-       unlock(p.mheapLock)
  
-       // Update the limit to reflect the fact that we checked maxChunk already.
-       work.limit = offAddr{chunkBase(maxChunk)}
-       return 0, work
-}
+                       if !p.test {
+                               // Only perform the actual scavenging if we're not in a test.
+                               // It's dangerous to do so otherwise.
+                               sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+
+                               // Update global accounting only when not in test, otherwise
+                               // the runtime's accounting will be wrong.
+                               nbytes := int64(npages) * pageSize
+                               gcController.heapReleased.add(nbytes)
+                               gcController.heapFree.add(-nbytes)
+
+                               stats := memstats.heapStats.acquire()
+                               atomic.Xaddint64(&stats.committed, -nbytes)
+                               atomic.Xaddint64(&stats.released, nbytes)
+                               memstats.heapStats.release()
+                       }
  
-// scavengeRangeLocked scavenges the given region of memory.
-// The region of memory is described by its chunk index (ci),
-// the starting page index of the region relative to that
-// chunk (base), and the length of the region in pages (npages).
-//
-// Returns the base address of the scavenged region.
-//
-// p.mheapLock must be held. Unlocks p.mheapLock but reacquires
-// it before returning. Must be run on the systemstack as a result.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
-       assertLockHeld(p.mheapLock)
+                       // Relock the heap, because now we need to make these pages
+                       // available allocation. Free them back to the page allocator.
+                       lock(p.mheapLock)
+                       p.free(addr, uintptr(npages), true)
  
-       // Compute the full address for the start of the range.
-       addr := chunkBase(ci) + uintptr(base)*pageSize
+                       // Mark the range as scavenged.
+                       p.chunkOf(ci).scavenged.setRange(base, npages)
+                       unlock(p.mheapLock)
  
-       // Mark the range we're about to scavenge as allocated, because
-       // we don't want any allocating goroutines to grab it while
-       // the scavenging is in progress.
-       if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
-               throw("double scavenge")
+                       return uintptr(npages) * pageSize
+               }
         }
-
-       // With that done, it's safe to unlock.
+       // Mark this chunk as having no free pages.
+       p.scav.index.clear(ci)
         unlock(p.mheapLock)
  
-       // Update the scavenge low watermark.
-       lock(&p.scav.lock)
-       if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
-               p.scav.scavLWM = oAddr
-       }
-       unlock(&p.scav.lock)
-
-       if !p.test {
-               // Only perform the actual scavenging if we're not in a test.
-               // It's dangerous to do so otherwise.
-               sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
-
-               // Update global accounting only when not in test, otherwise
-               // the runtime's accounting will be wrong.
-               nbytes := int64(npages) * pageSize
-               gcController.heapReleased.add(nbytes)
-               gcController.heapFree.add(-nbytes)
-
-               // Update consistent accounting too.
-               stats := memstats.heapStats.acquire()
-               atomic.Xaddint64(&stats.committed, -nbytes)
-               atomic.Xaddint64(&stats.released, nbytes)
-               memstats.heapStats.release()
-       }
-
-       // Relock the heap, because now we need to make these pages
-       // available allocation. Free them back to the page allocator.
-       lock(p.mheapLock)
-       p.free(addr, uintptr(npages), true)
-
-       // Mark the range as scavenged.
-       p.chunkOf(ci).scavenged.setRange(base, npages)
-       return addr
+       return 0
  }
  
  // fillAligned returns x but with all zeroes in m-aligned
@@ -1059,38 +827,6 @@ func fillAligned(x uint64, m uint) uint64 {
         return ^((x - (x >> (m - 1))) | x)
  }
  
-// hasScavengeCandidate returns true if there's any min-page-aligned groups of
-// min pages of free-and-unscavenged memory in the region represented by this
-// pallocData.
-//
-// min must be a non-zero power of 2 <= maxPagesPerPhysPage.
-func (m *pallocData) hasScavengeCandidate(min uintptr) bool {
-       if min&(min-1) != 0 || min == 0 {
-               print("runtime: min = ", min, "\n")
-               throw("min must be a non-zero power of 2")
-       } else if min > maxPagesPerPhysPage {
-               print("runtime: min = ", min, "\n")
-               throw("min too large")
-       }
-
-       // The goal of this search is to see if the chunk contains any free and unscavenged memory.
-       for i := len(m.scavenged) - 1; i >= 0; i-- {
-               // 1s are scavenged OR non-free => 0s are unscavenged AND free
-               //
-               // TODO(mknyszek): Consider splitting up fillAligned into two
-               // functions, since here we technically could get by with just
-               // the first half of its computation. It'll save a few instructions
-               // but adds some additional code complexity.
-               x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
-
-               // Quickly skip over chunks of non-free or scavenged pages.
-               if x != ^uint64(0) {
-                       return true
-               }
-       }
-       return false
-}
-
  // findScavengeCandidate returns a start index and a size for this pallocData
  // segment which represents a contiguous region of free and unscavenged memory.
  //
@@ -1210,3 +946,157 @@ func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (ui
         }
         return start, size
  }
+
+// scavengeIndex is a structure for efficiently managing which pageAlloc chunks have
+// memory available to scavenge.
+type scavengeIndex struct {
+       // chunks is a bitmap representing the entire address space. Each bit represents
+       // a single chunk, and a 1 value indicates the presence of pages available for
+       // scavenging. Updates to the bitmap are serialized by the pageAlloc lock.
+       //
+       // The underlying storage of chunks is platform dependent and may not even be
+       // totally mapped read/write. min and max reflect the extent that is safe to access.
+       // min is inclusive, max is exclusive.
+       //
+       // searchAddr is the maximum address (in the offset address space, so we have a linear
+       // view of the address space; see mranges.go:offAddr) containing memory available to
+       // scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups.
+       //
+       // searchAddr is always inclusive and should be the base address of the highest runtime
+       // page available for scavenging.
+       //
+       // searchAddr is managed by both find and mark.
+       //
+       // Normally, find monotonically decreases searchAddr as it finds no more free pages to
+       // scavenge. However, mark, when marking a new chunk at an index greater than the current
+       // searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here
+       // is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they
+       // won't barge over new memory becoming available to scavenge. Furthermore, this ensures
+       // that some future caller of find *must* observe the new high index. That caller
+       // (or any other racing with it), then makes searchAddr positive before continuing, bringing
+       // us back to our monotonically decreasing steady-state.
+       //
+       // A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr)
+       // is always guaranteed to be >= min and < max (converted to heap addresses).
+       //
+       // TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster
+       // iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either
+       // copy them from math/bits or fix the fact that we can't import math/bits' code from
+       // the runtime due to compiler instrumentation.
+       searchAddr atomicOffAddr
+       chunks     []atomic.Uint8
+       minHeapIdx atomic.Int32
+       min, max   atomic.Int32
+}
+
+// find returns the highest chunk index that may contain pages available to scavenge.
+// It also returns an offset to start searching in the highest chunk.
+func (s *scavengeIndex) find() (chunkIdx, uint) {
+       searchAddr, marked := s.searchAddr.Load()
+       if searchAddr == minOffAddr.addr() {
+               // We got a cleared search addr.
+               return 0, 0
+       }
+
+       // Starting from searchAddr's chunk, and moving down to minHeapIdx,
+       // iterate until we find a chunk with pages to scavenge.
+       min := s.minHeapIdx.Load()
+       searchChunk := chunkIndex(uintptr(searchAddr))
+       start := int32(searchChunk / 8)
+       for i := start; i >= min; i-- {
+               // Skip over irrelevant address space.
+               chunks := s.chunks[i].Load()
+               if chunks == 0 {
+                       continue
+               }
+               // Note that we can't have 8 leading zeroes here because
+               // we necessarily skipped that case. So, what's left is
+               // an index. If there are no zeroes, we want the 7th
+               // index, if 1 zero, the 6th, and so on.
+               n := 7 - sys.LeadingZeros8(chunks)
+               ci := chunkIdx(uint(i)*8 + uint(n))
+               if searchChunk == ci {
+                       return ci, chunkPageIndex(uintptr(searchAddr))
+               }
+               // Try to reduce searchAddr to newSearchAddr.
+               newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize
+               if marked {
+                       // Attempt to be the first one to decrease the searchAddr
+                       // after an increase. If we fail, that means there was another
+                       // increase, or somebody else got to it before us. Either way,
+                       // it doesn't matter. We may lose some performance having an
+                       // incorrect search address, but it's far more important that
+                       // we don't miss updates.
+                       s.searchAddr.StoreUnmark(searchAddr, newSearchAddr)
+               } else {
+                       // Decrease searchAddr.
+                       s.searchAddr.StoreMin(newSearchAddr)
+               }
+               return ci, pallocChunkPages - 1
+       }
+       // Clear searchAddr, because we've exhausted the heap.
+       s.searchAddr.Clear()
+       return 0, 0
+}
+
+// mark sets the inclusive range of chunks between indices start and end as
+// containing pages available to scavenge.
+//
+// Must be serialized with other mark, markRange, and clear calls.
+func (s *scavengeIndex) mark(base, limit uintptr) {
+       start, end := chunkIndex(base), chunkIndex(limit-pageSize)
+       if start == end {
+               // Within a chunk.
+               mask := uint8(1 << (start % 8))
+               s.chunks[start/8].Or(mask)
+       } else if start/8 == end/8 {
+               // Within the same byte in the index.
+               mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8)
+               s.chunks[start/8].Or(mask)
+       } else {
+               // Crosses multiple bytes in the index.
+               startAligned := chunkIdx(alignUp(uintptr(start), 8))
+               endAligned := chunkIdx(alignDown(uintptr(end), 8))
+
+               // Do the end of the first byte first.
+               if width := startAligned - start; width > 0 {
+                       mask := uint8(uint16(1<<width)-1) << (start % 8)
+                       s.chunks[start/8].Or(mask)
+               }
+               // Do the middle aligned sections that take up a whole
+               // byte.
+               for ci := startAligned; ci < endAligned; ci += 8 {
+                       s.chunks[ci/8].Store(^uint8(0))
+               }
+               // Do the end of the last byte.
+               //
+               // This width check doesn't match the one above
+               // for start because aligning down into the endAligned
+               // block means we always have at least one chunk in this
+               // block (note that end is *inclusive*). This also means
+               // that if end == endAligned+n, then what we really want
+               // is to fill n+1 chunks, i.e. width n+1. By induction,
+               // this is true for all n.
+               if width := end - endAligned + 1; width > 0 {
+                       mask := uint8(uint16(1<<width) - 1)
+                       s.chunks[end/8].Or(mask)
+               }
+       }
+       newSearchAddr := limit - pageSize
+       searchAddr, _ := s.searchAddr.Load()
+       // N.B. Because mark is serialized, it's not necessary to do a
+       // full CAS here. mark only ever increases searchAddr, while
+       // find only ever decreases it. Since we only ever race with
+       // decreases, even if the value we loaded is stale, the actual
+       // value will never be larger.
+       if (offAddr{searchAddr}).lessThan(offAddr{newSearchAddr}) {
+               s.searchAddr.StoreMarked(newSearchAddr)
+       }
+}
+
+// clear sets the chunk at index ci as not containing pages available to scavenge.
+//
+// Must be serialized with other mark, markRange, and clear calls.
+func (s *scavengeIndex) clear(ci chunkIdx) {
+       s.chunks[ci/8].And(^uint8(1 << (ci % 8)))
+}
diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go

index 8d922959614ae883b396309f36c40200ad936430..620392f867433ef65721db08b81dc212cde88b29 100644 (file)
--- a/src/runtime/mgcscavenge_test.go
+++ b/src/runtime/mgcscavenge_test.go
@@ -561,3 +561,149 @@ func TestScavenger(t *testing.T) {
         // Clean up.
         s.Stop()
  }
+
+func TestScavengeIndex(t *testing.T) {
+       setup := func(t *testing.T) (func(ChunkIdx, uint), func(uintptr, uintptr)) {
+               t.Helper()
+
+               // Pick some reasonable bounds. We don't need a huge range just to test.
+               si := NewScavengeIndex(BaseChunkIdx, BaseChunkIdx+64)
+               find := func(want ChunkIdx, wantOffset uint) {
+                       t.Helper()
+
+                       got, gotOffset := si.Find()
+                       if want != got {
+                               t.Errorf("find: wanted chunk index %d, got %d", want, got)
+                       }
+                       if want != got {
+                               t.Errorf("find: wanted page offset %d, got %d", wantOffset, gotOffset)
+                       }
+                       if t.Failed() {
+                               t.FailNow()
+                       }
+                       si.Clear(got)
+               }
+               mark := func(base, limit uintptr) {
+                       t.Helper()
+
+                       si.Mark(base, limit)
+               }
+               return find, mark
+       }
+       t.Run("Uninitialized", func(t *testing.T) {
+               find, _ := setup(t)
+               find(0, 0)
+       })
+       t.Run("OnePage", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
+               find(BaseChunkIdx, 3)
+               find(0, 0)
+       })
+       t.Run("FirstPage", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
+               find(BaseChunkIdx, 0)
+               find(0, 0)
+       })
+       t.Run("SeveralPages", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
+               find(BaseChunkIdx, 13)
+               find(0, 0)
+       })
+       t.Run("WholeChunk", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+               find(BaseChunkIdx, PallocChunkPages-1)
+               find(0, 0)
+       })
+       t.Run("LastPage", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
+               find(BaseChunkIdx, PallocChunkPages-1)
+               find(0, 0)
+       })
+       t.Run("TwoChunks", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
+               find(BaseChunkIdx+1, 127)
+               find(BaseChunkIdx, PallocChunkPages-1)
+               find(0, 0)
+       })
+       t.Run("TwoChunksOffset", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
+               find(BaseChunkIdx+8, 128)
+               find(BaseChunkIdx+7, PallocChunkPages-1)
+               find(0, 0)
+       })
+       t.Run("SevenChunksOffset", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
+               find(BaseChunkIdx+13, 14)
+               for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("ThirtyTwoChunks", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("ThirtyTwoChunksOffset", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
+               for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("Mark", func(t *testing.T) {
+               find, mark := setup(t)
+               for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+                       mark(PageBase(i, 0), PageBase(i+1, 0))
+               }
+               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("MarkInterleaved", func(t *testing.T) {
+               find, mark := setup(t)
+               for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+                       mark(PageBase(i, 0), PageBase(i+1, 0))
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("MarkIdempotentOneChunk", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+               find(BaseChunkIdx, PallocChunkPages-1)
+               find(0, 0)
+       })
+       t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+       t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) {
+               find, mark := setup(t)
+               mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
+               mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
+               for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
+                       find(i, PallocChunkPages-1)
+               }
+               find(0, 0)
+       })
+}
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go

index 0a53cd451b4ac31a74106a71eaf6f68a170d8bae..698b7bff314086aa9011b3bae2448aeb2fd47c1d 100644 (file)
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -398,11 +398,15 @@ func sweepone() uintptr {
                 // between sweep done and sweep termination (e.g. not enough
                 // allocations to trigger a GC) which would be nice to fill in
                 // with scavenging work.
-               systemstack(func() {
-                       lock(&mheap_.lock)
-                       mheap_.pages.scavengeStartGen()
-                       unlock(&mheap_.lock)
-               })
+               if debug.scavtrace > 0 {
+                       systemstack(func() {
+                               lock(&mheap_.lock)
+                               released := atomic.Loaduintptr(&mheap_.pages.scav.released)
+                               printScavTrace(released, false)
+                               atomic.Storeuintptr(&mheap_.pages.scav.released, 0)
+                               unlock(&mheap_.lock)
+                       })
+               }
                 scavenger.ready()
         }
  
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index ac4f99b57de70e7204b688dc3f30c02b414952c0..ff681a19cd2b2f0db01faff92dc8f0d0972759fb 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -62,7 +62,10 @@ const (
  type mheap struct {
         // lock must only be acquired on the system stack, otherwise a g
         // could self-deadlock if its stack grows with the lock held.
-       lock  mutex
+       lock mutex
+
+       _ uint32 // 8-byte align pages so its alignment is consistent with tests.
+
         pages pageAlloc // page allocation data structure
  
         sweepgen uint32 // sweep generation, see comment in mspan; written during STW
@@ -1548,22 +1551,12 @@ func (h *mheap) scavengeAll() {
         gp := getg()
         gp.m.mallocing++
  
-       lock(&h.lock)
-       // Start a new scavenge generation so we have a chance to walk
-       // over the whole heap.
-       h.pages.scavengeStartGen()
-       unlock(&h.lock)
-
         released := h.pages.scavenge(^uintptr(0))
  
-       lock(&h.pages.scav.lock)
-       gen := h.pages.scav.gen
-       unlock(&h.pages.scav.lock)
-
         gp.m.mallocing--
  
         if debug.scavtrace > 0 {
-               printScavTrace(gen, released, true)
+               printScavTrace(released, true)
         }
  }
  
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go

index 38819747423d94a2ee3feea0349b06aacee462e5..c85da15ff2cdfc4ecf2d8ae95911f2c616568ede 100644 (file)
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -262,45 +262,20 @@ type pageAlloc struct {
         // All access is protected by the mheapLock.
         inUse addrRanges
  
+       _ uint32 // Align scav so it's easier to reason about alignment within scav.
+
         // scav stores the scavenger state.
         scav struct {
-               lock mutex
-
-               // inUse is a slice of ranges of address space which have not
-               // yet been looked at by the scavenger.
-               //
-               // Protected by lock.
-               inUse addrRanges
-
-               // gen is the scavenge generation number.
-               //
-               // Protected by lock.
-               gen uint32
-
-               // reservationBytes is how large of a reservation should be made
-               // in bytes of address space for each scavenge iteration.
-               //
-               // Protected by lock.
-               reservationBytes uintptr
+               // index is an efficient index of chunks that have pages available to
+               // scavenge.
+               index scavengeIndex
  
                 // released is the amount of memory released this generation.
                 //
                 // Updated atomically.
                 released uintptr
  
-               // scavLWM is the lowest (offset) address that the scavenger reached this
-               // scavenge generation.
-               //
-               // Protected by lock.
-               scavLWM offAddr
-
-               // freeHWM is the highest (offset) address of a page that was freed to
-               // the page allocator this scavenge generation.
-               //
-               // Protected by mheapLock.
-               freeHWM offAddr
-
-               _ uint32 // Align assistTime for atomics.
+               _ uint32 // Align assistTime for atomics on 32-bit platforms.
  
                 // scavengeAssistTime is the time spent scavenging in the last GC cycle.
                 //
@@ -348,12 +323,6 @@ func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
  
         // Set the mheapLock.
         p.mheapLock = mheapLock
-
-       // Initialize p.scav.inUse.
-       p.scav.inUse.init(sysStat)
-
-       // Initialize scavenge tracking state.
-       p.scav.scavLWM = maxSearchAddr
  }
  
  // tryChunkOf returns the bitmap data for the given chunk.
@@ -903,10 +872,7 @@ func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
         }
         limit := base + npages*pageSize - 1
         if !scavenged {
-               // Update the free high watermark for the scavenger.
-               if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
-                       p.scav.freeHWM = offLimit
-               }
+               p.scav.index.mark(base, limit+1)
         }
         if npages == 1 {
                 // Fast path: we're clearing a single bit, and we know exactly
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go

index e072f70cd7631f91d02b6c093f11990e9ab1243f..859c61d8a5e78e6ce47c3db9aa608c5605464fd2 100644 (file)
--- a/src/runtime/mpagealloc_32bit.go
+++ b/src/runtime/mpagealloc_32bit.go
@@ -11,7 +11,10 @@
  
  package runtime
  
-import "unsafe"
+import (
+       "runtime/internal/atomic"
+       "unsafe"
+)
  
  const (
         // The number of levels in the radix tree.
@@ -53,6 +56,10 @@ var levelLogPages = [summaryLevels]uint{
         logPallocChunkPages,
  }
  
+// scavengeIndexArray is the backing store for p.scav.index.chunks.
+// On 32-bit platforms, it's small enough to just be a global.
+var scavengeIndexArray [((1 << heapAddrBits) / pallocChunkBytes) / 8]atomic.Uint8
+
  // See mpagealloc_64bit.go for details.
  func (p *pageAlloc) sysInit() {
         // Calculate how much memory all our entries will take up.
@@ -87,6 +94,9 @@ func (p *pageAlloc) sysInit() {
  
                 reservation = add(reservation, uintptr(entries)*pallocSumBytes)
         }
+
+       // Set up the scavenge index.
+       p.scav.index.chunks = scavengeIndexArray[:]
  }
  
  // See mpagealloc_64bit.go for details.
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go

index 0b99209d994409cd65b50bae07a56cd9c8136394..bfc3e0ad90d728354867339ea3137edad03a9f45 100644 (file)
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -6,7 +6,10 @@
  
  package runtime
  
-import "unsafe"
+import (
+       "runtime/internal/atomic"
+       "unsafe"
+)
  
  const (
         // The number of levels in the radix tree.
@@ -83,6 +86,12 @@ func (p *pageAlloc) sysInit() {
                 sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
                 p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
         }
+
+       // Set up the scavenge index.
+       nbytes := uintptr(1<<heapAddrBits) / pallocChunkBytes / 8
+       r := sysReserve(nil, nbytes)
+       sl := notInHeapSlice{(*notInHeap)(r), int(nbytes), int(nbytes)}
+       p.scav.index.chunks = *(*[]atomic.Uint8)(unsafe.Pointer(&sl))
  }
  
  // sysGrow performs architecture-dependent operations on heap
@@ -177,4 +186,72 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) {
                 sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
                 p.summaryMappedReady += need.size()
         }
+
+       // Update the scavenge index.
+       p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat)
+}
+
+// grow increases the index's backing store in response to a heap growth.
+//
+// Returns the amount of memory added to sysStat.
+func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
+       if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+               print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+               throw("sysGrow bounds not aligned to pallocChunkBytes")
+       }
+       // Map and commit the pieces of chunks that we need.
+       //
+       // We always map the full range of the minimum heap address to the
+       // maximum heap address. We don't do this for the summary structure
+       // because it's quite large and a discontiguous heap could cause a
+       // lot of memory to be used. In this situation, the worst case overhead
+       // is in the single-digit MiB if we map the whole thing.
+       //
+       // The base address of the backing store is always page-aligned,
+       // because it comes from the OS, so it's sufficient to align the
+       // index.
+       haveMin := s.min.Load()
+       haveMax := s.max.Load()
+       needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize))
+       needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize))
+       // Extend the range down to what we have, if there's no overlap.
+       if needMax < haveMin {
+               needMax = haveMin
+       }
+       if needMin > haveMax {
+               needMin = haveMax
+       }
+       have := makeAddrRange(
+               // Avoid a panic from indexing one past the last element.
+               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin),
+               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax),
+       )
+       need := makeAddrRange(
+               // Avoid a panic from indexing one past the last element.
+               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin),
+               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax),
+       )
+       // Subtract any overlap from rounding. We can't re-map memory because
+       // it'll be zeroed.
+       need = need.subtract(have)
+
+       // If we've got something to map, map it, and update the slice bounds.
+       if need.size() != 0 {
+               sysMap(unsafe.Pointer(need.base.addr()), need.size(), sysStat)
+               sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
+               // Update the indices only after the new memory is valid.
+               if haveMin == 0 || needMin < haveMin {
+                       s.min.Store(needMin)
+               }
+               if haveMax == 0 || needMax > haveMax {
+                       s.max.Store(needMax)
+               }
+       }
+       // Update minHeapIdx. Note that even if there's no mapping work to do,
+       // we may still have a new, lower minimum heap address.
+       minHeapIdx := s.minHeapIdx.Load()
+       if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx {
+               s.minHeapIdx.Store(baseIdx)
+       }
+       return need.size()
  }
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go

index e0be1e134ed71b9585d7df38a87887c95901563f..9cf83cc6131dc1231da2edac61fbfe1a2cf9f431 100644 (file)
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -11,6 +11,7 @@ package runtime
  
  import (
         "internal/goarch"
+       "runtime/internal/atomic"
         "unsafe"
  )
  
@@ -141,6 +142,69 @@ func (l offAddr) addr() uintptr {
         return l.a
  }
  
+// atomicOffAddr is like offAddr, but operations on it are atomic.
+// It also contains operations to be able to store marked addresses
+// to ensure that they're not overridden until they've been seen.
+type atomicOffAddr struct {
+       // a contains the offset address, unlike offAddr.
+       a atomic.Int64
+}
+
+// Clear attempts to store minOffAddr in atomicOffAddr. It may fail
+// if a marked value is placed in the box in the meanwhile.
+func (b *atomicOffAddr) Clear() {
+       for {
+               old := b.a.Load()
+               if old < 0 {
+                       return
+               }
+               if b.a.CompareAndSwap(old, int64(minOffAddr.addr()-arenaBaseOffset)) {
+                       return
+               }
+       }
+}
+
+// StoreMin stores addr if it's less than the current value in the
+// offset address space if the current value is not marked.
+func (b *atomicOffAddr) StoreMin(addr uintptr) {
+       new := int64(addr - arenaBaseOffset)
+       for {
+               old := b.a.Load()
+               if old < new {
+                       return
+               }
+               if b.a.CompareAndSwap(old, new) {
+                       return
+               }
+       }
+}
+
+// StoreUnmark attempts to unmark the value in atomicOffAddr and
+// replace it with newAddr. markedAddr must be a marked address
+// returned by Load. This function will not store newAddr if the
+// box no longer contains markedAddr.
+func (b *atomicOffAddr) StoreUnmark(markedAddr, newAddr uintptr) {
+       b.a.CompareAndSwap(-int64(markedAddr-arenaBaseOffset), int64(newAddr-arenaBaseOffset))
+}
+
+// StoreMarked stores addr but first converted to the offset address
+// space and then negated.
+func (b *atomicOffAddr) StoreMarked(addr uintptr) {
+       b.a.Store(-int64(addr - arenaBaseOffset))
+}
+
+// Load returns the address in the box as a virtual address. It also
+// returns if the value was marked or not.
+func (b *atomicOffAddr) Load() (uintptr, bool) {
+       v := b.a.Load()
+       wasMarked := false
+       if v < 0 {
+               wasMarked = true
+               v = -v
+       }
+       return uintptr(v) + arenaBaseOffset, wasMarked
+}
+
  // addrRanges is a data structure holding a collection of ranges of
  // address space.
  //
author	Michael Anthony Knyszek <mknyszek@google.com>
	Sun, 10 Apr 2022 20:34:17 +0000 (20:34 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Tue, 3 May 2022 15:13:53 +0000 (15:13 +0000)
src/runtime/export_test.go		patch \| blob \| history
src/runtime/mgcscavenge.go		patch \| blob \| history
src/runtime/mgcscavenge_test.go		patch \| blob \| history
src/runtime/mgcsweep.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/mpagealloc.go		patch \| blob \| history
src/runtime/mpagealloc_32bit.go		patch \| blob \| history
src/runtime/mpagealloc_64bit.go		patch \| blob \| history
src/runtime/mranges.go		patch \| blob \| history