From 697644070c6e335b9c3dffdd4e82feb8038c3f22 Mon Sep 17 00:00:00 2001 From: Sven Anderson Date: Fri, 5 May 2023 00:15:07 +0200 Subject: [PATCH] runtime: improve Pinner with gcBits This change replaces the statically sized pinnerBits with gcBits based ones, that are copied in each GC cycle if they exist. The pinnerBits now include a second bit per object, that indicates if a pinner counter for multi-pins exists, in order to avoid unnecessary specials iterations. This is a follow-up to CL 367296. Change-Id: I82e38cecd535e18c3b3ae54b5cc67d3aeeaafcfd Reviewed-on: https://go-review.googlesource.com/c/go/+/493275 Reviewed-by: Michael Knyszek TryBot-Result: Gopher Robot Run-TryBot: Michael Knyszek Reviewed-by: Heschi Kreinick --- src/runtime/mgcsweep.go | 5 + src/runtime/mheap.go | 12 +-- src/runtime/pinner.go | 197 ++++++++++++++++++++++++++++------------ 3 files changed, 144 insertions(+), 70 deletions(-) diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 728a5bad7e..68f1aae600 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -668,6 +668,11 @@ func (sl *sweepLocked) sweep(preserve bool) bool { s.allocBits = s.gcmarkBits s.gcmarkBits = newMarkBits(s.nelems) + // refresh pinnerBits if they exists + if s.pinnerBits != nil { + s.refreshPinnerBits() + } + // Initialize alloc bits cache. s.refillAllocCache(0) diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 6dccb4e33d..f836d91c6a 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -205,8 +205,7 @@ type mheap struct { specialprofilealloc fixalloc // allocator for specialprofile* specialReachableAlloc fixalloc // allocator for specialReachable specialPinCounterAlloc fixalloc // allocator for specialPinCounter - pinnerBitsAlloc fixalloc // allocator for *pBits - speciallock mutex // lock for special record and pinnerBits allocators. + speciallock mutex // lock for special record allocators. arenaHintAlloc fixalloc // allocator for arenaHints // User arena state. @@ -471,6 +470,7 @@ type mspan struct { // out memory. allocBits *gcBits gcmarkBits *gcBits + pinnerBits *gcBits // bitmap for pinned objects; accessed atomically // sweep generation: // if sweepgen == h->sweepgen - 2, the span needs sweeping @@ -492,7 +492,6 @@ type mspan struct { limit uintptr // end of data in span speciallock mutex // guards specials list and changes to pinnerBits specials *special // linked list of special records sorted by offset. - pinnerBits *pinBits // bitmap for pinned objects; accessed atomically userArenaChunkFree addrRange // interval for managing chunk allocation // freeIndexForScan is like freeindex, except that freeindex is @@ -760,7 +759,6 @@ func (h *mheap) init() { h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys) h.specialReachableAlloc.init(unsafe.Sizeof(specialReachable{}), nil, nil, &memstats.other_sys) h.specialPinCounterAlloc.init(unsafe.Sizeof(specialPinCounter{}), nil, nil, &memstats.other_sys) - h.pinnerBitsAlloc.init(unsafe.Sizeof(pinBits{}), nil, nil, &memstats.other_sys) h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys) // Don't zero mspan allocations. Background sweeping can @@ -1640,12 +1638,6 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // Mark the space as free. h.pages.free(s.base(), s.npages) - // Free pinnerBits if set. - if pinnerBits := s.getPinnerBits(); pinnerBits != nil { - s.setPinnerBits(nil) - h.freePinnerBits(pinnerBits) - } - // Free the span structure. We no longer have a use for it. s.state.set(mSpanDead) h.freeMSpanLocked(s) diff --git a/src/runtime/pinner.go b/src/runtime/pinner.go index 1c4209997a..a507a5a3cc 100644 --- a/src/runtime/pinner.go +++ b/src/runtime/pinner.go @@ -6,7 +6,6 @@ package runtime import ( "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) @@ -90,15 +89,16 @@ func isPinned(ptr unsafe.Pointer) bool { return true } pinnerBits := span.getPinnerBits() + // these pinnerBits might get unlinked by a concurrently running sweep, but + // that's OK because gcBits don't get cleared until the following GC cycle + // (nextMarkBitArenaEpoch) if pinnerBits == nil { return false } objIndex := span.objIndex(uintptr(ptr)) - bytep := &pinnerBits.x[objIndex/8] - mask := byte(1 << (objIndex % 8)) - result := (bytep.Load() & mask) != 0 + pinState := pinnerBits.ofObject(objIndex) KeepAlive(ptr) // make sure ptr is alive until we are done so the span can't be freed - return result + return pinState.isPinned() } // setPinned marks or unmarks a Go pointer as pinned. @@ -119,45 +119,50 @@ func setPinned(ptr unsafe.Pointer, pin bool) { KeepAlive(ptr) // make sure ptr is still alive after span is swept objIndex := span.objIndex(uintptr(ptr)) - mask := byte(1 << (objIndex % 8)) lock(&span.speciallock) // guard against concurrent calls of setPinned on same span pinnerBits := span.getPinnerBits() if pinnerBits == nil { - pinnerBits = mheap_.newPinnerBits() + pinnerBits = span.newPinnerBits() span.setPinnerBits(pinnerBits) } - bytep := &pinnerBits.x[objIndex/8] - alreadySet := pin == ((bytep.Load() & mask) != 0) + pinState := pinnerBits.ofObject(objIndex) if pin { - if alreadySet { - // multiple pin on same object, record it in counter - offset := objIndex * span.elemsize + if pinState.isPinned() { + // multiple pins on same object, set multipin bit + pinState.setMultiPinned(true) + // and increase the pin counter // TODO(mknyszek): investigate if systemstack is necessary here systemstack(func() { + offset := objIndex * span.elemsize span.incPinCounter(offset) }) } else { - bytep.Or(mask) + // set pin bit + pinState.setPinned(true) } } else { - if alreadySet { - // unpinning unpinned object, bail out - throw("runtime.Pinner: object already unpinned") - } else { - multipin := false - if pinnerBits.specialCnt.Load() != 0 { + // unpin + if pinState.isPinned() { + if pinState.isMultiPinned() { + var exists bool // TODO(mknyszek): investigate if systemstack is necessary here systemstack(func() { offset := objIndex * span.elemsize - multipin = span.decPinCounter(offset) + exists = span.decPinCounter(offset) }) + if !exists { + // counter is 0, clear multipin bit + pinState.setMultiPinned(false) + } + } else { + // no multipins recorded. unpin object. + pinState.setPinned(false) } - if !multipin { - // no multiple pins recoded. unpin object. - bytep.And(^mask) - } + } else { + // unpinning unpinned object, bail out + throw("runtime.Pinner: object already unpinned") } } unlock(&span.speciallock) @@ -165,42 +170,116 @@ func setPinned(ptr unsafe.Pointer, pin bool) { return } -// pinBits is a bitmap for pinned objects. This is always used as pinBits.x. -type pinBits struct { - _ sys.NotInHeap - x [(maxObjsPerSpan + 7) / 8]atomic.Uint8 - specialCnt atomic.Int32 +type pinState struct { + bytep *uint8 + byteVal uint8 + mask uint8 +} + +// nosplit, because it's called by isPinned, which is nosplit +// +//go:nosplit +func (v *pinState) isPinned() bool { + return (v.byteVal & v.mask) != 0 +} + +func (v *pinState) isMultiPinned() bool { + return (v.byteVal & (v.mask << 1)) != 0 +} + +func (v *pinState) setPinned(val bool) { + v.set(val, false) +} + +func (v *pinState) setMultiPinned(val bool) { + v.set(val, true) +} + +// set sets the pin bit of the pinState to val. If multipin is true, it +// sets/unsets the multipin bit instead. +func (v *pinState) set(val bool, multipin bool) { + mask := v.mask + if multipin { + mask <<= 1 + } + if val { + atomic.Or8(v.bytep, mask) + } else { + atomic.And8(v.bytep, ^mask) + } +} + +// pinnerBits is the same type as gcBits but has different methods. +type pinnerBits gcBits + +// ofObject returns the pinState of the n'th object. +// nosplit, because it's called by isPinned, which is nosplit +// +//go:nosplit +func (p *pinnerBits) ofObject(n uintptr) pinState { + bytep, mask := (*gcBits)(p).bitp(n * 2) + byteVal := atomic.Load8(bytep) + return pinState{bytep, byteVal, mask} } -func (h *mheap) newPinnerBits() *pinBits { - lock(&h.speciallock) - pinnerBits := (*pinBits)(h.pinnerBitsAlloc.alloc()) - unlock(&h.speciallock) - return pinnerBits +func (s *mspan) pinnerBitSize() uintptr { + return divRoundUp(s.nelems*2, 8) } -func (h *mheap) freePinnerBits(p *pinBits) { - lock(&h.speciallock) - h.pinnerBitsAlloc.free(unsafe.Pointer(p)) - unlock(&h.speciallock) +// newPinnerBits returns a pointer to 8 byte aligned bytes to be used for this +// span's pinner bits. newPinneBits is used to mark objects that are pinned. +// They are copied when the span is swept. +func (s *mspan) newPinnerBits() *pinnerBits { + return (*pinnerBits)(newMarkBits(s.nelems * 2)) } // nosplit, because it's called by isPinned, which is nosplit // //go:nosplit -func (s *mspan) getPinnerBits() *pinBits { - return (*pinBits)(atomic.Loadp(unsafe.Pointer(&s.pinnerBits))) +func (s *mspan) getPinnerBits() *pinnerBits { + return (*pinnerBits)(atomic.Loadp(unsafe.Pointer(&s.pinnerBits))) } -func (s *mspan) setPinnerBits(p *pinBits) { +func (s *mspan) setPinnerBits(p *pinnerBits) { atomicstorep(unsafe.Pointer(&s.pinnerBits), unsafe.Pointer(p)) } +// refreshPinnerBits replaces pinnerBits with a fresh copy in the arenas for the +// next GC cycle. If it does not contain any pinned objects, pinnerBits of the +// span is set to nil. +func (s *mspan) refreshPinnerBits() { + p := s.getPinnerBits() + if p == nil { + return + } + + hasPins := false + bytes := alignUp(s.pinnerBitSize(), 8) + + // Iterate over each 8-byte chunk and check for pins. Note that + // newPinnerBits guarantees that pinnerBits will be 8-byte aligned, so we + // don't have to worry about edge cases, irrelevant bits will simply be + // zero. + for _, x := range unsafe.Slice((*uint64)(unsafe.Pointer(&p.x)), bytes/8) { + if x != 0 { + hasPins = true + break + } + } + + if hasPins { + newPinnerBits := s.newPinnerBits() + memmove(unsafe.Pointer(&newPinnerBits.x), unsafe.Pointer(&p.x), bytes) + s.setPinnerBits(newPinnerBits) + } else { + s.setPinnerBits(nil) + } +} + // incPinCounter is only called for multiple pins of the same object and records // the _additional_ pins. func (span *mspan) incPinCounter(offset uintptr) { var rec *specialPinCounter - ref, exists := span.specialFindSplicePoint(offset, _KindSpecialPinCounter) if !exists { lock(&mheap_.speciallock) @@ -212,34 +291,32 @@ func (span *mspan) incPinCounter(offset uintptr) { rec.special.next = *ref *ref = (*special)(unsafe.Pointer(rec)) spanHasSpecials(span) - span.pinnerBits.specialCnt.Add(1) } else { rec = (*specialPinCounter)(unsafe.Pointer(*ref)) } rec.counter++ } -// decPinCounter is always called for unpins and returns false if no multiple -// pins are recorded. If multiple pins are recorded, it decreases the counter -// and returns true. +// decPinCounter decreases the counter. If the counter reaches 0, the counter +// special is deleted and false is returned. Otherwise true is returned. func (span *mspan) decPinCounter(offset uintptr) bool { ref, exists := span.specialFindSplicePoint(offset, _KindSpecialPinCounter) - if exists { - counter := (*specialPinCounter)(unsafe.Pointer(*ref)) - if counter.counter > 1 { - counter.counter-- - } else { - span.pinnerBits.specialCnt.Add(-1) - *ref = counter.special.next - if span.specials == nil { - spanHasNoSpecials(span) - } - lock(&mheap_.speciallock) - mheap_.specialPinCounterAlloc.free(unsafe.Pointer(counter)) - unlock(&mheap_.speciallock) + if !exists { + throw("runtime.Pinner: decreased non-existing pin counter") + } + counter := (*specialPinCounter)(unsafe.Pointer(*ref)) + counter.counter-- + if counter.counter == 0 { + *ref = counter.special.next + if span.specials == nil { + spanHasNoSpecials(span) } + lock(&mheap_.speciallock) + mheap_.specialPinCounterAlloc.free(unsafe.Pointer(counter)) + unlock(&mheap_.speciallock) + return false } - return exists + return true } // only for tests -- 2.48.1