From: Rhys Hiltner Date: Fri, 1 Apr 2022 19:56:49 +0000 (-0700) Subject: runtime: split mprof locks X-Git-Tag: go1.19beta1~448 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=89c0dd829f49c91eb2636bc2b24df0b1cdc74a1c;p=gostls13.git runtime: split mprof locks The profiles for memory allocations, sync.Mutex contention, and general blocking store their data in a shared hash table. The bookkeeping work at the end of a garbage collection cycle involves maintenance on each memory allocation record. Previously, a single lock guarded access to the hash table and the contents of all records. When a program has allocated memory at a large number of unique call stacks, the maintenance following every garbage collection can hold that lock for several milliseconds. That can prevent progress on all other goroutines by delaying acquirep's call to mcache.prepareForSweep, which needs the lock in mProf_Free to report when a profiled allocation is no longer in use. With no user goroutines making progress, it is in effect a multi-millisecond GC-related stop-the-world pause. Split the lock so the call to mProf_Flush no longer delays each P's call to mProf_Free: mProf_Free uses a lock on the memory records' N+1 cycle, and mProf_Flush uses locks on the memory records' accumulator and their N cycle. mProf_Malloc also no longer competes with mProf_Flush, as it uses a lock on the memory records' N+2 cycle. The profiles for sync.Mutex contention and general blocking now share a separate lock, and another lock guards insertions to the shared hash table (uncommon in the steady-state). Consumers of each type of profile take the matching accumulator lock, so will observe consistent count and magnitude values for each record. For #45894 Change-Id: I615ff80618d10e71025423daa64b0b7f9dc57daa Reviewed-on: https://go-review.googlesource.com/c/go/+/399956 Reviewed-by: Carlos Amedee Run-TryBot: Rhys Hiltner Reviewed-by: Michael Knyszek TryBot-Result: Gopher Robot --- diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go index 4a16bc0ddb..f6e7ea9880 100644 --- a/src/runtime/lockrank.go +++ b/src/runtime/lockrank.go @@ -56,7 +56,10 @@ const ( lockRankNotifyList lockRankTraceStrings lockRankMspanSpecial - lockRankProf + lockRankProfInsert + lockRankProfBlock + lockRankProfMemActive + lockRankProfMemFuture lockRankGcBitsArenas lockRankRoot lockRankTrace @@ -137,7 +140,10 @@ var lockNames = []string{ lockRankNotifyList: "notifyList", lockRankTraceStrings: "traceStrings", lockRankMspanSpecial: "mspanSpecial", - lockRankProf: "prof", + lockRankProfInsert: "profInsert", + lockRankProfBlock: "profBlock", + lockRankProfMemActive: "profMemActive", + lockRankProfMemFuture: "profMemFuture", lockRankGcBitsArenas: "gcBitsArenas", lockRankRoot: "root", lockRankTrace: "trace", @@ -215,7 +221,10 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankNotifyList: {}, lockRankTraceStrings: {lockRankTraceBuf}, lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, - lockRankProf: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, + lockRankProfInsert: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, + lockRankProfBlock: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, + lockRankProfMemActive: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, + lockRankProfMemFuture: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings, lockRankProfMemActive}, lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, lockRankRoot: {}, lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot}, @@ -226,15 +235,15 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankRwmutexR: {lockRankSysmon, lockRankRwmutexW}, lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, - lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine}, - lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan}, - lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan}, + lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine}, + lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan}, + lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan}, lockRankDefer: {}, lockRankSudog: {lockRankHchan, lockRankNotifyList}, - lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog}, - lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans}, + lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog}, + lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans}, lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings}, - lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, + lockRankGlobalAlloc: {lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, lockRankPageAllocScav: {lockRankMheap}, lockRankGFree: {lockRankSched}, diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index f65be2bc74..14bf9a583f 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -413,7 +413,12 @@ func mallocinit() { mheap_.init() mcache0 = allocmcache() lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas) - lockInit(&proflock, lockRankProf) + lockInit(&profInsertLock, lockRankProfInsert) + lockInit(&profBlockLock, lockRankProfBlock) + lockInit(&profMemActiveLock, lockRankProfMemActive) + for i := range profMemFutureLock { + lockInit(&profMemFutureLock[i], lockRankProfMemFuture) + } lockInit(&globalAlloc.mutex, lockRankGlobalAlloc) // Create initial arena growth hints. diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 5137db2015..cd63bafebb 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -14,7 +14,17 @@ import ( ) // NOTE(rsc): Everything here could use cas if contention became an issue. -var proflock mutex +var ( + // profInsertLock protects changes to the start of all *bucket linked lists + profInsertLock mutex + // profBlockLock protects the contents of every blockRecord struct + profBlockLock mutex + // profMemActiveLock protects the active field of every memRecord struct + profMemActiveLock mutex + // profMemFutureLock is a set of locks that protect the respective elements + // of the future array of every memRecord struct + profMemFutureLock [len(memRecord{}.future)]mutex +) // All memory allocations are local and do not escape outside of the profiler. // The profiler is forbidden from referring to garbage-collected memory. @@ -43,6 +53,9 @@ type bucketType int // Per-call-stack profiling information. // Lookup by hashing call stack into a linked-list hash table. // +// None of the fields in this bucket header are modified after +// creation, including its next and allnext links. +// // No heap pointers. // //go:notinheap @@ -139,26 +152,64 @@ type blockRecord struct { } var ( - mbuckets *bucket // memory profile buckets - bbuckets *bucket // blocking profile buckets - xbuckets *bucket // mutex profile buckets - buckhash *[buckHashSize]*bucket - bucketmem uintptr - - mProf struct { - // All fields in mProf are protected by proflock. - - // cycle is the global heap profile cycle. This wraps - // at mProfCycleWrap. - cycle uint32 - // flushed indicates that future[cycle] in all buckets - // has been flushed to the active profile. - flushed bool - } + mbuckets atomic.UnsafePointer // *bucket, memory profile buckets + bbuckets atomic.UnsafePointer // *bucket, blocking profile buckets + xbuckets atomic.UnsafePointer // *bucket, mutex profile buckets + buckhash atomic.UnsafePointer // *buckhashArray + + mProfCycle mProfCycleHolder ) +type buckhashArray [buckHashSize]atomic.UnsafePointer // *bucket + const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24) +// mProfCycleHolder holds the global heap profile cycle number (wrapped at +// mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to +// indicate whether future[cycle] in all buckets has been queued to flush into +// the active profile. +type mProfCycleHolder struct { + value atomic.Uint32 +} + +// read returns the current cycle count. +func (c *mProfCycleHolder) read() (cycle uint32) { + v := c.value.Load() + cycle = v >> 1 + return cycle +} + +// setFlushed sets the flushed flag. It returns the current cycle count and the +// previous value of the flushed flag. +func (c *mProfCycleHolder) setFlushed() (cycle uint32, alreadyFlushed bool) { + for { + prev := c.value.Load() + cycle = prev >> 1 + alreadyFlushed = (prev & 0x1) != 0 + next := prev | 0x1 + if c.value.CompareAndSwap(prev, next) { + return cycle, alreadyFlushed + } + } +} + +// increment increases the cycle count by one, wrapping the value at +// mProfCycleWrap. It clears the flushed flag. +func (c *mProfCycleHolder) increment() { + // We explicitly wrap mProfCycle rather than depending on + // uint wraparound because the memRecord.future ring does not + // itself wrap at a power of two. + for { + prev := c.value.Load() + cycle := prev >> 1 + cycle = (cycle + 1) % mProfCycleWrap + next := cycle << 1 + if c.value.CompareAndSwap(prev, next) { + break + } + } +} + // newBucket allocates a bucket with the given type and number of stack entries. func newBucket(typ bucketType, nstk int) *bucket { size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0)) @@ -172,7 +223,6 @@ func newBucket(typ bucketType, nstk int) *bucket { } b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys)) - bucketmem += size b.typ = typ b.nstk = uintptr(nstk) return b @@ -204,11 +254,19 @@ func (b *bucket) bp() *blockRecord { // Return the bucket for stk[0:nstk], allocating new bucket if needed. func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket { - if buckhash == nil { - buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys)) - if buckhash == nil { - throw("runtime: cannot allocate memory") + bh := (*buckhashArray)(buckhash.Load()) + if bh == nil { + lock(&profInsertLock) + // check again under the lock + bh = (*buckhashArray)(buckhash.Load()) + if bh == nil { + bh = (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys)) + if bh == nil { + throw("runtime: cannot allocate memory") + } + buckhash.StoreNoWB(unsafe.Pointer(bh)) } + unlock(&profInsertLock) } // Hash stack. @@ -227,7 +285,8 @@ func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket h ^= h >> 11 i := int(h % buckHashSize) - for b := buckhash[i]; b != nil; b = b.next { + // first check optimistically, without the lock + for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { return b } @@ -237,23 +296,37 @@ func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket return nil } + lock(&profInsertLock) + // check again under the insertion lock + for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { + if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { + unlock(&profInsertLock) + return b + } + } + // Create new bucket. b := newBucket(typ, len(stk)) copy(b.stk(), stk) b.hash = h b.size = size - b.next = buckhash[i] - buckhash[i] = b + + var allnext *atomic.UnsafePointer if typ == memProfile { - b.allnext = mbuckets - mbuckets = b + allnext = &mbuckets } else if typ == mutexProfile { - b.allnext = xbuckets - xbuckets = b + allnext = &xbuckets } else { - b.allnext = bbuckets - bbuckets = b + allnext = &bbuckets } + + b.next = (*bucket)(bh[i].Load()) + b.allnext = (*bucket)(allnext.Load()) + + bh[i].StoreNoWB(unsafe.Pointer(b)) + allnext.StoreNoWB(unsafe.Pointer(b)) + + unlock(&profInsertLock) return b } @@ -278,13 +351,7 @@ func eqslice(x, y []uintptr) bool { // frees after the world is started again count towards a new heap // profiling cycle. func mProf_NextCycle() { - lock(&proflock) - // We explicitly wrap mProf.cycle rather than depending on - // uint wraparound because the memRecord.future ring does not - // itself wrap at a power of two. - mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap - mProf.flushed = false - unlock(&proflock) + mProfCycle.increment() } // mProf_Flush flushes the events from the current heap profiling @@ -295,22 +362,33 @@ func mProf_NextCycle() { // contrast with mProf_NextCycle, this is somewhat expensive, but safe // to do concurrently. func mProf_Flush() { - lock(&proflock) - if !mProf.flushed { - mProf_FlushLocked() - mProf.flushed = true + cycle, alreadyFlushed := mProfCycle.setFlushed() + if alreadyFlushed { + return } - unlock(&proflock) + + index := cycle % uint32(len(memRecord{}.future)) + lock(&profMemActiveLock) + lock(&profMemFutureLock[index]) + mProf_FlushLocked(index) + unlock(&profMemFutureLock[index]) + unlock(&profMemActiveLock) } -func mProf_FlushLocked() { - c := mProf.cycle - for b := mbuckets; b != nil; b = b.allnext { +// mProf_FlushLocked flushes the events from the heap profiling cycle at index +// into the active profile. The caller must hold the lock for the active profile +// (profMemActiveLock) and for the profiling cycle at index +// (profMemFutureLock[index]). +func mProf_FlushLocked(index uint32) { + assertLockHeld(&profMemActiveLock) + assertLockHeld(&profMemFutureLock[index]) + head := (*bucket)(mbuckets.Load()) + for b := head; b != nil; b = b.allnext { mp := b.mp() // Flush cycle C into the published profile and clear // it for reuse. - mpc := &mp.future[c%uint32(len(mp.future))] + mpc := &mp.future[index] mp.active.add(mpc) *mpc = memRecordCycle{} } @@ -321,39 +399,41 @@ func mProf_FlushLocked() { // snapshot as of the last mark termination without advancing the heap // profile cycle. func mProf_PostSweep() { - lock(&proflock) // Flush cycle C+1 to the active profile so everything as of // the last mark termination becomes visible. *Don't* advance // the cycle, since we're still accumulating allocs in cycle // C+2, which have to become C+1 in the next mark termination // and so on. - c := mProf.cycle - for b := mbuckets; b != nil; b = b.allnext { - mp := b.mp() - mpc := &mp.future[(c+1)%uint32(len(mp.future))] - mp.active.add(mpc) - *mpc = memRecordCycle{} - } - unlock(&proflock) + cycle := mProfCycle.read() + 1 + + index := cycle % uint32(len(memRecord{}.future)) + lock(&profMemActiveLock) + lock(&profMemFutureLock[index]) + mProf_FlushLocked(index) + unlock(&profMemFutureLock[index]) + unlock(&profMemActiveLock) } // Called by malloc to record a profiled block. func mProf_Malloc(p unsafe.Pointer, size uintptr) { var stk [maxStack]uintptr nstk := callers(4, stk[:]) - lock(&proflock) + + index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) + b := stkbucket(memProfile, size, stk[:nstk], true) - c := mProf.cycle mp := b.mp() - mpc := &mp.future[(c+2)%uint32(len(mp.future))] + mpc := &mp.future[index] + + lock(&profMemFutureLock[index]) mpc.allocs++ mpc.alloc_bytes += size - unlock(&proflock) + unlock(&profMemFutureLock[index]) - // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. - // This reduces potential contention and chances of deadlocks. - // Since the object must be alive during call to mProf_Malloc, - // it's fine to do this non-atomically. + // Setprofilebucket locks a bunch of other mutexes, so we call it outside of + // the profiler locks. This reduces potential contention and chances of + // deadlocks. Since the object must be alive during the call to + // mProf_Malloc, it's fine to do this non-atomically. systemstack(func() { setprofilebucket(p, b) }) @@ -361,13 +441,15 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) { // Called when freeing a profiled block. func mProf_Free(b *bucket, size uintptr) { - lock(&proflock) - c := mProf.cycle + index := (mProfCycle.read() + 1) % uint32(len(memRecord{}.future)) + mp := b.mp() - mpc := &mp.future[(c+1)%uint32(len(mp.future))] + mpc := &mp.future[index] + + lock(&profMemFutureLock[index]) mpc.frees++ mpc.free_bytes += size - unlock(&proflock) + unlock(&profMemFutureLock[index]) } var blockprofilerate uint64 // in CPU ticks @@ -424,18 +506,19 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) { } else { nstk = gcallers(gp.m.curg, skip, stk[:]) } - lock(&proflock) b := stkbucket(which, 0, stk[:nstk], true) + bp := b.bp() + lock(&profBlockLock) if which == blockProfile && cycles < rate { // Remove sampling bias, see discussion on http://golang.org/cl/299991. - b.bp().count += float64(rate) / float64(cycles) - b.bp().cycles += rate + bp.count += float64(rate) / float64(cycles) + bp.cycles += rate } else { - b.bp().count++ - b.bp().cycles += cycles + bp.count++ + bp.cycles += cycles } - unlock(&proflock) + unlock(&profBlockLock) } var mutexprofilerate uint64 // fraction sampled @@ -567,13 +650,18 @@ func (r *MemProfileRecord) Stack() []uintptr { // the testing package's -test.memprofile flag instead // of calling MemProfile directly. func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { - lock(&proflock) + cycle := mProfCycle.read() // If we're between mProf_NextCycle and mProf_Flush, take care // of flushing to the active profile so we only have to look // at the active profile below. - mProf_FlushLocked() + index := cycle % uint32(len(memRecord{}.future)) + lock(&profMemActiveLock) + lock(&profMemFutureLock[index]) + mProf_FlushLocked(index) + unlock(&profMemFutureLock[index]) clear := true - for b := mbuckets; b != nil; b = b.allnext { + head := (*bucket)(mbuckets.Load()) + for b := head; b != nil; b = b.allnext { mp := b.mp() if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { n++ @@ -588,11 +676,13 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { // garbage collection is disabled from the beginning of execution, // accumulate all of the cycles, and recount buckets. n = 0 - for b := mbuckets; b != nil; b = b.allnext { + for b := head; b != nil; b = b.allnext { mp := b.mp() for c := range mp.future { + lock(&profMemFutureLock[c]) mp.active.add(&mp.future[c]) mp.future[c] = memRecordCycle{} + unlock(&profMemFutureLock[c]) } if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { n++ @@ -602,7 +692,7 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { if n <= len(p) { ok = true idx := 0 - for b := mbuckets; b != nil; b = b.allnext { + for b := head; b != nil; b = b.allnext { mp := b.mp() if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { record(&p[idx], b) @@ -610,7 +700,7 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { } } } - unlock(&proflock) + unlock(&profMemActiveLock) return } @@ -637,12 +727,13 @@ func record(r *MemProfileRecord, b *bucket) { } func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { - lock(&proflock) - for b := mbuckets; b != nil; b = b.allnext { + lock(&profMemActiveLock) + head := (*bucket)(mbuckets.Load()) + for b := head; b != nil; b = b.allnext { mp := b.mp() fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees) } - unlock(&proflock) + unlock(&profMemActiveLock) } // BlockProfileRecord describes blocking events originated @@ -661,13 +752,14 @@ type BlockProfileRecord struct { // the testing package's -test.blockprofile flag instead // of calling BlockProfile directly. func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { - lock(&proflock) - for b := bbuckets; b != nil; b = b.allnext { + lock(&profBlockLock) + head := (*bucket)(bbuckets.Load()) + for b := head; b != nil; b = b.allnext { n++ } if n <= len(p) { ok = true - for b := bbuckets; b != nil; b = b.allnext { + for b := head; b != nil; b = b.allnext { bp := b.bp() r := &p[0] r.Count = int64(bp.count) @@ -693,7 +785,7 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { p = p[1:] } } - unlock(&proflock) + unlock(&profBlockLock) return } @@ -704,13 +796,14 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { // Most clients should use the runtime/pprof package // instead of calling MutexProfile directly. func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { - lock(&proflock) - for b := xbuckets; b != nil; b = b.allnext { + lock(&profBlockLock) + head := (*bucket)(xbuckets.Load()) + for b := head; b != nil; b = b.allnext { n++ } if n <= len(p) { ok = true - for b := xbuckets; b != nil; b = b.allnext { + for b := head; b != nil; b = b.allnext { bp := b.bp() r := &p[0] r.Count = int64(bp.count) @@ -722,7 +815,7 @@ func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { p = p[1:] } } - unlock(&proflock) + unlock(&profBlockLock) return }