]> Cypherpunks repositories - gostls13.git/commitdiff
Revert "runtime: split mutex profile clocks"
authorRhys Hiltner <rhys.hiltner@gmail.com>
Wed, 29 May 2024 16:36:36 +0000 (16:36 +0000)
committerGopher Robot <gobot@golang.org>
Thu, 30 May 2024 17:49:07 +0000 (17:49 +0000)
This reverts commit 8ab131fb1256a4a795c610e145c022e22e2d1567 (CL 586796)

Reason for revert: This is part of a patch series that changed the
handling of contended lock2/unlock2 calls, reducing the maximum
throughput of contended runtime.mutex values, and causing a performance
regression on applications where that is (or became) the bottleneck.

Updates #66999
Updates #67585

Change-Id: I54711691e86e072081482102019d168292b5150a
Reviewed-on: https://go-review.googlesource.com/c/go/+/589095
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Rhys Hiltner <rhys.hiltner@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Than McIntosh <thanm@google.com>
src/runtime/lock_futex.go
src/runtime/lock_sema.go
src/runtime/mprof.go

index ef9a800b566a0295daf523bc0fe1532f252397ca..9be231f2ea9208e9ef2da28b2ec0103607047c89 100644 (file)
@@ -80,14 +80,7 @@ func lock2(l *mutex) {
                gp.stackguard0, gp.throwsplit = stackPreempt, true
        }
 
-       var startNanos int64
-       const sampleRate = gTrackingPeriod
-       sample := cheaprandn(sampleRate) == 0
-       if sample {
-               startNanos = nanotime()
-       }
-       gp.m.mWaitList.acquireTicks = cputicks()
-
+       gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
        // On uniprocessors, no point spinning.
        // On multiprocessors, spin for ACTIVE_SPIN attempts.
        spin := 0
@@ -119,7 +112,7 @@ Loop:
 
                                        if v == old || atomic.Casuintptr(&l.key, old, v) {
                                                gp.m.mWaitList.clearLinks()
-                                               gp.m.mWaitList.acquireTicks = 0
+                                               gp.m.mWaitList.acquireTimes = timePair{}
                                                break
                                        }
                                        v = atomic.Loaduintptr(&l.key)
@@ -127,11 +120,6 @@ Loop:
                                if gp == gp.m.curg {
                                        gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
                                }
-
-                               if sample {
-                                       endNanos := nanotime()
-                                       gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
-                               }
                                return
                        }
                        i = 0
@@ -173,8 +161,7 @@ func unlock(l *mutex) {
 }
 
 func unlock2(l *mutex) {
-       var claimed bool
-       var cycles int64
+       now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
        for {
                v := atomic.Loaduintptr(&l.key)
                if v == mutex_locked {
@@ -184,11 +171,10 @@ func unlock2(l *mutex) {
                } else if v&mutex_locked == 0 {
                        throw("unlock of unlocked lock")
                } else {
-                       if !claimed {
-                               claimed = true
-                               nowTicks := cputicks()
+                       if now != (timePair{}) {
                                head := muintptr(v &^ (mutex_sleeping | mutex_locked))
-                               cycles = claimMutexWaitTime(nowTicks, head)
+                               dt = claimMutexWaitTime(now, head)
+                               now = timePair{}
                        }
 
                        // Other M's are waiting for the lock.
@@ -200,7 +186,7 @@ func unlock2(l *mutex) {
        }
 
        gp := getg()
-       gp.m.mLockProfile.recordUnlock(cycles)
+       gp.m.mLockProfile.recordUnlock(dt)
        gp.m.locks--
        if gp.m.locks < 0 {
                throw("runtimeĀ·unlock: lock count")
index 6c941c65149b3b7f74f554cf239673cdf388de47..0d7bd5b9c9228297f4b596c7679640b0e8133c7e 100644 (file)
@@ -60,14 +60,7 @@ func lock2(l *mutex) {
                gp.stackguard0, gp.throwsplit = stackPreempt, true
        }
 
-       var startNanos int64
-       const sampleRate = gTrackingPeriod
-       sample := cheaprandn(sampleRate) == 0
-       if sample {
-               startNanos = nanotime()
-       }
-       gp.m.mWaitList.acquireTicks = cputicks()
-
+       gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
        // On uniprocessor's, no point spinning.
        // On multiprocessors, spin for ACTIVE_SPIN attempts.
        spin := 0
@@ -95,7 +88,7 @@ Loop:
 
                                        if v == old || atomic.Casuintptr(&l.key, old, v) {
                                                gp.m.mWaitList.clearLinks()
-                                               gp.m.mWaitList.acquireTicks = 0
+                                               gp.m.mWaitList.acquireTimes = timePair{}
                                                break
                                        }
                                        v = atomic.Loaduintptr(&l.key)
@@ -103,11 +96,6 @@ Loop:
                                if gp == gp.m.curg {
                                        gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
                                }
-
-                               if sample {
-                                       endNanos := nanotime()
-                                       gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
-                               }
                                return
                        }
                        i = 0
@@ -157,8 +145,7 @@ func unlock(l *mutex) {
 //
 //go:nowritebarrier
 func unlock2(l *mutex) {
-       var claimed bool
-       var cycles int64
+       now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
        gp := getg()
        var mp *m
        for {
@@ -168,11 +155,9 @@ func unlock2(l *mutex) {
                                break
                        }
                } else {
-                       if !claimed {
-                               claimed = true
-                               nowTicks := cputicks()
-                               head := muintptr(v &^ locked)
-                               cycles = claimMutexWaitTime(nowTicks, head)
+                       if now != (timePair{}) {
+                               dt = claimMutexWaitTime(now, muintptr(v&^locked))
+                               now = timePair{}
                        }
 
                        // Other M's are waiting for the lock.
@@ -186,7 +171,7 @@ func unlock2(l *mutex) {
                }
        }
 
-       gp.m.mLockProfile.recordUnlock(cycles)
+       gp.m.mLockProfile.recordUnlock(dt)
        gp.m.locks--
        if gp.m.locks < 0 {
                throw("runtimeĀ·unlock: lock count")
index 1d44164b842a25aac2e5203b84e41651a0af87c3..fd0a0187245432b6561b9c0cc33cf1f281258469 100644 (file)
@@ -590,7 +590,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
 // mutex.
 //
 // Having found the head and tail nodes and a correct waiters count, the
-// unlocking M can read and update those two nodes' acquireTicks field and thus
+// unlocking M can read and update those two nodes' acquireTimes fields and thus
 // take responsibility for (an estimate of) the entire list's delay since the
 // last unlock call.
 //
@@ -603,16 +603,21 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
 // runtime controls the order of thread wakeups (it's a LIFO stack), but with
 // lock_futex.go the OS can wake an arbitrary thread.
 type mWaitList struct {
-       acquireTicks int64    // start of current wait (set by us, updated by others during unlock)
+       acquireTimes timePair // start of current wait (set by us, updated by others during unlock)
        next         muintptr // next m waiting for lock (set by us, cleared by another during unlock)
        prev         muintptr // previous m waiting for lock (an amortized hint, set by another during unlock)
        tail         muintptr // final m waiting for lock (an amortized hint, set by others during unlock)
        waiters      int32    // length of waiting m list (an amortized hint, set by another during unlock)
 }
 
+type timePair struct {
+       nanotime int64
+       cputicks int64
+}
+
 // clearLinks resets the fields related to the M's position in the list of Ms
-// waiting for a mutex. It leaves acquireTicks intact, since this M may still be
-// waiting and may have had its acquireTicks updated by an unlock2 call.
+// waiting for a mutex. It leaves acquireTimes intact, since this M may still be
+// waiting and may have had its acquireTimes updated by an unlock2 call.
 //
 // In lock_sema.go, the previous owner of the mutex dequeues an M and then wakes
 // it; with semaphore-based sleep, it's important that each M receives only one
@@ -731,8 +736,8 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
        hp := head.ptr()
        tail := hp.mWaitList.tail
        waiters := hp.mWaitList.waiters
-       headTicks := hp.mWaitList.acquireTicks
-       tailTicks := hp.mWaitList.tail.ptr().mWaitList.acquireTicks
+       headTimes := hp.mWaitList.acquireTimes
+       tailTimes := hp.mWaitList.tail.ptr().mWaitList.acquireTimes
 
        mp.mWaitList.tail = 0
 
@@ -768,40 +773,42 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
                hp.mWaitList.prev = 0
                hp.mWaitList.tail = tail
                hp.mWaitList.waiters = waiters - 1
-               hp.mWaitList.acquireTicks = headTicks
+               hp.mWaitList.acquireTimes = headTimes
        }
        if tp := tail.ptr(); tp != nil {
-               tp.mWaitList.acquireTicks = tailTicks
+               tp.mWaitList.acquireTimes = tailTimes
        }
        return head
 }
 
-// claimMutexWaitTime advances the acquireTicks of the list of waiting Ms at
+// claimMutexWaitTime advances the acquireTimes of the list of waiting Ms at
 // head to now, returning an estimate of the total wait time claimed by that
 // action.
-func claimMutexWaitTime(nowTicks int64, head muintptr) int64 {
+func claimMutexWaitTime(now timePair, head muintptr) timePair {
        fixMutexWaitList(head)
        hp := head.ptr()
        if hp == nil {
-               return 0
+               return timePair{}
        }
        tp := hp.mWaitList.tail.ptr()
        waiters := hp.mWaitList.waiters
-       headTicks := hp.mWaitList.acquireTicks
-       tailTicks := tp.mWaitList.acquireTicks
+       headTimes := hp.mWaitList.acquireTimes
+       tailTimes := tp.mWaitList.acquireTimes
 
-       var cycles int64
-       cycles = nowTicks - headTicks
+       var dt timePair
+       dt.nanotime = now.nanotime - headTimes.nanotime
+       dt.cputicks = now.cputicks - headTimes.cputicks
        if waiters > 1 {
-               cycles = int64(waiters) * (cycles + nowTicks - tailTicks) / 2
+               dt.nanotime = int64(waiters) * (dt.nanotime + now.nanotime - tailTimes.nanotime) / 2
+               dt.cputicks = int64(waiters) * (dt.cputicks + now.cputicks - tailTimes.cputicks) / 2
        }
 
        // When removeMutexWaitList removes a head or tail node, it's responsible
        // for applying these changes to the new head or tail.
-       hp.mWaitList.acquireTicks = nowTicks
-       tp.mWaitList.acquireTicks = nowTicks
+       hp.mWaitList.acquireTimes = now
+       tp.mWaitList.acquireTimes = now
 
-       return cycles
+       return dt
 }
 
 // mLockProfile is part of the M struct to hold information relating to mutex
@@ -832,21 +839,26 @@ type mLockProfile struct {
 // From unlock2, we might not be holding a p in this code.
 //
 //go:nowritebarrierrec
-func (prof *mLockProfile) recordUnlock(cycles int64) {
-       if cycles != 0 {
+func (prof *mLockProfile) recordUnlock(dt timePair) {
+       if dt != (timePair{}) {
                // We could make a point of clearing out the local storage right before
                // this, to have a slightly better chance of being able to see the call
                // stack if the program has several (nested) contended locks. If apps
                // are seeing a lot of _LostContendedRuntimeLock samples, maybe that'll
                // be a worthwhile change.
-               prof.proposeUnlock(cycles)
+               prof.proposeUnlock(dt)
        }
        if getg().m.locks == 1 && prof.cycles != 0 {
                prof.store()
        }
 }
 
-func (prof *mLockProfile) proposeUnlock(cycles int64) {
+func (prof *mLockProfile) proposeUnlock(dt timePair) {
+       if nanos := dt.nanotime; nanos > 0 {
+               prof.waitTime.Add(nanos)
+       }
+
+       cycles := dt.cputicks
        if cycles <= 0 {
                return
        }