]> Cypherpunks repositories - gostls13.git/commitdiff
sync: add active spinning to Mutex
authorDmitry Vyukov <dvyukov@google.com>
Fri, 20 Feb 2015 08:50:56 +0000 (11:50 +0300)
committerDmitry Vyukov <dvyukov@google.com>
Tue, 24 Feb 2015 10:53:48 +0000 (10:53 +0000)
Currently sync.Mutex is fully cooperative. That is, once contention is discovered,
the goroutine calls into scheduler. This is suboptimal as the resource can become
free soon after (especially if critical sections are short). Server software
usually runs at ~~50% CPU utilization, that is, switching to other goroutines
is not necessary profitable.

This change adds limited active spinning to sync.Mutex if:
1. running on a multicore machine and
2. GOMAXPROCS>1 and
3. there is at least one other running P and
4. local runq is empty.
As opposed to runtime mutex we don't do passive spinning,
because there can be work on global runq on on other Ps.

benchmark                   old ns/op     new ns/op     delta
BenchmarkMutexNoSpin        1271          1272          +0.08%
BenchmarkMutexNoSpin-2      702           683           -2.71%
BenchmarkMutexNoSpin-4      377           372           -1.33%
BenchmarkMutexNoSpin-8      197           190           -3.55%
BenchmarkMutexNoSpin-16     131           122           -6.87%
BenchmarkMutexNoSpin-32     170           164           -3.53%
BenchmarkMutexSpin          4724          4728          +0.08%
BenchmarkMutexSpin-2        2501          2491          -0.40%
BenchmarkMutexSpin-4        1330          1325          -0.38%
BenchmarkMutexSpin-8        684           684           +0.00%
BenchmarkMutexSpin-16       414           372           -10.14%
BenchmarkMutexSpin-32       559           469           -16.10%

BenchmarkMutex                 19.1          19.1          +0.00%
BenchmarkMutex-2               81.6          54.3          -33.46%
BenchmarkMutex-4               143           100           -30.07%
BenchmarkMutex-8               154           156           +1.30%
BenchmarkMutex-16              140           159           +13.57%
BenchmarkMutex-32              141           163           +15.60%
BenchmarkMutexSlack            33.3          31.2          -6.31%
BenchmarkMutexSlack-2          122           97.7          -19.92%
BenchmarkMutexSlack-4          168           158           -5.95%
BenchmarkMutexSlack-8          152           158           +3.95%
BenchmarkMutexSlack-16         140           159           +13.57%
BenchmarkMutexSlack-32         146           162           +10.96%
BenchmarkMutexWork             154           154           +0.00%
BenchmarkMutexWork-2           89.2          89.9          +0.78%
BenchmarkMutexWork-4           139           86.1          -38.06%
BenchmarkMutexWork-8           177           162           -8.47%
BenchmarkMutexWork-16          170           173           +1.76%
BenchmarkMutexWork-32          176           176           +0.00%
BenchmarkMutexWorkSlack        160           160           +0.00%
BenchmarkMutexWorkSlack-2      103           99.1          -3.79%
BenchmarkMutexWorkSlack-4      155           148           -4.52%
BenchmarkMutexWorkSlack-8      176           170           -3.41%
BenchmarkMutexWorkSlack-16     170           173           +1.76%
BenchmarkMutexWorkSlack-32     175           176           +0.57%

"No work" benchmarks are not very interesting (BenchmarkMutex and
BenchmarkMutexSlack), as they are absolutely not realistic.

Fixes #8889

Change-Id: I6f14f42af1fa48f73a776fdd11f0af6dd2bb428b
Reviewed-on: https://go-review.googlesource.com/5430
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>

src/runtime/proc1.go
src/sync/mutex.go
src/sync/mutex_test.go
src/sync/runtime.go

index a47df13691e320434ee5b7cd833aeb0d3d1f0d5c..1eef1b8dcd4ee0b4959bcb5b4bb0aecb58d35498 100644 (file)
@@ -3332,3 +3332,27 @@ func sync_atomic_runtime_procPin() int {
 func sync_atomic_runtime_procUnpin() {
        procUnpin()
 }
+
+// Active spinning for sync.Mutex.
+//go:linkname sync_runtime_canSpin sync.runtime_canSpin
+//go:nosplit
+func sync_runtime_canSpin(i int) bool {
+       // sync.Mutex is cooperative, so we are conservative with spinning.
+       // Spin only few times and only if running on a multicore machine and
+       // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
+       // As opposed to runtime mutex we don't do passive spinning here,
+       // because there can be work on global runq on on other Ps.
+       if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
+               return false
+       }
+       if p := getg().m.p; p.runqhead != p.runqtail {
+               return false
+       }
+       return true
+}
+
+//go:linkname sync_runtime_doSpin sync.runtime_doSpin
+//go:nosplit
+func sync_runtime_doSpin() {
+       procyield(active_spin_cnt)
+}
index 73b33770222d103b0d64274721fe1c766d938ad4..3f280ad719d577c72d5df97cdb1cc7485e305be0 100644 (file)
@@ -48,15 +48,31 @@ func (m *Mutex) Lock() {
        }
 
        awoke := false
+       iter := 0
        for {
                old := m.state
                new := old | mutexLocked
                if old&mutexLocked != 0 {
+                       if runtime_canSpin(iter) {
+                               // Active spinning makes sense.
+                               // Try to set mutexWoken flag to inform Unlock
+                               // to not wake other blocked goroutines.
+                               if !awoke && old&mutexWoken == 0 && old>>mutexWaiterShift != 0 &&
+                                       atomic.CompareAndSwapInt32(&m.state, old, old|mutexWoken) {
+                                       awoke = true
+                               }
+                               runtime_doSpin()
+                               iter++
+                               continue
+                       }
                        new = old + 1<<mutexWaiterShift
                }
                if awoke {
                        // The goroutine has been woken from sleep,
                        // so we need to reset the flag in either case.
+                       if new&mutexWoken == 0 {
+                               panic("sync: inconsistent mutex state")
+                       }
                        new &^= mutexWoken
                }
                if atomic.CompareAndSwapInt32(&m.state, old, new) {
@@ -65,6 +81,7 @@ func (m *Mutex) Lock() {
                        }
                        runtime_Semacquire(&m.sema)
                        awoke = true
+                       iter = 0
                }
        }
 
index 151b25c10fce9c9eacf076710145676a81c1f802..91a4855cb1f5862702e815679812433ed3b9a8d8 100644 (file)
@@ -134,3 +134,58 @@ func BenchmarkMutexWork(b *testing.B) {
 func BenchmarkMutexWorkSlack(b *testing.B) {
        benchmarkMutex(b, true, true)
 }
+
+func BenchmarkMutexNoSpin(b *testing.B) {
+       // This benchmark models a situation where spinning in the mutex should be
+       // non-profitable and allows to confirm that spinning does not do harm.
+       // To achieve this we create excess of goroutines most of which do local work.
+       // These goroutines yield during local work, so that switching from
+       // a blocked goroutine to other goroutines is profitable.
+       // As a matter of fact, this benchmark still triggers some spinning in the mutex.
+       var m Mutex
+       var acc0, acc1 uint64
+       b.SetParallelism(4)
+       b.RunParallel(func(pb *testing.PB) {
+               c := make(chan bool)
+               var data [4 << 10]uint64
+               for i := 0; pb.Next(); i++ {
+                       if i%4 == 0 {
+                               m.Lock()
+                               acc0 -= 100
+                               acc1 += 100
+                               m.Unlock()
+                       } else {
+                               for i := 0; i < len(data); i += 4 {
+                                       data[i]++
+                               }
+                               // Elaborate way to say runtime.Gosched
+                               // that does not put the goroutine onto global runq.
+                               go func() {
+                                       c <- true
+                               }()
+                               <-c
+                       }
+               }
+       })
+}
+
+func BenchmarkMutexSpin(b *testing.B) {
+       // This benchmark models a situation where spinning in the mutex should be
+       // profitable. To achieve this we create a goroutine per-proc.
+       // These goroutines access considerable amount of local data so that
+       // unnecessary rescheduling is penalized by cache misses.
+       var m Mutex
+       var acc0, acc1 uint64
+       b.RunParallel(func(pb *testing.PB) {
+               var data [16 << 10]uint64
+               for i := 0; pb.Next(); i++ {
+                       m.Lock()
+                       acc0 -= 100
+                       acc1 += 100
+                       m.Unlock()
+                       for i := 0; i < len(data); i += 4 {
+                               data[i]++
+                       }
+               }
+       })
+}
index 3b866303a96109fabf6140444094a0a9db0466ce..c66d2deb4c847891e68cd321cd32a659294ca15d 100644 (file)
@@ -38,3 +38,10 @@ func init() {
        var s syncSema
        runtime_Syncsemcheck(unsafe.Sizeof(s))
 }
+
+// Active spinning runtime support.
+// runtime_canSpin returns true is spinning makes sense at the moment.
+func runtime_canSpin(i int) bool
+
+// runtime_doSpin does active spinning.
+func runtime_doSpin()