From daaf29cf9320011af9b5feee36f75cb2ac175718 Mon Sep 17 00:00:00 2001 From: Dmitriy Vyukov Date: Tue, 12 Jul 2011 09:24:21 -0700 Subject: [PATCH] sync: improve RWMutex performance The new implementation features wait-free fast path for readers which significantly improves performance/scalability on read-mostly workloads. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows: benchmark old ns/op new ns/op delta BenchmarkRWMutexUncontended 179.00 96.60 -46.03% BenchmarkRWMutexUncontended-2 89.10 49.10 -44.89% BenchmarkRWMutexUncontended-4 44.70 24.70 -44.74% BenchmarkRWMutexUncontended-8 23.30 12.90 -44.64% BenchmarkRWMutexUncontended-16 16.80 8.75 -47.92% BenchmarkRWMutexWrite100 79.60 26.80 -66.33% BenchmarkRWMutexWrite100-2 305.00 33.00 -89.18% BenchmarkRWMutexWrite100-4 245.00 113.00 -53.88% BenchmarkRWMutexWrite100-8 330.00 147.00 -55.45% BenchmarkRWMutexWrite100-16 371.00 152.00 -59.03% BenchmarkRWMutexWrite10 78.30 29.80 -61.94% BenchmarkRWMutexWrite10-2 348.00 165.00 -52.59% BenchmarkRWMutexWrite10-4 447.00 199.00 -55.48% BenchmarkRWMutexWrite10-8 564.00 180.00 -68.09% BenchmarkRWMutexWrite10-16 492.00 192.00 -60.98% BenchmarkRWMutexWorkWrite100 1077.00 1037.00 -3.71% BenchmarkRWMutexWorkWrite100-2 659.00 596.00 -9.56% BenchmarkRWMutexWorkWrite100-4 509.00 361.00 -29.08% BenchmarkRWMutexWorkWrite100-8 603.00 351.00 -41.79% BenchmarkRWMutexWorkWrite100-16 750.00 607.00 -19.07% BenchmarkRWMutexWorkWrite10 990.00 951.00 -3.94% BenchmarkRWMutexWorkWrite10-2 1119.00 1070.00 -4.38% BenchmarkRWMutexWorkWrite10-4 1300.00 1199.00 -7.77% BenchmarkRWMutexWorkWrite10-8 1424.00 1291.00 -9.34% BenchmarkRWMutexWorkWrite10-16 1981.00 1786.00 -9.84% R=rsc CC=golang-dev https://golang.org/cl/4671051 --- src/pkg/sync/rwmutex.go | 67 ++++++++++++++++------------- src/pkg/sync/rwmutex_test.go | 81 ++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 30 deletions(-) diff --git a/src/pkg/sync/rwmutex.go b/src/pkg/sync/rwmutex.go index 9248b4b037..cb1a47720b 100644 --- a/src/pkg/sync/rwmutex.go +++ b/src/pkg/sync/rwmutex.go @@ -4,7 +4,10 @@ package sync -import "sync/atomic" +import ( + "runtime" + "sync/atomic" +) // An RWMutex is a reader/writer mutual exclusion lock. // The lock can be held by an arbitrary number of readers @@ -12,35 +15,22 @@ import "sync/atomic" // RWMutexes can be created as part of other // structures; the zero value for a RWMutex is // an unlocked mutex. -// -// Writers take priority over Readers: no new RLocks -// are granted while a blocked Lock call is waiting. type RWMutex struct { - w Mutex // held if there are pending readers or writers - r Mutex // held if the w is being rd - readerCount int32 // number of pending readers + w Mutex // held if there are pending writers + writerSem uint32 // semaphore for writers to wait for completing readers + readerSem uint32 // semaphore for readers to wait for completing writers + readerCount int32 // number of pending readers + readerWait int32 // number of departing readers } +const rwmutexMaxReaders = 1 << 30 + // RLock locks rw for reading. -// If the lock is already locked for writing or there is a writer already waiting -// to release the lock, RLock blocks until the writer has released the lock. func (rw *RWMutex) RLock() { - // Use rw.r.Lock() to block granting the RLock if a goroutine - // is waiting for its Lock. This is the prevent starvation of W in - // this situation: - // A: rw.RLock() // granted - // W: rw.Lock() // waiting for rw.w().Lock() - // B: rw.RLock() // granted - // C: rw.RLock() // granted - // B: rw.RUnlock() - // ... (new readers come and go indefinitely, W is starving) - rw.r.Lock() - if atomic.AddInt32(&rw.readerCount, 1) == 1 { - // The first reader locks rw.w, so writers will be blocked - // while the readers have the RLock. - rw.w.Lock() + if atomic.AddInt32(&rw.readerCount, 1) < 0 { + // A writer is pending, wait for it. + runtime.Semacquire(&rw.readerSem) } - rw.r.Unlock() } // RUnlock undoes a single RLock call; @@ -48,9 +38,12 @@ func (rw *RWMutex) RLock() { // It is a run-time error if rw is not locked for reading // on entry to RUnlock. func (rw *RWMutex) RUnlock() { - if atomic.AddInt32(&rw.readerCount, -1) == 0 { - // last reader finished, enable writers - rw.w.Unlock() + if atomic.AddInt32(&rw.readerCount, -1) < 0 { + // A writer is pending. + if atomic.AddInt32(&rw.readerWait, -1) == 0 { + // The last reader unblocks the writer. + runtime.Semrelease(&rw.writerSem) + } } } @@ -61,9 +54,14 @@ func (rw *RWMutex) RUnlock() { // a blocked Lock call excludes new readers from acquiring // the lock. func (rw *RWMutex) Lock() { - rw.r.Lock() + // First, resolve competition with other writers. rw.w.Lock() - rw.r.Unlock() + // Announce to readers there is a pending writer. + r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders + // Wait for active readers. + if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { + runtime.Semacquire(&rw.writerSem) + } } // Unlock unlocks rw for writing. It is a run-time error if rw is @@ -72,7 +70,16 @@ func (rw *RWMutex) Lock() { // As with Mutexes, a locked RWMutex is not associated with a particular // goroutine. One goroutine may RLock (Lock) an RWMutex and then // arrange for another goroutine to RUnlock (Unlock) it. -func (rw *RWMutex) Unlock() { rw.w.Unlock() } +func (rw *RWMutex) Unlock() { + // Announce to readers there is no active writer. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) + // Unblock blocked readers, if any. + for i := 0; i < int(r); i++ { + runtime.Semrelease(&rw.readerSem) + } + // Allow other writers to proceed. + rw.w.Unlock() +} // RLocker returns a Locker interface that implements // the Lock and Unlock methods by calling rw.RLock and rw.RUnlock. diff --git a/src/pkg/sync/rwmutex_test.go b/src/pkg/sync/rwmutex_test.go index 0480a66018..dc8ce9653c 100644 --- a/src/pkg/sync/rwmutex_test.go +++ b/src/pkg/sync/rwmutex_test.go @@ -154,3 +154,84 @@ func TestRLocker(t *testing.T) { wl.Unlock() } } + +func BenchmarkRWMutexUncontended(b *testing.B) { + type PaddedRWMutex struct { + RWMutex + pad [32]uint32 + } + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + var rwm PaddedRWMutex + for atomic.AddInt32(&N, -1) >= 0 { + runtime.Gosched() + for g := 0; g < CallsPerSched; g++ { + rwm.RLock() + rwm.RLock() + rwm.RUnlock() + rwm.RUnlock() + rwm.Lock() + rwm.Unlock() + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func benchmarkRWMutex(b *testing.B, localWork, writeRatio int) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + var rwm RWMutex + for p := 0; p < procs; p++ { + go func() { + foo := 0 + for atomic.AddInt32(&N, -1) >= 0 { + runtime.Gosched() + for g := 0; g < CallsPerSched; g++ { + foo++ + if foo%writeRatio == 0 { + rwm.Lock() + rwm.Unlock() + } else { + rwm.RLock() + for i := 0; i != localWork; i += 1 { + foo *= 2 + foo /= 2 + } + rwm.RUnlock() + } + } + } + c <- foo == 42 + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkRWMutexWrite100(b *testing.B) { + benchmarkRWMutex(b, 0, 100) +} + +func BenchmarkRWMutexWrite10(b *testing.B) { + benchmarkRWMutex(b, 0, 10) +} + +func BenchmarkRWMutexWorkWrite100(b *testing.B) { + benchmarkRWMutex(b, 100, 100) +} + +func BenchmarkRWMutexWorkWrite10(b *testing.B) { + benchmarkRWMutex(b, 100, 10) +} -- 2.50.0