runtime: use balanced tree for addr lookup in semaphore implementation

author Russ Cox <rsc@golang.org>

Sun, 12 Feb 2017 18:19:02 +0000 (13:19 -0500)

committer Russ Cox <rsc@golang.org>

Thu, 16 Feb 2017 17:52:15 +0000 (17:52 +0000)
author Russ Cox <rsc@golang.org>
Sun, 12 Feb 2017 18:19:02 +0000 (13:19 -0500)
committer Russ Cox <rsc@golang.org>
Thu, 16 Feb 2017 17:52:15 +0000 (17:52 +0000)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index f0b919453a360f634e3b68d7aa8795e6614f246a..ab45a5c7b9523d536389c809983bc2e8fc68229b 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -286,6 +286,7 @@ type sudog struct {
         acquiretime int64
         releasetime int64
         ticket      uint32
+       parent      *sudog // semaRoot binary tree
         waitlink    *sudog // g.waiting list or semaRoot
         waittail    *sudog // semaRoot
         c           *hchan // channel
diff --git a/src/runtime/sema.go b/src/runtime/sema.go

index 9d4cc3c036e5f4b1f0c31218223dfca2ec6a8b61..d8d8710501af67a167ad2449417a4aa16d88b3ca 100644 (file)
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -27,24 +27,19 @@ import (
  
  // Asynchronous semaphore for sync.Mutex.
  
-// A semaRoot holds a linked list of sudog with distinct addresses (s.elem).
+// A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem).
  // Each of those sudog may in turn point (through s.waitlink) to a list
  // of other sudogs waiting on the same address.
  // The operations on the inner lists of sudogs with the same address
-// are all O(1). Only the scanning of the top-level semaRoot list is O(n),
+// are all O(1). The scanning of the top-level semaRoot list is O(log n),
  // where n is the number of distinct addresses with goroutines blocked
  // on them that hash to the given semaRoot.
-// In systems with many goroutines, most queue up on a few addresses,
-// so the linear search across unique addresses is probably OK.
-// At least, we'll use this until it's not.
-// The next step is probably to make the top-level list a treap instead
-// of a linked list.
  // See golang.org/issue/17953 for a program that worked badly
-// before we introduced the second level of list.
+// before we introduced the second level of list, and test/locklinear.go
+// for a test that exercises this.
  type semaRoot struct {
         lock  mutex
-       head  *sudog
-       tail  *sudog
+       treap *sudog // root of balanced tree of unique waiters.
         nwait uint32 // Number of waiters. Read w/o the lock.
  }
  
@@ -205,8 +200,12 @@ func cansemacquire(addr *uint32) bool {
  func (root *semaRoot) queue(addr *uint32, s *sudog) {
         s.g = getg()
         s.elem = unsafe.Pointer(addr)
+       s.next = nil
+       s.prev = nil
  
-       for t := root.head; t != nil; t = t.next {
+       var last *sudog
+       pt := &root.treap
+       for t := *pt; t != nil; t = *pt {
                 if t.elem == unsafe.Pointer(addr) {
                         // Already have addr in list; add s to end of per-addr list.
                         if t.waittail == nil {
@@ -218,17 +217,37 @@ func (root *semaRoot) queue(addr *uint32, s *sudog) {
                         s.waitlink = nil
                         return
                 }
+               last = t
+               if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) {
+                       pt = &t.prev
+               } else {
+                       pt = &t.next
+               }
         }
  
-       // Add s as new entry in list of unique addrs.
-       s.next = nil
-       s.prev = root.tail
-       if root.tail != nil {
-               root.tail.next = s
-       } else {
-               root.head = s
+       // Add s as new leaf in tree of unique addrs.
+       // The balanced tree is a treap using ticket as the random heap priority.
+       // That is, it is a binary tree ordered according to the elem addresses,
+       // but then among the space of possible binary trees respecting those
+       // addresses, it is kept balanced on average by maintaining a heap ordering
+       // on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
+       // https://en.wikipedia.org/wiki/Treap
+       // http://faculty.washington.edu/aragon/pubs/rst89.pdf
+       s.ticket = fastrand()
+       s.parent = last
+       *pt = s
+
+       // Rotate up into tree according to ticket (priority).
+       for s.parent != nil && s.parent.ticket > s.ticket {
+               if s.parent.prev == s {
+                       root.rotateRight(s.parent)
+               } else {
+                       if s.parent.next != s {
+                               panic("semaRoot queue")
+                       }
+                       root.rotateLeft(s.parent)
+               }
         }
-       root.tail = s
  }
  
  // dequeue searches for and finds the first goroutine
@@ -236,11 +255,17 @@ func (root *semaRoot) queue(addr *uint32, s *sudog) {
  // If the sudog was being profiled, dequeue returns the time
  // at which it was woken up as now. Otherwise now is 0.
  func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
-       s := root.head
-       for ; s != nil; s = s.next {
+       ps := &root.treap
+       s := *ps
+       for ; s != nil; s = *ps {
                 if s.elem == unsafe.Pointer(addr) {
                         goto Found
                 }
+               if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) {
+                       ps = &s.prev
+               } else {
+                       ps = &s.next
+               }
         }
         return nil, 0
  
@@ -250,18 +275,17 @@ Found:
                 now = cputicks()
         }
         if t := s.waitlink; t != nil {
-               // Substitute t, also waiting on addr, for s in root list of unique addrs.
+               // Substitute t, also waiting on addr, for s in root tree of unique addrs.
+               *ps = t
+               t.ticket = s.ticket
+               t.parent = s.parent
                 t.prev = s.prev
-               t.next = s.next
                 if t.prev != nil {
-                       t.prev.next = t
-               } else {
-                       root.head = t
+                       t.prev.parent = t
                 }
+               t.next = s.next
                 if t.next != nil {
-                       t.next.prev = t
-               } else {
-                       root.tail = t
+                       t.next.parent = t
                 }
                 if t.waitlink != nil {
                         t.waittail = s.waittail
@@ -272,24 +296,104 @@ Found:
                 s.waitlink = nil
                 s.waittail = nil
         } else {
-               // Remove s from list.
-               if s.next != nil {
-                       s.next.prev = s.prev
-               } else {
-                       root.tail = s.prev
+               // Rotate s down to be leaf of tree for removal, respecting priorities.
+               for s.next != nil || s.prev != nil {
+                       if s.next == nil || s.prev != nil && s.prev.ticket < s.next.ticket {
+                               root.rotateRight(s)
+                       } else {
+                               root.rotateLeft(s)
+                       }
                 }
-               if s.prev != nil {
-                       s.prev.next = s.next
+               // Remove s, now a leaf.
+               if s.parent != nil {
+                       if s.parent.prev == s {
+                               s.parent.prev = nil
+                       } else {
+                               s.parent.next = nil
+                       }
                 } else {
-                       root.head = s.next
+                       root.treap = nil
                 }
         }
+       s.parent = nil
         s.elem = nil
         s.next = nil
         s.prev = nil
         return s, now
  }
  
+// rotateLeft rotates the tree rooted at node x.
+// turning (x a (y b c)) into (y (x a b) c).
+func (root *semaRoot) rotateLeft(x *sudog) {
+       // p -> (x a (y b c))
+       p := x.parent
+       a, y := x.prev, x.next
+       b, c := y.prev, y.next
+
+       y.prev = x
+       x.parent = y
+       y.next = c
+       if c != nil {
+               c.parent = y
+       }
+       x.prev = a
+       if a != nil {
+               a.parent = x
+       }
+       x.next = b
+       if b != nil {
+               b.parent = x
+       }
+
+       y.parent = p
+       if p == nil {
+               root.treap = y
+       } else if p.prev == x {
+               p.prev = y
+       } else {
+               if p.next != x {
+                       throw("semaRoot rotateLeft")
+               }
+               p.next = y
+       }
+}
+
+// rotateRight rotates the tree rooted at node y.
+// turning (y (x a b) c) into (x a (y b c)).
+func (root *semaRoot) rotateRight(y *sudog) {
+       // p -> (y (x a b) c)
+       p := y.parent
+       x, c := y.prev, y.next
+       a, b := x.prev, x.next
+
+       x.prev = a
+       if a != nil {
+               a.parent = x
+       }
+       x.next = y
+       y.parent = x
+       y.prev = b
+       if b != nil {
+               b.parent = y
+       }
+       y.next = c
+       if c != nil {
+               c.parent = y
+       }
+
+       x.parent = p
+       if p == nil {
+               root.treap = x
+       } else if p.prev == y {
+               p.prev = x
+       } else {
+               if p.next != y {
+                       throw("semaRoot rotateRight")
+               }
+               p.next = x
+       }
+}
+
  // notifyList is a ticket-based notification list used to implement sync.Cond.
  //
  // It must be kept in sync with the sync package.
@@ -414,10 +518,22 @@ func notifyListNotifyOne(l *notifyList) {
                 return
         }
  
-       // Update the next notify ticket number, and try to find the G that
-       // needs to be notified. If it hasn't made it to the list yet we won't
-       // find it, but it won't park itself once it sees the new notify number.
+       // Update the next notify ticket number.
         atomic.Store(&l.notify, t+1)
+
+       // Try to find the g that needs to be notified.
+       // If it hasn't made it to the list yet we won't find it,
+       // but it won't park itself once it sees the new notify number.
+       //
+       // This scan looks linear but essentially always stops quickly.
+       // Because g's queue separately from taking numbers,
+       // there may be minor reorderings in the list, but we
+       // expect the g we're looking for to be near the front.
+       // The g has others in front of it on the list only to the
+       // extent that it lost the race, so the iteration will not
+       // be too long. This applies even when the g is missing:
+       // it hasn't yet gotten to sleep and has lost the race to
+       // the (few) other g's that we find on the list.
         for p, s := (*sudog)(nil), l.head; s != nil; p, s = s, s.next {
                 if s.ticket == t {
                         n := s.next
diff --git a/test/locklinear.go b/test/locklinear.go

new file mode 100644 (file)

index 0000000..f1fe213
--- /dev/null
+++ b/test/locklinear.go
@@ -0,0 +1,139 @@
+// run
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that locks don't go quadratic due to runtime hash table collisions.
+
+package main
+
+import (
+       "fmt"
+       "runtime"
+       "sync"
+       "time"
+)
+
+const debug = false
+
+// checkLinear asserts that the running time of f(n) is at least linear but sub-quadratic.
+// tries is the initial number of iterations.
+func checkLinear(typ string, tries int, f func(n int)) {
+       // Depending on the machine and OS, this test might be too fast
+       // to measure with accurate enough granularity. On failure,
+       // make it run longer, hoping that the timing granularity
+       // is eventually sufficient.
+
+       timeF := func(n int) time.Duration {
+               t1 := time.Now()
+               f(n)
+               return time.Since(t1)
+       }
+
+       n := tries
+       fails := 0
+       for {
+               t1 := timeF(n)
+               t2 := timeF(2 * n)
+               if debug {
+                       println(n, t1.String(), 2*n, t2.String())
+               }
+               // should be 2x (linear); allow up to 2.5x
+               if t1*3/2 < t2 && t2 < t1*5/2 {
+                       return
+               }
+               // If 2n ops run in under a second and the ratio
+               // doesn't work out, make n bigger, trying to reduce
+               // the effect that a constant amount of overhead has
+               // on the computed ratio.
+               if t2 < 1*time.Second {
+                       n *= 2
+                       continue
+               }
+               // Once the test runs long enough for n ops,
+               // try to get the right ratio at least once.
+               // If five in a row all fail, give up.
+               if fails++; fails >= 5 {
+                       panic(fmt.Sprintf("%s: too slow: %d ops: %v; %d ops: %v\n",
+                               typ, n, t1, 2*n, t2))
+               }
+       }
+}
+
+const offset = 251 // known size of runtime hash table
+
+func main() {
+       checkLinear("lockone", 1000, func(n int) {
+               ch := make(chan int)
+               locks := make([]sync.RWMutex, offset+1)
+               for i := 0; i < n; i++ {
+                       go func() {
+                               locks[0].Lock()
+                               ch <- 1
+                       }()
+               }
+               time.Sleep(1 * time.Millisecond)
+
+               go func() {
+                       for j := 0; j < n; j++ {
+                               locks[1].Lock()
+                               locks[offset].Lock()
+                               locks[1].Unlock()
+                               runtime.Gosched()
+                               locks[offset].Unlock()
+                       }
+               }()
+
+               for j := 0; j < n; j++ {
+                       locks[1].Lock()
+                       locks[offset].Lock()
+                       locks[1].Unlock()
+                       runtime.Gosched()
+                       locks[offset].Unlock()
+               }
+
+               for i := 0; i < n; i++ {
+                       <-ch
+                       locks[0].Unlock()
+               }
+       })
+
+       checkLinear("lockmany", 1000, func(n int) {
+               locks := make([]sync.RWMutex, n*offset+1)
+
+               var wg sync.WaitGroup
+               for i := 0; i < n; i++ {
+                       wg.Add(1)
+                       go func(i int) {
+                               locks[(i+1)*offset].Lock()
+                               wg.Done()
+                               locks[(i+1)*offset].Lock()
+                               locks[(i+1)*offset].Unlock()
+                       }(i)
+               }
+               wg.Wait()
+
+               go func() {
+                       for j := 0; j < n; j++ {
+                               locks[1].Lock()
+                               locks[0].Lock()
+                               locks[1].Unlock()
+                               runtime.Gosched()
+                               locks[0].Unlock()
+                       }
+               }()
+
+               for j := 0; j < n; j++ {
+                       locks[1].Lock()
+                       locks[0].Lock()
+                       locks[1].Unlock()
+                       runtime.Gosched()
+                       locks[0].Unlock()
+               }
+
+               for i := 0; i < n; i++ {
+                       locks[(i+1)*offset].Unlock()
+               }
+       })
+}
author	Russ Cox <rsc@golang.org>
	Sun, 12 Feb 2017 18:19:02 +0000 (13:19 -0500)
committer	Russ Cox <rsc@golang.org>
	Thu, 16 Feb 2017 17:52:15 +0000 (17:52 +0000)
src/runtime/runtime2.go		patch \| blob \| history
src/runtime/sema.go		patch \| blob \| history
test/locklinear.go	[new file with mode: 0644]	patch \| blob