runtime: do hashmap grow work during reads.

author Keith Randall <khr@golang.org>

Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)

committer Keith Randall <khr@golang.org>

Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
author Keith Randall <khr@golang.org>
Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
committer Keith Randall <khr@golang.org>
Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c

index 2e61bcfe8fad8ae707272162ede4861b7370a9d2..db8cfd20e95c57b5556146bcd872e2855afd9ecf 100644 (file)
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -107,6 +107,9 @@ struct Hmap
         uintptr nevacuate;    // progress counter for evacuation (buckets less than this have been evacuated)
  };
  
+// token to store in nevacuate field when locking the table to evacuate a bucket.
+#define EVAC_LOCK ((uintptr)-1)
+
  // possible flags
  enum
  {
@@ -114,7 +117,7 @@ enum
         IndirectValue = 2,  // storing pointers to values
         Iterator = 4,       // there may be an iterator using buckets
         OldIterator = 8,    // there may be an iterator using oldbuckets
-       CanFreeBucket = 16, // ok to free buckets
+       CanFreeBucket = 16, // ok to free buckets TODO: remove - unused
         CanFreeKey = 32,    // keys are indirect and ok to free keys
  };
  
@@ -285,12 +288,14 @@ hash_init(MapType *t, Hmap *h, uint32 hint)
  
  // Moves entries in oldbuckets[i] to buckets[i] and buckets[i+2^k].
  // We leave the original bucket intact, except for the evacuated marks, so that
-// iterators can still iterate through the old buckets.
+// lookup and iterators can still iterate through the old buckets.
+// Multiple threads must not be evacuating the same bucket at the same time.
  static void
  evacuate(MapType *t, Hmap *h, uintptr oldbucket)
  {
         Bucket *b;
         Bucket *nextb;
+       Bucket *mainb;
         Bucket *x, *y;
         Bucket *newx, *newy;
         uintptr xi, yi;
@@ -299,143 +304,154 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
         uintptr i;
         byte *k, *v;
         byte *xk, *yk, *xv, *yv;
-       byte *ob;
  
-       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+       mainb = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
         newbit = (uintptr)1 << (h->B - 1);
  
-       if(!evacuated(b)) {
-               // TODO: reuse overflow buckets instead of using new ones, if there
-               // is no iterator using the old buckets.  (If CanFreeBuckets and !OldIterator.)
-
-               x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
-               y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
-               clearbucket(x);
-               clearbucket(y);
-               xi = 0;
-               yi = 0;
-               xk = x->data;
-               yk = y->data;
-               xv = xk + h->keysize * BUCKETSIZE;
-               yv = yk + h->keysize * BUCKETSIZE;
-               do {
-                       for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-                               if(b->tophash[i] == 0)
-                                       continue;
-                               hash = h->hash0;
-                               t->key->alg->hash(&hash, t->key->size, IK(h, k));
-                               // NOTE: if key != key, then this hash could be (and probably will be)
-                               // entirely different from the old hash.  We effectively only update
-                               // the B'th bit of the hash in this case.
-                               if((hash & newbit) == 0) {
-                                       if(xi == BUCKETSIZE) {
-                                               if(checkgc) mstats.next_gc = mstats.heap_alloc;
-                                               newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-                                               clearbucket(newx);
-                                               x->overflow = newx;
-                                               x = newx;
-                                               xi = 0;
-                                               xk = x->data;
-                                               xv = xk + h->keysize * BUCKETSIZE;
-                                       }
-                                       x->tophash[xi] = b->tophash[i];
-                                       if((h->flags & IndirectKey) != 0) {
-                                               *(byte**)xk = *(byte**)k;               // copy pointer
-                                       } else {
-                                               t->key->alg->copy(t->key->size, xk, k); // copy value
-                                       }
-                                       if((h->flags & IndirectValue) != 0) {
-                                               *(byte**)xv = *(byte**)v;
-                                       } else {
-                                               t->elem->alg->copy(t->elem->size, xv, v);
-                                       }
-                                       xi++;
-                                       xk += h->keysize;
-                                       xv += h->valuesize;
+       if(evacuated(mainb))            // someone else already evacuated this bucket.
+               return;
+
+       b = mainb;
+       x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
+       y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
+       clearbucket(x);
+       clearbucket(y);
+       xi = 0;
+       yi = 0;
+       xk = x->data;
+       yk = y->data;
+       xv = xk + h->keysize * BUCKETSIZE;
+       yv = yk + h->keysize * BUCKETSIZE;
+       do {
+               for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+                       if(b->tophash[i] == 0)
+                               continue;
+                       hash = h->hash0;
+                       t->key->alg->hash(&hash, t->key->size, IK(h, k));
+                       // NOTE: if key != key, then this hash could be (and probably will be)
+                       // entirely different from the old hash.  We effectively only update
+                       // the B'th bit of the hash in this case.
+                       if((hash & newbit) == 0) {
+                               if(xi == BUCKETSIZE) {
+                                       if(checkgc) mstats.next_gc = mstats.heap_alloc;
+                                       newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+                                       clearbucket(newx);
+                                       x->overflow = newx;
+                                       x = newx;
+                                       xi = 0;
+                                       xk = x->data;
+                                       xv = xk + h->keysize * BUCKETSIZE;
+                               }
+                               x->tophash[xi] = b->tophash[i];
+                               if((h->flags & IndirectKey) != 0) {
+                                       *(byte**)xk = *(byte**)k;               // copy pointer
                                 } else {
-                                       if(yi == BUCKETSIZE) {
-                                               if(checkgc) mstats.next_gc = mstats.heap_alloc;
-                                               newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-                                               clearbucket(newy);
-                                               y->overflow = newy;
-                                               y = newy;
-                                               yi = 0;
-                                               yk = y->data;
-                                               yv = yk + h->keysize * BUCKETSIZE;
-                                       }
-                                       y->tophash[yi] = b->tophash[i];
-                                       if((h->flags & IndirectKey) != 0) {
-                                               *(byte**)yk = *(byte**)k;
-                                       } else {
-                                               t->key->alg->copy(t->key->size, yk, k);
-                                       }
-                                       if((h->flags & IndirectValue) != 0) {
-                                               *(byte**)yv = *(byte**)v;
-                                       } else {
-                                               t->elem->alg->copy(t->elem->size, yv, v);
-                                       }
-                                       yi++;
-                                       yk += h->keysize;
-                                       yv += h->valuesize;
+                                       t->key->alg->copy(t->key->size, xk, k); // copy value
                                 }
-                       }
-
-                       // mark as evacuated so we don't do it again.
-                       // this also tells any iterators that this data isn't golden anymore.
-                       nextb = b->overflow;
-                       b->overflow = (Bucket*)((uintptr)nextb + 1);
-
-                       b = nextb;
-               } while(b != nil);
-
-               // Free old overflow buckets as much as we can.
-               if((h->flags & OldIterator) == 0) {
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if((h->flags & CanFreeBucket) != 0) {
-                               while((nextb = overflowptr(b)) != nil) {
-                                       b->overflow = nextb->overflow;
-                                       runtime·free(nextb);
+                               if((h->flags & IndirectValue) != 0) {
+                                       *(byte**)xv = *(byte**)v;
+                               } else {
+                                       t->elem->alg->copy(t->elem->size, xv, v);
                                 }
+                               xi++;
+                               xk += h->keysize;
+                               xv += h->valuesize;
                         } else {
-                               // can't explicitly free overflow buckets, but at least
-                               // we can unlink them.
-                               b->overflow = (Bucket*)1;
+                               if(yi == BUCKETSIZE) {
+                                       if(checkgc) mstats.next_gc = mstats.heap_alloc;
+                                       newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+                                       clearbucket(newy);
+                                       y->overflow = newy;
+                                       y = newy;
+                                       yi = 0;
+                                       yk = y->data;
+                                       yv = yk + h->keysize * BUCKETSIZE;
+                               }
+                               y->tophash[yi] = b->tophash[i];
+                               if((h->flags & IndirectKey) != 0) {
+                                       *(byte**)yk = *(byte**)k;
+                               } else {
+                                       t->key->alg->copy(t->key->size, yk, k);
+                               }
+                               if((h->flags & IndirectValue) != 0) {
+                                       *(byte**)yv = *(byte**)v;
+                               } else {
+                                       t->elem->alg->copy(t->elem->size, yv, v);
+                               }
+                               yi++;
+                               yk += h->keysize;
+                               yv += h->valuesize;
                         }
                 }
-       }
+               b = b->overflow;
+       } while(b != nil);
  
-       // advance evacuation mark
-       if(oldbucket == h->nevacuate) {
-               h->nevacuate = oldbucket + 1;
-               if(oldbucket + 1 == newbit) { // newbit == # of oldbuckets
-                       // free main bucket array
-                       if((h->flags & (OldIterator | CanFreeBucket)) == CanFreeBucket) {
-                               ob = h->oldbuckets;
-                               h->oldbuckets = nil;
-                               runtime·free(ob);
-                       } else {
-                               h->oldbuckets = nil;
-                       }
-               }
+       // Mark main bucket as evacuated.  This write commits the
+       // bucket evacuation (readers can start using the new buckets).
+       b = mainb->overflow;
+       runtime·atomicstorep(&mainb->overflow, (Bucket*)((uintptr)b + 1));
+
+       // Mark overflow buckets for any iterators.
+       // These writes don't need to reach anyone until the next hashtable
+       // modification, so they don't need to be synchronized.
+       while(b != nil) {
+               nextb = b->overflow;
+               b->overflow = (Bucket*)((uintptr)nextb + 1);
+               b = nextb;
         }
+
         if(docheck)
                 check(t, h);
  }
  
+// Ensure that bucket has been evacuated from oldbuckets so that we can modify it.
+// Not multithreaded safe - you must not call this from anywhere except hash table
+// modifications (where we're guaranteed external synchronization).
  static void
  grow_work(MapType *t, Hmap *h, uintptr bucket)
  {
         uintptr noldbuckets;
+       intptr n;
  
+       // evac the bucket we're going to need
         noldbuckets = (uintptr)1 << (h->B - 1);
-
-       // make sure we evacuate the oldbucket corresponding
-       // to the bucket we're about to use
         evacuate(t, h, bucket & (noldbuckets - 1));
+       // evac another bucket to make progress
+       n = h->nevacuate;
+       evacuate(t, h, n);
+       // record what we've done
+       h->nevacuate = n + 1;
+       if(n + 1 == noldbuckets)
+               h->oldbuckets = nil;
+}
  
-       // evacuate one more oldbucket to make progress on growing
-       if(h->oldbuckets != nil)
-               evacuate(t, h, h->nevacuate);
+// Do some work for growing the table.
+// Multithreaded-safe.
+static void
+grow_work_read(MapType *t, Hmap *h) {
+       uintptr noldbuckets;
+       intptr n;
+
+       noldbuckets = (uintptr)1 << (h->B - 1);
+
+       // Get evacuation lock.  If we can't get it, fine, that means
+       // someone else is making progress which is good enough.
+       n = h->nevacuate;
+       if(n != EVAC_LOCK &&    // no one has evac lock
+          n != noldbuckets &&  // there's still work to do
+          runtime·casp((void**)&h->nevacuate, (void*)n, (void*)EVAC_LOCK)) { // we acquired lock
+               // We're now the exclusive evacuator.
+               evacuate(t, h, n);
+
+               // record that we're done.
+               runtime·atomicstorep((void**)&h->nevacuate, (void*)(n + 1));
+               if(n + 1 == noldbuckets) {
+                       // commit finishing of grow.
+                       runtime·atomicstorep(&h->oldbuckets, nil);
+                       // note: can't free oldbuckets, someone might be using it.
+                       // it will have to get GCed.
+               }
+       }
  }
  
  static void
@@ -480,7 +496,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
  {
         void *key;
         uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
         Bucket *b;
         uint8 top;
         uintptr i;
@@ -495,13 +511,14 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
         hash = h->hash0;
         t->key->alg->hash(&hash, t->key->size, key);
         bucket = hash & (((uintptr)1 << h->B) - 1);
-       if(h->oldbuckets != nil) {
-               oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-               b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-               if(evacuated(b)) {
-                       b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-               }
+       b = runtime·atomicloadp(&h->oldbuckets);
+       if(b != nil) {
+               grow_work_read(t, h);
+               b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+               if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                       goto newbucket;
         } else {
+       newbucket:
                 b = (Bucket*)(h->buckets + bucket * h->bucketsize);
         }
         top = hash >> (sizeof(uintptr)*8 - 8);
@@ -518,7 +535,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
                                 }
                         }
                 }
-               b = b->overflow;
+               b = overflowptr(b);
         } while(b != nil);
         return nil;
  }
@@ -812,6 +829,7 @@ hash_next(struct hash_iter *it)
         uintptr bucket, oldbucket;
         uintptr hash;
         Bucket *b;
+       byte *oldbuckets;
         uintptr i;
         intptr check_bucket;
         bool eq;
@@ -833,14 +851,15 @@ next:
                         it->value = nil;
                         return;
                 }
-               if(h->oldbuckets != nil && it->B == h->B) {
+               if(it->B == h->B && (oldbuckets = runtime·atomicloadp(&h->oldbuckets)) != nil) {
                         // Iterator was started in the middle of a grow, and the grow isn't done yet.
                         // If the bucket we're looking at hasn't been filled in yet (i.e. the old
                         // bucket hasn't been evacuated) then we need to iterate through the old
                         // bucket and only return the ones that will be migrated to this bucket.
+                       grow_work_read(t, h);
                         oldbucket = bucket & (((uintptr)1 << (it->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(!evacuated(b)) {
+                       b = (Bucket*)(oldbuckets + oldbucket * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) == 0) {
                                 check_bucket = bucket;
                         } else {
                                 b = (Bucket*)(it->buckets + bucket * h->bucketsize);
diff --git a/src/pkg/runtime/hashmap_fast.c b/src/pkg/runtime/hashmap_fast.c

index afff7b1aad0f34728877f6535610046cd3fdf97e..1136ed6db620d9c5d4c6b51800265133353a6727 100644 (file)
--- a/src/pkg/runtime/hashmap_fast.c
+++ b/src/pkg/runtime/hashmap_fast.c
@@ -17,7 +17,7 @@ void
  HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value)
  {
         uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
         Bucket *b;
         uintptr i;
         KEYTYPE *k;
@@ -83,13 +83,14 @@ dohash:
                 hash = h->hash0;
                 HASHFUNC(&hash, sizeof(KEYTYPE), &key);
                 bucket = hash & (((uintptr)1 << h->B) - 1);
-               if(h->oldbuckets != nil) {
-                       oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(evacuated(b)) {
-                               b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-                       }
+               b = runtime·atomicloadp(&h->oldbuckets);
+               if(b != nil) {
+                       grow_work_read(t, h);
+                       b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                               goto newbucket;
                 } else {
+               newbucket:
                         b = (Bucket*)(h->buckets + bucket * h->bucketsize);
                 }
                 top = hash >> (sizeof(uintptr)*8 - 8);
@@ -103,7 +104,7 @@ dohash:
                                         return;
                                 }
                         }
-                       b = b->overflow;
+                       b = overflowptr(b);
                 } while(b != nil);
         }
         value = empty_value;
@@ -115,7 +116,7 @@ void
  HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res)
  {
         uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
         Bucket *b;
         uintptr i;
         KEYTYPE *k;
@@ -187,13 +188,14 @@ dohash:
                 hash = h->hash0;
                 HASHFUNC(&hash, sizeof(KEYTYPE), &key);
                 bucket = hash & (((uintptr)1 << h->B) - 1);
-               if(h->oldbuckets != nil) {
-                       oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(evacuated(b)) {
-                               b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-                       }
+               b = runtime·atomicloadp(&h->oldbuckets);
+               if(b != nil) {
+                       grow_work_read(t, h);
+                       b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                               goto newbucket;
                 } else {
+               newbucket:
                         b = (Bucket*)(h->buckets + bucket * h->bucketsize);
                 }
                 top = hash >> (sizeof(uintptr)*8 - 8);
@@ -209,7 +211,7 @@ dohash:
                                         return;
                                 }
                         }
-                       b = b->overflow;
+                       b = overflowptr(b);
                 } while(b != nil);
         }
         value = empty_value;
author	Keith Randall <khr@golang.org>
	Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
committer	Keith Randall <khr@golang.org>
	Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
src/pkg/runtime/hashmap.c		patch \| blob \| history
src/pkg/runtime/hashmap_fast.c		patch \| blob \| history