]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: do hashmap grow work during reads.
authorKeith Randall <khr@golang.org>
Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
committerKeith Randall <khr@golang.org>
Sat, 1 Jun 2013 03:58:31 +0000 (20:58 -0700)
Before this change, grow work was done only
during map writes to ensure multithreaded safety.
This can lead to maps remaining in a partially
grown state for a long time, potentially forever.
This change allows grow work to happen during reads,
which will lead to grow work finishing sooner, making
the resulting map smaller and faster.

Grow work is not done in parallel.  Reads can
happen in parallel while grow work is happening.

R=golang-dev, dvyukov, khr, iant
CC=golang-dev
https://golang.org/cl/8852047

src/pkg/runtime/hashmap.c
src/pkg/runtime/hashmap_fast.c

index 2e61bcfe8fad8ae707272162ede4861b7370a9d2..db8cfd20e95c57b5556146bcd872e2855afd9ecf 100644 (file)
@@ -107,6 +107,9 @@ struct Hmap
        uintptr nevacuate;    // progress counter for evacuation (buckets less than this have been evacuated)
 };
 
+// token to store in nevacuate field when locking the table to evacuate a bucket.
+#define EVAC_LOCK ((uintptr)-1)
+
 // possible flags
 enum
 {
@@ -114,7 +117,7 @@ enum
        IndirectValue = 2,  // storing pointers to values
        Iterator = 4,       // there may be an iterator using buckets
        OldIterator = 8,    // there may be an iterator using oldbuckets
-       CanFreeBucket = 16, // ok to free buckets
+       CanFreeBucket = 16, // ok to free buckets TODO: remove - unused
        CanFreeKey = 32,    // keys are indirect and ok to free keys
 };
 
@@ -285,12 +288,14 @@ hash_init(MapType *t, Hmap *h, uint32 hint)
 
 // Moves entries in oldbuckets[i] to buckets[i] and buckets[i+2^k].
 // We leave the original bucket intact, except for the evacuated marks, so that
-// iterators can still iterate through the old buckets.
+// lookup and iterators can still iterate through the old buckets.
+// Multiple threads must not be evacuating the same bucket at the same time.
 static void
 evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 {
        Bucket *b;
        Bucket *nextb;
+       Bucket *mainb;
        Bucket *x, *y;
        Bucket *newx, *newy;
        uintptr xi, yi;
@@ -299,143 +304,154 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
        uintptr i;
        byte *k, *v;
        byte *xk, *yk, *xv, *yv;
-       byte *ob;
 
-       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+       mainb = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
        newbit = (uintptr)1 << (h->B - 1);
 
-       if(!evacuated(b)) {
-               // TODO: reuse overflow buckets instead of using new ones, if there
-               // is no iterator using the old buckets.  (If CanFreeBuckets and !OldIterator.)
-
-               x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
-               y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
-               clearbucket(x);
-               clearbucket(y);
-               xi = 0;
-               yi = 0;
-               xk = x->data;
-               yk = y->data;
-               xv = xk + h->keysize * BUCKETSIZE;
-               yv = yk + h->keysize * BUCKETSIZE;
-               do {
-                       for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-                               if(b->tophash[i] == 0)
-                                       continue;
-                               hash = h->hash0;
-                               t->key->alg->hash(&hash, t->key->size, IK(h, k));
-                               // NOTE: if key != key, then this hash could be (and probably will be)
-                               // entirely different from the old hash.  We effectively only update
-                               // the B'th bit of the hash in this case.
-                               if((hash & newbit) == 0) {
-                                       if(xi == BUCKETSIZE) {
-                                               if(checkgc) mstats.next_gc = mstats.heap_alloc;
-                                               newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-                                               clearbucket(newx);
-                                               x->overflow = newx;
-                                               x = newx;
-                                               xi = 0;
-                                               xk = x->data;
-                                               xv = xk + h->keysize * BUCKETSIZE;
-                                       }
-                                       x->tophash[xi] = b->tophash[i];
-                                       if((h->flags & IndirectKey) != 0) {
-                                               *(byte**)xk = *(byte**)k;               // copy pointer
-                                       } else {
-                                               t->key->alg->copy(t->key->size, xk, k); // copy value
-                                       }
-                                       if((h->flags & IndirectValue) != 0) {
-                                               *(byte**)xv = *(byte**)v;
-                                       } else {
-                                               t->elem->alg->copy(t->elem->size, xv, v);
-                                       }
-                                       xi++;
-                                       xk += h->keysize;
-                                       xv += h->valuesize;
+       if(evacuated(mainb))            // someone else already evacuated this bucket.
+               return;
+
+       b = mainb;
+       x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
+       y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
+       clearbucket(x);
+       clearbucket(y);
+       xi = 0;
+       yi = 0;
+       xk = x->data;
+       yk = y->data;
+       xv = xk + h->keysize * BUCKETSIZE;
+       yv = yk + h->keysize * BUCKETSIZE;
+       do {
+               for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+                       if(b->tophash[i] == 0)
+                               continue;
+                       hash = h->hash0;
+                       t->key->alg->hash(&hash, t->key->size, IK(h, k));
+                       // NOTE: if key != key, then this hash could be (and probably will be)
+                       // entirely different from the old hash.  We effectively only update
+                       // the B'th bit of the hash in this case.
+                       if((hash & newbit) == 0) {
+                               if(xi == BUCKETSIZE) {
+                                       if(checkgc) mstats.next_gc = mstats.heap_alloc;
+                                       newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+                                       clearbucket(newx);
+                                       x->overflow = newx;
+                                       x = newx;
+                                       xi = 0;
+                                       xk = x->data;
+                                       xv = xk + h->keysize * BUCKETSIZE;
+                               }
+                               x->tophash[xi] = b->tophash[i];
+                               if((h->flags & IndirectKey) != 0) {
+                                       *(byte**)xk = *(byte**)k;               // copy pointer
                                } else {
-                                       if(yi == BUCKETSIZE) {
-                                               if(checkgc) mstats.next_gc = mstats.heap_alloc;
-                                               newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-                                               clearbucket(newy);
-                                               y->overflow = newy;
-                                               y = newy;
-                                               yi = 0;
-                                               yk = y->data;
-                                               yv = yk + h->keysize * BUCKETSIZE;
-                                       }
-                                       y->tophash[yi] = b->tophash[i];
-                                       if((h->flags & IndirectKey) != 0) {
-                                               *(byte**)yk = *(byte**)k;
-                                       } else {
-                                               t->key->alg->copy(t->key->size, yk, k);
-                                       }
-                                       if((h->flags & IndirectValue) != 0) {
-                                               *(byte**)yv = *(byte**)v;
-                                       } else {
-                                               t->elem->alg->copy(t->elem->size, yv, v);
-                                       }
-                                       yi++;
-                                       yk += h->keysize;
-                                       yv += h->valuesize;
+                                       t->key->alg->copy(t->key->size, xk, k); // copy value
                                }
-                       }
-
-                       // mark as evacuated so we don't do it again.
-                       // this also tells any iterators that this data isn't golden anymore.
-                       nextb = b->overflow;
-                       b->overflow = (Bucket*)((uintptr)nextb + 1);
-
-                       b = nextb;
-               } while(b != nil);
-
-               // Free old overflow buckets as much as we can.
-               if((h->flags & OldIterator) == 0) {
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if((h->flags & CanFreeBucket) != 0) {
-                               while((nextb = overflowptr(b)) != nil) {
-                                       b->overflow = nextb->overflow;
-                                       runtime·free(nextb);
+                               if((h->flags & IndirectValue) != 0) {
+                                       *(byte**)xv = *(byte**)v;
+                               } else {
+                                       t->elem->alg->copy(t->elem->size, xv, v);
                                }
+                               xi++;
+                               xk += h->keysize;
+                               xv += h->valuesize;
                        } else {
-                               // can't explicitly free overflow buckets, but at least
-                               // we can unlink them.
-                               b->overflow = (Bucket*)1;
+                               if(yi == BUCKETSIZE) {
+                                       if(checkgc) mstats.next_gc = mstats.heap_alloc;
+                                       newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+                                       clearbucket(newy);
+                                       y->overflow = newy;
+                                       y = newy;
+                                       yi = 0;
+                                       yk = y->data;
+                                       yv = yk + h->keysize * BUCKETSIZE;
+                               }
+                               y->tophash[yi] = b->tophash[i];
+                               if((h->flags & IndirectKey) != 0) {
+                                       *(byte**)yk = *(byte**)k;
+                               } else {
+                                       t->key->alg->copy(t->key->size, yk, k);
+                               }
+                               if((h->flags & IndirectValue) != 0) {
+                                       *(byte**)yv = *(byte**)v;
+                               } else {
+                                       t->elem->alg->copy(t->elem->size, yv, v);
+                               }
+                               yi++;
+                               yk += h->keysize;
+                               yv += h->valuesize;
                        }
                }
-       }
+               b = b->overflow;
+       } while(b != nil);
 
-       // advance evacuation mark
-       if(oldbucket == h->nevacuate) {
-               h->nevacuate = oldbucket + 1;
-               if(oldbucket + 1 == newbit) { // newbit == # of oldbuckets
-                       // free main bucket array
-                       if((h->flags & (OldIterator | CanFreeBucket)) == CanFreeBucket) {
-                               ob = h->oldbuckets;
-                               h->oldbuckets = nil;
-                               runtime·free(ob);
-                       } else {
-                               h->oldbuckets = nil;
-                       }
-               }
+       // Mark main bucket as evacuated.  This write commits the
+       // bucket evacuation (readers can start using the new buckets).
+       b = mainb->overflow;
+       runtime·atomicstorep(&mainb->overflow, (Bucket*)((uintptr)b + 1));
+
+       // Mark overflow buckets for any iterators.
+       // These writes don't need to reach anyone until the next hashtable
+       // modification, so they don't need to be synchronized.
+       while(b != nil) {
+               nextb = b->overflow;
+               b->overflow = (Bucket*)((uintptr)nextb + 1);
+               b = nextb;
        }
+
        if(docheck)
                check(t, h);
 }
 
+// Ensure that bucket has been evacuated from oldbuckets so that we can modify it.
+// Not multithreaded safe - you must not call this from anywhere except hash table
+// modifications (where we're guaranteed external synchronization).
 static void
 grow_work(MapType *t, Hmap *h, uintptr bucket)
 {
        uintptr noldbuckets;
+       intptr n;
 
+       // evac the bucket we're going to need
        noldbuckets = (uintptr)1 << (h->B - 1);
-
-       // make sure we evacuate the oldbucket corresponding
-       // to the bucket we're about to use
        evacuate(t, h, bucket & (noldbuckets - 1));
+       // evac another bucket to make progress
+       n = h->nevacuate;
+       evacuate(t, h, n);
+       // record what we've done
+       h->nevacuate = n + 1;
+       if(n + 1 == noldbuckets)
+               h->oldbuckets = nil;
+}
 
-       // evacuate one more oldbucket to make progress on growing
-       if(h->oldbuckets != nil)
-               evacuate(t, h, h->nevacuate);
+// Do some work for growing the table.
+// Multithreaded-safe.
+static void
+grow_work_read(MapType *t, Hmap *h) {
+       uintptr noldbuckets;
+       intptr n;
+
+       noldbuckets = (uintptr)1 << (h->B - 1);
+
+       // Get evacuation lock.  If we can't get it, fine, that means
+       // someone else is making progress which is good enough.
+       n = h->nevacuate;
+       if(n != EVAC_LOCK &&    // no one has evac lock
+          n != noldbuckets &&  // there's still work to do
+          runtime·casp((void**)&h->nevacuate, (void*)n, (void*)EVAC_LOCK)) { // we acquired lock
+               // We're now the exclusive evacuator.
+               evacuate(t, h, n);
+
+               // record that we're done.
+               runtime·atomicstorep((void**)&h->nevacuate, (void*)(n + 1));
+               if(n + 1 == noldbuckets) {
+                       // commit finishing of grow.
+                       runtime·atomicstorep(&h->oldbuckets, nil);
+                       // note: can't free oldbuckets, someone might be using it.
+                       // it will have to get GCed.
+               }
+       }
 }
 
 static void
@@ -480,7 +496,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
 {
        void *key;
        uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
        Bucket *b;
        uint8 top;
        uintptr i;
@@ -495,13 +511,14 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
        hash = h->hash0;
        t->key->alg->hash(&hash, t->key->size, key);
        bucket = hash & (((uintptr)1 << h->B) - 1);
-       if(h->oldbuckets != nil) {
-               oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-               b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-               if(evacuated(b)) {
-                       b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-               }
+       b = runtime·atomicloadp(&h->oldbuckets);
+       if(b != nil) {
+               grow_work_read(t, h);
+               b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+               if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                       goto newbucket;
        } else {
+       newbucket:
                b = (Bucket*)(h->buckets + bucket * h->bucketsize);
        }
        top = hash >> (sizeof(uintptr)*8 - 8);
@@ -518,7 +535,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
                                }
                        }
                }
-               b = b->overflow;
+               b = overflowptr(b);
        } while(b != nil);
        return nil;
 }
@@ -812,6 +829,7 @@ hash_next(struct hash_iter *it)
        uintptr bucket, oldbucket;
        uintptr hash;
        Bucket *b;
+       byte *oldbuckets;
        uintptr i;
        intptr check_bucket;
        bool eq;
@@ -833,14 +851,15 @@ next:
                        it->value = nil;
                        return;
                }
-               if(h->oldbuckets != nil && it->B == h->B) {
+               if(it->B == h->B && (oldbuckets = runtime·atomicloadp(&h->oldbuckets)) != nil) {
                        // Iterator was started in the middle of a grow, and the grow isn't done yet.
                        // If the bucket we're looking at hasn't been filled in yet (i.e. the old
                        // bucket hasn't been evacuated) then we need to iterate through the old
                        // bucket and only return the ones that will be migrated to this bucket.
+                       grow_work_read(t, h);
                        oldbucket = bucket & (((uintptr)1 << (it->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(!evacuated(b)) {
+                       b = (Bucket*)(oldbuckets + oldbucket * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) == 0) {
                                check_bucket = bucket;
                        } else {
                                b = (Bucket*)(it->buckets + bucket * h->bucketsize);
index afff7b1aad0f34728877f6535610046cd3fdf97e..1136ed6db620d9c5d4c6b51800265133353a6727 100644 (file)
@@ -17,7 +17,7 @@ void
 HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value)
 {
        uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
        Bucket *b;
        uintptr i;
        KEYTYPE *k;
@@ -83,13 +83,14 @@ dohash:
                hash = h->hash0;
                HASHFUNC(&hash, sizeof(KEYTYPE), &key);
                bucket = hash & (((uintptr)1 << h->B) - 1);
-               if(h->oldbuckets != nil) {
-                       oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(evacuated(b)) {
-                               b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-                       }
+               b = runtime·atomicloadp(&h->oldbuckets);
+               if(b != nil) {
+                       grow_work_read(t, h);
+                       b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                               goto newbucket;
                } else {
+               newbucket:
                        b = (Bucket*)(h->buckets + bucket * h->bucketsize);
                }
                top = hash >> (sizeof(uintptr)*8 - 8);
@@ -103,7 +104,7 @@ dohash:
                                        return;
                                }
                        }
-                       b = b->overflow;
+                       b = overflowptr(b);
                } while(b != nil);
        }
        value = empty_value;
@@ -115,7 +116,7 @@ void
 HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res)
 {
        uintptr hash;
-       uintptr bucket, oldbucket;
+       uintptr bucket;
        Bucket *b;
        uintptr i;
        KEYTYPE *k;
@@ -187,13 +188,14 @@ dohash:
                hash = h->hash0;
                HASHFUNC(&hash, sizeof(KEYTYPE), &key);
                bucket = hash & (((uintptr)1 << h->B) - 1);
-               if(h->oldbuckets != nil) {
-                       oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-                       b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-                       if(evacuated(b)) {
-                               b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-                       }
+               b = runtime·atomicloadp(&h->oldbuckets);
+               if(b != nil) {
+                       grow_work_read(t, h);
+                       b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+                       if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+                               goto newbucket;
                } else {
+               newbucket:
                        b = (Bucket*)(h->buckets + bucket * h->bucketsize);
                }
                top = hash >> (sizeof(uintptr)*8 - 8);
@@ -209,7 +211,7 @@ dohash:
                                        return;
                                }
                        }
-                       b = b->overflow;
+                       b = overflowptr(b);
                } while(b != nil);
        }
        value = empty_value;