runtime: fix bug in maps at the intersection of iterators, growing, and NaN keys

author Keith Randall <khr@golang.org>

Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)

committer Keith Randall <khr@golang.org>

Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)
author Keith Randall <khr@golang.org>
Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)
committer Keith Randall <khr@golang.org>
Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)
diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go

index 01d0ed667f40da07c284cf9580c6f1f725d36e1b..d170fa72aee47238860a12cb01544ce17a3826fb 100644 (file)
--- a/src/pkg/runtime/export_test.go
+++ b/src/pkg/runtime/export_test.go
@@ -81,3 +81,6 @@ var Int32Hash = int32Hash
  var Int64Hash = int64Hash
  
  func GogoBytes() int32
+
+var hashLoad float64 // declared in hashmap.c
+var HashLoad = &hashLoad
diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c

index 6ff0c32aa4308c1edc5784de225eacb7a4b10796..6d2ab2168912cdd376e8c179f185dd800cd97085 100644 (file)
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -288,6 +288,8 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
         uintptr i;
         byte *k, *v;
         byte *xk, *yk, *xv, *yv;
+       uint8 top;
+       bool eq;
  
         b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
         newbit = (uintptr)1 << (h->B - 1);
@@ -306,13 +308,38 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
                 yv = yk + h->keysize * BUCKETSIZE;
                 do {
                         for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-                               if(b->tophash[i] == 0)
+                               top = b->tophash[i];
+                               if(top == 0)
                                         continue;
+
+                               // Compute hash to make our evacuation decision (whether we need
+                               // to send this key/value to bucket x or bucket y).
                                 hash = h->hash0;
                                 t->key->alg->hash(&hash, t->key->size, IK(h, k));
-                               // NOTE: if key != key, then this hash could be (and probably will be)
-                               // entirely different from the old hash.  We effectively only update
-                               // the B'th bit of the hash in this case.
+                               if((h->flags & Iterator) != 0) {
+                                       t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+                                       if(!eq) {
+                                               // If key != key (NaNs), then the hash could be (and probably
+                                               // will be) entirely different from the old hash.  Moreover,
+                                               // it isn't reproducible.  Reproducibility is required in the
+                                               // presence of iterators, as our evacuation decision must
+                                               // match whatever decision the iterator made.
+                                               // Fortunately, we have the freedom to send these keys either
+                                               // way.  Also, tophash is meaningless for these kinds of keys.
+                                               // We let the low bit of tophash drive the evacuation decision.
+                                               // We recompute a new random tophash for the next level so
+                                               // these keys will get evenly distributed across all buckets
+                                               // after multiple grows.
+                                               if((top & 1) != 0)
+                                                       hash |= newbit;
+                                               else
+                                                       hash &= ~newbit;
+                                               top = hash >> (8*sizeof(uintptr)-8);
+                                               if(top == 0)
+                                                       top = 1;
+                                       }
+                               }
+
                                 if((hash & newbit) == 0) {
                                         if(xi == BUCKETSIZE) {
                                                 if(checkgc) mstats.next_gc = mstats.heap_alloc;
@@ -323,7 +350,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
                                                 xk = x->data;
                                                 xv = xk + h->keysize * BUCKETSIZE;
                                         }
-                                       x->tophash[xi] = b->tophash[i];
+                                       x->tophash[xi] = top;
                                         if((h->flags & IndirectKey) != 0) {
                                                 *(byte**)xk = *(byte**)k;               // copy pointer
                                         } else {
@@ -347,7 +374,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
                                                 yk = y->data;
                                                 yv = yk + h->keysize * BUCKETSIZE;
                                         }
-                                       y->tophash[yi] = b->tophash[i];
+                                       y->tophash[yi] = top;
                                         if((h->flags & IndirectKey) != 0) {
                                                 *(byte**)yk = *(byte**)k;
                                         } else {
@@ -838,18 +865,12 @@ next:
                         if(check_bucket >= 0) {
                                 // Special case: iterator was started during a grow and the
                                 // grow is not done yet.  We're working on a bucket whose
-                               // oldbucket has not been evacuated yet.  So we iterate
+                               // oldbucket has not been evacuated yet.  So we're iterating
                                 // through the oldbucket, skipping any keys that will go
                                 // to the other new bucket (each oldbucket expands to two
                                 // buckets during a grow).
                                 t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
-                               if(!eq) {
-                                       // Hash is meaningless if k != k (NaNs).  Return all
-                                       // NaNs during the first of the two new buckets.
-                                       if(bucket >= ((uintptr)1 << (it->B - 1))) {
-                                               continue;
-                                       }
-                               } else {
+                               if(eq) {
                                         // If the item in the oldbucket is not destined for
                                         // the current new bucket in the iteration, skip it.
                                         hash = h->hash0;
@@ -857,6 +878,14 @@ next:
                                         if((hash & (((uintptr)1 << it->B) - 1)) != check_bucket) {
                                                 continue;
                                         }
+                               } else {
+                                       // Hash isn't repeatable if k != k (NaNs).  We need a
+                                       // repeatable and randomish choice of which direction
+                                       // to send NaNs during evacuation.  We'll use the low
+                                       // bit of tophash to decide which way NaNs go.
+                                       if(check_bucket >> (it->B - 1) != (b->tophash[i] & 1)) {
+                                               continue;
+                                       }
                                 }
                         }
                         if(!evacuated(b)) {
@@ -1091,7 +1120,10 @@ runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
                 runtime·prints("; key=");
                 t->key->alg->print(t->key->size, ak);
                 runtime·prints("; val=");
-               t->elem->alg->print(t->elem->size, av);
+               if(av)
+                       t->elem->alg->print(t->elem->size, av);
+               else
+                       runtime·prints("nil");
                 runtime·prints("\n");
         }
  }
@@ -1330,3 +1362,6 @@ runtime·mapiter2(struct hash_iter *it, ...)
                 runtime·prints("\n");
         }
  }
+
+// exported value for testing
+float64 runtime·hashLoad = LOAD;
diff --git a/src/pkg/runtime/map_test.go b/src/pkg/runtime/map_test.go

index 9f9c40d156335fe06b905a38f37c330cc252527a..a221cb28cf20dc0a8540cb2f3a09dd851f8c984b 100644 (file)
--- a/src/pkg/runtime/map_test.go
+++ b/src/pkg/runtime/map_test.go
@@ -371,3 +371,41 @@ func testMapLookups(t *testing.T, m map[string]string) {
                 }
         }
  }
+
+// Tests whether the iterator returns the right elements when
+// started in the middle of a grow, when the keys are NaNs.
+func TestMapNanGrowIterator(t *testing.T) {
+       m := make(map[float64]int)
+       nan := math.NaN()
+       const nBuckets = 16
+       // To fill nBuckets buckets takes LOAD * nBuckets keys.
+       nKeys := int(nBuckets * *runtime.HashLoad)
+
+       // Get map to full point with nan keys.
+       for i := 0; i < nKeys; i++ {
+               m[nan] = i
+       }
+       // Trigger grow
+       m[1.0] = 1
+       delete(m, 1.0)
+
+       // Run iterator
+       found := make(map[int]struct{})
+       for _, v := range m {
+               if v != -1 {
+                       if _, repeat := found[v]; repeat {
+                               t.Fatalf("repeat of value %d", v)
+                       }
+                       found[v] = struct{}{}
+               }
+               if len(found) == nKeys/2 {
+                       // Halfway through iteration, finish grow.
+                       for i := 0; i < nBuckets; i++ {
+                               delete(m, 1.0)
+                       }
+               }
+       }
+       if len(found) != nKeys {
+               t.Fatalf("missing value")
+       }
+}
author	Keith Randall <khr@golang.org>
	Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)
committer	Keith Randall <khr@golang.org>
	Fri, 4 Oct 2013 20:54:03 +0000 (13:54 -0700)
src/pkg/runtime/export_test.go		patch \| blob \| history
src/pkg/runtime/hashmap.c		patch \| blob \| history
src/pkg/runtime/map_test.go		patch \| blob \| history