runtime: fix bug in maps at the intersection of iterators, growing, and NaN keys

If an iterator is started while a map is in the middle of a grow, and the map has NaN keys, then those keys might get returned by the iterator more than once. This fix makes the evacuation decision deterministic and repeatable for NaN keys so each one gets returned only once. R=golang-dev, r, khr, iant CC=golang-dev https://codereview.appspot.com/14367043
author: Keith Randall <khr@golang.org> 2013-10-04 13:54:03 -0700
committer: Keith Randall <khr@golang.org> 2013-10-04 13:54:03 -0700
commit: c7b55db4842447f764b5264eeead343986d6c5b1 (patch)
tree: 7ecfe057001b660449174b09ac520c1874e61d26
parent: 99bc26f342b95be51da28015adfb5521209639be (diff)
download: go-c7b55db4842447f764b5264eeead343986d6c5b1.tar.gz
3 files changed, 91 insertions, 15 deletions
diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go
index 01d0ed667..d170fa72a 100644
--- a/src/pkg/runtime/export_test.go
+++ b/src/pkg/runtime/export_test.go
@@ -81,3 +81,6 @@ var Int32Hash = int32Hash
 var Int64Hash = int64Hash
 
 func GogoBytes() int32
+
+var hashLoad float64 // declared in hashmap.c
+var HashLoad = &hashLoad
diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c
index 6ff0c32aa..6d2ab2168 100644
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -288,6 +288,8 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 	uintptr i;
 	byte *k, *v;
 	byte *xk, *yk, *xv, *yv;
+	uint8 top;
+	bool eq;
 
 	b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
 	newbit = (uintptr)1 << (h->B - 1);
@@ -306,13 +308,38 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 		yv = yk + h->keysize * BUCKETSIZE;
 		do {
 			for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-				if(b->tophash[i] == 0)
+				top = b->tophash[i];
+				if(top == 0)
 					continue;
+
+				// Compute hash to make our evacuation decision (whether we need
+				// to send this key/value to bucket x or bucket y).
 				hash = h->hash0;
 				t->key->alg->hash(&hash, t->key->size, IK(h, k));
-				// NOTE: if key != key, then this hash could be (and probably will be)
-				// entirely different from the old hash.  We effectively only update
-				// the B'th bit of the hash in this case.
+				if((h->flags & Iterator) != 0) {
+					t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+					if(!eq) {
+						// If key != key (NaNs), then the hash could be (and probably
+						// will be) entirely different from the old hash.  Moreover,
+						// it isn't reproducible.  Reproducibility is required in the
+						// presence of iterators, as our evacuation decision must
+						// match whatever decision the iterator made.
+						// Fortunately, we have the freedom to send these keys either
+						// way.  Also, tophash is meaningless for these kinds of keys.
+						// We let the low bit of tophash drive the evacuation decision.
+						// We recompute a new random tophash for the next level so
+						// these keys will get evenly distributed across all buckets
+						// after multiple grows.
+						if((top & 1) != 0)
+							hash |= newbit;
+						else
+							hash &= ~newbit;
+						top = hash >> (8*sizeof(uintptr)-8);
+						if(top == 0)
+							top = 1;
+					}
+				}
+
 				if((hash & newbit) == 0) {
 					if(xi == BUCKETSIZE) {
 						if(checkgc) mstats.next_gc = mstats.heap_alloc;
@@ -323,7 +350,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 						xk = x->data;
 						xv = xk + h->keysize * BUCKETSIZE;
 					}
-					x->tophash[xi] = b->tophash[i];
+					x->tophash[xi] = top;
 					if((h->flags & IndirectKey) != 0) {
 						*(byte**)xk = *(byte**)k;               // copy pointer
 					} else {
@@ -347,7 +374,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 						yk = y->data;
 						yv = yk + h->keysize * BUCKETSIZE;
 					}
-					y->tophash[yi] = b->tophash[i];
+					y->tophash[yi] = top;
 					if((h->flags & IndirectKey) != 0) {
 						*(byte**)yk = *(byte**)k;
 					} else {
@@ -838,18 +865,12 @@ next:
 			if(check_bucket >= 0) {
 				// Special case: iterator was started during a grow and the
 				// grow is not done yet.  We're working on a bucket whose
-				// oldbucket has not been evacuated yet.  So we iterate
+				// oldbucket has not been evacuated yet.  So we're iterating
 				// through the oldbucket, skipping any keys that will go
 				// to the other new bucket (each oldbucket expands to two
 				// buckets during a grow).
 				t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
-				if(!eq) {
-					// Hash is meaningless if k != k (NaNs).  Return all
-					// NaNs during the first of the two new buckets.
-					if(bucket >= ((uintptr)1 << (it->B - 1))) {
-						continue;
-					}
-				} else {
+				if(eq) {
 					// If the item in the oldbucket is not destined for
 					// the current new bucket in the iteration, skip it.
 					hash = h->hash0;
@@ -857,6 +878,14 @@ next:
 					if((hash & (((uintptr)1 << it->B) - 1)) != check_bucket) {
 						continue;
 					}
+				} else {
+					// Hash isn't repeatable if k != k (NaNs).  We need a
+					// repeatable and randomish choice of which direction
+					// to send NaNs during evacuation.  We'll use the low
+					// bit of tophash to decide which way NaNs go.
+					if(check_bucket >> (it->B - 1) != (b->tophash[i] & 1)) {
+						continue;
+					}
 				}
 			}
 			if(!evacuated(b)) {
@@ -1091,7 +1120,10 @@ runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
 		runtime·prints("; key=");
 		t->key->alg->print(t->key->size, ak);
 		runtime·prints("; val=");
-		t->elem->alg->print(t->elem->size, av);
+		if(av)
+			t->elem->alg->print(t->elem->size, av);
+		else
+			runtime·prints("nil");
 		runtime·prints("\n");
 	}
 }
@@ -1330,3 +1362,6 @@ runtime·mapiter2(struct hash_iter *it, ...)
 		runtime·prints("\n");
 	}
 }
+
+// exported value for testing
+float64 runtime·hashLoad = LOAD;
diff --git a/src/pkg/runtime/map_test.go b/src/pkg/runtime/map_test.go
index 9f9c40d15..a221cb28c 100644
--- a/src/pkg/runtime/map_test.go
+++ b/src/pkg/runtime/map_test.go
@@ -371,3 +371,41 @@ func testMapLookups(t *testing.T, m map[string]string) {
 		}
 	}
 }
+
+// Tests whether the iterator returns the right elements when
+// started in the middle of a grow, when the keys are NaNs.
+func TestMapNanGrowIterator(t *testing.T) {
+	m := make(map[float64]int)
+	nan := math.NaN()
+	const nBuckets = 16
+	// To fill nBuckets buckets takes LOAD * nBuckets keys.
+	nKeys := int(nBuckets * *runtime.HashLoad)
+
+	// Get map to full point with nan keys.
+	for i := 0; i < nKeys; i++ {
+		m[nan] = i
+	}
+	// Trigger grow
+	m[1.0] = 1
+	delete(m, 1.0)
+
+	// Run iterator
+	found := make(map[int]struct{})
+	for _, v := range m {
+		if v != -1 {
+			if _, repeat := found[v]; repeat {
+				t.Fatalf("repeat of value %d", v)
+			}
+			found[v] = struct{}{}
+		}
+		if len(found) == nKeys/2 {
+			// Halfway through iteration, finish grow.
+			for i := 0; i < nBuckets; i++ {
+				delete(m, 1.0)
+			}
+		}
+	}
+	if len(found) != nKeys {
+		t.Fatalf("missing value")
+	}
+}
author	Keith Randall <khr@golang.org>	2013-10-04 13:54:03 -0700
committer	Keith Randall <khr@golang.org>	2013-10-04 13:54:03 -0700
commit	c7b55db4842447f764b5264eeead343986d6c5b1 (patch)
tree	7ecfe057001b660449174b09ac520c1874e61d26
parent	99bc26f342b95be51da28015adfb5521209639be (diff)
download	go-c7b55db4842447f764b5264eeead343986d6c5b1.tar.gz