From 45b54ee7fb3dc7b8444733288a557f1c62572c43 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 2 Apr 2013 20:58:25 -0700 Subject: [PATCH] runtime: avoid hashing strings until needed in single-bucket maps This changes the map lookup behavior for string maps with 2-8 keys. There was already previously a fastpath for 0 items and 1 item. Now, if a string-keyed map has <= 8 items, first check all the keys for length first. If only one has the right length, then just check it for equality and avoid hashing altogether. Once the map has more than 8 items, always hash like normal. I don't know why some of the other non-string map benchmarks got faster. This was with benchtime=2s, multiple times. I haven't anything else getting slower, though. benchmark old ns/op new ns/op delta BenchmarkHashStringSpeed 37 34 -8.20% BenchmarkHashInt32Speed 32 29 -10.67% BenchmarkHashInt64Speed 31 27 -12.82% BenchmarkHashStringArraySpeed 105 99 -5.43% BenchmarkMegMap 274206 255153 -6.95% BenchmarkMegOneMap 27 23 -14.80% BenchmarkMegEqMap 148332 116089 -21.74% BenchmarkMegEmptyMap 4 3 -12.72% BenchmarkSmallStrMap 22 22 -0.89% BenchmarkMapStringKeysEight_32 42 23 -43.71% BenchmarkMapStringKeysEight_64 55 23 -56.96% BenchmarkMapStringKeysEight_1M 279688 24 -99.99% BenchmarkIntMap 16 15 -10.18% BenchmarkRepeatedLookupStrMapKey32 40 37 -8.15% BenchmarkRepeatedLookupStrMapKey1M 287918 272980 -5.19% BenchmarkNewEmptyMap 156 130 -16.67% R=golang-dev, khr CC=golang-dev https://golang.org/cl/7641057 --- src/pkg/runtime/hashmap.c | 18 ++++++-- src/pkg/runtime/hashmap_fast.c | 76 ++++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c index d639be3c3d..4ebfffd6c4 100644 --- a/src/pkg/runtime/hashmap.c +++ b/src/pkg/runtime/hashmap.c @@ -74,9 +74,9 @@ typedef struct Bucket Bucket; struct Bucket { - uint8 tophash[BUCKETSIZE]; // top 8 bits of hash of each entry (0 = empty) + uint8 tophash[BUCKETSIZE]; // top 8 bits of hash of each entry (0 = empty) Bucket *overflow; // overflow bucket, if any - byte data[1]; // BUCKETSIZE keys followed by BUCKETSIZE values + byte data[1]; // BUCKETSIZE keys followed by BUCKETSIZE values }; // NOTE: packing all the keys together and then all the values together makes the // code a bit more complicated than alternating key/value/key/value/... but it allows @@ -102,7 +102,7 @@ struct Hmap uint16 bucketsize; // bucket size in bytes uintptr hash0; // hash seed - byte *buckets; // array of 2^B Buckets + byte *buckets; // array of 2^B Buckets. may be nil if count==0. byte *oldbuckets; // previous bucket array of half the size, non-nil only when growing uintptr nevacuate; // progress counter for evacuation (buckets less than this have been evacuated) }; @@ -527,12 +527,14 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp) static uint8 empty_value[MAXVALUESIZE]; // Specialized versions of mapaccess1 for specific types. -// See ./hashmap_fast and ../../cmd/gc/walk.c. +// See ./hashmap_fast.c and ../../cmd/gc/walk.c. #define HASH_LOOKUP1 runtime·mapaccess1_fast32 #define HASH_LOOKUP2 runtime·mapaccess2_fast32 #define KEYTYPE uint32 #define HASHFUNC runtime·algarray[AMEM32].hash #define EQFUNC(x,y) ((x) == (y)) +#define EQMAYBE(x,y) ((x) == (y)) +#define HASMAYBE false #define QUICKEQ(x) true #include "hashmap_fast.c" @@ -541,6 +543,8 @@ static uint8 empty_value[MAXVALUESIZE]; #undef KEYTYPE #undef HASHFUNC #undef EQFUNC +#undef EQMAYBE +#undef HASMAYBE #undef QUICKEQ #define HASH_LOOKUP1 runtime·mapaccess1_fast64 @@ -548,6 +552,8 @@ static uint8 empty_value[MAXVALUESIZE]; #define KEYTYPE uint64 #define HASHFUNC runtime·algarray[AMEM64].hash #define EQFUNC(x,y) ((x) == (y)) +#define EQMAYBE(x,y) ((x) == (y)) +#define HASMAYBE false #define QUICKEQ(x) true #include "hashmap_fast.c" @@ -556,6 +562,8 @@ static uint8 empty_value[MAXVALUESIZE]; #undef KEYTYPE #undef HASHFUNC #undef EQFUNC +#undef EQMAYBE +#undef HASMAYBE #undef QUICKEQ #define HASH_LOOKUP1 runtime·mapaccess1_faststr @@ -563,6 +571,8 @@ static uint8 empty_value[MAXVALUESIZE]; #define KEYTYPE String #define HASHFUNC runtime·algarray[ASTRING].hash #define EQFUNC(x,y) ((x).len == (y).len && ((x).str == (y).str || runtime·memeq((x).str, (y).str, (x).len))) +#define EQMAYBE(x,y) ((x).len == (y).len) +#define HASMAYBE true #define QUICKEQ(x) ((x).len < 32) #include "hashmap_fast.c" diff --git a/src/pkg/runtime/hashmap_fast.c b/src/pkg/runtime/hashmap_fast.c index 55914ca7f0..afff7b1aad 100644 --- a/src/pkg/runtime/hashmap_fast.c +++ b/src/pkg/runtime/hashmap_fast.c @@ -19,10 +19,12 @@ HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value) uintptr hash; uintptr bucket, oldbucket; Bucket *b; - uint8 top; uintptr i; KEYTYPE *k; byte *v; + uint8 top; + int8 keymaybe; + bool quickkey; if(debug) { runtime·prints("runtime.mapaccess1_fastXXX: map="); @@ -41,17 +43,43 @@ HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value) if(docheck) check(t, h); - if(h->B == 0 && (h->count == 1 || QUICKEQ(key))) { - // One-bucket table. Don't hash, just check each bucket entry. + if(h->B == 0) { + // One-bucket table. Don't hash, just check each bucket entry. + if(HASMAYBE) { + keymaybe = -1; + } + quickkey = QUICKEQ(key); b = (Bucket*)h->buckets; for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) { - if(b->tophash[i] != 0 && EQFUNC(key, *k)) { - value = v; + if(b->tophash[i] != 0) { + if(quickkey && EQFUNC(key, *k)) { + value = v; + FLUSH(&value); + return; + } + if(HASMAYBE && EQMAYBE(key, *k)) { + // TODO: check if key.str matches. Add EQFUNCFAST? + if(keymaybe >= 0) { + // Two same-length strings in this bucket. + // use slow path. + // TODO: keep track of more than just 1. Especially + // if doing the TODO above. + goto dohash; + } + keymaybe = i; + } + } + } + if(HASMAYBE && keymaybe >= 0) { + k = (KEYTYPE*)b->data + keymaybe; + if(EQFUNC(key, *k)) { + value = (byte*)((KEYTYPE*)b->data + BUCKETSIZE) + keymaybe * h->valuesize; FLUSH(&value); return; } } } else { +dohash: hash = h->hash0; HASHFUNC(&hash, sizeof(KEYTYPE), &key); bucket = hash & (((uintptr)1 << h->B) - 1); @@ -89,10 +117,12 @@ HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res) uintptr hash; uintptr bucket, oldbucket; Bucket *b; - uint8 top; uintptr i; KEYTYPE *k; byte *v; + uint8 top; + int8 keymaybe; + bool quickkey; if(debug) { runtime·prints("runtime.mapaccess2_fastXXX: map="); @@ -113,12 +143,39 @@ HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res) if(docheck) check(t, h); - if(h->B == 0 && (h->count == 1 || QUICKEQ(key))) { + if(h->B == 0) { // One-bucket table. Don't hash, just check each bucket entry. + if(HASMAYBE) { + keymaybe = -1; + } + quickkey = QUICKEQ(key); b = (Bucket*)h->buckets; for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) { - if(b->tophash[i] != 0 && EQFUNC(key, *k)) { - value = v; + if(b->tophash[i] != 0) { + if(quickkey && EQFUNC(key, *k)) { + value = v; + res = true; + FLUSH(&value); + FLUSH(&res); + return; + } + if(HASMAYBE && EQMAYBE(key, *k)) { + // TODO: check if key.str matches. Add EQFUNCFAST? + if(keymaybe >= 0) { + // Two same-length strings in this bucket. + // use slow path. + // TODO: keep track of more than just 1. Especially + // if doing the TODO above. + goto dohash; + } + keymaybe = i; + } + } + } + if(HASMAYBE && keymaybe >= 0) { + k = (KEYTYPE*)b->data + keymaybe; + if(EQFUNC(key, *k)) { + value = (byte*)((KEYTYPE*)b->data + BUCKETSIZE) + keymaybe * h->valuesize; res = true; FLUSH(&value); FLUSH(&res); @@ -126,6 +183,7 @@ HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res) } } } else { +dohash: hash = h->hash0; HASHFUNC(&hash, sizeof(KEYTYPE), &key); bucket = hash & (((uintptr)1 << h->B) - 1); -- 2.50.0