summaryrefslogtreecommitdiff
path: root/rts/Hash.c
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2017-09-04 08:02:15 -0400
committerBen Gamari <ben@smart-cactus.org>2017-09-05 07:20:00 -0400
commit542f89ff23e4deb66debca0b5de3ac3047befb28 (patch)
tree2365fa80106a4cac21911660507a64d18d803a28 /rts/Hash.c
parenta4c2ac2cef3d0ad3c968b8195daf651e672d3435 (diff)
downloadhaskell-542f89ff23e4deb66debca0b5de3ac3047befb28.tar.gz
Replace hashing function for string keys implementation with xxhash
When doing profiling on startup time of ghci on Windows, both cold and startup loading static LLVM libs, the profiler is showing a glaring red spot on the division operation of the the hashStr function. In fact profiling shows 14% of the time is spent hashing the keys. So I am replacing the hash function with xxHash which is a very fast non-crypto hash. It's faster than MurMurHash which node etc use. It also passes SMHasher. I can provide if required the collected raw data. But from analysis done on the keys, xxHash does not introduce more collisions than before, the amount splits seem about the same and the distributions among the buckets are slightly more uniform than before. However the runtime dropped enough to remove the function completely from the profiler's report. There's also a noticeable improvement in responsiveness. xxHash is BSD licensed and can be found https://github.com/Cyan4973/xxHash Test Plan: ./validate Reviewers: austin, bgamari, erikd, simonmar Reviewed By: bgamari Subscribers: rwbarton, thomie GHC Trac Issues: #13165 Differential Revision: https://phabricator.haskell.org/D3909
Diffstat (limited to 'rts/Hash.c')
-rw-r--r--rts/Hash.c17
1 files changed, 7 insertions, 10 deletions
diff --git a/rts/Hash.c b/rts/Hash.c
index 8f32ac3076..702e5a20a7 100644
--- a/rts/Hash.c
+++ b/rts/Hash.c
@@ -13,6 +13,7 @@
#include "Hash.h"
#include "RtsUtils.h"
+#include "xxhash.h"
#include <string.h>
@@ -78,18 +79,14 @@ hashWord(const HashTable *table, StgWord key)
int
hashStr(const HashTable *table, char *key)
{
- int h, bucket;
- char *s;
-
- s = key;
- for (h=0; *s; s++) {
- h *= 128;
- h += *s;
- h = h % 1048583; /* some random large prime */
- }
+#if x86_64_HOST_ARCH
+ StgWord h = XXH64 (key, strlen(key), 1048583);
+#else
+ StgWord h = XXH32 (key, strlen(key), 1048583);
+#endif
/* Mod the size of the hash table (a power of 2) */
- bucket = h & table->mask1;
+ int bucket = h & table->mask1;
if (bucket < table->split) {
/* Mod the size of the expanded hash table (also a power of 2) */