summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-04 09:33:35 +0200
committerantirez <antirez@gmail.com>2014-04-16 15:26:09 +0200
commitee764b0f8dfbd6e2c976aae0d391b96e0bf1bf04 (patch)
treed0d58df40da5f09c17d1a81a5a869bf7ddfcc6a5
parent091f5677a00ca540aef408f3d6a8cf0dcfe2cdf0 (diff)
downloadredis-ee764b0f8dfbd6e2c976aae0d391b96e0bf1bf04.tar.gz
Changed HyperLogLog hash seed to a non-zero value.
Using a seed of zero has the side effect of having the empty string hashing to what is a very special case in the context of HyperLogLog: a very long run of zeroes. This did not influenced the correctness of the result with 16k registers because of the harmonic mean, but still it is inconvenient that a so obvious value maps to a so special hash. The seed 0xadc83b19 is used instead, which is the first 64 bits of the SHA1 of the empty string. Reference: issue #1657.
-rw-r--r--src/hyperloglog.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 02fd2728a..0ae66a0fc 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -316,7 +316,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
*
* This may sound like inefficient, but actually in the average case
* there are high probabilities to find a 1 after a few iterations. */
- hash = MurmurHash64A(ele,elesize,0);
+ hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */
bit = REDIS_HLL_REGISTERS; /* First bit not used to address the register. */
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */