summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-03-30 00:54:08 +0100
committerantirez <antirez@gmail.com>2014-03-30 00:55:49 +0100
commitaaf6db459b34dc2182fe1d84596733a9b9e97ec4 (patch)
tree8c95a9bac16d5b7baf8ff639e18478a8b7dee742
parent4628ac00650591777d34427fa7ee6d5f1f6c484e (diff)
downloadredis-aaf6db459b34dc2182fe1d84596733a9b9e97ec4.tar.gz
Use endian neutral hash function for HyperLogLog.
We need to be sure that you can save a dataset in a Redis instance, reload it in a different architecture, and continue to count in the same HyperLogLog structure. So 32 and 64 bit, little or bit endian, must all guarantee to output the same hash for the same element.
-rw-r--r--src/hyperloglog.c42
1 files changed, 28 insertions, 14 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index faea9bd21..b94f325f4 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -213,33 +213,48 @@
/* ========================= HyperLogLog algorithm ========================= */
-/* Our hahs function is MurmurHash2, 64 bit version. */
+/* Our hash function is MurmurHash2, 64 bit version.
+ * It was modified for Redis in order to provide the same result in
+ * big and little endian archs (endian neutral). */
uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
const uint64_t m = 0xc6a4a7935bd1e995;
const int r = 47;
uint64_t h = seed ^ (len * m);
- const uint64_t *data = (const uint64_t *)key;
- const uint64_t *end = data + (len/8);
+ const uint8_t *data = (const uint8_t *)key;
+ const uint8_t *end = data + (len-(len&7));
while(data != end) {
- uint64_t k = *data++;
+ uint64_t k;
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+ k = *((uint64_t*)data);
+#else
+ k = (uint64_t) data[0];
+ k |= (uint64_t) data[1] << 8;
+ k |= (uint64_t) data[2] << 16;
+ k |= (uint64_t) data[3] << 24;
+ k |= (uint64_t) data[4] << 32;
+ k |= (uint64_t) data[5] << 40;
+ k |= (uint64_t) data[6] << 48;
+ k |= (uint64_t) data[7] << 56;
+#endif
+
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
+ data += 8;
}
- const unsigned char *data2 = (const unsigned char*)data;
-
switch(len & 7) {
- case 7: h ^= (uint64_t)data2[6] << 48;
- case 6: h ^= (uint64_t)data2[5] << 40;
- case 5: h ^= (uint64_t)data2[4] << 32;
- case 4: h ^= (uint64_t)data2[3] << 24;
- case 3: h ^= (uint64_t)data2[2] << 16;
- case 2: h ^= (uint64_t)data2[1] << 8;
- case 1: h ^= (uint64_t)data2[0];
+ case 7: h ^= (uint64_t)data[6] << 48;
+ case 6: h ^= (uint64_t)data[5] << 40;
+ case 5: h ^= (uint64_t)data[4] << 32;
+ case 4: h ^= (uint64_t)data[3] << 24;
+ case 3: h ^= (uint64_t)data[2] << 16;
+ case 2: h ^= (uint64_t)data[1] << 8;
+ case 1: h ^= (uint64_t)data[0];
h *= m;
};
@@ -462,7 +477,6 @@ void hllCountCommand(redisClient *c) {
}
}
-
/* This command performs a self-test of the HLL registers implementation.
* Something that is not easy to test from within the outside.
*