Use SipHash hash function to mitigate HashDos attempts.

This change attempts to switch to an hash function which mitigates the effects of the HashDoS attack (denial of service attack trying to force data structures to worst case behavior) while at the same time providing Redis with an hash function that does not expect the input data to be word aligned, a condition no longer true now that sds.c strings have a varialbe length header. Note that it is possible sometimes that even using an hash function for which collisions cannot be generated without knowing the seed, special implementation details or the exposure of the seed in an indirect way (for example the ability to add elements to a Set and check the return in which Redis returns them with SMEMBERS) may make the attacker's life simpler in the process of trying to guess the correct seed, however the next step would be to switch to a log(N) data structure when too many items in a single bucket are detected: this seems like an overkill in the case of Redis. SPEED REGRESION TESTS: In order to verify that switching from MurmurHash to SipHash had no impact on speed, a set of benchmarks involving fast insertion of 5 million of keys were performed. The result shows Redis with SipHash in high pipelining conditions to be about 4% slower compared to using the previous hash function. However this could partially be related to the fact that the current implementation does not attempt to hash whole words at a time but reads single bytes, in order to have an output which is endian-netural and at the same time working on systems where unaligned memory accesses are a problem. Further X86 specific optimizations should be tested, the function may easily get at the same level of MurMurHash2 if a few optimizations are performed.
author: antirez <antirez@gmail.com> 2017-02-20 16:09:54 +0100
committer: antirez <antirez@gmail.com> 2017-02-20 17:29:17 +0100
commit: adeed29a99dcd0efdbfe4dbd5da74e7b01966c67 (patch)
tree: 73feb624357ce5f8bbf82028e75968b67358e3f1 /src/dict.c
parent: 9b05aafb50348838f45bfddcd689e7d8d1d3c950 (diff)
download: redis-adeed29a99dcd0efdbfe4dbd5da74e7b01966c67.tar.gz
1 files changed, 13 insertions, 62 deletions
diff --git a/src/dict.c b/src/dict.c
index 59aef7724..8ce735961 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -37,11 +37,11 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <string.h>
 #include <stdarg.h>
 #include <limits.h>
 #include <sys/time.h>
-#include <ctype.h>
 
 #include "dict.h"
 #include "zmalloc.h"
@@ -71,77 +71,28 @@ static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
 
 /* -------------------------- hash functions -------------------------------- */
 
-static uint32_t dict_hash_function_seed = 5381;
+static uint8_t dict_hash_function_seed[16];
 
-void dictSetHashFunctionSeed(uint32_t seed) {
-    dict_hash_function_seed = seed;
+void dictSetHashFunctionSeed(uint8_t *seed) {
+    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
 }
 
-uint32_t dictGetHashFunctionSeed(void) {
+uint8_t *dictGetHashFunctionSeed(void) {
     return dict_hash_function_seed;
 }
 
-/* MurmurHash2, by Austin Appleby
- * Note - This code makes a few assumptions about how your machine behaves -
- * 1. We can read a 4-byte value from any address without crashing
- * 2. sizeof(int) == 4
- *
- * And it has a few limitations -
- *
- * 1. It will not work incrementally.
- * 2. It will not produce the same results on little-endian and big-endian
- *    machines.
- */
-unsigned int dictGenHashFunction(const void *key, int len) {
-    /* 'm' and 'r' are mixing constants generated offline.
-     They're not really 'magic', they just happen to work well.  */
-    uint32_t seed = dict_hash_function_seed;
-    const uint32_t m = 0x5bd1e995;
-    const int r = 24;
-
-    /* Initialize the hash to a 'random' value */
-    uint32_t h = seed ^ len;
-
-    /* Mix 4 bytes at a time into the hash */
-    const unsigned char *data = (const unsigned char *)key;
-
-    while(len >= 4) {
-        uint32_t k = *(uint32_t*)data;
-
-        k *= m;
-        k ^= k >> r;
-        k *= m;
+/* The default hashing function uses SipHash implementation
+ * in siphash.c. */
 
-        h *= m;
-        h ^= k;
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
 
-        data += 4;
-        len -= 4;
-    }
-
-    /* Handle the last few bytes of the input array  */
-    switch(len) {
-    case 3: h ^= data[2] << 16;
-    case 2: h ^= data[1] << 8;
-    case 1: h ^= data[0]; h *= m;
-    };
-
-    /* Do a few final mixes of the hash to ensure the last few
-     * bytes are well-incorporated. */
-    h ^= h >> 13;
-    h *= m;
-    h ^= h >> 15;
-
-    return (unsigned int)h;
+uint64_t dictGenHashFunction(const void *key, int len) {
+    return siphash(key,len,dict_hash_function_seed);
 }
 
-/* And a case insensitive hash function (based on djb hash) */
-unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
-    unsigned int hash = (unsigned int)dict_hash_function_seed;
-
-    while (len--)
-        hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
-    return hash;
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
+    return siphash_nocase(buf,len,dict_hash_function_seed);
 }
 
 /* ----------------------------- API implementation ------------------------- */
author	antirez <antirez@gmail.com>	2017-02-20 16:09:54 +0100
committer	antirez <antirez@gmail.com>	2017-02-20 17:29:17 +0100
commit	adeed29a99dcd0efdbfe4dbd5da74e7b01966c67 (patch)
tree	73feb624357ce5f8bbf82028e75968b67358e3f1 /src/dict.c
parent	9b05aafb50348838f45bfddcd689e7d8d1d3c950 (diff)
download	redis-adeed29a99dcd0efdbfe4dbd5da74e7b01966c67.tar.gz