summaryrefslogtreecommitdiff
path: root/tests/test-hash.c
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2013-01-16 16:14:42 -0800
committerBen Pfaff <blp@nicira.com>2013-01-22 13:40:53 -0800
commitc49d1dd1686277277cb742ec0fc93749214de391 (patch)
tree2e41361103aafd384d2eb82306254a22ef07e68d /tests/test-hash.c
parentcb8ca8156749d07c94b92249a49c1c6aa7c74934 (diff)
downloadopenvswitch-c49d1dd1686277277cb742ec0fc93749214de391.tar.gz
hash: Replace primary hash functions by murmurhash.
murmurhash is faster than Jenkins and slightly higher quality, so switch to it for hashing words. The best timings I got for hashing for data lengths of the following numbers of 32-bit words, in seconds per 1,000,000,000 hashes, were: words murmurhash Jenkins hash ----- ---------- ------------ 1 8.4 10.4 2 10.3 10.3 3 11.2 10.7 4 12.6 18.0 5 13.9 18.3 6 15.2 18.7 In other words, murmurhash outperforms Jenkins for all input lengths other than exactly 3 32-bit words (12 bytes). (It's understandable that Jenkins would have a best case at 12 bytes, because Jenkins works in 12-byte chunks.) Even in the case where Jenkins is faster, it's only by 5%. On average within this data set, murmurhash is 15% faster, and for 4-word input it is 30% faster. We retain Jenkins for flow_hash_symmetric_l4() and flow_hash_fields(), which are cases where the hash value is exposed externally. This commit appears to improve "ovs-benchmark rate" results slightly by a few hundred connections per second (under 1%), when used with an NVP controller. Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Ethan Jackson <ethan@nicira.com>
Diffstat (limited to 'tests/test-hash.c')
-rw-r--r--tests/test-hash.c27
1 files changed, 15 insertions, 12 deletions
diff --git a/tests/test-hash.c b/tests/test-hash.c
index d53ba2ead..0b7b87a20 100644
--- a/tests/test-hash.c
+++ b/tests/test-hash.c
@@ -20,6 +20,7 @@
#include <stdlib.h>
#include <string.h>
#include "hash.h"
+#include "jhash.h"
#undef NDEBUG
#include <assert.h>
@@ -41,9 +42,9 @@ hash_words_cb(uint32_t input)
}
static uint32_t
-mhash_words_cb(uint32_t input)
+jhash_words_cb(uint32_t input)
{
- return mhash_words(&input, 1, 0);
+ return jhash_words(&input, 1, 0);
}
static uint32_t
@@ -130,7 +131,7 @@ main(void)
* independence must be a bad assumption :-)
*/
check_word_hash(hash_words_cb, "hash_words", 11);
- check_word_hash(mhash_words_cb, "mhash_words", 11);
+ check_word_hash(jhash_words_cb, "jhash_words", 11);
/* Check that all hash functions of with one 1-bit (or no 1-bits) set
* within three 32-bit words have different values in their lowest 12
@@ -146,24 +147,26 @@ main(void)
* so we are doing pretty well to not have any collisions in 12 bits.
*/
check_3word_hash(hash_words, "hash_words");
- check_3word_hash(mhash_words, "mhash_words");
+ check_3word_hash(jhash_words, "jhash_words");
/* Check that all hashes computed with hash_int with one 1-bit (or no
* 1-bits) set within a single 32-bit word have different values in all
- * 14-bit consecutive runs.
+ * 12-bit consecutive runs.
*
* Given a random distribution, the probability of at least one collision
- * in any set of 14 bits is approximately
+ * in any set of 12 bits is approximately
*
- * 1 - ((2**14 - 1)/2**14)**C(33,2)
- * == 1 - (16,383/16,834)**528
- * =~ 0.031
+ * 1 - ((2**12 - 1)/2**12)**C(33,2)
+ * == 1 - (4,095/4,096)**528
+ * =~ 0.12
*
- * There are 18 ways to pick 14 consecutive bits in a 32-bit word, so if we
+ * There are 20 ways to pick 12 consecutive bits in a 32-bit word, so if we
* assumed independence then the chance of having no collisions in any of
- * those 14-bit runs would be (1-0.03)**18 =~ 0.56. This seems reasonable.
+ * those 12-bit runs would be (1-0.12)**20 =~ 0.078. This refutes our
+ * assumption of independence, which makes it seem like a good hash
+ * function.
*/
- check_word_hash(hash_int_cb, "hash_int", 14);
+ check_word_hash(hash_int_cb, "hash_int", 12);
return 0;
}