summaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorZdenek Kabelac <zkabelac@redhat.com>2021-03-08 15:18:04 +0100
committerZdenek Kabelac <zkabelac@redhat.com>2021-03-08 15:33:15 +0100
commitff21723512cb633c741e516381899f70e0d9ef2a (patch)
tree90dbce439b8cf087cdf6661e4e635ed2a17a566f /base
parentd602837b91ceb2a47e6d4d10c34ef90e6f26b17a (diff)
downloadlvm2-ff21723512cb633c741e516381899f70e0d9ef2a.tar.gz
hash: replace hash with better function
Add Bob Jenkins hash function to get better working hash function, which does genarate way less colisions (especially with similar strings). For a comparision also a kernel function used in DM kernel is include. While it's better then our existing one, it's still far worse, then Bob Jenkins hash.
Diffstat (limited to 'base')
-rw-r--r--base/data-struct/hash.c92
1 files changed, 74 insertions, 18 deletions
diff --git a/base/data-struct/hash.c b/base/data-struct/hash.c
index 54f75a5a6..5fee9cf85 100644
--- a/base/data-struct/hash.c
+++ b/base/data-struct/hash.c
@@ -37,8 +37,11 @@ struct dm_hash_table {
struct dm_hash_node **slots;
};
-/* Permutation of the Integers 0 through 255 */
-static unsigned char _nums[] = {
+#if 0 /* TO BE REMOVED */
+static unsigned _hash(const void *key, unsigned len)
+{
+ /* Permutation of the Integers 0 through 255 */
+ static unsigned char _nums[] = {
1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
@@ -63,23 +66,9 @@ static unsigned char _nums[] = {
44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120,
209
-};
-
-static struct dm_hash_node *_create_node(const void *key, unsigned len)
-{
- struct dm_hash_node *n = malloc(sizeof(*n) + len);
-
- if (n) {
- memcpy(n->key, key, len);
- n->keylen = len;
- }
-
- return n;
-}
+ };
-static unsigned _hash(const void *key, unsigned len)
-{
- const unsigned char *str = key;
+ const uint8_t *str = key;
unsigned h = 0, g;
unsigned i;
@@ -96,6 +85,73 @@ static unsigned _hash(const void *key, unsigned len)
return h;
}
+/* In-kernel DM hashing, still lots of collisions */
+static unsigned _hash_in_kernel(const char *key, unsigned len)
+{
+ const unsigned char *str = (unsigned char *)key;
+ const unsigned hash_mult = 2654435387U;
+ unsigned hash = 0, i;
+
+ for (i = 0; i < len; ++i)
+ hash = (hash + str[i]) * hash_mult;
+
+ return hash;
+}
+#endif
+
+#undef get16bits
+#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+ +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+/*
+ * Adapted Bob Jenkins hash to read by 2 bytes if possible.
+ * https://secure.wikimedia.org/wikipedia/en/wiki/Jenkins_hash_function
+ *
+ * Reduces amount of hash collisions
+ */
+static unsigned _hash(const void *key, unsigned len)
+{
+ const uint8_t *str = (uint8_t*) key;
+ unsigned hash = 0, i;
+ unsigned sz = len / 2;
+
+ for(i = 0; i < sz; ++i) {
+ hash += get16bits(str + 2 * i);
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ if (len & 1) {
+ hash += str[len - 1];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
+static struct dm_hash_node *_create_node(const void *key, unsigned len)
+{
+ struct dm_hash_node *n = malloc(sizeof(*n) + len);
+
+ if (n) {
+ memcpy(n->key, key, len);
+ n->keylen = len;
+ }
+
+ return n;
+}
+
struct dm_hash_table *dm_hash_create(unsigned size_hint)
{
size_t len;