summaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorZdenek Kabelac <zkabelac@redhat.com>2021-03-07 15:33:04 +0100
committerZdenek Kabelac <zkabelac@redhat.com>2021-03-08 15:33:15 +0100
commitd602837b91ceb2a47e6d4d10c34ef90e6f26b17a (patch)
treec43d3cd40ba9850d544f6b5c2fc36f9a8dce5c36 /base
parent84679d254ffdeab24cc2994d1ef3bc800ca736d7 (diff)
downloadlvm2-d602837b91ceb2a47e6d4d10c34ef90e6f26b17a.tar.gz
hash: speed up hash tables
Enhance hash perfomance by remembering the computed hash value for the hashed node - this allows to speedup lookup of nodes with the hash collision when different keys map to the same masked hash value. For the easier use 'num_slots' become 'mask_slots', so we only add '1' in rare case of need of the original num_slots value. Also add statistic counters for hashes and print stats in debug build (-DDEBUG) on hash wiping. (so badly performing hash can be optimized.)
Diffstat (limited to 'base')
-rw-r--r--base/data-struct/hash.c90
1 files changed, 58 insertions, 32 deletions
diff --git a/base/data-struct/hash.c b/base/data-struct/hash.c
index 9f6322733..54f75a5a6 100644
--- a/base/data-struct/hash.c
+++ b/base/data-struct/hash.c
@@ -22,12 +22,18 @@ struct dm_hash_node {
void *data;
unsigned data_len;
unsigned keylen;
+ unsigned hash;
char key[];
};
struct dm_hash_table {
unsigned num_nodes;
- unsigned num_slots;
+ unsigned num_hint;
+ unsigned mask_slots; /* (slots - 1) -> used as hash mask */
+ unsigned collisions; /* Collissions of hash keys */
+ unsigned search; /* How many keys were searched */
+ unsigned found; /* How many nodes were found */
+ unsigned same_hash; /* Was there a colision with same masked hash and len ? */
struct dm_hash_node **slots;
};
@@ -96,24 +102,26 @@ struct dm_hash_table *dm_hash_create(unsigned size_hint)
unsigned new_size = 16u;
struct dm_hash_table *hc = zalloc(sizeof(*hc));
- if (!hc)
- return_0;
+ if (!hc) {
+ log_error("Failed to allocate memory for hash.");
+ return 0;
+ }
+
+ hc->num_hint = size_hint;
/* round size hint up to a power of two */
while (new_size < size_hint)
new_size = new_size << 1;
- hc->num_slots = new_size;
+ hc->mask_slots = new_size - 1;
len = sizeof(*(hc->slots)) * new_size;
- if (!(hc->slots = zalloc(len)))
- goto_bad;
+ if (!(hc->slots = zalloc(len))) {
+ free(hc);
+ log_error("Failed to allocate slots for hash.");
+ return 0;
+ }
return hc;
-
- bad:
- free(hc->slots);
- free(hc);
- return 0;
}
static void _free_nodes(struct dm_hash_table *t)
@@ -121,7 +129,16 @@ static void _free_nodes(struct dm_hash_table *t)
struct dm_hash_node *c, *n;
unsigned i;
- for (i = 0; i < t->num_slots; i++)
+#ifdef DEBUG
+ log_debug("Free hash hint:%d slots:%d nodes:%d (s:%d f:%d c:%d h:%d)",
+ t->num_hint, t->mask_slots + 1, t->num_nodes,
+ t->search, t->found, t->collisions, t->same_hash);
+#endif
+
+ if (!t->num_nodes)
+ return;
+
+ for (i = 0; i <= t->mask_slots; i++)
for (c = t->slots[i]; c; c = n) {
n = c->next;
free(c);
@@ -135,23 +152,32 @@ void dm_hash_destroy(struct dm_hash_table *t)
free(t);
}
-static struct dm_hash_node **_find(struct dm_hash_table *t, const void *key,
- uint32_t len)
+static struct dm_hash_node **_findh(struct dm_hash_table *t, const void *key,
+ uint32_t len, unsigned hash)
{
- unsigned h = _hash(key, len) & (t->num_slots - 1);
struct dm_hash_node **c;
- for (c = &t->slots[h]; *c; c = &((*c)->next)) {
- if ((*c)->keylen != len)
- continue;
-
- if (!memcmp(key, (*c)->key, len))
- break;
+ ++t->search;
+ for (c = &t->slots[hash & t->mask_slots]; *c; c = &((*c)->next)) {
+ if ((*c)->keylen == len && (*c)->hash == hash) {
+ if (!memcmp(key, (*c)->key, len)) {
+ ++t->found;
+ break;
+ }
+ ++t->same_hash;
+ }
+ ++t->collisions;
}
return c;
}
+static struct dm_hash_node **_find(struct dm_hash_table *t, const void *key,
+ uint32_t len)
+{
+ return _findh(t, key, len, _hash(key, len));
+}
+
void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key,
uint32_t len)
{
@@ -163,7 +189,8 @@ void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key,
int dm_hash_insert_binary(struct dm_hash_table *t, const void *key,
uint32_t len, void *data)
{
- struct dm_hash_node **c = _find(t, key, len);
+ unsigned hash = _hash(key, len);
+ struct dm_hash_node **c = _findh(t, key, len, hash);
if (*c)
(*c)->data = data;
@@ -174,6 +201,7 @@ int dm_hash_insert_binary(struct dm_hash_table *t, const void *key,
return 0;
n->data = data;
+ n->hash = hash;
n->next = 0;
*c = n;
t->num_nodes++;
@@ -217,7 +245,7 @@ static struct dm_hash_node **_find_str_with_val(struct dm_hash_table *t,
struct dm_hash_node **c;
unsigned h;
- h = _hash(key, len) & (t->num_slots - 1);
+ h = _hash(key, len) & t->mask_slots;
for (c = &t->slots[h]; *c; c = &((*c)->next)) {
if ((*c)->keylen != len)
@@ -248,7 +276,7 @@ int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
n->data = (void *)val;
n->data_len = val_len;
- h = _hash(key, len) & (t->num_slots - 1);
+ h = _hash(key, len) & t->mask_slots;
first = t->slots[h];
@@ -316,7 +344,7 @@ void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *c
*count = 0;
- h = _hash(key, len) & (t->num_slots - 1);
+ h = _hash(key, len) & t->mask_slots;
for (c = &t->slots[h]; *c; c = &((*c)->next)) {
if ((*c)->keylen != len)
@@ -345,7 +373,7 @@ void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f)
struct dm_hash_node *c, *n;
unsigned i;
- for (i = 0; i < t->num_slots; i++)
+ for (i = 0; i <= t->mask_slots; i++)
for (c = t->slots[i]; c; c = n) {
n = c->next;
f(c->data);
@@ -355,8 +383,8 @@ void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f)
void dm_hash_wipe(struct dm_hash_table *t)
{
_free_nodes(t);
- memset(t->slots, 0, sizeof(struct dm_hash_node *) * t->num_slots);
- t->num_nodes = 0u;
+ memset(t->slots, 0, sizeof(struct dm_hash_node *) * (t->mask_slots + 1));
+ t->num_nodes = t->collisions = t->search = t->same_hash = 0u;
}
char *dm_hash_get_key(struct dm_hash_table *t __attribute__((unused)),
@@ -376,7 +404,7 @@ static struct dm_hash_node *_next_slot(struct dm_hash_table *t, unsigned s)
struct dm_hash_node *c = NULL;
unsigned i;
- for (i = s; i < t->num_slots && !c; i++)
+ for (i = s; i <= t->mask_slots && !c; i++)
c = t->slots[i];
return c;
@@ -389,7 +417,5 @@ struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t)
struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n)
{
- unsigned h = _hash(n->key, n->keylen) & (t->num_slots - 1);
-
- return n->next ? n->next : _next_slot(t, h + 1);
+ return n->next ? n->next : _next_slot(t, (n->hash & t->mask_slots) + 1);
}