summaryrefslogtreecommitdiff
path: root/src/hashtable.c
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2011-02-22 21:59:36 +0200
committerVicent Marti <tanoku@gmail.com>2011-02-22 21:59:36 +0200
commitfc658755bf980170cf5a497870155a9fc97151cb (patch)
tree1e04116b1c9ca8234c4aacbba90d182c470081b7 /src/hashtable.c
parent4378e8d470abae5e9e8f32f98869516c8b86b191 (diff)
downloadlibgit2-fc658755bf980170cf5a497870155a9fc97151cb.tar.gz
Rewrite git_hashtable internals
The old hash table with chained buckets has been replaced by a new one using Cuckoo hashing, which offers guaranteed constant lookup times. This should improve speeds on most use cases, since hash tables in libgit2 are usually used as caches where the objects are stored once and queried several times. The Cuckoo hash implementation is based off the one in the Basekit library [1] for the IO language, but rewritten to support an arbritrary number of hashes. We currently use 3 to maximize the usage of the nodes pool. [1]: https://github.com/stevedekorte/basekit/blob/master/source/CHash.c Signed-off-by: Vicent Marti <tanoku@gmail.com>
Diffstat (limited to 'src/hashtable.c')
-rw-r--r--src/hashtable.c287
1 files changed, 141 insertions, 146 deletions
diff --git a/src/hashtable.c b/src/hashtable.c
index 67fd49a46..e226c8191 100644
--- a/src/hashtable.c
+++ b/src/hashtable.c
@@ -27,46 +27,113 @@
#include "repository.h"
#include "commit.h"
+#define MAX_LOOPS 5
static const double max_load_factor = 0.65;
-static void hashtable_resize(git_hashtable *table)
+static int resize_to(git_hashtable *self, size_t new_size);
+static int set_size(git_hashtable *self, size_t new_size);
+static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id);
+static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other);
+static int node_insert(git_hashtable *self, git_hashtable_node *new_node);
+static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size);
+
+static int resize_to(git_hashtable *self, size_t new_size)
{
- git_hashtable_node **new_nodes;
- unsigned int new_size, i;
+ git_hashtable_node *old_nodes = self->nodes;
+ size_t old_size = self->size;
+
+ self->is_resizing = 1;
- assert(table);
+ do {
+ self->size = new_size;
+ self->size_mask = new_size - 1;
+ self->key_count = 0;
+ self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size);
- new_size = (table->size_mask + 1) * 2;
+ if (self->nodes == NULL)
+ return GIT_ENOMEM;
- new_nodes = git__malloc(new_size * sizeof(git_hashtable_node *));
- if (new_nodes == NULL)
- return;
+ if (insert_nodes(self, old_nodes, old_size) == 0)
+ self->is_resizing = 0;
+ else {
+ new_size *= 2;
+ free(self->nodes);
+ }
+ } while(self->is_resizing);
+
+ free(old_nodes);
+ return GIT_SUCCESS;
+}
- memset(new_nodes, 0x0, new_size * sizeof(git_hashtable_node *));
+static int set_size(git_hashtable *self, size_t new_size)
+{
+ self->nodes = realloc(self->nodes, new_size * sizeof(git_hashtable_node));
+ if (self->nodes == NULL)
+ return GIT_ENOMEM;
- for (i = 0; i <= table->size_mask; ++i) {
- git_hashtable_node *n;
- unsigned int index;
+ if (new_size > self->size) {
+ memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(git_hashtable_node));
+ }
- while ((n = table->nodes[i]) != NULL) {
- table->nodes[i] = n->next;
- index = n->hash & (new_size - 1);
- n->next = new_nodes[index];
- new_nodes[index] = n;
+ self->size = new_size;
+ self->size_mask = new_size - 1;
+ return GIT_SUCCESS;
+}
+
+static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id)
+{
+ size_t pos = self->hash(key, hash_id) & self->size_mask;
+ return git_hashtable_node_at(self->nodes, pos);
+}
+
+static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
+{
+ git_hashtable_node tmp = *self;
+ *self = *other;
+ *other = tmp;
+}
+
+static int node_insert(git_hashtable *self, git_hashtable_node *new_node)
+{
+ int iteration, hash_id;
+
+ for (iteration = 0; iteration < MAX_LOOPS; iteration++) {
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ git_hashtable_node *node;
+ node = node_with_hash(self, new_node->key, hash_id);
+ node_swap_with(new_node, node);
+ if(new_node->key == 0x0){
+ self->key_count++;
+ return GIT_SUCCESS;
+ }
}
}
- free(table->nodes);
- table->nodes = new_nodes;
- table->size_mask = (new_size - 1);
- table->max_count = (unsigned int)(new_size * max_load_factor);
+ if (self->is_resizing)
+ return GIT_EBUSY;
+
+ resize_to(self, self->size * 2);
+ git_hashtable_insert(self, new_node->key, new_node->value);
+ return GIT_SUCCESS;
+}
+
+static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size)
+{
+ size_t i;
+
+ for (i = 0; i < old_size; ++i) {
+ git_hashtable_node *node = git_hashtable_node_at(old_nodes, i);
+ if (node->key && git_hashtable_insert(self, node->key, node->value) < GIT_SUCCESS)
+ return GIT_ENOMEM;
+ }
+
+ return GIT_SUCCESS;
}
-git_hashtable *git_hashtable_alloc(unsigned int min_size,
+git_hashtable *git_hashtable_alloc(size_t min_size,
git_hash_ptr hash,
git_hash_keyeq_ptr key_eq)
{
- unsigned int i;
git_hashtable *table;
assert(hash && key_eq);
@@ -74,6 +141,11 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,
if ((table = git__malloc(sizeof(git_hashtable))) == NULL)
return NULL;
+ memset(table, 0x0, sizeof(git_hashtable));
+
+ if (min_size < 8)
+ min_size = 8;
+
/* round up size to closest power of 2 */
min_size--;
min_size |= min_size >> 1;
@@ -82,167 +154,90 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,
min_size |= min_size >> 8;
min_size |= min_size >> 16;
- table->size_mask = min_size;
- table->count = 0;
- table->max_count = (unsigned int)((min_size + 1) * max_load_factor);
-
table->hash = hash;
table->key_equal = key_eq;
- table->nodes = git__malloc((min_size + 1) * sizeof(git_hashtable_node *));
-
- if (table->nodes == NULL) {
- free(table);
- return NULL;
- }
-
- for (i = 0; i <= min_size; ++i)
- table->nodes[i] = NULL;
+ set_size(table, min_size + 1);
return table;
}
-void git_hashtable_clear(git_hashtable *table)
+void git_hashtable_clear(git_hashtable *self)
{
- unsigned int index;
-
- assert(table);
-
- for (index = 0; index <= table->size_mask; ++index) {
- git_hashtable_node *node, *next_node;
-
- node = table->nodes[index];
- while (node != NULL) {
- next_node = node->next;
- free(node);
- node = next_node;
- }
+ assert(self);
- table->nodes[index] = NULL;
- }
-
- table->count = 0;
+ memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size);
+ self->key_count = 0;
}
-void git_hashtable_free(git_hashtable *table)
+void git_hashtable_free(git_hashtable *self)
{
- assert(table);
+ assert(self);
- git_hashtable_clear(table);
- free(table->nodes);
- free(table);
+ free(self->nodes);
+ free(self);
}
-int git_hashtable_insert(git_hashtable *table, const void *key, void *value)
+int git_hashtable_insert(git_hashtable *self, const void *key, void *value)
{
+ int hash_id;
git_hashtable_node *node;
- uint32_t index, hash;
-
- assert(table);
-
- if (table->count + 1 > table->max_count)
- hashtable_resize(table);
-
- node = git__malloc(sizeof(git_hashtable_node));
- if (node == NULL)
- return GIT_ENOMEM;
- hash = table->hash(key);
- index = (hash & table->size_mask);
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
- node->object = value;
- node->hash = hash;
- node->next = table->nodes[index];
+ if (!node->key) {
+ node->key = key;
+ node->value = value;
+ self->key_count++;
+ return GIT_SUCCESS;
+ }
- table->nodes[index] = node;
- table->count++;
+ if (key == node->key || self->key_equal(key, node->key) == 0) {
+ node->value = value;
+ return GIT_SUCCESS;
+ }
+ }
- return GIT_SUCCESS;
+ /* no space in table; must do cuckoo dance */
+ {
+ git_hashtable_node x;
+ x.key = key;
+ x.value = value;
+ return node_insert(self, &x);
+ }
}
-void *git_hashtable_lookup(git_hashtable *table, const void *key)
+void *git_hashtable_lookup(git_hashtable *self, const void *key)
{
+ int hash_id;
git_hashtable_node *node;
- uint32_t index, hash;
-
- assert(table);
-
- hash = table->hash(key);
- index = (hash & table->size_mask);
- node = table->nodes[index];
- while (node != NULL) {
- if (node->hash == hash && table->key_equal(node->object, key))
- return node->object;
-
- node = node->next;
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
+ if (node->key && self->key_equal(key, node->key) == 0)
+ return node->value;
}
return NULL;
}
-int git_hashtable_remove(git_hashtable *table, const void *key)
+int git_hashtable_remove(git_hashtable *self, const void *key)
{
- git_hashtable_node *node, *prev_node;
- uint32_t index, hash;
-
- assert(table);
-
- hash = table->hash(key);
- index = (hash & table->size_mask);
- node = table->nodes[index];
-
- prev_node = NULL;
-
- while (node != NULL) {
- if (node->hash == hash && table->key_equal(node->object, key)) {
- if (prev_node == NULL)
- table->nodes[index] = node->next;
- else
- prev_node->next = node->next;
+ int hash_id;
+ git_hashtable_node *node;
- free(node);
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
+ if (node->key && self->key_equal(key, node->key) == 0) {
+ node->key = NULL;
+ node->value = NULL;
+ self->key_count--;
return GIT_SUCCESS;
}
-
- prev_node = node;
- node = node->next;
}
return GIT_ENOTFOUND;
}
-
-
-void git_hashtable_iterator_init(git_hashtable *table, git_hashtable_iterator *it)
-{
- assert(table && it);
-
- memset(it, 0x0, sizeof(git_hashtable_iterator));
-
- it->nodes = table->nodes;
- it->current_node = NULL;
- it->current_pos = 0;
- it->size = table->size_mask + 1;
-}
-
-void *git_hashtable_iterator_next(git_hashtable_iterator *it)
-{
- git_hashtable_node *next = NULL;
-
- assert(it);
-
- while (it->current_node == NULL) {
- if (it->current_pos >= it->size)
- return NULL;
-
- it->current_node = it->nodes[it->current_pos++];
- }
-
- next = it->current_node;
- it->current_node = it->current_node->next;
-
- return next->object;
-}
-