diff options
| -rw-r--r-- | src/hashtable.c | 287 | ||||
| -rw-r--r-- | src/hashtable.h | 49 | ||||
| -rw-r--r-- | src/refs.c | 34 | ||||
| -rw-r--r-- | src/repository.c | 28 | ||||
| -rw-r--r-- | src/revwalk.c | 28 | ||||
| -rw-r--r-- | tests/t07-hashtable.c | 31 | 
6 files changed, 210 insertions, 247 deletions
| diff --git a/src/hashtable.c b/src/hashtable.c index 67fd49a46..e226c8191 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -27,46 +27,113 @@  #include "repository.h"  #include "commit.h" +#define MAX_LOOPS 5  static const double max_load_factor = 0.65; -static void hashtable_resize(git_hashtable *table) +static int resize_to(git_hashtable *self, size_t new_size); +static int set_size(git_hashtable *self, size_t new_size); +static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id); +static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other); +static int node_insert(git_hashtable *self, git_hashtable_node *new_node); +static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size); + +static int resize_to(git_hashtable *self, size_t new_size)  { -	git_hashtable_node **new_nodes; -	unsigned int new_size, i; +	git_hashtable_node *old_nodes = self->nodes; +	size_t old_size = self->size; + +	self->is_resizing = 1; -	assert(table); +	do { +		self->size = new_size; +		self->size_mask = new_size - 1; +		self->key_count = 0; +		self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size); -	new_size = (table->size_mask + 1) * 2; +		if (self->nodes == NULL) +			return GIT_ENOMEM; -	new_nodes = git__malloc(new_size * sizeof(git_hashtable_node *)); -	if (new_nodes == NULL) -		return; +		if (insert_nodes(self, old_nodes, old_size) == 0) +			self->is_resizing = 0; +		else { +			new_size *= 2; +			free(self->nodes); +		} +	} while(self->is_resizing); + +	free(old_nodes); +	return GIT_SUCCESS; +} -	memset(new_nodes, 0x0, new_size * sizeof(git_hashtable_node *)); +static int set_size(git_hashtable *self, size_t new_size) +{ +	self->nodes = realloc(self->nodes, new_size * sizeof(git_hashtable_node)); +	if (self->nodes == NULL) +		return GIT_ENOMEM; -	for (i = 0; i <= table->size_mask; ++i) { -		git_hashtable_node *n; -		unsigned int index; +	if (new_size > self->size) { +		memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(git_hashtable_node)); +	} -		while ((n = table->nodes[i]) != NULL) { -			table->nodes[i] = n->next; -			index = n->hash & (new_size - 1); -			n->next = new_nodes[index]; -			new_nodes[index] = n; +	self->size = new_size; +	self->size_mask = new_size - 1; +	return GIT_SUCCESS; +} + +static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id) +{ +	size_t pos = self->hash(key, hash_id) & self->size_mask; +	return git_hashtable_node_at(self->nodes, pos); +} + +static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) +{ +	git_hashtable_node tmp = *self; +	*self = *other; +	*other = tmp; +} + +static int node_insert(git_hashtable *self, git_hashtable_node *new_node) +{	 +	int iteration, hash_id; + +	for (iteration = 0; iteration < MAX_LOOPS; iteration++) {  +		for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { +			git_hashtable_node *node; +			node = node_with_hash(self, new_node->key, hash_id); +			node_swap_with(new_node, node); +			if(new_node->key == 0x0){ +				self->key_count++; +				return GIT_SUCCESS;  +			}  		}  	} -	free(table->nodes); -	table->nodes = new_nodes; -	table->size_mask = (new_size - 1); -	table->max_count = (unsigned int)(new_size * max_load_factor); +	if (self->is_resizing)  +		return GIT_EBUSY; + +	resize_to(self, self->size * 2); +	git_hashtable_insert(self, new_node->key, new_node->value); +	return GIT_SUCCESS; +} + +static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size) +{ +	size_t i; + +	for (i = 0; i < old_size; ++i) { +		git_hashtable_node *node = git_hashtable_node_at(old_nodes, i); +		if (node->key && git_hashtable_insert(self, node->key, node->value) < GIT_SUCCESS) +			return GIT_ENOMEM; +	} + +	return GIT_SUCCESS;  } -git_hashtable *git_hashtable_alloc(unsigned int min_size,  +git_hashtable *git_hashtable_alloc(size_t min_size,   		git_hash_ptr hash,  		git_hash_keyeq_ptr key_eq)  { -	unsigned int i;  	git_hashtable *table;  	assert(hash && key_eq); @@ -74,6 +141,11 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,  	if ((table = git__malloc(sizeof(git_hashtable))) == NULL)  		return NULL; +	memset(table, 0x0, sizeof(git_hashtable)); + +	if (min_size < 8) +		min_size = 8; +  	/* round up size to closest power of 2 */  	min_size--;  	min_size |= min_size >> 1; @@ -82,167 +154,90 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,  	min_size |= min_size >> 8;  	min_size |= min_size >> 16; -	table->size_mask = min_size; -	table->count = 0; -	table->max_count = (unsigned int)((min_size + 1) * max_load_factor); -  	table->hash = hash;  	table->key_equal = key_eq; -	table->nodes = git__malloc((min_size + 1) * sizeof(git_hashtable_node *)); - -	if (table->nodes == NULL) { -		free(table); -		return NULL; -	} - -	for (i = 0; i <= min_size; ++i) -		table->nodes[i] = NULL; +	set_size(table, min_size + 1);  	return table;  } -void git_hashtable_clear(git_hashtable *table) +void git_hashtable_clear(git_hashtable *self)  { -	unsigned int index; - -	assert(table); - -	for (index = 0; index <= table->size_mask; ++index) { -		git_hashtable_node *node, *next_node; - -		node = table->nodes[index]; -		while (node != NULL) { -			next_node = node->next; -			free(node); -			node = next_node; -		} +	assert(self); -		table->nodes[index] = NULL; -	} - -	table->count = 0; +	memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size); +	self->key_count = 0;  } -void git_hashtable_free(git_hashtable *table) +void git_hashtable_free(git_hashtable *self)  { -	assert(table); +	assert(self); -	git_hashtable_clear(table); -	free(table->nodes); -	free(table); +	free(self->nodes); +	free(self);  } -int git_hashtable_insert(git_hashtable *table, const void *key, void *value) +int git_hashtable_insert(git_hashtable *self, const void *key, void *value)  { +	int hash_id;  	git_hashtable_node *node; -	uint32_t index, hash; - -	assert(table); - -	if (table->count + 1 > table->max_count) -		hashtable_resize(table); - -	node = git__malloc(sizeof(git_hashtable_node)); -	if (node == NULL) -		return GIT_ENOMEM; -	hash = table->hash(key); -	index = (hash & table->size_mask); +	for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { +		node = node_with_hash(self, key, hash_id); -	node->object = value; -	node->hash = hash; -	node->next = table->nodes[index]; +		if (!node->key) { +			node->key = key; +			node->value = value; +			self->key_count++; +			return GIT_SUCCESS; +		} -	table->nodes[index] = node; -	table->count++; +		if (key == node->key || self->key_equal(key, node->key) == 0) { +			node->value = value; +			return GIT_SUCCESS; +		} +	} -	return GIT_SUCCESS; +	/* no space in table; must do cuckoo dance */ +	{ +		git_hashtable_node x; +		x.key = key; +		x.value = value; +		return node_insert(self, &x); +	}  } -void *git_hashtable_lookup(git_hashtable *table, const void *key) +void *git_hashtable_lookup(git_hashtable *self, const void *key)  { +	int hash_id;  	git_hashtable_node *node; -	uint32_t index, hash; - -	assert(table); - -	hash = table->hash(key); -	index = (hash & table->size_mask); -	node = table->nodes[index]; -	while (node != NULL) { -		if (node->hash == hash && table->key_equal(node->object, key)) -			return node->object; - -		node = node->next; +	for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { +		node = node_with_hash(self, key, hash_id); +		if (node->key && self->key_equal(key, node->key) == 0) +			return node->value;  	}  	return NULL;  } -int git_hashtable_remove(git_hashtable *table, const void *key) +int git_hashtable_remove(git_hashtable *self, const void *key)  { -	git_hashtable_node *node, *prev_node; -	uint32_t index, hash; - -	assert(table); - -	hash = table->hash(key); -	index = (hash & table->size_mask); -	node = table->nodes[index]; - -	prev_node = NULL; - -	while (node != NULL) { -		if (node->hash == hash && table->key_equal(node->object, key)) { -			if (prev_node == NULL) -				table->nodes[index] = node->next; -			else -				prev_node->next = node->next; +	int hash_id; +	git_hashtable_node *node; -			free(node); +	for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { +		node = node_with_hash(self, key, hash_id); +		if (node->key && self->key_equal(key, node->key) == 0) { +			node->key = NULL; +			node->value = NULL; +			self->key_count--;  			return GIT_SUCCESS;  		} - -		prev_node = node; -		node = node->next;  	}  	return GIT_ENOTFOUND;  } - - -void git_hashtable_iterator_init(git_hashtable *table, git_hashtable_iterator *it) -{ -	assert(table && it); - -	memset(it, 0x0, sizeof(git_hashtable_iterator)); - -	it->nodes = table->nodes; -	it->current_node = NULL; -	it->current_pos = 0; -	it->size = table->size_mask + 1; -} - -void *git_hashtable_iterator_next(git_hashtable_iterator *it) -{ -	git_hashtable_node *next = NULL; - -	assert(it); - -	while (it->current_node == NULL) { -		if (it->current_pos >= it->size) -			return NULL; - -		it->current_node = it->nodes[it->current_pos++]; -	} - -	next = it->current_node; -	it->current_node = it->current_node->next; - -	return next->object; -} - diff --git a/src/hashtable.h b/src/hashtable.h index 69535040d..74da580ef 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -5,39 +5,33 @@  #include "git2/oid.h"  #include "git2/odb.h" +#define GIT_HASHTABLE_HASHES 3 -typedef uint32_t (*git_hash_ptr)(const void *); -typedef int (*git_hash_keyeq_ptr)(void *obj, const void *obj_key); +typedef uint32_t (*git_hash_ptr)(const void *, int hash_id); +typedef int (*git_hash_keyeq_ptr)(const void *key_a, const void *key_b);  struct git_hashtable_node { -	void *object; -	uint32_t hash; -	struct git_hashtable_node *next; +	const void *key; +	void *value;  };  struct git_hashtable { -	struct git_hashtable_node **nodes; +	struct git_hashtable_node *nodes; -	unsigned int size_mask; -	unsigned int count; -	unsigned int max_count; +	size_t size_mask; +	size_t size; +	size_t key_count; + +	int is_resizing;  	git_hash_ptr hash;  	git_hash_keyeq_ptr key_equal;  }; -struct git_hashtable_iterator { -	struct git_hashtable_node **nodes; -	struct git_hashtable_node *current_node; -	unsigned int current_pos; -	unsigned int size; -}; -  typedef struct git_hashtable_node git_hashtable_node;  typedef struct git_hashtable git_hashtable; -typedef struct git_hashtable_iterator git_hashtable_iterator; -git_hashtable *git_hashtable_alloc(unsigned int min_size,  +git_hashtable *git_hashtable_alloc(size_t min_size,   		git_hash_ptr hash,  		git_hash_keyeq_ptr key_eq);  int git_hashtable_insert(git_hashtable *h, const void *key, void *value); @@ -46,7 +40,22 @@ int git_hashtable_remove(git_hashtable *table, const void *key);  void git_hashtable_free(git_hashtable *h);  void git_hashtable_clear(git_hashtable *h); -void *git_hashtable_iterator_next(git_hashtable_iterator *it); -void git_hashtable_iterator_init(git_hashtable *h, git_hashtable_iterator *it); +#define git_hashtable_node_at(nodes, pos) ((git_hashtable_node *)(&nodes[pos])) + +#define GIT_HASHTABLE_FOREACH(self, pkey, pvalue, code) {\ +	git_hashtable *_self = (self);\ +	git_hashtable_node *_nodes = _self->nodes;\ +	unsigned int _i, _size = _self->size;\ +	for (_i = 0; _i < _size; _i ++) {\ +		git_hashtable_node *_node = git_hashtable_node_at(_nodes, _i);\ +		if (_node->key)\ +		{\ +			pkey = _node->key;\ +			pvalue = _node->value;\ +			code;\ +		}\ +	}\ +} +  #endif diff --git a/src/refs.c b/src/refs.c index 1f434ea03..b95ec70cf 100644 --- a/src/refs.c +++ b/src/refs.c @@ -28,28 +28,21 @@  #include "repository.h"  #include "fileops.h" -#define HASH_SEED 2147483647  #define MAX_NESTING_LEVEL 5  static const int default_table_size = 32; -static uint32_t reftable_hash(const void *key) +static uint32_t reftable_hash(const void *key, int hash_id)  { -	return git__hash(key, strlen((const char *)key), HASH_SEED); -} - -static int reftable_haskey(void *reference, const void *key) -{ -	git_reference *ref; -	char *name; +	static uint32_t hash_seeds[GIT_HASHTABLE_HASHES] = { +		2147483647, +		0x5d20bb23, +		0x7daaab3c  +	}; -	ref = (git_reference *)reference; -	name = (char *)key; - -	return strcmp(name, ref->name) == 0; +	return git__hash(key, strlen((const char *)key), hash_seeds[hash_id]);  } -  static int check_refname(const char *name)   {  	/* @@ -641,24 +634,21 @@ int git_repository__refcache_init(git_refcache *refs)  	refs->cache = git_hashtable_alloc(  		default_table_size,   		reftable_hash, -		reftable_haskey); +		(git_hash_keyeq_ptr)strcmp);  	return refs->cache ? GIT_SUCCESS : GIT_ENOMEM;   }  void git_repository__refcache_free(git_refcache *refs)  { -	git_hashtable_iterator it; +	const char *ref_name;  	git_reference *reference;  	assert(refs); -	git_hashtable_iterator_init(refs->cache, &it); - -	while ((reference = (git_reference *)git_hashtable_iterator_next(&it)) != NULL) { -		git_hashtable_remove(refs->cache, reference->name); -		reference_free(reference); -	} +	GIT_HASHTABLE_FOREACH(refs->cache, ref_name, reference, +		reference_free(reference) +	);  	git_hashtable_free(refs->cache);  } diff --git a/src/repository.c b/src/repository.c index 2b17ba455..a1d2798b3 100644 --- a/src/repository.c +++ b/src/repository.c @@ -58,28 +58,16 @@ typedef struct {   * Callbacks for the ODB cache, implemented   * as a hash table   */ -uint32_t object_table_hash(const void *key) +uint32_t object_table_hash(const void *key, int hash_id)  {  	uint32_t r;  	git_oid *id;  	id = (git_oid *)key; -	memcpy(&r, id->id, sizeof(r)); +	memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r));  	return r;  } -int object_table_hashkey(void *object, const void *key) -{ -	git_object *obj; -	git_oid *oid; - -	obj = (git_object *)object; -	oid = (git_oid *)key; - -	return (git_oid_cmp(oid, &obj->id) == 0); -} - -  /*   * Git repository open methods   * @@ -245,9 +233,9 @@ static git_repository *repository_alloc()  	repo->objects = git_hashtable_alloc(  			OBJECT_TABLE_SIZE,   			object_table_hash, -			object_table_hashkey); +			(git_hash_keyeq_ptr)git_oid_cmp); -	if (repo->objects == NULL) { +	if (repo->objects == NULL) {   		free(repo);  		return NULL;  	} @@ -369,8 +357,8 @@ cleanup:  void git_repository_free(git_repository *repo)  { -	git_hashtable_iterator it;  	git_object *object; +	const git_oid *oid;  	if (repo == NULL)  		return; @@ -380,11 +368,9 @@ void git_repository_free(git_repository *repo)  	free(repo->path_repository);  	free(repo->path_odb); -	git_hashtable_iterator_init(repo->objects, &it); - -	while ((object = (git_object *) -				git_hashtable_iterator_next(&it)) != NULL) +	GIT_HASHTABLE_FOREACH(repo->objects, oid, object, {  		git_object_free(object); +	});  	git_hashtable_free(repo->objects); diff --git a/src/revwalk.c b/src/revwalk.c index e30e543a8..c073be13f 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -28,28 +28,23 @@  #include "revwalk.h"  #include "hashtable.h" -uint32_t git_revwalk__commit_hash(const void *key) +uint32_t git_revwalk__commit_hash(const void *key, int hash_id)  {  	uint32_t r;  	git_commit *commit;  	commit = (git_commit *)key; -	memcpy(&r, commit->object.id.id, sizeof(r)); +	memcpy(&r, commit->object.id.id + (hash_id * sizeof(uint32_t)), sizeof(r));  	return r;  } -int git_revwalk__commit_haskey(void *object, const void *key) +int git_revwalk__commit_keycmp(const void *key_a, const void *key_b)  { -	git_revwalk_commit *walk_commit; -	git_commit *commit_object; - -	walk_commit = (git_revwalk_commit *)object; -	commit_object = (git_commit *)key; - -	return (walk_commit->commit_object == commit_object); +	git_commit *a = (git_commit *)key_a; +	git_commit *b = (git_commit *)key_b; +	return git_oid_cmp(&a->object.id, &b->object.id);  } -  int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)  {  	git_revwalk *walk; @@ -62,7 +57,7 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)  	walk->commits = git_hashtable_alloc(64,  			git_revwalk__commit_hash, -			git_revwalk__commit_haskey); +			git_revwalk__commit_keycmp);  	if (walk->commits == NULL) {  		free(walk); @@ -245,18 +240,15 @@ int git_revwalk_next(git_commit **commit, git_revwalk *walk)  void git_revwalk_reset(git_revwalk *walk)  { -	git_hashtable_iterator it; +	const void *_unused;  	git_revwalk_commit *commit;  	assert(walk); -	git_hashtable_iterator_init(walk->commits, &it); - -	while ((commit = (git_revwalk_commit *) -				git_hashtable_iterator_next(&it)) != NULL) { +	GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit, {  		git_revwalk_list_clear(&commit->parents);  		free(commit); -	} +	});  	git_hashtable_clear(walk->commits);  	git_revwalk_list_clear(&walk->iterator); diff --git a/tests/t07-hashtable.c b/tests/t07-hashtable.c index a0f32194c..76eb0d1a1 100644 --- a/tests/t07-hashtable.c +++ b/tests/t07-hashtable.c @@ -34,32 +34,26 @@ typedef struct _aux_object {  	int visited;  } table_item; -uint32_t hash_func(const void *key) +uint32_t hash_func(const void *key, int hash_id)  {  	uint32_t r;  	git_oid *id;  	id = (git_oid *)key; -	memcpy(&r, id->id, sizeof(r)); +	memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r));  	return r;  } -int hash_haskey(void *item, const void *key) +int hash_cmpkey(const void *a, const void *b)  { -	table_item *obj; -	git_oid *oid; - -	obj = (table_item *)item; -	oid = (git_oid *)key; - -	return (git_oid_cmp(oid, &obj->id) == 0); +	return git_oid_cmp(a, b);  }  BEGIN_TEST("table", table_create)  	git_hashtable *table = NULL; -	table = git_hashtable_alloc(55, hash_func, hash_haskey); +	table = git_hashtable_alloc(55, hash_func, hash_cmpkey);  	must_be_true(table != NULL);  	must_be_true(table->size_mask + 1 == 64); @@ -75,7 +69,7 @@ BEGIN_TEST("table", table_populate)  	table_item *objects;  	git_hashtable *table = NULL; -	table = git_hashtable_alloc(objects_n * 2, hash_func, hash_haskey); +	table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey);  	must_be_true(table != NULL);  	objects = git__malloc(objects_n * sizeof(table_item)); @@ -123,7 +117,7 @@ BEGIN_TEST("table", table_resize)  	table_item *objects;  	git_hashtable *table = NULL; -	table = git_hashtable_alloc(objects_n, hash_func, hash_haskey); +	table = git_hashtable_alloc(objects_n, hash_func, hash_cmpkey);  	must_be_true(table != NULL);  	objects = git__malloc(objects_n * sizeof(table_item)); @@ -161,11 +155,11 @@ BEGIN_TEST("tableit", table_iterator)  	const int objects_n = 32;  	int i;  	table_item *objects, *ob; +	const void *_unused;  	git_hashtable *table = NULL; -	git_hashtable_iterator iterator; -	table = git_hashtable_alloc(objects_n * 2, hash_func, hash_haskey); +	table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey);  	must_be_true(table != NULL);  	objects = git__malloc(objects_n * sizeof(table_item)); @@ -177,11 +171,9 @@ BEGIN_TEST("tableit", table_iterator)  		must_pass(git_hashtable_insert(table, &(objects[i].id), &(objects[i])));  	} -	git_hashtable_iterator_init(table, &iterator); - -	/* iterate through all nodes, mark as visited */ -	while ((ob = (table_item *)git_hashtable_iterator_next(&iterator)) != NULL) +	GIT_HASHTABLE_FOREACH(table, _unused, ob,  		ob->visited = 1; +	);  	/* make sure all nodes have been visited */  	for (i = 0; i < objects_n; ++i) @@ -189,7 +181,6 @@ BEGIN_TEST("tableit", table_iterator)  	git_hashtable_free(table);  	free(objects); -  END_TEST | 
