summaryrefslogtreecommitdiff
path: root/remote.c
diff options
context:
space:
mode:
authorPatrick Reynolds <patrick.reynolds@github.com>2014-07-29 14:43:39 +0000
committerJunio C Hamano <gitster@pobox.com>2014-07-30 11:29:33 -0700
commitd0da003d5b1e65f6e52920e42582f43b357782ee (patch)
tree8e2dc93a14690fa10a5424716f7ed1b0a07fb1d8 /remote.c
parent583b61c1af1bac12d6b48b2583ad508b5ab9964c (diff)
downloadgit-d0da003d5b1e65f6e52920e42582f43b357782ee.tar.gz
use a hashmap to make remotes fasterpr/remotes-in-hashmap
Remotes are stored as an array, so looking one up or adding one without duplication is an O(n) operation. Reading an entire config file full of remotes is O(n^2) in the number of remotes. For a repository with tens of thousands of remotes, the running time can hit multiple minutes. Hash tables are way faster. So we add a hashmap from remote name to struct remote and use it for all lookups. The time to add a new remote to a repo that already has 50,000 remotes drops from ~2 minutes to < 1 second. We retain the old array of remotes so iterators proceed in config-file order. Signed-off-by: Patrick Reynolds <patrick.reynolds@github.com> Reviewed-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'remote.c')
-rw-r--r--remote.c63
1 files changed, 46 insertions, 17 deletions
diff --git a/remote.c b/remote.c
index 3d6c86a36f..846cd1969c 100644
--- a/remote.c
+++ b/remote.c
@@ -42,6 +42,7 @@ struct rewrites {
static struct remote **remotes;
static int remotes_alloc;
static int remotes_nr;
+static struct hashmap remotes_hash;
static struct branch **branches;
static int branches_alloc;
@@ -136,26 +137,51 @@ static void add_url_alias(struct remote *remote, const char *url)
add_pushurl_alias(remote, url);
}
+struct remotes_hash_key {
+ const char *str;
+ int len;
+};
+
+static int remotes_hash_cmp(const struct remote *a, const struct remote *b, const struct remotes_hash_key *key)
+{
+ if (key)
+ return strncmp(a->name, key->str, key->len) || a->name[key->len];
+ else
+ return strcmp(a->name, b->name);
+}
+
+static inline void init_remotes_hash(void)
+{
+ if (!remotes_hash.cmpfn)
+ hashmap_init(&remotes_hash, (hashmap_cmp_fn)remotes_hash_cmp, 0);
+}
+
static struct remote *make_remote(const char *name, int len)
{
- struct remote *ret;
- int i;
+ struct remote *ret, *replaced;
+ struct remotes_hash_key lookup;
+ struct hashmap_entry lookup_entry;
- for (i = 0; i < remotes_nr; i++) {
- if (len ? (!strncmp(name, remotes[i]->name, len) &&
- !remotes[i]->name[len]) :
- !strcmp(name, remotes[i]->name))
- return remotes[i];
- }
+ if (!len)
+ len = strlen(name);
+
+ init_remotes_hash();
+ lookup.str = name;
+ lookup.len = len;
+ hashmap_entry_init(&lookup_entry, memhash(name, len));
+
+ if ((ret = hashmap_get(&remotes_hash, &lookup_entry, &lookup)) != NULL)
+ return ret;
ret = xcalloc(1, sizeof(struct remote));
ret->prune = -1; /* unspecified */
ALLOC_GROW(remotes, remotes_nr + 1, remotes_alloc);
remotes[remotes_nr++] = ret;
- if (len)
- ret->name = xstrndup(name, len);
- else
- ret->name = xstrdup(name);
+ ret->name = xstrndup(name, len);
+
+ hashmap_entry_init(ret, lookup_entry.hash);
+ replaced = hashmap_put(&remotes_hash, ret);
+ assert(replaced == NULL); /* no previous entry overwritten */
return ret;
}
@@ -717,13 +743,16 @@ struct remote *pushremote_get(const char *name)
int remote_is_configured(const char *name)
{
- int i;
+ struct remotes_hash_key lookup;
+ struct hashmap_entry lookup_entry;
read_config();
- for (i = 0; i < remotes_nr; i++)
- if (!strcmp(name, remotes[i]->name))
- return 1;
- return 0;
+ init_remotes_hash();
+ lookup.str = name;
+ lookup.len = strlen(name);
+ hashmap_entry_init(&lookup_entry, memhash(name, lookup.len));
+
+ return hashmap_get(&remotes_hash, &lookup_entry, &lookup) != NULL;
}
int for_each_remote(each_remote_fn fn, void *priv)