summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Phillips <julian@quantumfyre.co.uk>2011-09-29 23:11:42 +0100
committerJunio C Hamano <gitster@pobox.com>2011-09-30 12:28:34 -0700
commite9c4c11165e48b8f3fe1b4fc4db513f8e57202fb (patch)
tree0e5cc781ef4481edeb066d397c98e656d7caaba4
parentb4f223c6367bb7aefa00c746c808f5afa7b85331 (diff)
downloadgit-e9c4c11165e48b8f3fe1b4fc4db513f8e57202fb.tar.gz
refs: Use binary search to lookup refs faster
Currently we linearly search through lists of refs when we need to find a specific ref. This can be very slow if we need to lookup a large number of refs. By changing to a binary search we can make this faster. In order to be able to use a binary search we need to change from using linked lists to arrays, which we can manage using ALLOC_GROW. We can now also use the standard library qsort function to sort the refs arrays. Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--refs.c345
1 files changed, 152 insertions, 193 deletions
diff --git a/refs.c b/refs.c
index d021595578..5835b40b0c 100644
--- a/refs.c
+++ b/refs.c
@@ -8,14 +8,18 @@
#define REF_KNOWS_PEELED 04
#define REF_BROKEN 010
-struct ref_list {
- struct ref_list *next;
+struct ref_entry {
unsigned char flag; /* ISSYMREF? ISPACKED? */
unsigned char sha1[20];
unsigned char peeled[20];
char name[FLEX_ARRAY];
};
+struct ref_array {
+ int nr, alloc;
+ struct ref_entry **refs;
+};
+
static const char *parse_ref_line(char *line, unsigned char *sha1)
{
/*
@@ -44,108 +48,80 @@ static const char *parse_ref_line(char *line, unsigned char *sha1)
return line;
}
-static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
- int flag, struct ref_list *list,
- struct ref_list **new_entry)
+static void add_ref(const char *name, const unsigned char *sha1,
+ int flag, struct ref_array *refs,
+ struct ref_entry **new_entry)
{
int len;
- struct ref_list *entry;
+ struct ref_entry *entry;
/* Allocate it and add it in.. */
len = strlen(name) + 1;
- entry = xmalloc(sizeof(struct ref_list) + len);
+ entry = xmalloc(sizeof(struct ref_entry) + len);
hashcpy(entry->sha1, sha1);
hashclr(entry->peeled);
memcpy(entry->name, name, len);
entry->flag = flag;
- entry->next = list;
if (new_entry)
*new_entry = entry;
- return entry;
+ ALLOC_GROW(refs->refs, refs->nr + 1, refs->alloc);
+ refs->refs[refs->nr++] = entry;
}
-/* merge sort the ref list */
-static struct ref_list *sort_ref_list(struct ref_list *list)
+static int ref_entry_cmp(const void *a, const void *b)
{
- int psize, qsize, last_merge_count, cmp;
- struct ref_list *p, *q, *l, *e;
- struct ref_list *new_list = list;
- int k = 1;
- int merge_count = 0;
+ struct ref_entry *one = *(struct ref_entry **)a;
+ struct ref_entry *two = *(struct ref_entry **)b;
+ return strcmp(one->name, two->name);
+}
- if (!list)
- return list;
+static void sort_ref_array(struct ref_array *array)
+{
+ int i = 0, j = 1;
- do {
- last_merge_count = merge_count;
- merge_count = 0;
+ /* Nothing to sort unless there are at least two entries */
+ if (array->nr < 2)
+ return;
- psize = 0;
+ qsort(array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
- p = new_list;
- q = new_list;
- new_list = NULL;
- l = NULL;
+ /* Remove any duplicates from the ref_array */
+ for (; j < array->nr; j++) {
+ struct ref_entry *a = array->refs[i];
+ struct ref_entry *b = array->refs[j];
+ if (!strcmp(a->name, b->name)) {
+ if (hashcmp(a->sha1, b->sha1))
+ die("Duplicated ref, and SHA1s don't match: %s",
+ a->name);
+ warning("Duplicated ref: %s", a->name);
+ continue;
+ }
+ i++;
+ array->refs[i] = array->refs[j];
+ }
+ array->nr = i + 1;
+}
- while (p) {
- merge_count++;
+static struct ref_entry *search_ref_array(struct ref_array *array, const char *name)
+{
+ struct ref_entry *e, **r;
+ int len;
- while (psize < k && q->next) {
- q = q->next;
- psize++;
- }
- qsize = k;
-
- while ((psize > 0) || (qsize > 0 && q)) {
- if (qsize == 0 || !q) {
- e = p;
- p = p->next;
- psize--;
- } else if (psize == 0) {
- e = q;
- q = q->next;
- qsize--;
- } else {
- cmp = strcmp(q->name, p->name);
- if (cmp < 0) {
- e = q;
- q = q->next;
- qsize--;
- } else if (cmp > 0) {
- e = p;
- p = p->next;
- psize--;
- } else {
- if (hashcmp(q->sha1, p->sha1))
- die("Duplicated ref, and SHA1s don't match: %s",
- q->name);
- warning("Duplicated ref: %s", q->name);
- e = q;
- q = q->next;
- qsize--;
- free(e);
- e = p;
- p = p->next;
- psize--;
- }
- }
+ if (name == NULL)
+ return NULL;
- e->next = NULL;
+ len = strlen(name) + 1;
+ e = xmalloc(sizeof(struct ref_entry) + len);
+ memcpy(e->name, name, len);
- if (l)
- l->next = e;
- if (!new_list)
- new_list = e;
- l = e;
- }
+ r = bsearch(&e, array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
- p = q;
- };
+ free(e);
- k = k * 2;
- } while ((last_merge_count != merge_count) || (last_merge_count != 1));
+ if (r == NULL)
+ return NULL;
- return new_list;
+ return *r;
}
/*
@@ -155,38 +131,37 @@ static struct ref_list *sort_ref_list(struct ref_list *list)
static struct cached_refs {
char did_loose;
char did_packed;
- struct ref_list *loose;
- struct ref_list *packed;
+ struct ref_array loose;
+ struct ref_array packed;
} cached_refs, submodule_refs;
-static struct ref_list *current_ref;
+static struct ref_entry *current_ref;
-static struct ref_list *extra_refs;
+static struct ref_array extra_refs;
-static void free_ref_list(struct ref_list *list)
+static void free_ref_array(struct ref_array *array)
{
- struct ref_list *next;
- for ( ; list; list = next) {
- next = list->next;
- free(list);
- }
+ int i;
+ for (i = 0; i < array->nr; i++)
+ free(array->refs[i]);
+ free(array->refs);
+ array->nr = array->alloc = 0;
+ array->refs = NULL;
}
static void invalidate_cached_refs(void)
{
struct cached_refs *ca = &cached_refs;
- if (ca->did_loose && ca->loose)
- free_ref_list(ca->loose);
- if (ca->did_packed && ca->packed)
- free_ref_list(ca->packed);
- ca->loose = ca->packed = NULL;
+ if (ca->did_loose)
+ free_ref_array(&ca->loose);
+ if (ca->did_packed)
+ free_ref_array(&ca->packed);
ca->did_loose = ca->did_packed = 0;
}
static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
{
- struct ref_list *list = NULL;
- struct ref_list *last = NULL;
+ struct ref_entry *last = NULL;
char refline[PATH_MAX];
int flag = REF_ISPACKED;
@@ -205,7 +180,7 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
name = parse_ref_line(refline, sha1);
if (name) {
- list = add_ref(name, sha1, flag, list, &last);
+ add_ref(name, sha1, flag, &cached_refs->packed, &last);
continue;
}
if (last &&
@@ -215,21 +190,20 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
!get_sha1_hex(refline + 1, sha1))
hashcpy(last->peeled, sha1);
}
- cached_refs->packed = sort_ref_list(list);
+ sort_ref_array(&cached_refs->packed);
}
void add_extra_ref(const char *name, const unsigned char *sha1, int flag)
{
- extra_refs = add_ref(name, sha1, flag, extra_refs, NULL);
+ add_ref(name, sha1, flag, &extra_refs, NULL);
}
void clear_extra_refs(void)
{
- free_ref_list(extra_refs);
- extra_refs = NULL;
+ free_ref_array(&extra_refs);
}
-static struct ref_list *get_packed_refs(const char *submodule)
+static struct ref_array *get_packed_refs(const char *submodule)
{
const char *packed_refs_file;
struct cached_refs *refs;
@@ -237,7 +211,7 @@ static struct ref_list *get_packed_refs(const char *submodule)
if (submodule) {
packed_refs_file = git_path_submodule(submodule, "packed-refs");
refs = &submodule_refs;
- free_ref_list(refs->packed);
+ free_ref_array(&refs->packed);
} else {
packed_refs_file = git_path("packed-refs");
refs = &cached_refs;
@@ -245,18 +219,17 @@ static struct ref_list *get_packed_refs(const char *submodule)
if (!refs->did_packed || submodule) {
FILE *f = fopen(packed_refs_file, "r");
- refs->packed = NULL;
if (f) {
read_packed_refs(f, refs);
fclose(f);
}
refs->did_packed = 1;
}
- return refs->packed;
+ return &refs->packed;
}
-static struct ref_list *get_ref_dir(const char *submodule, const char *base,
- struct ref_list *list)
+static void get_ref_dir(const char *submodule, const char *base,
+ struct ref_array *array)
{
DIR *dir;
const char *path;
@@ -299,7 +272,7 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
if (stat(refdir, &st) < 0)
continue;
if (S_ISDIR(st.st_mode)) {
- list = get_ref_dir(submodule, ref, list);
+ get_ref_dir(submodule, ref, array);
continue;
}
if (submodule) {
@@ -314,12 +287,11 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
hashclr(sha1);
flag |= REF_BROKEN;
}
- list = add_ref(ref, sha1, flag, list, NULL);
+ add_ref(ref, sha1, flag, array, NULL);
}
free(ref);
closedir(dir);
}
- return list;
}
struct warn_if_dangling_data {
@@ -356,21 +328,21 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname)
for_each_rawref(warn_if_dangling_symref, &data);
}
-static struct ref_list *get_loose_refs(const char *submodule)
+static struct ref_array *get_loose_refs(const char *submodule)
{
if (submodule) {
- free_ref_list(submodule_refs.loose);
- submodule_refs.loose = get_ref_dir(submodule, "refs", NULL);
- submodule_refs.loose = sort_ref_list(submodule_refs.loose);
- return submodule_refs.loose;
+ free_ref_array(&submodule_refs.loose);
+ get_ref_dir(submodule, "refs", &submodule_refs.loose);
+ sort_ref_array(&submodule_refs.loose);
+ return &submodule_refs.loose;
}
if (!cached_refs.did_loose) {
- cached_refs.loose = get_ref_dir(NULL, "refs", NULL);
- cached_refs.loose = sort_ref_list(cached_refs.loose);
+ get_ref_dir(NULL, "refs", &cached_refs.loose);
+ sort_ref_array(&cached_refs.loose);
cached_refs.did_loose = 1;
}
- return cached_refs.loose;
+ return &cached_refs.loose;
}
/* We allow "recursive" symbolic refs. Only within reason, though */
@@ -381,8 +353,8 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
{
FILE *f;
struct cached_refs refs;
- struct ref_list *ref;
- int retval;
+ struct ref_entry *ref;
+ int retval = -1;
strcpy(name + pathlen, "packed-refs");
f = fopen(name, "r");
@@ -390,17 +362,12 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
return -1;
read_packed_refs(f, &refs);
fclose(f);
- ref = refs.packed;
- retval = -1;
- while (ref) {
- if (!strcmp(ref->name, refname)) {
- retval = 0;
- memcpy(result, ref->sha1, 20);
- break;
- }
- ref = ref->next;
+ ref = search_ref_array(&refs.packed, refname);
+ if (ref != NULL) {
+ memcpy(result, ref->sha1, 20);
+ retval = 0;
}
- free_ref_list(refs.packed);
+ free_ref_array(&refs.packed);
return retval;
}
@@ -501,15 +468,13 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *
git_snpath(path, sizeof(path), "%s", ref);
/* Special case: non-existing file. */
if (lstat(path, &st) < 0) {
- struct ref_list *list = get_packed_refs(NULL);
- while (list) {
- if (!strcmp(ref, list->name)) {
- hashcpy(sha1, list->sha1);
- if (flag)
- *flag |= REF_ISPACKED;
- return ref;
- }
- list = list->next;
+ struct ref_array *packed = get_packed_refs(NULL);
+ struct ref_entry *r = search_ref_array(packed, ref);
+ if (r != NULL) {
+ hashcpy(sha1, r->sha1);
+ if (flag)
+ *flag |= REF_ISPACKED;
+ return ref;
}
if (reading || errno != ENOENT)
return NULL;
@@ -584,7 +549,7 @@ int read_ref(const char *ref, unsigned char *sha1)
#define DO_FOR_EACH_INCLUDE_BROKEN 01
static int do_one_ref(const char *base, each_ref_fn fn, int trim,
- int flags, void *cb_data, struct ref_list *entry)
+ int flags, void *cb_data, struct ref_entry *entry)
{
if (strncmp(base, entry->name, trim))
return 0;
@@ -630,18 +595,12 @@ int peel_ref(const char *ref, unsigned char *sha1)
return -1;
if ((flag & REF_ISPACKED)) {
- struct ref_list *list = get_packed_refs(NULL);
+ struct ref_array *array = get_packed_refs(NULL);
+ struct ref_entry *r = search_ref_array(array, ref);
- while (list) {
- if (!strcmp(list->name, ref)) {
- if (list->flag & REF_KNOWS_PEELED) {
- hashcpy(sha1, list->peeled);
- return 0;
- }
- /* older pack-refs did not leave peeled ones */
- break;
- }
- list = list->next;
+ if (r != NULL && r->flag & REF_KNOWS_PEELED) {
+ hashcpy(sha1, r->peeled);
+ return 0;
}
}
@@ -660,36 +619,39 @@ fallback:
static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn,
int trim, int flags, void *cb_data)
{
- int retval = 0;
- struct ref_list *packed = get_packed_refs(submodule);
- struct ref_list *loose = get_loose_refs(submodule);
+ int retval = 0, i, p = 0, l = 0;
+ struct ref_array *packed = get_packed_refs(submodule);
+ struct ref_array *loose = get_loose_refs(submodule);
- struct ref_list *extra;
+ struct ref_array *extra = &extra_refs;
- for (extra = extra_refs; extra; extra = extra->next)
- retval = do_one_ref(base, fn, trim, flags, cb_data, extra);
+ for (i = 0; i < extra->nr; i++)
+ retval = do_one_ref(base, fn, trim, flags, cb_data, extra->refs[i]);
- while (packed && loose) {
- struct ref_list *entry;
- int cmp = strcmp(packed->name, loose->name);
+ while (p < packed->nr && l < loose->nr) {
+ struct ref_entry *entry;
+ int cmp = strcmp(packed->refs[p]->name, loose->refs[l]->name);
if (!cmp) {
- packed = packed->next;
+ p++;
continue;
}
if (cmp > 0) {
- entry = loose;
- loose = loose->next;
+ entry = loose->refs[l++];
} else {
- entry = packed;
- packed = packed->next;
+ entry = packed->refs[p++];
}
retval = do_one_ref(base, fn, trim, flags, cb_data, entry);
if (retval)
goto end_each;
}
- for (packed = packed ? packed : loose; packed; packed = packed->next) {
- retval = do_one_ref(base, fn, trim, flags, cb_data, packed);
+ if (l < loose->nr) {
+ p = l;
+ packed = loose;
+ }
+
+ for (; p < packed->nr; p++) {
+ retval = do_one_ref(base, fn, trim, flags, cb_data, packed->refs[p]);
if (retval)
goto end_each;
}
@@ -980,24 +942,24 @@ static int remove_empty_directories(const char *file)
}
static int is_refname_available(const char *ref, const char *oldref,
- struct ref_list *list, int quiet)
-{
- int namlen = strlen(ref); /* e.g. 'foo/bar' */
- while (list) {
- /* list->name could be 'foo' or 'foo/bar/baz' */
- if (!oldref || strcmp(oldref, list->name)) {
- int len = strlen(list->name);
+ struct ref_array *array, int quiet)
+{
+ int i, namlen = strlen(ref); /* e.g. 'foo/bar' */
+ for (i = 0; i < array->nr; i++ ) {
+ struct ref_entry *entry = array->refs[i];
+ /* entry->name could be 'foo' or 'foo/bar/baz' */
+ if (!oldref || strcmp(oldref, entry->name)) {
+ int len = strlen(entry->name);
int cmplen = (namlen < len) ? namlen : len;
- const char *lead = (namlen < len) ? list->name : ref;
- if (!strncmp(ref, list->name, cmplen) &&
+ const char *lead = (namlen < len) ? entry->name : ref;
+ if (!strncmp(ref, entry->name, cmplen) &&
lead[cmplen] == '/') {
if (!quiet)
error("'%s' exists; cannot create '%s'",
- list->name, ref);
+ entry->name, ref);
return 0;
}
}
- list = list->next;
}
return 1;
}
@@ -1104,18 +1066,13 @@ static struct lock_file packlock;
static int repack_without_ref(const char *refname)
{
- struct ref_list *list, *packed_ref_list;
- int fd;
- int found = 0;
+ struct ref_array *packed;
+ struct ref_entry *ref;
+ int fd, i;
- packed_ref_list = get_packed_refs(NULL);
- for (list = packed_ref_list; list; list = list->next) {
- if (!strcmp(refname, list->name)) {
- found = 1;
- break;
- }
- }
- if (!found)
+ packed = get_packed_refs(NULL);
+ ref = search_ref_array(packed, refname);
+ if (ref == NULL)
return 0;
fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0);
if (fd < 0) {
@@ -1123,17 +1080,19 @@ static int repack_without_ref(const char *refname)
return error("cannot delete '%s' from packed refs", refname);
}
- for (list = packed_ref_list; list; list = list->next) {
+ for (i = 0; i < packed->nr; i++) {
char line[PATH_MAX + 100];
int len;
- if (!strcmp(refname, list->name))
+ ref = packed->refs[i];
+
+ if (!strcmp(refname, ref->name))
continue;
len = snprintf(line, sizeof(line), "%s %s\n",
- sha1_to_hex(list->sha1), list->name);
+ sha1_to_hex(ref->sha1), ref->name);
/* this should not happen but just being defensive */
if (len > sizeof(line))
- die("too long a refname '%s'", list->name);
+ die("too long a refname '%s'", ref->name);
write_or_die(fd, line, len);
}
return commit_lock_file(&packlock);