diff options
author | Jeff King <peff@peff.net> | 2016-10-03 19:47:28 -0400 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-10-03 21:03:14 -0700 |
commit | 8e3f52d77854a19cb3fd2adee40be84c8a8bdacc (patch) | |
tree | d342eca5c1880a5286108a8e12e71eee8ca2b44a /sha1_name.c | |
parent | e6c587c733b4634030b353f4024794b08bc86892 (diff) | |
download | git-8e3f52d77854a19cb3fd2adee40be84c8a8bdacc.tar.gz |
find_unique_abbrev: move logic out of get_short_sha1()jk/abbrev-auto
The get_short_sha1() is only about reading short sha1s; we
do call it in a loop to check "is this long enough" for each
object, but otherwise it should not need to know about
things like our default_abbrev setting.
So instead of asking it to set default_automatic_abbrev as a
side-effect, let's just have find_unique_abbrev() pick the
right place to start its loop. This requires a separate
approximate_object_count() function, but that naturally
belongs with the rest of sha1_file.c.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'sha1_name.c')
-rw-r--r-- | sha1_name.c | 60 |
1 files changed, 35 insertions, 25 deletions
diff --git a/sha1_name.c b/sha1_name.c index beb7ab588b..4a6f842692 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -15,7 +15,6 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *); struct disambiguate_state { int len; /* length of prefix in hex chars */ - unsigned int nrobjects; char hex_pfx[GIT_SHA1_HEXSZ + 1]; unsigned char bin_pfx[GIT_SHA1_RAWSZ]; @@ -119,14 +118,6 @@ static void find_short_object_filename(struct disambiguate_state *ds) if (strlen(de->d_name) != 38) continue; - - /* - * We only look at the one subdirectory, and we assume - * each subdirectory is roughly similar, so each - * object we find probably has 255 other objects in - * the other fan-out directories. - */ - ds->nrobjects += 256; if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2)) continue; memcpy(hex + 2, de->d_name, 38); @@ -160,7 +151,6 @@ static void unique_in_pack(struct packed_git *p, open_pack_index(p); num = p->num_objects; - ds->nrobjects += num; last = num; while (first < last) { uint32_t mid = (first + last) / 2; @@ -390,9 +380,6 @@ static int show_ambiguous_object(const unsigned char *sha1, void *data) return 0; } -/* start from our historical default before the automatic abbreviation */ -static int default_automatic_abbrev = FALLBACK_DEFAULT_ABBREV; - static int get_short_sha1(const char *name, int len, unsigned char *sha1, unsigned flags) { @@ -439,14 +426,6 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds); } - if (len < 16 && !status && (flags & GET_SHA1_AUTOMATIC)) { - unsigned int expect_collision = 1 << (len * 2); - if (ds.nrobjects > expect_collision) { - default_automatic_abbrev = len+1; - return SHORT_NAME_AMBIGUOUS; - } - } - return status; } @@ -476,22 +455,53 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data) return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len) { int status, exists; - int flags = GET_SHA1_QUIETLY; if (len < 0) { - flags |= GET_SHA1_AUTOMATIC; - len = default_automatic_abbrev; + unsigned long count = approximate_object_count(); + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2; but we also want to round + * odd numbers up, hence adding one before dividing. + */ + len = (len + 1) / 2; + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; } + sha1_to_hex_r(hex, sha1); if (len == 40 || !len) return 40; exists = has_sha1_file(sha1); while (len < 40) { unsigned char sha1_ret[20]; - status = get_short_sha1(hex, len, sha1_ret, flags); + status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY); if (exists ? !status : status == SHORT_NAME_NOT_FOUND) { |