diff options
author | Vicent Marti <tanoku@gmail.com> | 2013-09-04 13:16:57 +0200 |
---|---|---|
committer | Vicent Marti <tanoku@gmail.com> | 2013-09-04 13:16:57 +0200 |
commit | 74b38d199ebe74c25180e0baf4bf945c97cd3daf (patch) | |
tree | c44d94c678e9380813b5bd683d6af3a565028869 /src/sha1_lookup.c | |
parent | 6700cb9925741211910e592be58fa3b8b99f48ec (diff) | |
download | libgit2-74b38d199ebe74c25180e0baf4bf945c97cd3daf.tar.gz |
Backport @peff's fix for duplicates in sha1_lookup
Diffstat (limited to 'src/sha1_lookup.c')
-rw-r--r-- | src/sha1_lookup.c | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/sha1_lookup.c b/src/sha1_lookup.c index cdcadfaa9..c6b561340 100644 --- a/src/sha1_lookup.c +++ b/src/sha1_lookup.c @@ -109,7 +109,54 @@ int sha1_entry_pos(const void *table, * byte 0 thru (ofs-1) are the same between * lo and hi; ofs is the first byte that is * different. + * + * If ofs==20, then no bytes are different, + * meaning we have entries with duplicate + * keys. We know that we are in a solid run + * of this entry (because the entries are + * sorted, and our lo and hi are the same, + * there can be nothing but this single key + * in between). So we can stop the search. + * Either one of these entries is it (and + * we do not care which), or we do not have + * it. + * + * Furthermore, we know that one of our + * endpoints must be the edge of the run of + * duplicates. For example, given this + * sequence: + * + * idx 0 1 2 3 4 5 + * key A C C C C D + * + * If we are searching for "B", we might + * hit the duplicate run at lo=1, hi=3 + * (e.g., by first mi=3, then mi=0). But we + * can never have lo > 1, because B < C. + * That is, if our key is less than the + * run, we know that "lo" is the edge, but + * we can say nothing of "hi". Similarly, + * if our key is greater than the run, we + * know that "hi" is the edge, but we can + * say nothing of "lo". + * + * Therefore if we do not find it, we also + * know where it would go if it did exist: + * just on the far side of the edge that we + * know about. */ + if (ofs == 20) { + mi = lo; + mi_key = base + elem_size * mi + key_offset; + cmp = memcmp(mi_key, key, 20); + if (!cmp) + return mi; + if (cmp < 0) + return -1 - hi; + else + return -1 - lo; + } + hiv = hi_key[ofs_0]; if (ofs_0 < 19) hiv = (hiv << 8) | hi_key[ofs_0+1]; |