summaryrefslogtreecommitdiff
path: root/src/sha1_lookup.c
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2013-09-04 13:16:57 +0200
committerVicent Marti <tanoku@gmail.com>2013-09-04 13:16:57 +0200
commit74b38d199ebe74c25180e0baf4bf945c97cd3daf (patch)
treec44d94c678e9380813b5bd683d6af3a565028869 /src/sha1_lookup.c
parent6700cb9925741211910e592be58fa3b8b99f48ec (diff)
downloadlibgit2-74b38d199ebe74c25180e0baf4bf945c97cd3daf.tar.gz
Backport @peff's fix for duplicates in sha1_lookup
Diffstat (limited to 'src/sha1_lookup.c')
-rw-r--r--src/sha1_lookup.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/src/sha1_lookup.c b/src/sha1_lookup.c
index cdcadfaa9..c6b561340 100644
--- a/src/sha1_lookup.c
+++ b/src/sha1_lookup.c
@@ -109,7 +109,54 @@ int sha1_entry_pos(const void *table,
* byte 0 thru (ofs-1) are the same between
* lo and hi; ofs is the first byte that is
* different.
+ *
+ * If ofs==20, then no bytes are different,
+ * meaning we have entries with duplicate
+ * keys. We know that we are in a solid run
+ * of this entry (because the entries are
+ * sorted, and our lo and hi are the same,
+ * there can be nothing but this single key
+ * in between). So we can stop the search.
+ * Either one of these entries is it (and
+ * we do not care which), or we do not have
+ * it.
+ *
+ * Furthermore, we know that one of our
+ * endpoints must be the edge of the run of
+ * duplicates. For example, given this
+ * sequence:
+ *
+ * idx 0 1 2 3 4 5
+ * key A C C C C D
+ *
+ * If we are searching for "B", we might
+ * hit the duplicate run at lo=1, hi=3
+ * (e.g., by first mi=3, then mi=0). But we
+ * can never have lo > 1, because B < C.
+ * That is, if our key is less than the
+ * run, we know that "lo" is the edge, but
+ * we can say nothing of "hi". Similarly,
+ * if our key is greater than the run, we
+ * know that "hi" is the edge, but we can
+ * say nothing of "lo".
+ *
+ * Therefore if we do not find it, we also
+ * know where it would go if it did exist:
+ * just on the far side of the edge that we
+ * know about.
*/
+ if (ofs == 20) {
+ mi = lo;
+ mi_key = base + elem_size * mi + key_offset;
+ cmp = memcmp(mi_key, key, 20);
+ if (!cmp)
+ return mi;
+ if (cmp < 0)
+ return -1 - hi;
+ else
+ return -1 - lo;
+ }
+
hiv = hi_key[ofs_0];
if (ofs_0 < 19)
hiv = (hiv << 8) | hi_key[ofs_0+1];