summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--generator.c9
-rw-r--r--util1.c9
2 files changed, 14 insertions, 4 deletions
diff --git a/generator.c b/generator.c
index 935c84f9..21c4a595 100644
--- a/generator.c
+++ b/generator.c
@@ -875,9 +875,12 @@ static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list
len = strlen(name);
suf = find_filename_suffix(name, len, &suf_len);
- dist = fuzzy_distance(name, len, fname, fname_len);
- /* Add some extra weight to how well the suffixes match. */
- dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10;
+ dist = fuzzy_distance(name, len, fname, fname_len, lowest_dist);
+ /* Add some extra weight to how well the suffixes match unless we've already disqualified
+ * this file based on a heuristic. */
+ if (dist < 0xFFFF0000U) {
+ dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len, 0xFFFF0000U) * 10;
+ }
if (DEBUG_GTE(FUZZY, 2)) {
rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF));
diff --git a/util1.c b/util1.c
index 671f3c75..da50ff1e 100644
--- a/util1.c
+++ b/util1.c
@@ -1487,12 +1487,19 @@ const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
#define UNIT (1 << 16)
-uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2)
+uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2, uint32 upperlimit)
{
uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
int32 cost;
unsigned i1, i2;
+ /* Check to see if the Levenshtein distance must be greater than the
+ * upper limit defined by the previously found lowest distance using
+ * the heuristic that the Levenshtein distance is greater than the
+ * difference in length of the two strings */
+ if ((len1 > len2 ? len1 - len2 : len2 - len1) * UNIT > upperlimit)
+ return 0xFFFFU * UNIT + 1;
+
if (!len1 || !len2) {
if (!len1) {
s1 = s2;