diff options
-rw-r--r-- | generator.c | 9 | ||||
-rw-r--r-- | util1.c | 9 |
2 files changed, 14 insertions, 4 deletions
diff --git a/generator.c b/generator.c index 935c84f9..21c4a595 100644 --- a/generator.c +++ b/generator.c @@ -875,9 +875,12 @@ static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list len = strlen(name); suf = find_filename_suffix(name, len, &suf_len); - dist = fuzzy_distance(name, len, fname, fname_len); - /* Add some extra weight to how well the suffixes match. */ - dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10; + dist = fuzzy_distance(name, len, fname, fname_len, lowest_dist); + /* Add some extra weight to how well the suffixes match unless we've already disqualified + * this file based on a heuristic. */ + if (dist < 0xFFFF0000U) { + dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len, 0xFFFF0000U) * 10; + } if (DEBUG_GTE(FUZZY, 2)) { rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n", f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF)); @@ -1487,12 +1487,19 @@ const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr) #define UNIT (1 << 16) -uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2) +uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2, uint32 upperlimit) { uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc; int32 cost; unsigned i1, i2; + /* Check to see if the Levenshtein distance must be greater than the + * upper limit defined by the previously found lowest distance using + * the heuristic that the Levenshtein distance is greater than the + * difference in length of the two strings */ + if ((len1 > len2 ? len1 - len2 : len2 - len1) * UNIT > upperlimit) + return 0xFFFFU * UNIT + 1; + if (!len1 || !len2) { if (!len1) { s1 = s2; |