summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWayne Davison <wayned@samba.org>2011-11-22 08:14:01 -0800
committerWayne Davison <wayned@samba.org>2011-11-23 12:29:25 -0800
commit48b51d0004922cb029c55fe921f5e7df1c0bff23 (patch)
tree3f5c04e0ea1f5dca95c3eafb3ab9119e51ec3e9d
parent7da17144fd764a2420a8d08897475c0b7fdbf956 (diff)
downloadrsync-48b51d0004922cb029c55fe921f5e7df1c0bff23.tar.gz
make repeated --fuzzy option look into alt-dest dirs.
-rw-r--r--generator.c134
-rw-r--r--main.c35
-rw-r--r--options.c17
-rw-r--r--receiver.c17
-rw-r--r--rsync.yo4
5 files changed, 131 insertions, 76 deletions
diff --git a/generator.c b/generator.c
index 25648ce7..df690da6 100644
--- a/generator.c
+++ b/generator.c
@@ -733,56 +733,75 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
/* Try to find a filename in the same dir as "fname" with a similar name. */
-static int find_fuzzy(struct file_struct *file, struct file_list *dirlist)
+static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list *dirlist_array[], uchar *fnamecmp_type_ptr)
{
int fname_len, fname_suf_len;
const char *fname_suf, *fname = file->basename;
uint32 lowest_dist = 25 << 16; /* ignore a distance greater than 25 */
- int j, lowest_j = -1;
+ int i, j;
+ struct file_struct *lowest_fp = NULL;
fname_len = strlen(fname);
fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len);
- for (j = 0; j < dirlist->used; j++) {
- struct file_struct *fp = dirlist->files[j];
- const char *suf, *name;
- int len, suf_len;
- uint32 dist;
+ /* Try to find an exact size+mtime match first. */
+ for (i = 0; i < fuzzy_basis; i++) {
+ struct file_list *dirlist = dirlist_array[i];
- if (!S_ISREG(fp->mode) || !F_LENGTH(fp)
- || fp->flags & FLAG_FILE_SENT)
+ if (!dirlist)
continue;
- name = fp->basename;
+ for (j = 0; j < dirlist->used; j++) {
+ struct file_struct *fp = dirlist->files[j];
- if (F_LENGTH(fp) == F_LENGTH(file)
- && cmp_time(fp->modtime, file->modtime) == 0) {
- if (DEBUG_GTE(FUZZY, 2)) {
- rprintf(FINFO,
- "fuzzy size/modtime match for %s\n",
- name);
+ if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+ continue;
+
+ if (F_LENGTH(fp) == F_LENGTH(file) && cmp_time(fp->modtime, file->modtime) == 0) {
+ if (DEBUG_GTE(FUZZY, 2))
+ rprintf(FINFO, "fuzzy size/modtime match for %s\n", f_name(fp, NULL));
+ *fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+ return fp;
}
- return j;
+
}
+ }
- len = strlen(name);
- suf = find_filename_suffix(name, len, &suf_len);
+ for (i = 0; i < fuzzy_basis; i++) {
+ struct file_list *dirlist = dirlist_array[i];
- dist = fuzzy_distance(name, len, fname, fname_len);
- /* Add some extra weight to how well the suffixes match. */
- dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len)
- * 10;
- if (DEBUG_GTE(FUZZY, 2)) {
- rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
- name, (int)(dist>>16), (int)(dist&0xFFFF));
- }
- if (dist <= lowest_dist) {
- lowest_dist = dist;
- lowest_j = j;
+ if (!dirlist)
+ continue;
+
+ for (j = 0; j < dirlist->used; j++) {
+ struct file_struct *fp = dirlist->files[j];
+ const char *suf, *name;
+ int len, suf_len;
+ uint32 dist;
+
+ if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+ continue;
+
+ name = fp->basename;
+ len = strlen(name);
+ suf = find_filename_suffix(name, len, &suf_len);
+
+ dist = fuzzy_distance(name, len, fname, fname_len);
+ /* Add some extra weight to how well the suffixes match. */
+ dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10;
+ if (DEBUG_GTE(FUZZY, 2)) {
+ rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
+ f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF));
+ }
+ if (dist <= lowest_dist) {
+ lowest_dist = dist;
+ lowest_fp = fp;
+ *fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+ }
}
}
- return lowest_j;
+ return lowest_fp;
}
/* Copy a file found in our --copy-dest handling. */
@@ -1128,7 +1147,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
/* Missing dir whose contents are skipped altogether due to
* --ignore-non-existing, daemon exclude, or mkdir failure. */
static struct file_struct *skip_dir = NULL;
- static struct file_list *fuzzy_dirlist = NULL;
+ static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
static int need_fuzzy_dirlist = 0;
struct file_struct *fuzzy_file = NULL;
int fd = -1, f_copy = -1;
@@ -1187,10 +1206,13 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
}
if (dry_run > 1 || (dry_missing_dir && is_below(file, dry_missing_dir))) {
+ int i;
parent_is_dry_missing:
- if (fuzzy_dirlist) {
- flist_free(fuzzy_dirlist);
- fuzzy_dirlist = NULL;
+ for (i = 0; i < fuzzy_basis; i++) {
+ if (fuzzy_dirlist[i]) {
+ flist_free(fuzzy_dirlist[i]);
+ fuzzy_dirlist[i] = NULL;
+ }
}
parent_dirname = "";
statret = -1;
@@ -1209,12 +1231,16 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
full_fname(dn));
}
}
- if (fuzzy_dirlist) {
- flist_free(fuzzy_dirlist);
- fuzzy_dirlist = NULL;
- }
- if (fuzzy_basis)
+ if (fuzzy_basis) {
+ int i;
+ for (i = 0; i < fuzzy_basis; i++) {
+ if (fuzzy_dirlist[i]) {
+ flist_free(fuzzy_dirlist[i]);
+ fuzzy_dirlist[i] = NULL;
+ }
+ }
need_fuzzy_dirlist = 1;
+ }
#ifdef SUPPORT_ACLS
if (!preserve_perms)
dflt_perms = default_perms_for_dir(dn);
@@ -1223,8 +1249,17 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
parent_dirname = dn;
if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
+ int i;
strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
- fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+ for (i = 0; i < fuzzy_basis; i++) {
+ if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
+ continue;
+ fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+ if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
+ flist_free(fuzzy_dirlist[i]);
+ fuzzy_dirlist[i] = NULL;
+ }
+ }
need_fuzzy_dirlist = 0;
}
@@ -1629,10 +1664,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
} else
partialptr = NULL;
- if (statret != 0 && fuzzy_dirlist) {
- int j = find_fuzzy(file, fuzzy_dirlist);
- if (j >= 0) {
- fuzzy_file = fuzzy_dirlist->files[j];
+ if (statret != 0 && fuzzy_basis) {
+ /* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
+ fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
+ if (fuzzy_file) {
f_name(fuzzy_file, fnamecmpbuf);
if (DEBUG_GTE(FUZZY, 1)) {
rprintf(FINFO, "fuzzy basis selected for %s: %s\n",
@@ -1641,7 +1676,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
sx.st.st_size = F_LENGTH(fuzzy_file);
statret = 0;
fnamecmp = fnamecmpbuf;
- fnamecmp_type = FNAMECMP_FUZZY;
}
}
@@ -1717,10 +1751,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
goto notify_others;
}
- if (fuzzy_dirlist) {
- int j = flist_find(fuzzy_dirlist, file);
+ if (fuzzy_dirlist[0]) {
+ int j = flist_find(fuzzy_dirlist[0], file);
if (j >= 0) /* don't use changing file as future fuzzy basis */
- fuzzy_dirlist->files[j]->flags |= FLAG_FILE_SENT;
+ fuzzy_dirlist[0]->files[j]->flags |= FLAG_FILE_SENT;
}
/* open the file */
@@ -1790,7 +1824,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
iflags |= ITEM_REPORT_CHANGE;
if (fnamecmp_type != FNAMECMP_FNAME)
iflags |= ITEM_BASIS_TYPE_FOLLOWS;
- if (fnamecmp_type == FNAMECMP_FUZZY)
+ if (fnamecmp_type >= FNAMECMP_FUZZY)
iflags |= ITEM_XNAME_FOLLOWS;
itemize(fnamecmp, file, -1, real_ret, &real_sx, iflags, fnamecmp_type,
fuzzy_file ? fuzzy_file->basename : NULL);
diff --git a/main.c b/main.c
index 17ba62d6..93cd50d3 100644
--- a/main.c
+++ b/main.c
@@ -76,6 +76,7 @@ extern size_t bwlimit_writemax;
extern unsigned int module_dirlen;
extern BOOL flist_receiving_enabled;
extern BOOL shutting_down;
+extern int basis_dir_cnt;
extern struct stats stats;
extern char *stdout_format;
extern char *logfile_format;
@@ -705,33 +706,35 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
static void check_alt_basis_dirs(void)
{
STRUCT_STAT st;
- char **dir_p, *slash = strrchr(curr_dir, '/');
-
- for (dir_p = basis_dir; *dir_p; dir_p++) {
- if (dry_run > 1 && **dir_p != '/') {
- int len = curr_dir_len + 1 + strlen(*dir_p) + 1;
+ char *slash = strrchr(curr_dir, '/');
+ int j;
+
+ for (j = 0; j < basis_dir_cnt; j++) {
+ char *bdir = basis_dir[j];
+ int bd_len = strlen(bdir);
+ if (bd_len > 1 && bdir[bd_len-1] == '/')
+ bdir[--bd_len] = '\0';
+ if (dry_run > 1 && *bdir != '/') {
+ int len = curr_dir_len + 1 + bd_len + 1;
char *new = new_array(char, len);
if (!new)
out_of_memory("check_alt_basis_dirs");
- if (slash && strncmp(*dir_p, "../", 3) == 0) {
+ if (slash && strncmp(bdir, "../", 3) == 0) {
/* We want to remove only one leading "../" prefix for
* the directory we couldn't create in dry-run mode:
* this ensures that any other ".." references get
* evaluated the same as they would for a live copy. */
*slash = '\0';
- pathjoin(new, len, curr_dir, *dir_p + 3);
+ pathjoin(new, len, curr_dir, bdir + 3);
*slash = '/';
} else
- pathjoin(new, len, curr_dir, *dir_p);
- *dir_p = new;
- }
- if (do_stat(*dir_p, &st) < 0) {
- rprintf(FWARNING, "%s arg does not exist: %s\n",
- dest_option, *dir_p);
- } else if (!S_ISDIR(st.st_mode)) {
- rprintf(FWARNING, "%s arg is not a dir: %s\n",
- dest_option, *dir_p);
+ pathjoin(new, len, curr_dir, bdir);
+ basis_dir[j] = bdir = new;
}
+ if (do_stat(bdir, &st) < 0)
+ rprintf(FWARNING, "%s arg does not exist: %s\n", dest_option, bdir);
+ else if (!S_ISDIR(st.st_mode))
+ rprintf(FWARNING, "%s arg is not a dir: %s\n", dest_option, bdir);
}
}
diff --git a/options.c b/options.c
index e8db07cf..9e95c86a 100644
--- a/options.c
+++ b/options.c
@@ -955,7 +955,7 @@ static struct poptOption long_options[] = {
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
- {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 },
+ {"fuzzy", 'y', POPT_ARG_NONE, 0, 'y', 0, 0 },
{"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
{"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
{"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
@@ -1500,6 +1500,10 @@ int parse_arguments(int *argc_p, const char ***argv_p)
verbose++;
break;
+ case 'y':
+ fuzzy_basis++;
+ break;
+
case 'q':
quiet++;
break;
@@ -1845,6 +1849,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
#endif
+ if (fuzzy_basis > 1)
+ fuzzy_basis = basis_dir_cnt + 1;
+
if (protect_args == 1 && am_server)
return 1;
@@ -2342,6 +2349,11 @@ void server_options(char **args, int *argc_p)
argstr[x++] = 'O';
if (omit_link_times)
argstr[x++] = 'J';
+ if (fuzzy_basis) {
+ argstr[x++] = 'y';
+ if (fuzzy_basis > 1)
+ argstr[x++] = 'y';
+ }
} else {
if (copy_links)
argstr[x++] = 'L';
@@ -2680,9 +2692,6 @@ void server_options(char **args, int *argc_p)
if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
args[ac++] = "--no-implied-dirs";
- if (fuzzy_basis && am_sender)
- args[ac++] = "--fuzzy";
-
if (remove_source_files == 1)
args[ac++] = "--remove-source-files";
else if (remove_source_files)
diff --git a/receiver.c b/receiver.c
index 1819830a..3ab893d0 100644
--- a/receiver.c
+++ b/receiver.c
@@ -699,21 +699,26 @@ int recv_files(int f_in, int f_out, char *local_name)
break;
case FNAMECMP_FUZZY:
if (file->dirname) {
- pathjoin(fnamecmpbuf, MAXPATHLEN,
- file->dirname, xname);
+ pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, file->dirname, xname);
fnamecmp = fnamecmpbuf;
} else
fnamecmp = xname;
break;
default:
- if (fnamecmp_type >= basis_dir_cnt) {
+ if (fnamecmp_type > FNAMECMP_FUZZY && fnamecmp_type-FNAMECMP_FUZZY <= basis_dir_cnt) {
+ fnamecmp_type -= FNAMECMP_FUZZY + 1;
+ if (file->dirname) {
+ stringjoin(fnamecmpbuf, sizeof fnamecmpbuf,
+ basis_dir[fnamecmp_type], "/", file->dirname, "/", xname, NULL);
+ } else
+ pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], xname);
+ } else if (fnamecmp_type >= basis_dir_cnt) {
rprintf(FERROR,
"invalid basis_dir index: %d.\n",
fnamecmp_type);
exit_cleanup(RERR_PROTOCOL);
- }
- pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
- basis_dir[fnamecmp_type], fname);
+ } else
+ pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], fname);
fnamecmp = fnamecmpbuf;
break;
}
diff --git a/rsync.yo b/rsync.yo
index 3c0bfc00..43f264d2 100644
--- a/rsync.yo
+++ b/rsync.yo
@@ -1748,6 +1748,10 @@ looks in the same directory as the destination file for either a file that
has an identical size and modified-time, or a similarly-named file. If
found, rsync uses the fuzzy basis file to try to speed up the transfer.
+If the option is repeated, the fuzzy scan will also be done in any alternate
+destination directories that are specified via bf(--compare-dest),
+bf(--copy-dest), or bf(--link-dest).
+
Note that the use of the bf(--delete) option might get rid of any potential
fuzzy-match files, so either use bf(--delete-after) or specify some
filename exclusions if you need to prevent this.