From ae666de6c68c36daa946946303196f8be96e9cfe Mon Sep 17 00:00:00 2001 From: ben Date: Fri, 12 Apr 2002 04:18:16 +0000 Subject: Added new Select code git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@43 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/destructive_stepping.py | 1 + rdiff-backup/rdiff_backup/header.py | 2 +- rdiff-backup/rdiff_backup/highlevel.py | 33 ++- rdiff-backup/rdiff_backup/rpath.py | 9 +- rdiff-backup/rdiff_backup/selection.py | 274 ++++++++++++++++++---- rdiff-backup/src/destructive_stepping.py | 1 + rdiff-backup/src/globals.py | 8 +- rdiff-backup/src/header.py | 2 +- rdiff-backup/src/highlevel.py | 33 ++- rdiff-backup/src/main.py | 8 +- rdiff-backup/src/rpath.py | 9 +- rdiff-backup/src/selection.py | 274 ++++++++++++++++++---- 12 files changed, 496 insertions(+), 158 deletions(-) diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py index 80d274e..c4b1191 100644 --- a/rdiff-backup/rdiff_backup/destructive_stepping.py +++ b/rdiff-backup/rdiff_backup/destructive_stepping.py @@ -187,6 +187,7 @@ class DestructiveStepping: destination file. """ + return None # this code is for the test suites only, use Select instead if Globals.exclude_device_files and dsrp.isdev(): return 1 if source: exclude_regexps = Globals.exclude_regexps diff --git a/rdiff-backup/rdiff_backup/header.py b/rdiff-backup/rdiff_backup/header.py index 8d54052..81cbaff 100644 --- a/rdiff-backup/rdiff_backup/header.py +++ b/rdiff-backup/rdiff_backup/header.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes -# Version 0.7.2 released April 30, 2002 +# Version 0.7.2 released April 11, 2002 # Copyright (C) 2001, 2002 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). diff --git a/rdiff-backup/rdiff_backup/highlevel.py b/rdiff-backup/rdiff_backup/highlevel.py index 2b366f2..54029c7 100644 --- a/rdiff-backup/rdiff_backup/highlevel.py +++ b/rdiff-backup/rdiff_backup/highlevel.py @@ -37,7 +37,7 @@ class HighLevel: SourceS.set_session_info(session_info) DestS.set_session_info(session_info) - src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + src_init_dsiter = SourceS.split_initial_dsiter() dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint) @@ -53,7 +53,7 @@ class HighLevel: SourceS.set_session_info(session_info) DestS.set_session_info(session_info) if not session_info: dest_rpath.conn.SaveState.touch_last_file() - src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + src_init_dsiter = SourceS.split_initial_dsiter() dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath) @@ -85,16 +85,15 @@ class HLSourceStruct: def set_session_info(cls, session_info): cls._session_info = session_info - def iterate_from(cls, rpath): + def iterate_from(cls): """Supply more aruments to DestructiveStepping.Iterate_from""" - if cls._session_info: - return DestructiveStepping.Iterate_from(rpath, 1, - cls._session_info.last_index) - else: return DestructiveStepping.Iterate_from(rpath, 1) + if cls._session_info is None: Globals.select_source.set_iter() + else: Globals.select_source.set_iter(cls._session_info.last_index) + return Globals.select_source - def split_initial_dsiter(cls, rpath): + def split_initial_dsiter(cls): """Set iterators of all dsrps from rpath, returning one""" - dsiter = cls.iterate_from(rpath) + dsiter = cls.iterate_from() initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) return initial_dsiter1 @@ -131,17 +130,15 @@ class HLDestinationStruct: def set_session_info(cls, session_info): cls._session_info = session_info - def iterate_from(cls, rpath): + def iterate_from(cls): """Supply more arguments to DestructiveStepping.Iterate_from""" - if cls._session_info: - return DestructiveStepping.Iterate_from(rpath, None, - cls._session_info.last_index) - else: return DestructiveStepping.Iterate_from(rpath, None) + if cls._session_info is None: Globals.select_mirror.set_iter() + else: Globals.select_mirror.set_iter(cls._session_info.last_index) + return Globals.select_mirror - def split_initial_dsiter(cls, rpath): + def split_initial_dsiter(cls): """Set initial_dsiters (iteration of all dsrps from rpath)""" - dsiter = cls.iterate_from(rpath) - result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2) return result def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter): @@ -197,7 +194,7 @@ class HLDestinationStruct: def get_sigs(cls, baserp, src_init_iter): """Return signatures of all dissimilar files""" - dest_iters1 = cls.split_initial_dsiter(baserp) + dest_iters1 = cls.split_initial_dsiter() dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1) return RORPIter.Signatures(dissimilars) diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index c1f0a44..d862951 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -404,6 +404,9 @@ class RPath(RORPath): "bar") for "foo/bar" (no base), and ("local", "bin") for "/usr/local/bin" if the base is "/usr". + For the root directory "/", the index is empty and the base is + "/". + """ self.conn = connection self.index = index @@ -487,9 +490,6 @@ class RPath(RORPath): def _getdevnums(self): """Return tuple for special file (major, minor)""" - if Globals.exclude_device_files: - # No point in finding numbers because it will be excluded anyway - return () s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path) return (s >> 8, s & 0xff) @@ -596,10 +596,9 @@ class RPath(RORPath): def helper(dsrp, base_init_output, branch_reduction): if dsrp.isdir(): dsrp.rmdir() else: dsrp.delete() - dsiter = DestructiveStepping.Iterate_from(self, None) itm = IterTreeReducer(lambda x: None, lambda x,y: None, None, helper) - for dsrp in dsiter: itm(dsrp) + for dsrp in Select(self, None).set_iter(): itm(dsrp) itm.getresult() else: self.conn.os.unlink(self.path) self.setdata() diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py index 9f41860..753cac2 100644 --- a/rdiff-backup/rdiff_backup/selection.py +++ b/rdiff-backup/rdiff_backup/selection.py @@ -10,10 +10,18 @@ import re # documentation on what this code does can be found on the man page. # -class FilePrefixError(Exception): +class SelectError(Exception): + """Some error dealing with the Select class""" + pass + +class FilePrefixError(SelectError): """Signals that a specified file doesn't start with correct prefix""" pass +class GlobbingError(SelectError): + """Something has gone wrong when parsing a glob string""" + pass + class Select: """Iterate appropriate DSRPaths in given directory @@ -50,60 +58,97 @@ class Select: """ # This re should not match normal filenames, but usually just globs - glob_re = re.compile(".*[\*\?\[]") + glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S) + + def __init__(self, rpath, source): + """DSRPIterator initializer. - def __init__(self, dsrpath): - """DSRPIterator initializer""" + rpath is the root dir. Source is true if rpath is the root of + the source directory, and false for the mirror directory + + """ + assert isinstance(rpath, RPath) self.selection_functions = [] - self.dsrpath = dsrpath - self.prefix = dsrpath.path + self.source = source + if isinstance(rpath, DSRPath): self.dsrpath = rpath + else: self.dsrpath = DSRPath(rpath.conn, rpath.base, + rpath.index, rpath.data) + self.prefix = self.dsrpath.path - def set_iter(self, starting_index = None): - """Initialize more variables. dsrpath should be the root dir""" + def set_iter(self, starting_index = None, sel_func = None): + """Initialize more variables, get ready to iterate + + Will iterate indicies greater than starting_index. Selection + function sel_func is called on each dsrp and is usually + self.Select. Returns self just for convenience. + + """ + if not sel_func: sel_func = self.Select + self.dsrpath.setdata() # this may have changed since Select init if starting_index is not None: + self.starting_index = starting_index self.iter = self.iterate_starting_from(self.dsrpath, - starting_index, self.iterate_starting_from) - else: self.iter = self.Iterate(self.dsrpath, self.Iterate) + self.iterate_starting_from, sel_func) + else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) self.next = self.iter.next self.__iter__ = lambda: self + return self - def Iterate(self, dsrpath, rec_func): + def Iterate(self, dsrpath, rec_func, sel_func): """Return iterator yielding dsrps in dsrpath rec_func is usually the same as this function and is what Iterate uses to find files in subdirectories. It is used in iterate_starting_from. + sel_func is the selection function to use on the dsrps. It is + usually self.Select. + """ - s = self.Select(dsrpath) + s = sel_func(dsrpath) + if not s or DestructiveStepping.initialize(dsrpath, self.source): + return if s == 1: # File is included yield dsrpath if dsrpath.isdir(): - for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp + for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func): + yield dsrp elif s == 2 and dsrpath.isdir(): # Directory is merely scanned - iid = self.iterate_in_dir(dsrpath, rec_func) + iid = self.iterate_in_dir(dsrpath, rec_func, sel_func) try: first = iid.next() except StopIteration: return # no files inside; skip dsrp yield dsrpath yield first for dsrp in iid: yield dsrp - def iterate_in_dir(self, dsrpath, rec_func): + def iterate_in_dir(self, dsrpath, rec_func, sel_func): """Iterate the dsrps in directory dsrpath.""" dir_listing = dsrpath.listdir() dir_listing.sort() for filename in dir_listing: - for dsrp in rec_func(dsrpath.append(filename)): yield dsrp + for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func): + yield dsrp - def iterate_starting_from(self, dsrpath): + def iterate_starting_from(self, dsrpath, rec_func, sel_func): """Like Iterate, but only yield indicies > self.starting_index""" + if DestructiveStepping.initialize(dsrpath, self.source): return if dsrpath.index > self.starting_index: # past starting_index - for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp + for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func): + yield dsrp elif dsrpath.index == self.starting_index[:len(dsrpath.index)]: # May encounter starting index on this branch - for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): - yield dsrp - + for dsrp in self.iterate_in_dir(dsrpath, + self.iterate_starting_from, + sel_func): yield dsrp + + def iterate_with_finalizer(self): + """Like Iterate, but missing some options, and add finalizer""" + finalize = DestructiveStepping.Finalizer() + for dsrp in self: + yield dsrp + finalize(dsrp) + finalize.getresult() + def Select(self, dsrp): """Run through the selection functions and return dominant value""" for sf in self.selection_functions: @@ -123,29 +168,68 @@ class Select: information is sent over the link. """ - for opt, arg in argtuples: - if opt == "--exclude": - self.add_selection_func(self.glob_get_sf(arg, 0)) - elif opt == "--exclude-device-files": - self.add_selection_func(self.devfiles_get_sf()) - elif opt == "--exclude-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 0, arg[0])) - elif opt == "--exclude-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 0)) - elif opt == "--include": - self.add_selection_func(self.glob_get_sf(arg, 1)) - elif opt == "--include-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 1, arg[0])) - elif opt == "--include-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 1)) - else: assert 0, "Bad option %s" % opt - - # Exclude rdiff-backup-data directory + try: + for opt, arg in argtuples: + if opt == "--exclude": + self.add_selection_func(self.glob_get_sf(arg, 0)) + elif opt == "--exclude-device-files": + self.add_selection_func(self.devfiles_get_sf()) + elif opt == "--exclude-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 0, arg[0])) + elif opt == "--exclude-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 0)) + elif opt == "--include": + self.add_selection_func(self.glob_get_sf(arg, 1)) + elif opt == "--include-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 1, arg[0])) + elif opt == "--include-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 1)) + else: assert 0, "Bad option %s" % opt + except SelectError, e: self.parse_catch_error(e) + + self.parse_last_excludes() + self.parse_rbdir_exclude() + self.parse_proc_exclude() + + def parse_catch_error(self, exc): + """Deal with selection error exc""" + if isinstance(exc, FilePrefixError): + Log.FatalError( +"""Fatal Error: The file specification + %s +cannot match any files in the base directory + %s +Useful file specifications begin with the base directory or some +pattern (such as '**') which matches the base directory.""" % + (exc, self.prefix)) + elif isinstance(e, GlobbingError): + Log.FatalError("Fatal Error while processing expression\n" + "%s" % exc) + else: raise + + def parse_rbdir_exclude(self): + """Add exclusion of rdiff-backup-data dir to front of list""" self.add_selection_func( self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1) + def parse_proc_exclude(self): + """Exclude the /proc directory if starting from /""" + if self.prefix == "/": + self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1) + + def parse_last_excludes(self): + """Exit with error if last selection function isn't an exclude""" + if self.select_functions and not self.selection_functions[-1].exclude: + Log.FatalError( +"""Last selection expression: + %s +only specifies that files be included. Because the default is to +include all files, the expression is redundant. Exiting because this +probably isn't what you meant.""" % + (self.selection_functions[-1].name, self.prefix)) + def add_selection_func(self, sel_func, add_to_start = None): """Add another selection function at the end or beginning""" if add_to_start: self.selection_functions.insert(0, sel_func) @@ -259,8 +343,7 @@ class Select: raise def sel_func(dsrp): - match = regexp.match(dsrp.path) - if match and match.end(0) == len(dsrp.path): return include + if regexp.search(dsrp.path): return include else: return None sel_func.exclude = not include @@ -284,8 +367,8 @@ class Select: assert include == 0 or include == 1 if glob_str == "**": sel_func = lambda dsrp: include elif not self.glob_re.match(glob_str): # normal file - return self.glob_get_filename_sf(glob_str, include) - else: pass ####XXXXXXXXXXXXX + sel_func = self.glob_get_filename_sf(glob_str, include) + else: sel_func = self.glob_get_normal_sf(glob_str, include) sel_func.exclude = not include sel_func.name = "Command-line glob: %s" % glob_str @@ -296,20 +379,18 @@ class Select: Some of the parsing is better explained in filelist_parse_line. The reason this is split from normal - globbing is so we can check the prefix and give proper - warning. + globbing is things are a lot less complicated if no special + globbing characters are used. """ if not filename.startswith(self.prefix): - Log("Warning: file specification %s does not start with\n" - "prefix %s, ignoring" % (filename, self.prefix), 2) - return lambda x: None # dummy selection function + raise FilePrefixError(filename) index = tuple(filter(lambda x: x, filename[len(self.prefix):].split("/"))) return self.glob_get_tuple_sf(index, include) def glob_get_tuple_sf(self, tuple, include): - """Add selection function based on tuple""" + """Return selection function based on tuple""" def include_sel_func(dsrp): if (dsrp.index == tuple[:len(dsrp.index)] or dsrp.index[:len(tuple)] == tuple): @@ -327,3 +408,94 @@ class Select: sel_func.name = "Tuple select %s" % (tuple,) return sel_func + def glob_get_normal_sf(self, glob_str, include): + """Return selection function based on glob_str + + The basic idea is to turn glob_str into a regular expression, + and just use the normal regular expression. There is a + complication because the selection function should return '2' + (scan) for directories which may contain a file which matches + the glob_str. So we break up the glob string into parts, and + any file which matches an initial sequence of glob parts gets + scanned. + + Thanks to Donovan Baarda who provided some code which did some + things similar to this. + + """ + if glob_str.lower().startswith("ignorecase:"): + re_comp = lambda r: re.compile(r, re.I | re.S) + glob_str = glob_str[len("ignorecase:"):] + else: re_comp = lambda r: re.compile(r, re.S) + + # matches what glob matches and any files in directory + glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str)) + + if glob_str.find("**") != -1: + glob_str = glob_str[:glob_str.find("**")+2] # truncate after ** + + scan_comp_re = re_comp("^(%s)$" % + "|".join(self.glob_get_prefix_res(glob_str))) + + def include_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 1 + elif scan_comp_re.match(dsrp.path): return 2 + else: return None + + def exclude_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 0 + else: return None + + # Check to make sure prefix is ok + if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str) + + if include: return include_sel_func + else: return exclude_sel_func + + def glob_get_prefix_res(self, glob_str): + """Return list of regexps equivalent to prefixes of glob_str""" + glob_parts = glob_str.split("/") + if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/ + raise GlobbingError("Consecutive '/'s found in globbing string " + + glob_str) + + prefixes = map(lambda i: "/".join(glob_parts[:i+1]), + range(len(glob_parts))) + # we must make exception for root "/", only dir to end in slash + if prefixes[0] == "": prefixes[0] = "/" + return map(self.glob_to_re, prefixes) + + def glob_to_re(self, pat): + """Returned regular expression equivalent to shell glob pat + + Currently only the ?, *, [], and ** expressions are supported. + Ranges like [a-z] are also currently unsupported. There is no + way to quote these special characters. + + This function taken with minor modifications from efnmatch.py + by Donovan Baarda. + + """ + i, n, res = 0, len(pat), '' + while i < n: + c, s = pat[i], pat[i:i+2] + i = i+1 + if s == '**': + res = res + '.*' + i = i + 1 + elif c == '*': res = res + '[^/]*' + elif c == '?': res = res + '[^/]' + elif c == '[': + j = i + if j < n and pat[j] in '!^': j = j+1 + if j < n and pat[j] == ']': j = j+1 + while j < n and pat[j] != ']': j = j+1 + if j >= n: res = res + '\\[' # interpret the [ literally + else: # Deal with inside of [..] + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] in '!^': stuff = '^' + stuff[1:] + res = res + '[' + stuff + ']' + else: res = res + re.escape(c) + return res + diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py index 80d274e..c4b1191 100644 --- a/rdiff-backup/src/destructive_stepping.py +++ b/rdiff-backup/src/destructive_stepping.py @@ -187,6 +187,7 @@ class DestructiveStepping: destination file. """ + return None # this code is for the test suites only, use Select instead if Globals.exclude_device_files and dsrp.isdev(): return 1 if source: exclude_regexps = Globals.exclude_regexps diff --git a/rdiff-backup/src/globals.py b/rdiff-backup/src/globals.py index fceefa4..c47880c 100644 --- a/rdiff-backup/src/globals.py +++ b/rdiff-backup/src/globals.py @@ -123,8 +123,8 @@ class Globals: # case-insensitive regular expression won't be compressed (applies # to .snapshots and .diffs). The second below will be the # compiled version of the first. - no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \ - "jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$" + no_compression_regexp_string = "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \ + "jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$" no_compression_regexp = None # On the reader and writer connections, the following will be @@ -183,9 +183,9 @@ class Globals: def set_select(cls, source, dsrpath, tuplelist): """Initialize select object using tuplelist""" if source: - cls.select_source = Select(dsrpath) + cls.select_source = Select(dsrpath, 1) cls.select_source.ParseArgs(tuplelist) else: - cls.select_mirror = Select(dsrpath) + cls.select_mirror = Select(dsrpath, None) cls.select_mirror.ParseArgs(tuplelist) set_select = classmethod(set_select) diff --git a/rdiff-backup/src/header.py b/rdiff-backup/src/header.py index 8d54052..81cbaff 100644 --- a/rdiff-backup/src/header.py +++ b/rdiff-backup/src/header.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes -# Version 0.7.2 released April 30, 2002 +# Version 0.7.2 released April 11, 2002 # Copyright (C) 2001, 2002 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). diff --git a/rdiff-backup/src/highlevel.py b/rdiff-backup/src/highlevel.py index 2b366f2..54029c7 100644 --- a/rdiff-backup/src/highlevel.py +++ b/rdiff-backup/src/highlevel.py @@ -37,7 +37,7 @@ class HighLevel: SourceS.set_session_info(session_info) DestS.set_session_info(session_info) - src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + src_init_dsiter = SourceS.split_initial_dsiter() dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint) @@ -53,7 +53,7 @@ class HighLevel: SourceS.set_session_info(session_info) DestS.set_session_info(session_info) if not session_info: dest_rpath.conn.SaveState.touch_last_file() - src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + src_init_dsiter = SourceS.split_initial_dsiter() dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath) @@ -85,16 +85,15 @@ class HLSourceStruct: def set_session_info(cls, session_info): cls._session_info = session_info - def iterate_from(cls, rpath): + def iterate_from(cls): """Supply more aruments to DestructiveStepping.Iterate_from""" - if cls._session_info: - return DestructiveStepping.Iterate_from(rpath, 1, - cls._session_info.last_index) - else: return DestructiveStepping.Iterate_from(rpath, 1) + if cls._session_info is None: Globals.select_source.set_iter() + else: Globals.select_source.set_iter(cls._session_info.last_index) + return Globals.select_source - def split_initial_dsiter(cls, rpath): + def split_initial_dsiter(cls): """Set iterators of all dsrps from rpath, returning one""" - dsiter = cls.iterate_from(rpath) + dsiter = cls.iterate_from() initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) return initial_dsiter1 @@ -131,17 +130,15 @@ class HLDestinationStruct: def set_session_info(cls, session_info): cls._session_info = session_info - def iterate_from(cls, rpath): + def iterate_from(cls): """Supply more arguments to DestructiveStepping.Iterate_from""" - if cls._session_info: - return DestructiveStepping.Iterate_from(rpath, None, - cls._session_info.last_index) - else: return DestructiveStepping.Iterate_from(rpath, None) + if cls._session_info is None: Globals.select_mirror.set_iter() + else: Globals.select_mirror.set_iter(cls._session_info.last_index) + return Globals.select_mirror - def split_initial_dsiter(cls, rpath): + def split_initial_dsiter(cls): """Set initial_dsiters (iteration of all dsrps from rpath)""" - dsiter = cls.iterate_from(rpath) - result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2) return result def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter): @@ -197,7 +194,7 @@ class HLDestinationStruct: def get_sigs(cls, baserp, src_init_iter): """Return signatures of all dissimilar files""" - dest_iters1 = cls.split_initial_dsiter(baserp) + dest_iters1 = cls.split_initial_dsiter() dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1) return RORPIter.Signatures(dissimilars) diff --git a/rdiff-backup/src/main.py b/rdiff-backup/src/main.py index 9a0d901..c29dcdf 100755 --- a/rdiff-backup/src/main.py +++ b/rdiff-backup/src/main.py @@ -125,7 +125,7 @@ class Main: sys.exit(1) def misc_setup(self, rps): - """Set default change ownership flag, umask, regular expressions""" + """Set default change ownership flag, umask, Select objects""" if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or (len(rps) < 2 and os.getuid() == 0)): # Allow change_ownership if destination connection is root @@ -139,7 +139,7 @@ class Main: rps[1].conn.Globals.set_select(None, rps[1], self.select_mirror_opts) Globals.postset_regexp('no_compression_regexp', - Globals.no_compression_regexp_string, re.I) + Globals.no_compression_regexp_string) def take_action(self, rps): """Do whatever self.action says""" @@ -248,8 +248,8 @@ rdiff-backup with the --force option.""" % rpout.path) (rpin.path == "." and rpout.path[0] != '/' and rpout.path[:2] != '..')): # Just a few heuristics, we don't have to get every case - if not DestructiveStepping.isexcluded(rpout, 1): - Log( + if Globals.backup_reader.Globals.select_source \ + .Select(rpout): Log( """Warning: The destination directory '%s' may be contained in the source directory '%s'. This could cause an infinite regress. You may need to use the --exclude option.""" % (rpout.path, rpin.path), 2) diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py index c1f0a44..d862951 100644 --- a/rdiff-backup/src/rpath.py +++ b/rdiff-backup/src/rpath.py @@ -404,6 +404,9 @@ class RPath(RORPath): "bar") for "foo/bar" (no base), and ("local", "bin") for "/usr/local/bin" if the base is "/usr". + For the root directory "/", the index is empty and the base is + "/". + """ self.conn = connection self.index = index @@ -487,9 +490,6 @@ class RPath(RORPath): def _getdevnums(self): """Return tuple for special file (major, minor)""" - if Globals.exclude_device_files: - # No point in finding numbers because it will be excluded anyway - return () s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path) return (s >> 8, s & 0xff) @@ -596,10 +596,9 @@ class RPath(RORPath): def helper(dsrp, base_init_output, branch_reduction): if dsrp.isdir(): dsrp.rmdir() else: dsrp.delete() - dsiter = DestructiveStepping.Iterate_from(self, None) itm = IterTreeReducer(lambda x: None, lambda x,y: None, None, helper) - for dsrp in dsiter: itm(dsrp) + for dsrp in Select(self, None).set_iter(): itm(dsrp) itm.getresult() else: self.conn.os.unlink(self.path) self.setdata() diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py index 9f41860..753cac2 100644 --- a/rdiff-backup/src/selection.py +++ b/rdiff-backup/src/selection.py @@ -10,10 +10,18 @@ import re # documentation on what this code does can be found on the man page. # -class FilePrefixError(Exception): +class SelectError(Exception): + """Some error dealing with the Select class""" + pass + +class FilePrefixError(SelectError): """Signals that a specified file doesn't start with correct prefix""" pass +class GlobbingError(SelectError): + """Something has gone wrong when parsing a glob string""" + pass + class Select: """Iterate appropriate DSRPaths in given directory @@ -50,60 +58,97 @@ class Select: """ # This re should not match normal filenames, but usually just globs - glob_re = re.compile(".*[\*\?\[]") + glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S) + + def __init__(self, rpath, source): + """DSRPIterator initializer. - def __init__(self, dsrpath): - """DSRPIterator initializer""" + rpath is the root dir. Source is true if rpath is the root of + the source directory, and false for the mirror directory + + """ + assert isinstance(rpath, RPath) self.selection_functions = [] - self.dsrpath = dsrpath - self.prefix = dsrpath.path + self.source = source + if isinstance(rpath, DSRPath): self.dsrpath = rpath + else: self.dsrpath = DSRPath(rpath.conn, rpath.base, + rpath.index, rpath.data) + self.prefix = self.dsrpath.path - def set_iter(self, starting_index = None): - """Initialize more variables. dsrpath should be the root dir""" + def set_iter(self, starting_index = None, sel_func = None): + """Initialize more variables, get ready to iterate + + Will iterate indicies greater than starting_index. Selection + function sel_func is called on each dsrp and is usually + self.Select. Returns self just for convenience. + + """ + if not sel_func: sel_func = self.Select + self.dsrpath.setdata() # this may have changed since Select init if starting_index is not None: + self.starting_index = starting_index self.iter = self.iterate_starting_from(self.dsrpath, - starting_index, self.iterate_starting_from) - else: self.iter = self.Iterate(self.dsrpath, self.Iterate) + self.iterate_starting_from, sel_func) + else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) self.next = self.iter.next self.__iter__ = lambda: self + return self - def Iterate(self, dsrpath, rec_func): + def Iterate(self, dsrpath, rec_func, sel_func): """Return iterator yielding dsrps in dsrpath rec_func is usually the same as this function and is what Iterate uses to find files in subdirectories. It is used in iterate_starting_from. + sel_func is the selection function to use on the dsrps. It is + usually self.Select. + """ - s = self.Select(dsrpath) + s = sel_func(dsrpath) + if not s or DestructiveStepping.initialize(dsrpath, self.source): + return if s == 1: # File is included yield dsrpath if dsrpath.isdir(): - for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp + for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func): + yield dsrp elif s == 2 and dsrpath.isdir(): # Directory is merely scanned - iid = self.iterate_in_dir(dsrpath, rec_func) + iid = self.iterate_in_dir(dsrpath, rec_func, sel_func) try: first = iid.next() except StopIteration: return # no files inside; skip dsrp yield dsrpath yield first for dsrp in iid: yield dsrp - def iterate_in_dir(self, dsrpath, rec_func): + def iterate_in_dir(self, dsrpath, rec_func, sel_func): """Iterate the dsrps in directory dsrpath.""" dir_listing = dsrpath.listdir() dir_listing.sort() for filename in dir_listing: - for dsrp in rec_func(dsrpath.append(filename)): yield dsrp + for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func): + yield dsrp - def iterate_starting_from(self, dsrpath): + def iterate_starting_from(self, dsrpath, rec_func, sel_func): """Like Iterate, but only yield indicies > self.starting_index""" + if DestructiveStepping.initialize(dsrpath, self.source): return if dsrpath.index > self.starting_index: # past starting_index - for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp + for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func): + yield dsrp elif dsrpath.index == self.starting_index[:len(dsrpath.index)]: # May encounter starting index on this branch - for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): - yield dsrp - + for dsrp in self.iterate_in_dir(dsrpath, + self.iterate_starting_from, + sel_func): yield dsrp + + def iterate_with_finalizer(self): + """Like Iterate, but missing some options, and add finalizer""" + finalize = DestructiveStepping.Finalizer() + for dsrp in self: + yield dsrp + finalize(dsrp) + finalize.getresult() + def Select(self, dsrp): """Run through the selection functions and return dominant value""" for sf in self.selection_functions: @@ -123,29 +168,68 @@ class Select: information is sent over the link. """ - for opt, arg in argtuples: - if opt == "--exclude": - self.add_selection_func(self.glob_get_sf(arg, 0)) - elif opt == "--exclude-device-files": - self.add_selection_func(self.devfiles_get_sf()) - elif opt == "--exclude-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 0, arg[0])) - elif opt == "--exclude-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 0)) - elif opt == "--include": - self.add_selection_func(self.glob_get_sf(arg, 1)) - elif opt == "--include-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 1, arg[0])) - elif opt == "--include-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 1)) - else: assert 0, "Bad option %s" % opt - - # Exclude rdiff-backup-data directory + try: + for opt, arg in argtuples: + if opt == "--exclude": + self.add_selection_func(self.glob_get_sf(arg, 0)) + elif opt == "--exclude-device-files": + self.add_selection_func(self.devfiles_get_sf()) + elif opt == "--exclude-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 0, arg[0])) + elif opt == "--exclude-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 0)) + elif opt == "--include": + self.add_selection_func(self.glob_get_sf(arg, 1)) + elif opt == "--include-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 1, arg[0])) + elif opt == "--include-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 1)) + else: assert 0, "Bad option %s" % opt + except SelectError, e: self.parse_catch_error(e) + + self.parse_last_excludes() + self.parse_rbdir_exclude() + self.parse_proc_exclude() + + def parse_catch_error(self, exc): + """Deal with selection error exc""" + if isinstance(exc, FilePrefixError): + Log.FatalError( +"""Fatal Error: The file specification + %s +cannot match any files in the base directory + %s +Useful file specifications begin with the base directory or some +pattern (such as '**') which matches the base directory.""" % + (exc, self.prefix)) + elif isinstance(e, GlobbingError): + Log.FatalError("Fatal Error while processing expression\n" + "%s" % exc) + else: raise + + def parse_rbdir_exclude(self): + """Add exclusion of rdiff-backup-data dir to front of list""" self.add_selection_func( self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1) + def parse_proc_exclude(self): + """Exclude the /proc directory if starting from /""" + if self.prefix == "/": + self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1) + + def parse_last_excludes(self): + """Exit with error if last selection function isn't an exclude""" + if self.select_functions and not self.selection_functions[-1].exclude: + Log.FatalError( +"""Last selection expression: + %s +only specifies that files be included. Because the default is to +include all files, the expression is redundant. Exiting because this +probably isn't what you meant.""" % + (self.selection_functions[-1].name, self.prefix)) + def add_selection_func(self, sel_func, add_to_start = None): """Add another selection function at the end or beginning""" if add_to_start: self.selection_functions.insert(0, sel_func) @@ -259,8 +343,7 @@ class Select: raise def sel_func(dsrp): - match = regexp.match(dsrp.path) - if match and match.end(0) == len(dsrp.path): return include + if regexp.search(dsrp.path): return include else: return None sel_func.exclude = not include @@ -284,8 +367,8 @@ class Select: assert include == 0 or include == 1 if glob_str == "**": sel_func = lambda dsrp: include elif not self.glob_re.match(glob_str): # normal file - return self.glob_get_filename_sf(glob_str, include) - else: pass ####XXXXXXXXXXXXX + sel_func = self.glob_get_filename_sf(glob_str, include) + else: sel_func = self.glob_get_normal_sf(glob_str, include) sel_func.exclude = not include sel_func.name = "Command-line glob: %s" % glob_str @@ -296,20 +379,18 @@ class Select: Some of the parsing is better explained in filelist_parse_line. The reason this is split from normal - globbing is so we can check the prefix and give proper - warning. + globbing is things are a lot less complicated if no special + globbing characters are used. """ if not filename.startswith(self.prefix): - Log("Warning: file specification %s does not start with\n" - "prefix %s, ignoring" % (filename, self.prefix), 2) - return lambda x: None # dummy selection function + raise FilePrefixError(filename) index = tuple(filter(lambda x: x, filename[len(self.prefix):].split("/"))) return self.glob_get_tuple_sf(index, include) def glob_get_tuple_sf(self, tuple, include): - """Add selection function based on tuple""" + """Return selection function based on tuple""" def include_sel_func(dsrp): if (dsrp.index == tuple[:len(dsrp.index)] or dsrp.index[:len(tuple)] == tuple): @@ -327,3 +408,94 @@ class Select: sel_func.name = "Tuple select %s" % (tuple,) return sel_func + def glob_get_normal_sf(self, glob_str, include): + """Return selection function based on glob_str + + The basic idea is to turn glob_str into a regular expression, + and just use the normal regular expression. There is a + complication because the selection function should return '2' + (scan) for directories which may contain a file which matches + the glob_str. So we break up the glob string into parts, and + any file which matches an initial sequence of glob parts gets + scanned. + + Thanks to Donovan Baarda who provided some code which did some + things similar to this. + + """ + if glob_str.lower().startswith("ignorecase:"): + re_comp = lambda r: re.compile(r, re.I | re.S) + glob_str = glob_str[len("ignorecase:"):] + else: re_comp = lambda r: re.compile(r, re.S) + + # matches what glob matches and any files in directory + glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str)) + + if glob_str.find("**") != -1: + glob_str = glob_str[:glob_str.find("**")+2] # truncate after ** + + scan_comp_re = re_comp("^(%s)$" % + "|".join(self.glob_get_prefix_res(glob_str))) + + def include_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 1 + elif scan_comp_re.match(dsrp.path): return 2 + else: return None + + def exclude_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 0 + else: return None + + # Check to make sure prefix is ok + if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str) + + if include: return include_sel_func + else: return exclude_sel_func + + def glob_get_prefix_res(self, glob_str): + """Return list of regexps equivalent to prefixes of glob_str""" + glob_parts = glob_str.split("/") + if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/ + raise GlobbingError("Consecutive '/'s found in globbing string " + + glob_str) + + prefixes = map(lambda i: "/".join(glob_parts[:i+1]), + range(len(glob_parts))) + # we must make exception for root "/", only dir to end in slash + if prefixes[0] == "": prefixes[0] = "/" + return map(self.glob_to_re, prefixes) + + def glob_to_re(self, pat): + """Returned regular expression equivalent to shell glob pat + + Currently only the ?, *, [], and ** expressions are supported. + Ranges like [a-z] are also currently unsupported. There is no + way to quote these special characters. + + This function taken with minor modifications from efnmatch.py + by Donovan Baarda. + + """ + i, n, res = 0, len(pat), '' + while i < n: + c, s = pat[i], pat[i:i+2] + i = i+1 + if s == '**': + res = res + '.*' + i = i + 1 + elif c == '*': res = res + '[^/]*' + elif c == '?': res = res + '[^/]' + elif c == '[': + j = i + if j < n and pat[j] in '!^': j = j+1 + if j < n and pat[j] == ']': j = j+1 + while j < n and pat[j] != ']': j = j+1 + if j >= n: res = res + '\\[' # interpret the [ literally + else: # Deal with inside of [..] + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] in '!^': stuff = '^' + stuff[1:] + res = res + '[' + stuff + ']' + else: res = res + re.escape(c) + return res + -- cgit v1.2.1