diff options
author | ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2002-04-12 04:18:16 +0000 |
---|---|---|
committer | ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2002-04-12 04:18:16 +0000 |
commit | ae666de6c68c36daa946946303196f8be96e9cfe (patch) | |
tree | aa81b329fbeddcd3ef984b577f0855b92c2d6ae6 /rdiff-backup/src/selection.py | |
parent | 86d0ad7033cd0a0227c1aa7777270b95bf7c5743 (diff) | |
download | rdiff-backup-ae666de6c68c36daa946946303196f8be96e9cfe.tar.gz |
Added new Select code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@43 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/src/selection.py')
-rw-r--r-- | rdiff-backup/src/selection.py | 274 |
1 files changed, 223 insertions, 51 deletions
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py index 9f41860..753cac2 100644 --- a/rdiff-backup/src/selection.py +++ b/rdiff-backup/src/selection.py @@ -10,10 +10,18 @@ import re # documentation on what this code does can be found on the man page. # -class FilePrefixError(Exception): +class SelectError(Exception): + """Some error dealing with the Select class""" + pass + +class FilePrefixError(SelectError): """Signals that a specified file doesn't start with correct prefix""" pass +class GlobbingError(SelectError): + """Something has gone wrong when parsing a glob string""" + pass + class Select: """Iterate appropriate DSRPaths in given directory @@ -50,60 +58,97 @@ class Select: """ # This re should not match normal filenames, but usually just globs - glob_re = re.compile(".*[\*\?\[]") + glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S) + + def __init__(self, rpath, source): + """DSRPIterator initializer. - def __init__(self, dsrpath): - """DSRPIterator initializer""" + rpath is the root dir. Source is true if rpath is the root of + the source directory, and false for the mirror directory + + """ + assert isinstance(rpath, RPath) self.selection_functions = [] - self.dsrpath = dsrpath - self.prefix = dsrpath.path + self.source = source + if isinstance(rpath, DSRPath): self.dsrpath = rpath + else: self.dsrpath = DSRPath(rpath.conn, rpath.base, + rpath.index, rpath.data) + self.prefix = self.dsrpath.path - def set_iter(self, starting_index = None): - """Initialize more variables. dsrpath should be the root dir""" + def set_iter(self, starting_index = None, sel_func = None): + """Initialize more variables, get ready to iterate + + Will iterate indicies greater than starting_index. Selection + function sel_func is called on each dsrp and is usually + self.Select. Returns self just for convenience. + + """ + if not sel_func: sel_func = self.Select + self.dsrpath.setdata() # this may have changed since Select init if starting_index is not None: + self.starting_index = starting_index self.iter = self.iterate_starting_from(self.dsrpath, - starting_index, self.iterate_starting_from) - else: self.iter = self.Iterate(self.dsrpath, self.Iterate) + self.iterate_starting_from, sel_func) + else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) self.next = self.iter.next self.__iter__ = lambda: self + return self - def Iterate(self, dsrpath, rec_func): + def Iterate(self, dsrpath, rec_func, sel_func): """Return iterator yielding dsrps in dsrpath rec_func is usually the same as this function and is what Iterate uses to find files in subdirectories. It is used in iterate_starting_from. + sel_func is the selection function to use on the dsrps. It is + usually self.Select. + """ - s = self.Select(dsrpath) + s = sel_func(dsrpath) + if not s or DestructiveStepping.initialize(dsrpath, self.source): + return if s == 1: # File is included yield dsrpath if dsrpath.isdir(): - for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp + for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func): + yield dsrp elif s == 2 and dsrpath.isdir(): # Directory is merely scanned - iid = self.iterate_in_dir(dsrpath, rec_func) + iid = self.iterate_in_dir(dsrpath, rec_func, sel_func) try: first = iid.next() except StopIteration: return # no files inside; skip dsrp yield dsrpath yield first for dsrp in iid: yield dsrp - def iterate_in_dir(self, dsrpath, rec_func): + def iterate_in_dir(self, dsrpath, rec_func, sel_func): """Iterate the dsrps in directory dsrpath.""" dir_listing = dsrpath.listdir() dir_listing.sort() for filename in dir_listing: - for dsrp in rec_func(dsrpath.append(filename)): yield dsrp + for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func): + yield dsrp - def iterate_starting_from(self, dsrpath): + def iterate_starting_from(self, dsrpath, rec_func, sel_func): """Like Iterate, but only yield indicies > self.starting_index""" + if DestructiveStepping.initialize(dsrpath, self.source): return if dsrpath.index > self.starting_index: # past starting_index - for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp + for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func): + yield dsrp elif dsrpath.index == self.starting_index[:len(dsrpath.index)]: # May encounter starting index on this branch - for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): - yield dsrp - + for dsrp in self.iterate_in_dir(dsrpath, + self.iterate_starting_from, + sel_func): yield dsrp + + def iterate_with_finalizer(self): + """Like Iterate, but missing some options, and add finalizer""" + finalize = DestructiveStepping.Finalizer() + for dsrp in self: + yield dsrp + finalize(dsrp) + finalize.getresult() + def Select(self, dsrp): """Run through the selection functions and return dominant value""" for sf in self.selection_functions: @@ -123,29 +168,68 @@ class Select: information is sent over the link. """ - for opt, arg in argtuples: - if opt == "--exclude": - self.add_selection_func(self.glob_get_sf(arg, 0)) - elif opt == "--exclude-device-files": - self.add_selection_func(self.devfiles_get_sf()) - elif opt == "--exclude-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 0, arg[0])) - elif opt == "--exclude-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 0)) - elif opt == "--include": - self.add_selection_func(self.glob_get_sf(arg, 1)) - elif opt == "--include-filelist": - self.add_selection_func(self.filelist_get_sf(arg[1], - 1, arg[0])) - elif opt == "--include-regexp": - self.add_selection_func(self.regexp_get_sf(arg, 1)) - else: assert 0, "Bad option %s" % opt - - # Exclude rdiff-backup-data directory + try: + for opt, arg in argtuples: + if opt == "--exclude": + self.add_selection_func(self.glob_get_sf(arg, 0)) + elif opt == "--exclude-device-files": + self.add_selection_func(self.devfiles_get_sf()) + elif opt == "--exclude-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 0, arg[0])) + elif opt == "--exclude-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 0)) + elif opt == "--include": + self.add_selection_func(self.glob_get_sf(arg, 1)) + elif opt == "--include-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 1, arg[0])) + elif opt == "--include-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 1)) + else: assert 0, "Bad option %s" % opt + except SelectError, e: self.parse_catch_error(e) + + self.parse_last_excludes() + self.parse_rbdir_exclude() + self.parse_proc_exclude() + + def parse_catch_error(self, exc): + """Deal with selection error exc""" + if isinstance(exc, FilePrefixError): + Log.FatalError( +"""Fatal Error: The file specification + %s +cannot match any files in the base directory + %s +Useful file specifications begin with the base directory or some +pattern (such as '**') which matches the base directory.""" % + (exc, self.prefix)) + elif isinstance(e, GlobbingError): + Log.FatalError("Fatal Error while processing expression\n" + "%s" % exc) + else: raise + + def parse_rbdir_exclude(self): + """Add exclusion of rdiff-backup-data dir to front of list""" self.add_selection_func( self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1) + def parse_proc_exclude(self): + """Exclude the /proc directory if starting from /""" + if self.prefix == "/": + self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1) + + def parse_last_excludes(self): + """Exit with error if last selection function isn't an exclude""" + if self.select_functions and not self.selection_functions[-1].exclude: + Log.FatalError( +"""Last selection expression: + %s +only specifies that files be included. Because the default is to +include all files, the expression is redundant. Exiting because this +probably isn't what you meant.""" % + (self.selection_functions[-1].name, self.prefix)) + def add_selection_func(self, sel_func, add_to_start = None): """Add another selection function at the end or beginning""" if add_to_start: self.selection_functions.insert(0, sel_func) @@ -259,8 +343,7 @@ class Select: raise def sel_func(dsrp): - match = regexp.match(dsrp.path) - if match and match.end(0) == len(dsrp.path): return include + if regexp.search(dsrp.path): return include else: return None sel_func.exclude = not include @@ -284,8 +367,8 @@ class Select: assert include == 0 or include == 1 if glob_str == "**": sel_func = lambda dsrp: include elif not self.glob_re.match(glob_str): # normal file - return self.glob_get_filename_sf(glob_str, include) - else: pass ####XXXXXXXXXXXXX + sel_func = self.glob_get_filename_sf(glob_str, include) + else: sel_func = self.glob_get_normal_sf(glob_str, include) sel_func.exclude = not include sel_func.name = "Command-line glob: %s" % glob_str @@ -296,20 +379,18 @@ class Select: Some of the parsing is better explained in filelist_parse_line. The reason this is split from normal - globbing is so we can check the prefix and give proper - warning. + globbing is things are a lot less complicated if no special + globbing characters are used. """ if not filename.startswith(self.prefix): - Log("Warning: file specification %s does not start with\n" - "prefix %s, ignoring" % (filename, self.prefix), 2) - return lambda x: None # dummy selection function + raise FilePrefixError(filename) index = tuple(filter(lambda x: x, filename[len(self.prefix):].split("/"))) return self.glob_get_tuple_sf(index, include) def glob_get_tuple_sf(self, tuple, include): - """Add selection function based on tuple""" + """Return selection function based on tuple""" def include_sel_func(dsrp): if (dsrp.index == tuple[:len(dsrp.index)] or dsrp.index[:len(tuple)] == tuple): @@ -327,3 +408,94 @@ class Select: sel_func.name = "Tuple select %s" % (tuple,) return sel_func + def glob_get_normal_sf(self, glob_str, include): + """Return selection function based on glob_str + + The basic idea is to turn glob_str into a regular expression, + and just use the normal regular expression. There is a + complication because the selection function should return '2' + (scan) for directories which may contain a file which matches + the glob_str. So we break up the glob string into parts, and + any file which matches an initial sequence of glob parts gets + scanned. + + Thanks to Donovan Baarda who provided some code which did some + things similar to this. + + """ + if glob_str.lower().startswith("ignorecase:"): + re_comp = lambda r: re.compile(r, re.I | re.S) + glob_str = glob_str[len("ignorecase:"):] + else: re_comp = lambda r: re.compile(r, re.S) + + # matches what glob matches and any files in directory + glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str)) + + if glob_str.find("**") != -1: + glob_str = glob_str[:glob_str.find("**")+2] # truncate after ** + + scan_comp_re = re_comp("^(%s)$" % + "|".join(self.glob_get_prefix_res(glob_str))) + + def include_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 1 + elif scan_comp_re.match(dsrp.path): return 2 + else: return None + + def exclude_sel_func(dsrp): + if glob_comp_re.match(dsrp.path): return 0 + else: return None + + # Check to make sure prefix is ok + if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str) + + if include: return include_sel_func + else: return exclude_sel_func + + def glob_get_prefix_res(self, glob_str): + """Return list of regexps equivalent to prefixes of glob_str""" + glob_parts = glob_str.split("/") + if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/ + raise GlobbingError("Consecutive '/'s found in globbing string " + + glob_str) + + prefixes = map(lambda i: "/".join(glob_parts[:i+1]), + range(len(glob_parts))) + # we must make exception for root "/", only dir to end in slash + if prefixes[0] == "": prefixes[0] = "/" + return map(self.glob_to_re, prefixes) + + def glob_to_re(self, pat): + """Returned regular expression equivalent to shell glob pat + + Currently only the ?, *, [], and ** expressions are supported. + Ranges like [a-z] are also currently unsupported. There is no + way to quote these special characters. + + This function taken with minor modifications from efnmatch.py + by Donovan Baarda. + + """ + i, n, res = 0, len(pat), '' + while i < n: + c, s = pat[i], pat[i:i+2] + i = i+1 + if s == '**': + res = res + '.*' + i = i + 1 + elif c == '*': res = res + '[^/]*' + elif c == '?': res = res + '[^/]' + elif c == '[': + j = i + if j < n and pat[j] in '!^': j = j+1 + if j < n and pat[j] == ']': j = j+1 + while j < n and pat[j] != ']': j = j+1 + if j >= n: res = res + '\\[' # interpret the [ literally + else: # Deal with inside of [..] + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] in '!^': stuff = '^' + stuff[1:] + res = res + '[' + stuff + ']' + else: res = res + re.escape(c) + return res + |