From 2e590bf491d41788edb240b792a8bed5131989ff Mon Sep 17 00:00:00 2001 From: ben Date: Sat, 6 Apr 2002 18:01:51 +0000 Subject: Added selection.py, for better include/exclude options git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@34 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/selection.py | 176 +++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 rdiff-backup/rdiff_backup/selection.py (limited to 'rdiff-backup/rdiff_backup/selection.py') diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py new file mode 100644 index 0000000..75d55ce --- /dev/null +++ b/rdiff-backup/rdiff_backup/selection.py @@ -0,0 +1,176 @@ +execfile("destructive_stepping.py") + +####################################################################### +# +# selection - Provides the iterator-like DSRPIterator class +# +# Parses includes and excludes to yield correct files. More +# documentation on what this code does can be found on the man page. +# + +class FilePrefixError(Exception): + """Signals that a specified file doesn't start with correct prefix""" + pass + + +class Select: + """Iterate appropriate DSRPaths in given directory + + This class acts as an iterator on account of its next() method. + Basically, it just goes through all the files in a directory in + order (depth-first) and subjects each file to a bunch of tests + (selection functions) in order. The first test that includes or + excludes the file means that the file gets included (iterated) or + excluded. The default is include, so with no tests we would just + iterate all the files in the directory in order. + + The one complication to this is that sometimes we don't know + whether or not to include a directory until we examine its + contents. For instance, if we want to include all the **.py + files. If /home/ben/foo.py exists, we should also include /home + and /home/ben, but if these directories contain no **.py files, + they shouldn't be included. For this reason, a test may not + include or exclude a directory, but merely "scan" it. If later a + file in the directory gets included, so does the directory. + + As mentioned above, each test takes the form of a selection + function. The selection function takes a dsrp, and returns: + + None - means the test has nothing to say about the related file + 0 - the file is excluded by the test + 1 - the file is included + 2 - the test says the file (must be directory) should be scanned + + Also, a selection function f has a variable f.exclude which should + be true iff f could potentially exclude some file. This is used + to signal an error if the last function only includes, which would + be redundant and presumably isn't what the user intends. + + """ + def __init__(self, dsrpath, starting_index = None): + """DSRPIterator initializer. dsrpath should be the root dir""" + self.selection_functions = [] + if starting_index: + self.iter = self.iterate_starting_from(dsrpath, starting_index, + self.iterate_starting_from) + else: self.iter = self.Iterate(dsrpath, self.Iterate) + self.next = self.iter.next + + def Iterate(self, dsrpath, rec_func): + """Return iterator yielding dsrps in dsrpath + + rec_func is usually the same as this function and is what + Iterate uses to find files in subdirectories. It is used in + iterate_starting_from. + + """ + s = self.Select(dsrpath) + if s == 1: # File is included + yield dsrpath + if dsrpath.isdir(): + for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp + elif s == 2 and dsrpath.isdir(): # Directory is merely scanned + iid = self.iterate_in_dir(dsrpath, rec_func) + try: first = iid.next() + except StopIteration: return # no files inside; skip dsrp + yield dsrpath + yield first + for dsrp in iid: yield dsrp + + def iterate_in_dir(self, dsrpath, rec_func): + """Iterate the dsrps in directory dsrpath.""" + dir_listing = dsrpath.listdir() + dir_listing.sort() + for filename in dir_listing: + for dsrp in rec_func(dsrpath.append(filename)): yield dsrp + + def iterate_starting_from(self, dsrpath): + """Like Iterate, but only yield indicies > self.starting_index""" + if dsrpath.index > self.starting_index: # past starting_index + for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp + elif dsrpath.index = self.starting_index[:len(dsrpath.index)]: + # May encounter starting index on this branch + for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): + yield dsrp + + def Select(self, dsrp): + """Run through the selection functions and return dominant value""" + for sf in self.selection_functions: + result = sf(dsrp) + if result is not None: return result + return 1 + + def add_selection_func(self, sel_func): + """Add another selection function at the end""" + self.selection_functions.append(sel_func) + + def filelist_add_sf(self, filelist_fp, include, filelist_name): + """Adds selection function by reading list of files + + The format of the filelist is documented in the man page. + filelist_fp should be an (open) file object. + include should be true if this is an include list, false for + an exclude list. + filelist_name is just a string used for logging. + + """ + Log("Reading filelist %s" % filelist_name, 4) + tuple_list, something_excluded = \ + self.filelist_read(filelist_fp, include, filelist_name) + Log("Sorting filelist %s" % filelist_name, 4) + tuple_list.sort() + current_index = 0 + def selection_function(dsrp): + + + def filelist_read(self, filelist_fp, include, filelist_name): + """Read filelist from fp, return (tuplelist, something_excluded)""" + something_excluded, tuple_list = None, [] + prefix_warnings = 0 + while 1: + line = filelist_fp.readline() + if not line: break + try: tuple = self.filelist_parse_line(line, include) + except FilePrefixError, exp: + prefix_warnings += 1 + if prefix_warnings < 6: + Log("Warning: file specification %s in filelist %s\n" + "doesn't start with correct prefix %s, ignoring." % + (exp[0], filelist_name, exp[1]), 2) + if prefix_warnings == 5: + Log("Future prefix errors will not be logged.", 2) + tuple_list.append(tuple) + if not tuple[1]: something_excluded = 1 + return (tuple_list, something_excluded) + + def filelist_parse_line(self, line, include, prefix): + """Parse a single line of a filelist, returning a pair + + pair will be of form (index, include), where index is another + tuple, and include is 1 if the line specifies that we are + including a file. The default is given as an argument. + prefix is the string that the index is relative to. + + """ + line = line.strip() + if line[:2] == "+ ": # Check for "+ "/"- " syntax + include = 1 + line = line[2:] + elif line[:2] == "- ": + include = 0 + line = line[2:] + + if not line.startswith(prefix+"/"): + raise FilePrefixError(line, prefix+"/") + index = filter(lambda x: x, line.split("/")) # remove empties + return (index, include) + + def filelist_pair_match(self, dsrp, pair): + """Return 0/1 if pair excludes/includes dsrp, None if doesn't match""" + index, include = pair + assert include == 0 or include == 1 + if not include and dsrp.index[:len(index)] == index: + return 0 # /foo matches /foo/bar/baz + elif include and index[:len(dsrp.index)] == dsrp.index: + return 1 # /foo/bar implicitly matches /foo for includes only + else: return None -- cgit v1.2.1