summaryrefslogtreecommitdiff
path: root/rdiff-backup/src/selection.py
diff options
context:
space:
mode:
authorben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-08 09:14:12 +0000
committerben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-08 09:14:12 +0000
commit2fe5f0ee2d2545f05db11b8c227e582e6a9479d9 (patch)
treec5a92739fd8c1ad78acf6374fa3cd8a5c87123b6 /rdiff-backup/src/selection.py
parent6fe0035bb3eaf59d8a7fe13c230c534619dba04b (diff)
downloadrdiff-backup-2fe5f0ee2d2545f05db11b8c227e582e6a9479d9.tar.gz
Various changes to switch to new include/exclude syntax
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@39 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/src/selection.py')
-rw-r--r--rdiff-backup/src/selection.py217
1 files changed, 185 insertions, 32 deletions
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
index 75d55ce..9f41860 100644
--- a/rdiff-backup/src/selection.py
+++ b/rdiff-backup/src/selection.py
@@ -1,4 +1,6 @@
+from __future__ import generators
execfile("destructive_stepping.py")
+import re
#######################################################################
#
@@ -47,14 +49,23 @@ class Select:
be redundant and presumably isn't what the user intends.
"""
- def __init__(self, dsrpath, starting_index = None):
- """DSRPIterator initializer. dsrpath should be the root dir"""
+ # This re should not match normal filenames, but usually just globs
+ glob_re = re.compile(".*[\*\?\[]")
+
+ def __init__(self, dsrpath):
+ """DSRPIterator initializer"""
self.selection_functions = []
- if starting_index:
- self.iter = self.iterate_starting_from(dsrpath, starting_index,
- self.iterate_starting_from)
- else: self.iter = self.Iterate(dsrpath, self.Iterate)
+ self.dsrpath = dsrpath
+ self.prefix = dsrpath.path
+
+ def set_iter(self, starting_index = None):
+ """Initialize more variables. dsrpath should be the root dir"""
+ if starting_index is not None:
+ self.iter = self.iterate_starting_from(self.dsrpath,
+ starting_index, self.iterate_starting_from)
+ else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
self.next = self.iter.next
+ self.__iter__ = lambda: self
def Iterate(self, dsrpath, rec_func):
"""Return iterator yielding dsrps in dsrpath
@@ -88,7 +99,7 @@ class Select:
"""Like Iterate, but only yield indicies > self.starting_index"""
if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
- elif dsrpath.index = self.starting_index[:len(dsrpath.index)]:
+ elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch
for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
yield dsrp
@@ -100,50 +111,99 @@ class Select:
if result is not None: return result
return 1
- def add_selection_func(self, sel_func):
- """Add another selection function at the end"""
- self.selection_functions.append(sel_func)
+ def ParseArgs(self, argtuples):
+ """Create selection functions based on list of tuples
+
+ The tuples have the form (option string, additional argument)
+ and are created when the initial commandline arguments are
+ read. The reason for the extra level of processing is that
+ the filelists may only be openable by the main connection, but
+ the selection functions need to be on the backup reader or
+ writer side. When the initial arguments are parsed the right
+ information is sent over the link.
+
+ """
+ for opt, arg in argtuples:
+ if opt == "--exclude":
+ self.add_selection_func(self.glob_get_sf(arg, 0))
+ elif opt == "--exclude-device-files":
+ self.add_selection_func(self.devfiles_get_sf())
+ elif opt == "--exclude-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 0, arg[0]))
+ elif opt == "--exclude-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 0))
+ elif opt == "--include":
+ self.add_selection_func(self.glob_get_sf(arg, 1))
+ elif opt == "--include-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 1, arg[0]))
+ elif opt == "--include-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 1))
+ else: assert 0, "Bad option %s" % opt
+
+ # Exclude rdiff-backup-data directory
+ self.add_selection_func(
+ self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
+
+ def add_selection_func(self, sel_func, add_to_start = None):
+ """Add another selection function at the end or beginning"""
+ if add_to_start: self.selection_functions.insert(0, sel_func)
+ else: self.selection_functions.append(sel_func)
- def filelist_add_sf(self, filelist_fp, include, filelist_name):
- """Adds selection function by reading list of files
+ def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
+ """Return selection function by reading list of files
The format of the filelist is documented in the man page.
filelist_fp should be an (open) file object.
- include should be true if this is an include list, false for
- an exclude list.
+ inc_default should be true if this is an include list,
+ false for an exclude list.
filelist_name is just a string used for logging.
"""
Log("Reading filelist %s" % filelist_name, 4)
tuple_list, something_excluded = \
- self.filelist_read(filelist_fp, include, filelist_name)
+ self.filelist_read(filelist_fp, inc_default, filelist_name)
Log("Sorting filelist %s" % filelist_name, 4)
tuple_list.sort()
- current_index = 0
+ i = [0] # We have to put index in list because of stupid scoping rules
+
def selection_function(dsrp):
-
-
+ if i[0] > len(tuple_list): return inc_default
+ while 1:
+ include, move_on = \
+ self.filelist_pair_match(dsrp, tuple_list[i[0]])
+ if move_on:
+ i[0] += 1
+ if include is None: continue # later line may match
+ return include
+
+ selection_function.exclude = something_excluded
+ selection_function.name = "Filelist: " + filelist_name
+ return selection_function
+
def filelist_read(self, filelist_fp, include, filelist_name):
"""Read filelist from fp, return (tuplelist, something_excluded)"""
something_excluded, tuple_list = None, []
prefix_warnings = 0
- while 1:
- line = filelist_fp.readline()
- if not line: break
+ for line in filelist_fp:
+ if not line.strip(): continue # skip blanks
try: tuple = self.filelist_parse_line(line, include)
except FilePrefixError, exp:
prefix_warnings += 1
if prefix_warnings < 6:
Log("Warning: file specification %s in filelist %s\n"
"doesn't start with correct prefix %s, ignoring." %
- (exp[0], filelist_name, exp[1]), 2)
+ (exp, filelist_name, self.prefix), 2)
if prefix_warnings == 5:
Log("Future prefix errors will not be logged.", 2)
tuple_list.append(tuple)
if not tuple[1]: something_excluded = 1
+ if filelist_fp.close():
+ Log("Error closing filelist %s" % filelist_name, 2)
return (tuple_list, something_excluded)
- def filelist_parse_line(self, line, include, prefix):
+ def filelist_parse_line(self, line, include):
"""Parse a single line of a filelist, returning a pair
pair will be of form (index, include), where index is another
@@ -160,17 +220,110 @@ class Select:
include = 0
line = line[2:]
- if not line.startswith(prefix+"/"):
- raise FilePrefixError(line, prefix+"/")
- index = filter(lambda x: x, line.split("/")) # remove empties
+ if not line.startswith(self.prefix): raise FilePrefixError(line)
+ line = line[len(self.prefix):] # Discard prefix
+ index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
return (index, include)
def filelist_pair_match(self, dsrp, pair):
- """Return 0/1 if pair excludes/includes dsrp, None if doesn't match"""
+ """Matches a filelist tuple against a dsrp
+
+ Returns a pair (include, move_on, definitive). include is
+ None if the tuple doesn't match either way, and 0/1 if the
+ tuple excludes or includes the dsrp.
+
+ move_on is true if the tuple cannot match a later index, and
+ so we should move on to the next tuple in the index.
+
+ """
index, include = pair
+ if include == 1:
+ if index < dsrp.index: return (None, 1)
+ if index == dsrp.index: return (1, 1)
+ elif index[:len(dsrp.index)] == dsrp.index:
+ return (1, None) # /foo/bar implicitly includes /foo
+ else: return (None, None) # dsrp greater, not initial sequence
+ elif include == 0:
+ if dsrp.index[:len(index)] == index:
+ return (0, None) # /foo implicitly excludes /foo/bar
+ elif index < dsrp.index: return (None, 1)
+ else: return (None, None) # dsrp greater, not initial sequence
+ else: assert 0, "Include is %s, should be 0 or 1" % (include,)
+
+ def regexp_get_sf(self, regexp_string, include):
+ """Return selection function given by regexp_string"""
+ assert include == 0 or include == 1
+ try: regexp = re.compile(regexp_string)
+ except:
+ Log("Error compiling regular expression %s" % regexp_string, 1)
+ raise
+
+ def sel_func(dsrp):
+ match = regexp.match(dsrp.path)
+ if match and match.end(0) == len(dsrp.path): return include
+ else: return None
+
+ sel_func.exclude = not include
+ sel_func.name = "Regular expression: %s" % regexp_string
+ return sel_func
+
+ def devfiles_get_sf(self):
+ """Return a selection function to exclude all dev files"""
+ if self.selection_functions:
+ Log("Warning: exclude-device-files is not the first "
+ "selector.\nThis may not be what you intended", 3)
+ def sel_func(dsrp):
+ if dsrp.isdev(): return 0
+ else: return None
+ sel_func.exclude = 1
+ sel_func.name = "Exclude device files"
+ return sel_func
+
+ def glob_get_sf(self, glob_str, include):
+ """Return selection function given by glob string"""
assert include == 0 or include == 1
- if not include and dsrp.index[:len(index)] == index:
- return 0 # /foo matches /foo/bar/baz
- elif include and index[:len(dsrp.index)] == dsrp.index:
- return 1 # /foo/bar implicitly matches /foo for includes only
- else: return None
+ if glob_str == "**": sel_func = lambda dsrp: include
+ elif not self.glob_re.match(glob_str): # normal file
+ return self.glob_get_filename_sf(glob_str, include)
+ else: pass ####XXXXXXXXXXXXX
+
+ sel_func.exclude = not include
+ sel_func.name = "Command-line glob: %s" % glob_str
+ return sel_func
+
+ def glob_get_filename_sf(self, filename, include):
+ """Get a selection function given a normal filename
+
+ Some of the parsing is better explained in
+ filelist_parse_line. The reason this is split from normal
+ globbing is so we can check the prefix and give proper
+ warning.
+
+ """
+ if not filename.startswith(self.prefix):
+ Log("Warning: file specification %s does not start with\n"
+ "prefix %s, ignoring" % (filename, self.prefix), 2)
+ return lambda x: None # dummy selection function
+ index = tuple(filter(lambda x: x,
+ filename[len(self.prefix):].split("/")))
+ return self.glob_get_tuple_sf(index, include)
+
+ def glob_get_tuple_sf(self, tuple, include):
+ """Add selection function based on tuple"""
+ def include_sel_func(dsrp):
+ if (dsrp.index == tuple[:len(dsrp.index)] or
+ dsrp.index[:len(tuple)] == tuple):
+ return 1 # /foo/bar implicitly matches /foo, vice-versa
+ else: return None
+
+ def exclude_sel_func(dsrp):
+ if dsrp.index[:len(tuple)] == tuple:
+ return 0 # /foo excludes /foo/bar, not vice-versa
+ else: return None
+
+ if include == 1: sel_func = include_sel_func
+ elif include == 0: sel_func = exclude_sel_func
+ sel_func.exclude = not include
+ sel_func.name = "Tuple select %s" % (tuple,)
+ return sel_func
+