summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-06 18:01:51 +0000
committerben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-06 18:01:51 +0000
commit2e590bf491d41788edb240b792a8bed5131989ff (patch)
treebababc374dea9ffc1ed62bb2ed4d9bd98fbf1da7
parent22b413aa454b3a6375c5546bd0480853df164f2e (diff)
downloadrdiff-backup-2e590bf491d41788edb240b792a8bed5131989ff.tar.gz
Added selection.py, for better include/exclude options
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@34 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/rdiff_backup/increment.py2
-rw-r--r--rdiff-backup/rdiff_backup/selection.py176
-rwxr-xr-xrdiff-backup/src/Make5
-rw-r--r--rdiff-backup/src/increment.py2
-rw-r--r--rdiff-backup/src/selection.py176
5 files changed, 357 insertions, 4 deletions
diff --git a/rdiff-backup/rdiff_backup/increment.py b/rdiff-backup/rdiff_backup/increment.py
index 1bbdd39..2bef1ff 100644
--- a/rdiff-backup/rdiff_backup/increment.py
+++ b/rdiff-backup/rdiff_backup/increment.py
@@ -1,4 +1,4 @@
-execfile("destructive_stepping.py")
+execfile("selection.py")
#######################################################################
#
diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py
new file mode 100644
index 0000000..75d55ce
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/selection.py
@@ -0,0 +1,176 @@
+execfile("destructive_stepping.py")
+
+#######################################################################
+#
+# selection - Provides the iterator-like DSRPIterator class
+#
+# Parses includes and excludes to yield correct files. More
+# documentation on what this code does can be found on the man page.
+#
+
+class FilePrefixError(Exception):
+ """Signals that a specified file doesn't start with correct prefix"""
+ pass
+
+
+class Select:
+ """Iterate appropriate DSRPaths in given directory
+
+ This class acts as an iterator on account of its next() method.
+ Basically, it just goes through all the files in a directory in
+ order (depth-first) and subjects each file to a bunch of tests
+ (selection functions) in order. The first test that includes or
+ excludes the file means that the file gets included (iterated) or
+ excluded. The default is include, so with no tests we would just
+ iterate all the files in the directory in order.
+
+ The one complication to this is that sometimes we don't know
+ whether or not to include a directory until we examine its
+ contents. For instance, if we want to include all the **.py
+ files. If /home/ben/foo.py exists, we should also include /home
+ and /home/ben, but if these directories contain no **.py files,
+ they shouldn't be included. For this reason, a test may not
+ include or exclude a directory, but merely "scan" it. If later a
+ file in the directory gets included, so does the directory.
+
+ As mentioned above, each test takes the form of a selection
+ function. The selection function takes a dsrp, and returns:
+
+ None - means the test has nothing to say about the related file
+ 0 - the file is excluded by the test
+ 1 - the file is included
+ 2 - the test says the file (must be directory) should be scanned
+
+ Also, a selection function f has a variable f.exclude which should
+ be true iff f could potentially exclude some file. This is used
+ to signal an error if the last function only includes, which would
+ be redundant and presumably isn't what the user intends.
+
+ """
+ def __init__(self, dsrpath, starting_index = None):
+ """DSRPIterator initializer. dsrpath should be the root dir"""
+ self.selection_functions = []
+ if starting_index:
+ self.iter = self.iterate_starting_from(dsrpath, starting_index,
+ self.iterate_starting_from)
+ else: self.iter = self.Iterate(dsrpath, self.Iterate)
+ self.next = self.iter.next
+
+ def Iterate(self, dsrpath, rec_func):
+ """Return iterator yielding dsrps in dsrpath
+
+ rec_func is usually the same as this function and is what
+ Iterate uses to find files in subdirectories. It is used in
+ iterate_starting_from.
+
+ """
+ s = self.Select(dsrpath)
+ if s == 1: # File is included
+ yield dsrpath
+ if dsrpath.isdir():
+ for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
+ elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
+ iid = self.iterate_in_dir(dsrpath, rec_func)
+ try: first = iid.next()
+ except StopIteration: return # no files inside; skip dsrp
+ yield dsrpath
+ yield first
+ for dsrp in iid: yield dsrp
+
+ def iterate_in_dir(self, dsrpath, rec_func):
+ """Iterate the dsrps in directory dsrpath."""
+ dir_listing = dsrpath.listdir()
+ dir_listing.sort()
+ for filename in dir_listing:
+ for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
+
+ def iterate_starting_from(self, dsrpath):
+ """Like Iterate, but only yield indicies > self.starting_index"""
+ if dsrpath.index > self.starting_index: # past starting_index
+ for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
+ elif dsrpath.index = self.starting_index[:len(dsrpath.index)]:
+ # May encounter starting index on this branch
+ for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
+ yield dsrp
+
+ def Select(self, dsrp):
+ """Run through the selection functions and return dominant value"""
+ for sf in self.selection_functions:
+ result = sf(dsrp)
+ if result is not None: return result
+ return 1
+
+ def add_selection_func(self, sel_func):
+ """Add another selection function at the end"""
+ self.selection_functions.append(sel_func)
+
+ def filelist_add_sf(self, filelist_fp, include, filelist_name):
+ """Adds selection function by reading list of files
+
+ The format of the filelist is documented in the man page.
+ filelist_fp should be an (open) file object.
+ include should be true if this is an include list, false for
+ an exclude list.
+ filelist_name is just a string used for logging.
+
+ """
+ Log("Reading filelist %s" % filelist_name, 4)
+ tuple_list, something_excluded = \
+ self.filelist_read(filelist_fp, include, filelist_name)
+ Log("Sorting filelist %s" % filelist_name, 4)
+ tuple_list.sort()
+ current_index = 0
+ def selection_function(dsrp):
+
+
+ def filelist_read(self, filelist_fp, include, filelist_name):
+ """Read filelist from fp, return (tuplelist, something_excluded)"""
+ something_excluded, tuple_list = None, []
+ prefix_warnings = 0
+ while 1:
+ line = filelist_fp.readline()
+ if not line: break
+ try: tuple = self.filelist_parse_line(line, include)
+ except FilePrefixError, exp:
+ prefix_warnings += 1
+ if prefix_warnings < 6:
+ Log("Warning: file specification %s in filelist %s\n"
+ "doesn't start with correct prefix %s, ignoring." %
+ (exp[0], filelist_name, exp[1]), 2)
+ if prefix_warnings == 5:
+ Log("Future prefix errors will not be logged.", 2)
+ tuple_list.append(tuple)
+ if not tuple[1]: something_excluded = 1
+ return (tuple_list, something_excluded)
+
+ def filelist_parse_line(self, line, include, prefix):
+ """Parse a single line of a filelist, returning a pair
+
+ pair will be of form (index, include), where index is another
+ tuple, and include is 1 if the line specifies that we are
+ including a file. The default is given as an argument.
+ prefix is the string that the index is relative to.
+
+ """
+ line = line.strip()
+ if line[:2] == "+ ": # Check for "+ "/"- " syntax
+ include = 1
+ line = line[2:]
+ elif line[:2] == "- ":
+ include = 0
+ line = line[2:]
+
+ if not line.startswith(prefix+"/"):
+ raise FilePrefixError(line, prefix+"/")
+ index = filter(lambda x: x, line.split("/")) # remove empties
+ return (index, include)
+
+ def filelist_pair_match(self, dsrp, pair):
+ """Return 0/1 if pair excludes/includes dsrp, None if doesn't match"""
+ index, include = pair
+ assert include == 0 or include == 1
+ if not include and dsrp.index[:len(index)] == index:
+ return 0 # /foo matches /foo/bar/baz
+ elif include and index[:len(dsrp.index)] == dsrp.index:
+ return 1 # /foo/bar implicitly matches /foo for includes only
+ else: return None
diff --git a/rdiff-backup/src/Make b/rdiff-backup/src/Make
index cc7c69f..6e66656 100755
--- a/rdiff-backup/src/Make
+++ b/rdiff-backup/src/Make
@@ -23,8 +23,9 @@ def mystrip(filename):
files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py",
"iterfile.py", "rdiff.py", "connection.py", "rpath.py",
"hardlink.py", "robust.py", "rorpiter.py",
- "destructive_stepping.py", "increment.py", "restore.py",
- "manage.py", "highlevel.py", "setconnections.py", "main.py"]
+ "destructive_stepping.py", "selection.py", "increment.py",
+ "restore.py", "manage.py", "highlevel.py",
+ "setconnections.py", "main.py"]
os.system("cp header.py rdiff-backup")
diff --git a/rdiff-backup/src/increment.py b/rdiff-backup/src/increment.py
index 1bbdd39..2bef1ff 100644
--- a/rdiff-backup/src/increment.py
+++ b/rdiff-backup/src/increment.py
@@ -1,4 +1,4 @@
-execfile("destructive_stepping.py")
+execfile("selection.py")
#######################################################################
#
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
new file mode 100644
index 0000000..75d55ce
--- /dev/null
+++ b/rdiff-backup/src/selection.py
@@ -0,0 +1,176 @@
+execfile("destructive_stepping.py")
+
+#######################################################################
+#
+# selection - Provides the iterator-like DSRPIterator class
+#
+# Parses includes and excludes to yield correct files. More
+# documentation on what this code does can be found on the man page.
+#
+
+class FilePrefixError(Exception):
+ """Signals that a specified file doesn't start with correct prefix"""
+ pass
+
+
+class Select:
+ """Iterate appropriate DSRPaths in given directory
+
+ This class acts as an iterator on account of its next() method.
+ Basically, it just goes through all the files in a directory in
+ order (depth-first) and subjects each file to a bunch of tests
+ (selection functions) in order. The first test that includes or
+ excludes the file means that the file gets included (iterated) or
+ excluded. The default is include, so with no tests we would just
+ iterate all the files in the directory in order.
+
+ The one complication to this is that sometimes we don't know
+ whether or not to include a directory until we examine its
+ contents. For instance, if we want to include all the **.py
+ files. If /home/ben/foo.py exists, we should also include /home
+ and /home/ben, but if these directories contain no **.py files,
+ they shouldn't be included. For this reason, a test may not
+ include or exclude a directory, but merely "scan" it. If later a
+ file in the directory gets included, so does the directory.
+
+ As mentioned above, each test takes the form of a selection
+ function. The selection function takes a dsrp, and returns:
+
+ None - means the test has nothing to say about the related file
+ 0 - the file is excluded by the test
+ 1 - the file is included
+ 2 - the test says the file (must be directory) should be scanned
+
+ Also, a selection function f has a variable f.exclude which should
+ be true iff f could potentially exclude some file. This is used
+ to signal an error if the last function only includes, which would
+ be redundant and presumably isn't what the user intends.
+
+ """
+ def __init__(self, dsrpath, starting_index = None):
+ """DSRPIterator initializer. dsrpath should be the root dir"""
+ self.selection_functions = []
+ if starting_index:
+ self.iter = self.iterate_starting_from(dsrpath, starting_index,
+ self.iterate_starting_from)
+ else: self.iter = self.Iterate(dsrpath, self.Iterate)
+ self.next = self.iter.next
+
+ def Iterate(self, dsrpath, rec_func):
+ """Return iterator yielding dsrps in dsrpath
+
+ rec_func is usually the same as this function and is what
+ Iterate uses to find files in subdirectories. It is used in
+ iterate_starting_from.
+
+ """
+ s = self.Select(dsrpath)
+ if s == 1: # File is included
+ yield dsrpath
+ if dsrpath.isdir():
+ for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
+ elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
+ iid = self.iterate_in_dir(dsrpath, rec_func)
+ try: first = iid.next()
+ except StopIteration: return # no files inside; skip dsrp
+ yield dsrpath
+ yield first
+ for dsrp in iid: yield dsrp
+
+ def iterate_in_dir(self, dsrpath, rec_func):
+ """Iterate the dsrps in directory dsrpath."""
+ dir_listing = dsrpath.listdir()
+ dir_listing.sort()
+ for filename in dir_listing:
+ for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
+
+ def iterate_starting_from(self, dsrpath):
+ """Like Iterate, but only yield indicies > self.starting_index"""
+ if dsrpath.index > self.starting_index: # past starting_index
+ for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
+ elif dsrpath.index = self.starting_index[:len(dsrpath.index)]:
+ # May encounter starting index on this branch
+ for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
+ yield dsrp
+
+ def Select(self, dsrp):
+ """Run through the selection functions and return dominant value"""
+ for sf in self.selection_functions:
+ result = sf(dsrp)
+ if result is not None: return result
+ return 1
+
+ def add_selection_func(self, sel_func):
+ """Add another selection function at the end"""
+ self.selection_functions.append(sel_func)
+
+ def filelist_add_sf(self, filelist_fp, include, filelist_name):
+ """Adds selection function by reading list of files
+
+ The format of the filelist is documented in the man page.
+ filelist_fp should be an (open) file object.
+ include should be true if this is an include list, false for
+ an exclude list.
+ filelist_name is just a string used for logging.
+
+ """
+ Log("Reading filelist %s" % filelist_name, 4)
+ tuple_list, something_excluded = \
+ self.filelist_read(filelist_fp, include, filelist_name)
+ Log("Sorting filelist %s" % filelist_name, 4)
+ tuple_list.sort()
+ current_index = 0
+ def selection_function(dsrp):
+
+
+ def filelist_read(self, filelist_fp, include, filelist_name):
+ """Read filelist from fp, return (tuplelist, something_excluded)"""
+ something_excluded, tuple_list = None, []
+ prefix_warnings = 0
+ while 1:
+ line = filelist_fp.readline()
+ if not line: break
+ try: tuple = self.filelist_parse_line(line, include)
+ except FilePrefixError, exp:
+ prefix_warnings += 1
+ if prefix_warnings < 6:
+ Log("Warning: file specification %s in filelist %s\n"
+ "doesn't start with correct prefix %s, ignoring." %
+ (exp[0], filelist_name, exp[1]), 2)
+ if prefix_warnings == 5:
+ Log("Future prefix errors will not be logged.", 2)
+ tuple_list.append(tuple)
+ if not tuple[1]: something_excluded = 1
+ return (tuple_list, something_excluded)
+
+ def filelist_parse_line(self, line, include, prefix):
+ """Parse a single line of a filelist, returning a pair
+
+ pair will be of form (index, include), where index is another
+ tuple, and include is 1 if the line specifies that we are
+ including a file. The default is given as an argument.
+ prefix is the string that the index is relative to.
+
+ """
+ line = line.strip()
+ if line[:2] == "+ ": # Check for "+ "/"- " syntax
+ include = 1
+ line = line[2:]
+ elif line[:2] == "- ":
+ include = 0
+ line = line[2:]
+
+ if not line.startswith(prefix+"/"):
+ raise FilePrefixError(line, prefix+"/")
+ index = filter(lambda x: x, line.split("/")) # remove empties
+ return (index, include)
+
+ def filelist_pair_match(self, dsrp, pair):
+ """Return 0/1 if pair excludes/includes dsrp, None if doesn't match"""
+ index, include = pair
+ assert include == 0 or include == 1
+ if not include and dsrp.index[:len(index)] == index:
+ return 0 # /foo matches /foo/bar/baz
+ elif include and index[:len(dsrp.index)] == dsrp.index:
+ return 1 # /foo/bar implicitly matches /foo for includes only
+ else: return None