summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-12 04:18:16 +0000
committerben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-04-12 04:18:16 +0000
commitae666de6c68c36daa946946303196f8be96e9cfe (patch)
treeaa81b329fbeddcd3ef984b577f0855b92c2d6ae6
parent86d0ad7033cd0a0227c1aa7777270b95bf7c5743 (diff)
downloadrdiff-backup-ae666de6c68c36daa946946303196f8be96e9cfe.tar.gz
Added new Select code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@43 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/rdiff_backup/destructive_stepping.py1
-rw-r--r--rdiff-backup/rdiff_backup/header.py2
-rw-r--r--rdiff-backup/rdiff_backup/highlevel.py33
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py9
-rw-r--r--rdiff-backup/rdiff_backup/selection.py274
-rw-r--r--rdiff-backup/src/destructive_stepping.py1
-rw-r--r--rdiff-backup/src/globals.py8
-rw-r--r--rdiff-backup/src/header.py2
-rw-r--r--rdiff-backup/src/highlevel.py33
-rwxr-xr-xrdiff-backup/src/main.py8
-rw-r--r--rdiff-backup/src/rpath.py9
-rw-r--r--rdiff-backup/src/selection.py274
12 files changed, 496 insertions, 158 deletions
diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py
index 80d274e..c4b1191 100644
--- a/rdiff-backup/rdiff_backup/destructive_stepping.py
+++ b/rdiff-backup/rdiff_backup/destructive_stepping.py
@@ -187,6 +187,7 @@ class DestructiveStepping:
destination file.
"""
+ return None # this code is for the test suites only, use Select instead
if Globals.exclude_device_files and dsrp.isdev(): return 1
if source: exclude_regexps = Globals.exclude_regexps
diff --git a/rdiff-backup/rdiff_backup/header.py b/rdiff-backup/rdiff_backup/header.py
index 8d54052..81cbaff 100644
--- a/rdiff-backup/rdiff_backup/header.py
+++ b/rdiff-backup/rdiff_backup/header.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
-# Version 0.7.2 released April 30, 2002
+# Version 0.7.2 released April 11, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
diff --git a/rdiff-backup/rdiff_backup/highlevel.py b/rdiff-backup/rdiff_backup/highlevel.py
index 2b366f2..54029c7 100644
--- a/rdiff-backup/rdiff_backup/highlevel.py
+++ b/rdiff-backup/rdiff_backup/highlevel.py
@@ -37,7 +37,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
- src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
+ src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint)
@@ -53,7 +53,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
if not session_info: dest_rpath.conn.SaveState.touch_last_file()
- src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
+ src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath)
@@ -85,16 +85,15 @@ class HLSourceStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
- def iterate_from(cls, rpath):
+ def iterate_from(cls):
"""Supply more aruments to DestructiveStepping.Iterate_from"""
- if cls._session_info:
- return DestructiveStepping.Iterate_from(rpath, 1,
- cls._session_info.last_index)
- else: return DestructiveStepping.Iterate_from(rpath, 1)
+ if cls._session_info is None: Globals.select_source.set_iter()
+ else: Globals.select_source.set_iter(cls._session_info.last_index)
+ return Globals.select_source
- def split_initial_dsiter(cls, rpath):
+ def split_initial_dsiter(cls):
"""Set iterators of all dsrps from rpath, returning one"""
- dsiter = cls.iterate_from(rpath)
+ dsiter = cls.iterate_from()
initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
return initial_dsiter1
@@ -131,17 +130,15 @@ class HLDestinationStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
- def iterate_from(cls, rpath):
+ def iterate_from(cls):
"""Supply more arguments to DestructiveStepping.Iterate_from"""
- if cls._session_info:
- return DestructiveStepping.Iterate_from(rpath, None,
- cls._session_info.last_index)
- else: return DestructiveStepping.Iterate_from(rpath, None)
+ if cls._session_info is None: Globals.select_mirror.set_iter()
+ else: Globals.select_mirror.set_iter(cls._session_info.last_index)
+ return Globals.select_mirror
- def split_initial_dsiter(cls, rpath):
+ def split_initial_dsiter(cls):
"""Set initial_dsiters (iteration of all dsrps from rpath)"""
- dsiter = cls.iterate_from(rpath)
- result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
+ result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2)
return result
def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter):
@@ -197,7 +194,7 @@ class HLDestinationStruct:
def get_sigs(cls, baserp, src_init_iter):
"""Return signatures of all dissimilar files"""
- dest_iters1 = cls.split_initial_dsiter(baserp)
+ dest_iters1 = cls.split_initial_dsiter()
dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1)
return RORPIter.Signatures(dissimilars)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index c1f0a44..d862951 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -404,6 +404,9 @@ class RPath(RORPath):
"bar") for "foo/bar" (no base), and ("local", "bin") for
"/usr/local/bin" if the base is "/usr".
+ For the root directory "/", the index is empty and the base is
+ "/".
+
"""
self.conn = connection
self.index = index
@@ -487,9 +490,6 @@ class RPath(RORPath):
def _getdevnums(self):
"""Return tuple for special file (major, minor)"""
- if Globals.exclude_device_files:
- # No point in finding numbers because it will be excluded anyway
- return ()
s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path)
return (s >> 8, s & 0xff)
@@ -596,10 +596,9 @@ class RPath(RORPath):
def helper(dsrp, base_init_output, branch_reduction):
if dsrp.isdir(): dsrp.rmdir()
else: dsrp.delete()
- dsiter = DestructiveStepping.Iterate_from(self, None)
itm = IterTreeReducer(lambda x: None, lambda x,y: None, None,
helper)
- for dsrp in dsiter: itm(dsrp)
+ for dsrp in Select(self, None).set_iter(): itm(dsrp)
itm.getresult()
else: self.conn.os.unlink(self.path)
self.setdata()
diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py
index 9f41860..753cac2 100644
--- a/rdiff-backup/rdiff_backup/selection.py
+++ b/rdiff-backup/rdiff_backup/selection.py
@@ -10,10 +10,18 @@ import re
# documentation on what this code does can be found on the man page.
#
-class FilePrefixError(Exception):
+class SelectError(Exception):
+ """Some error dealing with the Select class"""
+ pass
+
+class FilePrefixError(SelectError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
+class GlobbingError(SelectError):
+ """Something has gone wrong when parsing a glob string"""
+ pass
+
class Select:
"""Iterate appropriate DSRPaths in given directory
@@ -50,60 +58,97 @@ class Select:
"""
# This re should not match normal filenames, but usually just globs
- glob_re = re.compile(".*[\*\?\[]")
+ glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
+
+ def __init__(self, rpath, source):
+ """DSRPIterator initializer.
- def __init__(self, dsrpath):
- """DSRPIterator initializer"""
+ rpath is the root dir. Source is true if rpath is the root of
+ the source directory, and false for the mirror directory
+
+ """
+ assert isinstance(rpath, RPath)
self.selection_functions = []
- self.dsrpath = dsrpath
- self.prefix = dsrpath.path
+ self.source = source
+ if isinstance(rpath, DSRPath): self.dsrpath = rpath
+ else: self.dsrpath = DSRPath(rpath.conn, rpath.base,
+ rpath.index, rpath.data)
+ self.prefix = self.dsrpath.path
- def set_iter(self, starting_index = None):
- """Initialize more variables. dsrpath should be the root dir"""
+ def set_iter(self, starting_index = None, sel_func = None):
+ """Initialize more variables, get ready to iterate
+
+ Will iterate indicies greater than starting_index. Selection
+ function sel_func is called on each dsrp and is usually
+ self.Select. Returns self just for convenience.
+
+ """
+ if not sel_func: sel_func = self.Select
+ self.dsrpath.setdata() # this may have changed since Select init
if starting_index is not None:
+ self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
- starting_index, self.iterate_starting_from)
- else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
+ self.iterate_starting_from, sel_func)
+ else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
self.next = self.iter.next
self.__iter__ = lambda: self
+ return self
- def Iterate(self, dsrpath, rec_func):
+ def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
rec_func is usually the same as this function and is what
Iterate uses to find files in subdirectories. It is used in
iterate_starting_from.
+ sel_func is the selection function to use on the dsrps. It is
+ usually self.Select.
+
"""
- s = self.Select(dsrpath)
+ s = sel_func(dsrpath)
+ if not s or DestructiveStepping.initialize(dsrpath, self.source):
+ return
if s == 1: # File is included
yield dsrpath
if dsrpath.isdir():
- for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
+ for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
+ yield dsrp
elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
- iid = self.iterate_in_dir(dsrpath, rec_func)
+ iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
try: first = iid.next()
except StopIteration: return # no files inside; skip dsrp
yield dsrpath
yield first
for dsrp in iid: yield dsrp
- def iterate_in_dir(self, dsrpath, rec_func):
+ def iterate_in_dir(self, dsrpath, rec_func, sel_func):
"""Iterate the dsrps in directory dsrpath."""
dir_listing = dsrpath.listdir()
dir_listing.sort()
for filename in dir_listing:
- for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
+ for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func):
+ yield dsrp
- def iterate_starting_from(self, dsrpath):
+ def iterate_starting_from(self, dsrpath, rec_func, sel_func):
"""Like Iterate, but only yield indicies > self.starting_index"""
+ if DestructiveStepping.initialize(dsrpath, self.source): return
if dsrpath.index > self.starting_index: # past starting_index
- for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
+ for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
+ yield dsrp
elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch
- for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
- yield dsrp
-
+ for dsrp in self.iterate_in_dir(dsrpath,
+ self.iterate_starting_from,
+ sel_func): yield dsrp
+
+ def iterate_with_finalizer(self):
+ """Like Iterate, but missing some options, and add finalizer"""
+ finalize = DestructiveStepping.Finalizer()
+ for dsrp in self:
+ yield dsrp
+ finalize(dsrp)
+ finalize.getresult()
+
def Select(self, dsrp):
"""Run through the selection functions and return dominant value"""
for sf in self.selection_functions:
@@ -123,29 +168,68 @@ class Select:
information is sent over the link.
"""
- for opt, arg in argtuples:
- if opt == "--exclude":
- self.add_selection_func(self.glob_get_sf(arg, 0))
- elif opt == "--exclude-device-files":
- self.add_selection_func(self.devfiles_get_sf())
- elif opt == "--exclude-filelist":
- self.add_selection_func(self.filelist_get_sf(arg[1],
- 0, arg[0]))
- elif opt == "--exclude-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 0))
- elif opt == "--include":
- self.add_selection_func(self.glob_get_sf(arg, 1))
- elif opt == "--include-filelist":
- self.add_selection_func(self.filelist_get_sf(arg[1],
- 1, arg[0]))
- elif opt == "--include-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 1))
- else: assert 0, "Bad option %s" % opt
-
- # Exclude rdiff-backup-data directory
+ try:
+ for opt, arg in argtuples:
+ if opt == "--exclude":
+ self.add_selection_func(self.glob_get_sf(arg, 0))
+ elif opt == "--exclude-device-files":
+ self.add_selection_func(self.devfiles_get_sf())
+ elif opt == "--exclude-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 0, arg[0]))
+ elif opt == "--exclude-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 0))
+ elif opt == "--include":
+ self.add_selection_func(self.glob_get_sf(arg, 1))
+ elif opt == "--include-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 1, arg[0]))
+ elif opt == "--include-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 1))
+ else: assert 0, "Bad option %s" % opt
+ except SelectError, e: self.parse_catch_error(e)
+
+ self.parse_last_excludes()
+ self.parse_rbdir_exclude()
+ self.parse_proc_exclude()
+
+ def parse_catch_error(self, exc):
+ """Deal with selection error exc"""
+ if isinstance(exc, FilePrefixError):
+ Log.FatalError(
+"""Fatal Error: The file specification
+ %s
+cannot match any files in the base directory
+ %s
+Useful file specifications begin with the base directory or some
+pattern (such as '**') which matches the base directory.""" %
+ (exc, self.prefix))
+ elif isinstance(e, GlobbingError):
+ Log.FatalError("Fatal Error while processing expression\n"
+ "%s" % exc)
+ else: raise
+
+ def parse_rbdir_exclude(self):
+ """Add exclusion of rdiff-backup-data dir to front of list"""
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
+ def parse_proc_exclude(self):
+ """Exclude the /proc directory if starting from /"""
+ if self.prefix == "/":
+ self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1)
+
+ def parse_last_excludes(self):
+ """Exit with error if last selection function isn't an exclude"""
+ if self.select_functions and not self.selection_functions[-1].exclude:
+ Log.FatalError(
+"""Last selection expression:
+ %s
+only specifies that files be included. Because the default is to
+include all files, the expression is redundant. Exiting because this
+probably isn't what you meant.""" %
+ (self.selection_functions[-1].name, self.prefix))
+
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
@@ -259,8 +343,7 @@ class Select:
raise
def sel_func(dsrp):
- match = regexp.match(dsrp.path)
- if match and match.end(0) == len(dsrp.path): return include
+ if regexp.search(dsrp.path): return include
else: return None
sel_func.exclude = not include
@@ -284,8 +367,8 @@ class Select:
assert include == 0 or include == 1
if glob_str == "**": sel_func = lambda dsrp: include
elif not self.glob_re.match(glob_str): # normal file
- return self.glob_get_filename_sf(glob_str, include)
- else: pass ####XXXXXXXXXXXXX
+ sel_func = self.glob_get_filename_sf(glob_str, include)
+ else: sel_func = self.glob_get_normal_sf(glob_str, include)
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
@@ -296,20 +379,18 @@ class Select:
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
- globbing is so we can check the prefix and give proper
- warning.
+ globbing is things are a lot less complicated if no special
+ globbing characters are used.
"""
if not filename.startswith(self.prefix):
- Log("Warning: file specification %s does not start with\n"
- "prefix %s, ignoring" % (filename, self.prefix), 2)
- return lambda x: None # dummy selection function
+ raise FilePrefixError(filename)
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
- """Add selection function based on tuple"""
+ """Return selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
@@ -327,3 +408,94 @@ class Select:
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
+ def glob_get_normal_sf(self, glob_str, include):
+ """Return selection function based on glob_str
+
+ The basic idea is to turn glob_str into a regular expression,
+ and just use the normal regular expression. There is a
+ complication because the selection function should return '2'
+ (scan) for directories which may contain a file which matches
+ the glob_str. So we break up the glob string into parts, and
+ any file which matches an initial sequence of glob parts gets
+ scanned.
+
+ Thanks to Donovan Baarda who provided some code which did some
+ things similar to this.
+
+ """
+ if glob_str.lower().startswith("ignorecase:"):
+ re_comp = lambda r: re.compile(r, re.I | re.S)
+ glob_str = glob_str[len("ignorecase:"):]
+ else: re_comp = lambda r: re.compile(r, re.S)
+
+ # matches what glob matches and any files in directory
+ glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
+
+ if glob_str.find("**") != -1:
+ glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
+
+ scan_comp_re = re_comp("^(%s)$" %
+ "|".join(self.glob_get_prefix_res(glob_str)))
+
+ def include_sel_func(dsrp):
+ if glob_comp_re.match(dsrp.path): return 1
+ elif scan_comp_re.match(dsrp.path): return 2
+ else: return None
+
+ def exclude_sel_func(dsrp):
+ if glob_comp_re.match(dsrp.path): return 0
+ else: return None
+
+ # Check to make sure prefix is ok
+ if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
+
+ if include: return include_sel_func
+ else: return exclude_sel_func
+
+ def glob_get_prefix_res(self, glob_str):
+ """Return list of regexps equivalent to prefixes of glob_str"""
+ glob_parts = glob_str.split("/")
+ if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
+ raise GlobbingError("Consecutive '/'s found in globbing string "
+ + glob_str)
+
+ prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
+ range(len(glob_parts)))
+ # we must make exception for root "/", only dir to end in slash
+ if prefixes[0] == "": prefixes[0] = "/"
+ return map(self.glob_to_re, prefixes)
+
+ def glob_to_re(self, pat):
+ """Returned regular expression equivalent to shell glob pat
+
+ Currently only the ?, *, [], and ** expressions are supported.
+ Ranges like [a-z] are also currently unsupported. There is no
+ way to quote these special characters.
+
+ This function taken with minor modifications from efnmatch.py
+ by Donovan Baarda.
+
+ """
+ i, n, res = 0, len(pat), ''
+ while i < n:
+ c, s = pat[i], pat[i:i+2]
+ i = i+1
+ if s == '**':
+ res = res + '.*'
+ i = i + 1
+ elif c == '*': res = res + '[^/]*'
+ elif c == '?': res = res + '[^/]'
+ elif c == '[':
+ j = i
+ if j < n and pat[j] in '!^': j = j+1
+ if j < n and pat[j] == ']': j = j+1
+ while j < n and pat[j] != ']': j = j+1
+ if j >= n: res = res + '\\[' # interpret the [ literally
+ else: # Deal with inside of [..]
+ stuff = pat[i:j].replace('\\','\\\\')
+ i = j+1
+ if stuff[0] in '!^': stuff = '^' + stuff[1:]
+ res = res + '[' + stuff + ']'
+ else: res = res + re.escape(c)
+ return res
+
diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py
index 80d274e..c4b1191 100644
--- a/rdiff-backup/src/destructive_stepping.py
+++ b/rdiff-backup/src/destructive_stepping.py
@@ -187,6 +187,7 @@ class DestructiveStepping:
destination file.
"""
+ return None # this code is for the test suites only, use Select instead
if Globals.exclude_device_files and dsrp.isdev(): return 1
if source: exclude_regexps = Globals.exclude_regexps
diff --git a/rdiff-backup/src/globals.py b/rdiff-backup/src/globals.py
index fceefa4..c47880c 100644
--- a/rdiff-backup/src/globals.py
+++ b/rdiff-backup/src/globals.py
@@ -123,8 +123,8 @@ class Globals:
# case-insensitive regular expression won't be compressed (applies
# to .snapshots and .diffs). The second below will be the
# compiled version of the first.
- no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
- "jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
+ no_compression_regexp_string = "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
+ "jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp = None
# On the reader and writer connections, the following will be
@@ -183,9 +183,9 @@ class Globals:
def set_select(cls, source, dsrpath, tuplelist):
"""Initialize select object using tuplelist"""
if source:
- cls.select_source = Select(dsrpath)
+ cls.select_source = Select(dsrpath, 1)
cls.select_source.ParseArgs(tuplelist)
else:
- cls.select_mirror = Select(dsrpath)
+ cls.select_mirror = Select(dsrpath, None)
cls.select_mirror.ParseArgs(tuplelist)
set_select = classmethod(set_select)
diff --git a/rdiff-backup/src/header.py b/rdiff-backup/src/header.py
index 8d54052..81cbaff 100644
--- a/rdiff-backup/src/header.py
+++ b/rdiff-backup/src/header.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
-# Version 0.7.2 released April 30, 2002
+# Version 0.7.2 released April 11, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
diff --git a/rdiff-backup/src/highlevel.py b/rdiff-backup/src/highlevel.py
index 2b366f2..54029c7 100644
--- a/rdiff-backup/src/highlevel.py
+++ b/rdiff-backup/src/highlevel.py
@@ -37,7 +37,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
- src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
+ src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint)
@@ -53,7 +53,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
if not session_info: dest_rpath.conn.SaveState.touch_last_file()
- src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
+ src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath)
@@ -85,16 +85,15 @@ class HLSourceStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
- def iterate_from(cls, rpath):
+ def iterate_from(cls):
"""Supply more aruments to DestructiveStepping.Iterate_from"""
- if cls._session_info:
- return DestructiveStepping.Iterate_from(rpath, 1,
- cls._session_info.last_index)
- else: return DestructiveStepping.Iterate_from(rpath, 1)
+ if cls._session_info is None: Globals.select_source.set_iter()
+ else: Globals.select_source.set_iter(cls._session_info.last_index)
+ return Globals.select_source
- def split_initial_dsiter(cls, rpath):
+ def split_initial_dsiter(cls):
"""Set iterators of all dsrps from rpath, returning one"""
- dsiter = cls.iterate_from(rpath)
+ dsiter = cls.iterate_from()
initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
return initial_dsiter1
@@ -131,17 +130,15 @@ class HLDestinationStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
- def iterate_from(cls, rpath):
+ def iterate_from(cls):
"""Supply more arguments to DestructiveStepping.Iterate_from"""
- if cls._session_info:
- return DestructiveStepping.Iterate_from(rpath, None,
- cls._session_info.last_index)
- else: return DestructiveStepping.Iterate_from(rpath, None)
+ if cls._session_info is None: Globals.select_mirror.set_iter()
+ else: Globals.select_mirror.set_iter(cls._session_info.last_index)
+ return Globals.select_mirror
- def split_initial_dsiter(cls, rpath):
+ def split_initial_dsiter(cls):
"""Set initial_dsiters (iteration of all dsrps from rpath)"""
- dsiter = cls.iterate_from(rpath)
- result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
+ result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2)
return result
def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter):
@@ -197,7 +194,7 @@ class HLDestinationStruct:
def get_sigs(cls, baserp, src_init_iter):
"""Return signatures of all dissimilar files"""
- dest_iters1 = cls.split_initial_dsiter(baserp)
+ dest_iters1 = cls.split_initial_dsiter()
dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1)
return RORPIter.Signatures(dissimilars)
diff --git a/rdiff-backup/src/main.py b/rdiff-backup/src/main.py
index 9a0d901..c29dcdf 100755
--- a/rdiff-backup/src/main.py
+++ b/rdiff-backup/src/main.py
@@ -125,7 +125,7 @@ class Main:
sys.exit(1)
def misc_setup(self, rps):
- """Set default change ownership flag, umask, regular expressions"""
+ """Set default change ownership flag, umask, Select objects"""
if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or
(len(rps) < 2 and os.getuid() == 0)):
# Allow change_ownership if destination connection is root
@@ -139,7 +139,7 @@ class Main:
rps[1].conn.Globals.set_select(None, rps[1],
self.select_mirror_opts)
Globals.postset_regexp('no_compression_regexp',
- Globals.no_compression_regexp_string, re.I)
+ Globals.no_compression_regexp_string)
def take_action(self, rps):
"""Do whatever self.action says"""
@@ -248,8 +248,8 @@ rdiff-backup with the --force option.""" % rpout.path)
(rpin.path == "." and rpout.path[0] != '/' and
rpout.path[:2] != '..')):
# Just a few heuristics, we don't have to get every case
- if not DestructiveStepping.isexcluded(rpout, 1):
- Log(
+ if Globals.backup_reader.Globals.select_source \
+ .Select(rpout): Log(
"""Warning: The destination directory '%s' may be contained in the
source directory '%s'. This could cause an infinite regress. You
may need to use the --exclude option.""" % (rpout.path, rpin.path), 2)
diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py
index c1f0a44..d862951 100644
--- a/rdiff-backup/src/rpath.py
+++ b/rdiff-backup/src/rpath.py
@@ -404,6 +404,9 @@ class RPath(RORPath):
"bar") for "foo/bar" (no base), and ("local", "bin") for
"/usr/local/bin" if the base is "/usr".
+ For the root directory "/", the index is empty and the base is
+ "/".
+
"""
self.conn = connection
self.index = index
@@ -487,9 +490,6 @@ class RPath(RORPath):
def _getdevnums(self):
"""Return tuple for special file (major, minor)"""
- if Globals.exclude_device_files:
- # No point in finding numbers because it will be excluded anyway
- return ()
s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path)
return (s >> 8, s & 0xff)
@@ -596,10 +596,9 @@ class RPath(RORPath):
def helper(dsrp, base_init_output, branch_reduction):
if dsrp.isdir(): dsrp.rmdir()
else: dsrp.delete()
- dsiter = DestructiveStepping.Iterate_from(self, None)
itm = IterTreeReducer(lambda x: None, lambda x,y: None, None,
helper)
- for dsrp in dsiter: itm(dsrp)
+ for dsrp in Select(self, None).set_iter(): itm(dsrp)
itm.getresult()
else: self.conn.os.unlink(self.path)
self.setdata()
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
index 9f41860..753cac2 100644
--- a/rdiff-backup/src/selection.py
+++ b/rdiff-backup/src/selection.py
@@ -10,10 +10,18 @@ import re
# documentation on what this code does can be found on the man page.
#
-class FilePrefixError(Exception):
+class SelectError(Exception):
+ """Some error dealing with the Select class"""
+ pass
+
+class FilePrefixError(SelectError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
+class GlobbingError(SelectError):
+ """Something has gone wrong when parsing a glob string"""
+ pass
+
class Select:
"""Iterate appropriate DSRPaths in given directory
@@ -50,60 +58,97 @@ class Select:
"""
# This re should not match normal filenames, but usually just globs
- glob_re = re.compile(".*[\*\?\[]")
+ glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
+
+ def __init__(self, rpath, source):
+ """DSRPIterator initializer.
- def __init__(self, dsrpath):
- """DSRPIterator initializer"""
+ rpath is the root dir. Source is true if rpath is the root of
+ the source directory, and false for the mirror directory
+
+ """
+ assert isinstance(rpath, RPath)
self.selection_functions = []
- self.dsrpath = dsrpath
- self.prefix = dsrpath.path
+ self.source = source
+ if isinstance(rpath, DSRPath): self.dsrpath = rpath
+ else: self.dsrpath = DSRPath(rpath.conn, rpath.base,
+ rpath.index, rpath.data)
+ self.prefix = self.dsrpath.path
- def set_iter(self, starting_index = None):
- """Initialize more variables. dsrpath should be the root dir"""
+ def set_iter(self, starting_index = None, sel_func = None):
+ """Initialize more variables, get ready to iterate
+
+ Will iterate indicies greater than starting_index. Selection
+ function sel_func is called on each dsrp and is usually
+ self.Select. Returns self just for convenience.
+
+ """
+ if not sel_func: sel_func = self.Select
+ self.dsrpath.setdata() # this may have changed since Select init
if starting_index is not None:
+ self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
- starting_index, self.iterate_starting_from)
- else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
+ self.iterate_starting_from, sel_func)
+ else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
self.next = self.iter.next
self.__iter__ = lambda: self
+ return self
- def Iterate(self, dsrpath, rec_func):
+ def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
rec_func is usually the same as this function and is what
Iterate uses to find files in subdirectories. It is used in
iterate_starting_from.
+ sel_func is the selection function to use on the dsrps. It is
+ usually self.Select.
+
"""
- s = self.Select(dsrpath)
+ s = sel_func(dsrpath)
+ if not s or DestructiveStepping.initialize(dsrpath, self.source):
+ return
if s == 1: # File is included
yield dsrpath
if dsrpath.isdir():
- for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
+ for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
+ yield dsrp
elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
- iid = self.iterate_in_dir(dsrpath, rec_func)
+ iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
try: first = iid.next()
except StopIteration: return # no files inside; skip dsrp
yield dsrpath
yield first
for dsrp in iid: yield dsrp
- def iterate_in_dir(self, dsrpath, rec_func):
+ def iterate_in_dir(self, dsrpath, rec_func, sel_func):
"""Iterate the dsrps in directory dsrpath."""
dir_listing = dsrpath.listdir()
dir_listing.sort()
for filename in dir_listing:
- for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
+ for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func):
+ yield dsrp
- def iterate_starting_from(self, dsrpath):
+ def iterate_starting_from(self, dsrpath, rec_func, sel_func):
"""Like Iterate, but only yield indicies > self.starting_index"""
+ if DestructiveStepping.initialize(dsrpath, self.source): return
if dsrpath.index > self.starting_index: # past starting_index
- for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
+ for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
+ yield dsrp
elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch
- for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
- yield dsrp
-
+ for dsrp in self.iterate_in_dir(dsrpath,
+ self.iterate_starting_from,
+ sel_func): yield dsrp
+
+ def iterate_with_finalizer(self):
+ """Like Iterate, but missing some options, and add finalizer"""
+ finalize = DestructiveStepping.Finalizer()
+ for dsrp in self:
+ yield dsrp
+ finalize(dsrp)
+ finalize.getresult()
+
def Select(self, dsrp):
"""Run through the selection functions and return dominant value"""
for sf in self.selection_functions:
@@ -123,29 +168,68 @@ class Select:
information is sent over the link.
"""
- for opt, arg in argtuples:
- if opt == "--exclude":
- self.add_selection_func(self.glob_get_sf(arg, 0))
- elif opt == "--exclude-device-files":
- self.add_selection_func(self.devfiles_get_sf())
- elif opt == "--exclude-filelist":
- self.add_selection_func(self.filelist_get_sf(arg[1],
- 0, arg[0]))
- elif opt == "--exclude-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 0))
- elif opt == "--include":
- self.add_selection_func(self.glob_get_sf(arg, 1))
- elif opt == "--include-filelist":
- self.add_selection_func(self.filelist_get_sf(arg[1],
- 1, arg[0]))
- elif opt == "--include-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 1))
- else: assert 0, "Bad option %s" % opt
-
- # Exclude rdiff-backup-data directory
+ try:
+ for opt, arg in argtuples:
+ if opt == "--exclude":
+ self.add_selection_func(self.glob_get_sf(arg, 0))
+ elif opt == "--exclude-device-files":
+ self.add_selection_func(self.devfiles_get_sf())
+ elif opt == "--exclude-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 0, arg[0]))
+ elif opt == "--exclude-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 0))
+ elif opt == "--include":
+ self.add_selection_func(self.glob_get_sf(arg, 1))
+ elif opt == "--include-filelist":
+ self.add_selection_func(self.filelist_get_sf(arg[1],
+ 1, arg[0]))
+ elif opt == "--include-regexp":
+ self.add_selection_func(self.regexp_get_sf(arg, 1))
+ else: assert 0, "Bad option %s" % opt
+ except SelectError, e: self.parse_catch_error(e)
+
+ self.parse_last_excludes()
+ self.parse_rbdir_exclude()
+ self.parse_proc_exclude()
+
+ def parse_catch_error(self, exc):
+ """Deal with selection error exc"""
+ if isinstance(exc, FilePrefixError):
+ Log.FatalError(
+"""Fatal Error: The file specification
+ %s
+cannot match any files in the base directory
+ %s
+Useful file specifications begin with the base directory or some
+pattern (such as '**') which matches the base directory.""" %
+ (exc, self.prefix))
+ elif isinstance(e, GlobbingError):
+ Log.FatalError("Fatal Error while processing expression\n"
+ "%s" % exc)
+ else: raise
+
+ def parse_rbdir_exclude(self):
+ """Add exclusion of rdiff-backup-data dir to front of list"""
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
+ def parse_proc_exclude(self):
+ """Exclude the /proc directory if starting from /"""
+ if self.prefix == "/":
+ self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1)
+
+ def parse_last_excludes(self):
+ """Exit with error if last selection function isn't an exclude"""
+ if self.select_functions and not self.selection_functions[-1].exclude:
+ Log.FatalError(
+"""Last selection expression:
+ %s
+only specifies that files be included. Because the default is to
+include all files, the expression is redundant. Exiting because this
+probably isn't what you meant.""" %
+ (self.selection_functions[-1].name, self.prefix))
+
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
@@ -259,8 +343,7 @@ class Select:
raise
def sel_func(dsrp):
- match = regexp.match(dsrp.path)
- if match and match.end(0) == len(dsrp.path): return include
+ if regexp.search(dsrp.path): return include
else: return None
sel_func.exclude = not include
@@ -284,8 +367,8 @@ class Select:
assert include == 0 or include == 1
if glob_str == "**": sel_func = lambda dsrp: include
elif not self.glob_re.match(glob_str): # normal file
- return self.glob_get_filename_sf(glob_str, include)
- else: pass ####XXXXXXXXXXXXX
+ sel_func = self.glob_get_filename_sf(glob_str, include)
+ else: sel_func = self.glob_get_normal_sf(glob_str, include)
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
@@ -296,20 +379,18 @@ class Select:
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
- globbing is so we can check the prefix and give proper
- warning.
+ globbing is things are a lot less complicated if no special
+ globbing characters are used.
"""
if not filename.startswith(self.prefix):
- Log("Warning: file specification %s does not start with\n"
- "prefix %s, ignoring" % (filename, self.prefix), 2)
- return lambda x: None # dummy selection function
+ raise FilePrefixError(filename)
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
- """Add selection function based on tuple"""
+ """Return selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
@@ -327,3 +408,94 @@ class Select:
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
+ def glob_get_normal_sf(self, glob_str, include):
+ """Return selection function based on glob_str
+
+ The basic idea is to turn glob_str into a regular expression,
+ and just use the normal regular expression. There is a
+ complication because the selection function should return '2'
+ (scan) for directories which may contain a file which matches
+ the glob_str. So we break up the glob string into parts, and
+ any file which matches an initial sequence of glob parts gets
+ scanned.
+
+ Thanks to Donovan Baarda who provided some code which did some
+ things similar to this.
+
+ """
+ if glob_str.lower().startswith("ignorecase:"):
+ re_comp = lambda r: re.compile(r, re.I | re.S)
+ glob_str = glob_str[len("ignorecase:"):]
+ else: re_comp = lambda r: re.compile(r, re.S)
+
+ # matches what glob matches and any files in directory
+ glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
+
+ if glob_str.find("**") != -1:
+ glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
+
+ scan_comp_re = re_comp("^(%s)$" %
+ "|".join(self.glob_get_prefix_res(glob_str)))
+
+ def include_sel_func(dsrp):
+ if glob_comp_re.match(dsrp.path): return 1
+ elif scan_comp_re.match(dsrp.path): return 2
+ else: return None
+
+ def exclude_sel_func(dsrp):
+ if glob_comp_re.match(dsrp.path): return 0
+ else: return None
+
+ # Check to make sure prefix is ok
+ if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
+
+ if include: return include_sel_func
+ else: return exclude_sel_func
+
+ def glob_get_prefix_res(self, glob_str):
+ """Return list of regexps equivalent to prefixes of glob_str"""
+ glob_parts = glob_str.split("/")
+ if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
+ raise GlobbingError("Consecutive '/'s found in globbing string "
+ + glob_str)
+
+ prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
+ range(len(glob_parts)))
+ # we must make exception for root "/", only dir to end in slash
+ if prefixes[0] == "": prefixes[0] = "/"
+ return map(self.glob_to_re, prefixes)
+
+ def glob_to_re(self, pat):
+ """Returned regular expression equivalent to shell glob pat
+
+ Currently only the ?, *, [], and ** expressions are supported.
+ Ranges like [a-z] are also currently unsupported. There is no
+ way to quote these special characters.
+
+ This function taken with minor modifications from efnmatch.py
+ by Donovan Baarda.
+
+ """
+ i, n, res = 0, len(pat), ''
+ while i < n:
+ c, s = pat[i], pat[i:i+2]
+ i = i+1
+ if s == '**':
+ res = res + '.*'
+ i = i + 1
+ elif c == '*': res = res + '[^/]*'
+ elif c == '?': res = res + '[^/]'
+ elif c == '[':
+ j = i
+ if j < n and pat[j] in '!^': j = j+1
+ if j < n and pat[j] == ']': j = j+1
+ while j < n and pat[j] != ']': j = j+1
+ if j >= n: res = res + '\\[' # interpret the [ literally
+ else: # Deal with inside of [..]
+ stuff = pat[i:j].replace('\\','\\\\')
+ i = j+1
+ if stuff[0] in '!^': stuff = '^' + stuff[1:]
+ res = res + '[' + stuff + ']'
+ else: res = res + re.escape(c)
+ return res
+