summaryrefslogtreecommitdiff
path: root/rdiff-backup/src/selection.py
diff options
context:
space:
mode:
Diffstat (limited to 'rdiff-backup/src/selection.py')
-rw-r--r--rdiff-backup/src/selection.py650
1 files changed, 0 insertions, 650 deletions
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
deleted file mode 100644
index 9fb43fc..0000000
--- a/rdiff-backup/src/selection.py
+++ /dev/null
@@ -1,650 +0,0 @@
-# Copyright 2002 Ben Escoto
-#
-# This file is part of rdiff-backup.
-#
-# rdiff-backup is free software; you can redistribute it and/or modify
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 2 of the License, or (at your
-# option) any later version.
-#
-# rdiff-backup is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with rdiff-backup; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
-# USA
-
-"""Iterate exactly the requested files in a directory
-
-Parses includes and excludes to yield correct files. More
-documentation on what this code does can be found on the man page.
-
-"""
-
-from __future__ import generators
-import re
-from log import *
-from robust import *
-from destructive_stepping import *
-import FilenameMapping
-
-
-class SelectError(Exception):
- """Some error dealing with the Select class"""
- pass
-
-class FilePrefixError(SelectError):
- """Signals that a specified file doesn't start with correct prefix"""
- pass
-
-class GlobbingError(SelectError):
- """Something has gone wrong when parsing a glob string"""
- pass
-
-
-class Select:
- """Iterate appropriate DSRPaths in given directory
-
- This class acts as an iterator on account of its next() method.
- Basically, it just goes through all the files in a directory in
- order (depth-first) and subjects each file to a bunch of tests
- (selection functions) in order. The first test that includes or
- excludes the file means that the file gets included (iterated) or
- excluded. The default is include, so with no tests we would just
- iterate all the files in the directory in order.
-
- The one complication to this is that sometimes we don't know
- whether or not to include a directory until we examine its
- contents. For instance, if we want to include all the **.py
- files. If /home/ben/foo.py exists, we should also include /home
- and /home/ben, but if these directories contain no **.py files,
- they shouldn't be included. For this reason, a test may not
- include or exclude a directory, but merely "scan" it. If later a
- file in the directory gets included, so does the directory.
-
- As mentioned above, each test takes the form of a selection
- function. The selection function takes a dsrp, and returns:
-
- None - means the test has nothing to say about the related file
- 0 - the file is excluded by the test
- 1 - the file is included
- 2 - the test says the file (must be directory) should be scanned
-
- Also, a selection function f has a variable f.exclude which should
- be true iff f could potentially exclude some file. This is used
- to signal an error if the last function only includes, which would
- be redundant and presumably isn't what the user intends.
-
- """
- # This re should not match normal filenames, but usually just globs
- glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
-
- def __init__(self, dsrpath, quoted_filenames = None):
- """DSRPIterator initializer. dsrp is the root directory
-
- When files have quoted characters in them, quoted_filenames
- should be true. Then RPath's index will be the unquoted
- version.
-
- """
- assert isinstance(dsrpath, DSRPath)
- self.selection_functions = []
- self.dsrpath = dsrpath
- self.prefix = self.dsrpath.path
- self.quoting_on = Globals.quoting_enabled and quoted_filenames
-
- def set_iter(self, starting_index = None, iterate_parents = None,
- sel_func = None):
- """Initialize more variables, get ready to iterate
-
- Will iterate indicies greater than starting_index. If
- iterate_parents is true, will also include parents of
- starting_index in iteration. Selection function sel_func is
- called on each dsrp and is usually self.Select. Returns self
- just for convenience.
-
- """
- if not sel_func: sel_func = self.Select
- self.dsrpath.setdata() # this may have changed since Select init
- if starting_index is not None:
- self.starting_index = starting_index
- self.iter = self.iterate_starting_from(self.dsrpath,
- self.iterate_starting_from, sel_func)
- elif self.quoting_on:
- self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
- else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
-
- # only iterate parents if we are not starting from beginning
- self.iterate_parents = starting_index is not None and iterate_parents
- self.next = self.iter.next
- self.__iter__ = lambda: self
- return self
-
- def Iterate_fast(self, dsrpath, sel_func):
- """Like Iterate, but don't recur, saving time
-
- Only handles standard case (quoting off, starting from
- beginning).
-
- """
- def error_handler(exc, filename):
- Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
- return None
-
- def diryield(dsrpath):
- """Generate relevant files in directory dsrpath
-
- Returns (dsrp, num) where num == 0 means dsrp should be
- generated normally, num == 1 means the dsrp is a directory
- and should be included iff something inside is included.
-
- """
- for filename in Robust.listrp(dsrpath):
- new_dsrp = Robust.check_common_error(error_handler,
- dsrpath.append, (filename,))
- if new_dsrp:
- s = sel_func(new_dsrp)
- if s == 1: yield (new_dsrp, 0)
- elif s == 2 and new_dsrp.isdir(): yield (new_dsrp, 1)
-
- yield dsrpath
- diryield_stack = [diryield(dsrpath)]
- delayed_dsrp_stack = []
-
- while diryield_stack:
- try: dsrp, val = diryield_stack[-1].next()
- except StopIteration:
- diryield_stack.pop()
- if delayed_dsrp_stack: delayed_dsrp_stack.pop()
- continue
- if val == 0:
- if delayed_dsrp_stack:
- for delayed_dsrp in delayed_dsrp_stack: yield delayed_dsrp
- del delayed_dsrp_stack[:]
- yield dsrp
- if dsrp.isdir(): diryield_stack.append(diryield(dsrp))
- elif val == 1:
- delayed_dsrp_stack.append(dsrp)
- diryield_stack.append(diryield(dsrp))
-
- def Iterate(self, dsrpath, rec_func, sel_func):
- """Return iterator yielding dsrps in dsrpath
-
- rec_func is usually the same as this function and is what
- Iterate uses to find files in subdirectories. It is used in
- iterate_starting_from.
-
- sel_func is the selection function to use on the dsrps. It is
- usually self.Select.
-
- """
- s = sel_func(dsrpath)
- if s == 0: return
- elif s == 1: # File is included
- yield dsrpath
- if dsrpath.isdir():
- for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
- yield dsrp
- elif s == 2:
- if dsrpath.isdir(): # Directory is merely scanned
- iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
- try: first = iid.next()
- except StopIteration: return # no files inside; skip dsrp
- yield dsrpath
- yield first
- for dsrp in iid: yield dsrp
- else: assert 0, "Invalid selection result %s" % (str(s),)
-
- def iterate_in_dir(self, dsrpath, rec_func, sel_func):
- """Iterate the dsrps in directory dsrpath."""
- def error_handler(exc, filename):
- Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
- return None
-
- if self.quoting_on:
- for subdir in FilenameMapping.get_quoted_dir_children(dsrpath):
- for dsrp in rec_func(subdir, rec_func, sel_func):
- yield dsrp
- else:
- for filename in Robust.listrp(dsrpath):
- new_dsrp = Robust.check_common_error(
- error_handler, dsrpath.append, [filename])
- if new_dsrp:
- for dsrp in rec_func(new_dsrp, rec_func, sel_func):
- yield dsrp
-
- def iterate_starting_from(self, dsrpath, rec_func, sel_func):
- """Like Iterate, but only yield indicies > self.starting_index"""
- if dsrpath.index > self.starting_index: # past starting_index
- for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
- yield dsrp
- elif (dsrpath.index == self.starting_index[:len(dsrpath.index)]
- and dsrpath.isdir()):
- # May encounter starting index on this branch
- if self.iterate_parents: yield dsrpath
- for dsrp in self.iterate_in_dir(dsrpath,
- self.iterate_starting_from,
- sel_func): yield dsrp
-
- def iterate_with_finalizer(self):
- """Like Iterate, but missing some options, and add finalizer"""
- finalize = IterTreeReducer(DestructiveSteppingFinalizer, ())
- for dsrp in self:
- yield dsrp
- finalize(dsrp.index, dsrp)
- finalize.Finish()
-
- def Select(self, dsrp):
- """Run through the selection functions and return dominant val 0/1/2"""
- for sf in self.selection_functions:
- result = sf(dsrp)
- if result is not None: return result
- return 1
-
- def ParseArgs(self, argtuples, filelists):
- """Create selection functions based on list of tuples
-
- The tuples have the form (option string, additional argument)
- and are created when the initial commandline arguments are
- read. The reason for the extra level of processing is that
- the filelists may only be openable by the main connection, but
- the selection functions need to be on the backup reader or
- writer side. When the initial arguments are parsed the right
- information is sent over the link.
-
- """
- filelists_index = 0
- try:
- for opt, arg in argtuples:
- if opt == "--exclude":
- self.add_selection_func(self.glob_get_sf(arg, 0))
- elif opt == "--exclude-device-files":
- self.add_selection_func(self.devfiles_get_sf(0))
- elif opt == "--exclude-filelist":
- self.add_selection_func(self.filelist_get_sf(
- filelists[filelists_index], 0, arg))
- filelists_index += 1
- elif opt == "--exclude-globbing-filelist":
- map(self.add_selection_func,
- self.filelist_globbing_get_sfs(
- filelists[filelists_index], 0, arg))
- filelists_index += 1
- elif opt == "--exclude-other-filesystems":
- self.add_selection_func(self.other_filesystems_get_sf(0))
- elif opt == "--exclude-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 0))
- elif opt == "--exclude-special-files":
- self.add_selection_func(self.special_get_sf(0))
- elif opt == "--include":
- self.add_selection_func(self.glob_get_sf(arg, 1))
- elif opt == "--include-filelist":
- self.add_selection_func(self.filelist_get_sf(
- filelists[filelists_index], 1, arg))
- filelists_index += 1
- elif opt == "--include-globbing-filelist":
- map(self.add_selection_func,
- self.filelist_globbing_get_sfs(
- filelists[filelists_index], 1, arg))
- filelists_index += 1
- elif opt == "--include-regexp":
- self.add_selection_func(self.regexp_get_sf(arg, 1))
- else: assert 0, "Bad selection option %s" % opt
- except IOError: pass#SelectError, e: self.parse_catch_error(e)
- assert filelists_index == len(filelists)
-
- self.parse_last_excludes()
- self.parse_rbdir_exclude()
-
- def parse_catch_error(self, exc):
- """Deal with selection error exc"""
- if isinstance(exc, FilePrefixError):
- Log.FatalError(
-"""Fatal Error: The file specification
-' %s'
-cannot match any files in the base directory
-' %s'
-Useful file specifications begin with the base directory or some
-pattern (such as '**') which matches the base directory.""" %
- (exc, self.prefix))
- elif isinstance(e, GlobbingError):
- Log.FatalError("Fatal Error while processing expression\n"
- "%s" % exc)
- else: raise
-
- def parse_rbdir_exclude(self):
- """Add exclusion of rdiff-backup-data dir to front of list"""
- self.add_selection_func(
- self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
-
- def parse_last_excludes(self):
- """Exit with error if last selection function isn't an exclude"""
- if (self.selection_functions and
- not self.selection_functions[-1].exclude):
- Log.FatalError(
-"""Last selection expression:
- %s
-only specifies that files be included. Because the default is to
-include all files, the expression is redundant. Exiting because this
-probably isn't what you meant.""" %
- (self.selection_functions[-1].name,))
-
- def add_selection_func(self, sel_func, add_to_start = None):
- """Add another selection function at the end or beginning"""
- if add_to_start: self.selection_functions.insert(0, sel_func)
- else: self.selection_functions.append(sel_func)
-
- def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
- """Return selection function by reading list of files
-
- The format of the filelist is documented in the man page.
- filelist_fp should be an (open) file object.
- inc_default should be true if this is an include list,
- false for an exclude list.
- filelist_name is just a string used for logging.
-
- """
- Log("Reading filelist %s" % filelist_name, 4)
- tuple_list, something_excluded = \
- self.filelist_read(filelist_fp, inc_default, filelist_name)
- Log("Sorting filelist %s" % filelist_name, 4)
- tuple_list.sort()
- i = [0] # We have to put index in list because of stupid scoping rules
-
- def selection_function(dsrp):
- while 1:
- if i[0] >= len(tuple_list): return None
- include, move_on = \
- self.filelist_pair_match(dsrp, tuple_list[i[0]])
- if move_on:
- i[0] += 1
- if include is None: continue # later line may match
- return include
-
- selection_function.exclude = something_excluded or inc_default == 0
- selection_function.name = "Filelist: " + filelist_name
- return selection_function
-
- def filelist_read(self, filelist_fp, include, filelist_name):
- """Read filelist from fp, return (tuplelist, something_excluded)"""
- prefix_warnings = [0]
- def incr_warnings(exc):
- """Warn if prefix is incorrect"""
- prefix_warnings[0] += 1
- if prefix_warnings[0] < 6:
- Log("Warning: file specification '%s' in filelist %s\n"
- "doesn't start with correct prefix %s. Ignoring." %
- (exc, filelist_name, self.prefix), 2)
- if prefix_warnings[0] == 5:
- Log("Future prefix errors will not be logged.", 2)
-
- something_excluded, tuple_list = None, []
- separator = Globals.null_separator and "\0" or "\n"
- for line in filelist_fp.read().split(separator):
- if not line: continue # skip blanks
- try: tuple = self.filelist_parse_line(line, include)
- except FilePrefixError, exc:
- incr_warnings(exc)
- continue
- tuple_list.append(tuple)
- if not tuple[1]: something_excluded = 1
- if filelist_fp.close():
- Log("Error closing filelist %s" % filelist_name, 2)
- return (tuple_list, something_excluded)
-
- def filelist_parse_line(self, line, include):
- """Parse a single line of a filelist, returning a pair
-
- pair will be of form (index, include), where index is another
- tuple, and include is 1 if the line specifies that we are
- including a file. The default is given as an argument.
- prefix is the string that the index is relative to.
-
- """
- if line[:2] == "+ ": # Check for "+ "/"- " syntax
- include = 1
- line = line[2:]
- elif line[:2] == "- ":
- include = 0
- line = line[2:]
-
- if not line.startswith(self.prefix): raise FilePrefixError(line)
- line = line[len(self.prefix):] # Discard prefix
- index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
- return (index, include)
-
- def filelist_pair_match(self, dsrp, pair):
- """Matches a filelist tuple against a dsrp
-
- Returns a pair (include, move_on). include is None if the
- tuple doesn't match either way, and 0/1 if the tuple excludes
- or includes the dsrp.
-
- move_on is true if the tuple cannot match a later index, and
- so we should move on to the next tuple in the index.
-
- """
- index, include = pair
- if include == 1:
- if index < dsrp.index: return (None, 1)
- if index == dsrp.index: return (1, 1)
- elif index[:len(dsrp.index)] == dsrp.index:
- return (1, None) # /foo/bar implicitly includes /foo
- else: return (None, None) # dsrp greater, not initial sequence
- elif include == 0:
- if dsrp.index[:len(index)] == index:
- return (0, None) # /foo implicitly excludes /foo/bar
- elif index < dsrp.index: return (None, 1)
- else: return (None, None) # dsrp greater, not initial sequence
- else: assert 0, "Include is %s, should be 0 or 1" % (include,)
-
- def filelist_globbing_get_sfs(self, filelist_fp, inc_default, list_name):
- """Return list of selection functions by reading fileobj
-
- filelist_fp should be an open file object
- inc_default is true iff this is an include list
- list_name is just the name of the list, used for logging
- See the man page on --[include/exclude]-globbing-filelist
-
- """
- Log("Reading globbing filelist %s" % list_name, 4)
- separator = Globals.null_separator and "\0" or "\n"
- for line in filelist_fp.read().split(separator):
- if not line: continue # skip blanks
- if line[:2] == "+ ": yield self.glob_get_sf(line[2:], 1)
- elif line[:2] == "- ": yield self.glob_get_sf(line[2:], 0)
- else: yield self.glob_get_sf(line, inc_default)
-
- def other_filesystems_get_sf(self, include):
- """Return selection function matching files on other filesystems"""
- assert include == 0 or include == 1
- root_devloc = self.dsrpath.getdevloc()
- def sel_func(dsrp):
- if dsrp.getdevloc() == root_devloc: return None
- else: return include
- sel_func.exclude = not include
- sel_func.name = "Match other filesystems"
- return sel_func
-
- def regexp_get_sf(self, regexp_string, include):
- """Return selection function given by regexp_string"""
- assert include == 0 or include == 1
- try: regexp = re.compile(regexp_string)
- except:
- Log("Error compiling regular expression %s" % regexp_string, 1)
- raise
-
- def sel_func(dsrp):
- if regexp.search(dsrp.path): return include
- else: return None
-
- sel_func.exclude = not include
- sel_func.name = "Regular expression: %s" % regexp_string
- return sel_func
-
- def devfiles_get_sf(self, include):
- """Return a selection function matching all dev files"""
- if self.selection_functions:
- Log("Warning: exclude-device-files is not the first "
- "selector.\nThis may not be what you intended", 3)
- def sel_func(dsrp):
- if dsrp.isdev(): return include
- else: return None
- sel_func.exclude = not include
- sel_func.name = (include and "include" or "exclude") + " device files"
- return sel_func
-
- def special_get_sf(self, include):
- """Return sel function matching sockets, symlinks, sockets, devs"""
- if self.selection_functions:
- Log("Warning: exclude-special-files is not the first "
- "selector.\nThis may not be what you intended", 3)
- def sel_func(dsrp):
- if dsrp.issym() or dsrp.issock() or dsrp.isfifo() or dsrp.isdev():
- return include
- else: return None
- sel_func.exclude = not include
- sel_func.name = (include and "include" or "exclude") + " special files"
- return sel_func
-
- def glob_get_sf(self, glob_str, include):
- """Return selection function given by glob string"""
- assert include == 0 or include == 1
- if glob_str == "**": sel_func = lambda dsrp: include
- elif not self.glob_re.match(glob_str): # normal file
- sel_func = self.glob_get_filename_sf(glob_str, include)
- else: sel_func = self.glob_get_normal_sf(glob_str, include)
-
- sel_func.exclude = not include
- sel_func.name = "Command-line %s glob: %s" % \
- (include and "include" or "exclude", glob_str)
- return sel_func
-
- def glob_get_filename_sf(self, filename, include):
- """Get a selection function given a normal filename
-
- Some of the parsing is better explained in
- filelist_parse_line. The reason this is split from normal
- globbing is things are a lot less complicated if no special
- globbing characters are used.
-
- """
- if not filename.startswith(self.prefix):
- raise FilePrefixError(filename)
- index = tuple(filter(lambda x: x,
- filename[len(self.prefix):].split("/")))
- return self.glob_get_tuple_sf(index, include)
-
- def glob_get_tuple_sf(self, tuple, include):
- """Return selection function based on tuple"""
- def include_sel_func(dsrp):
- if (dsrp.index == tuple[:len(dsrp.index)] or
- dsrp.index[:len(tuple)] == tuple):
- return 1 # /foo/bar implicitly matches /foo, vice-versa
- else: return None
-
- def exclude_sel_func(dsrp):
- if dsrp.index[:len(tuple)] == tuple:
- return 0 # /foo excludes /foo/bar, not vice-versa
- else: return None
-
- if include == 1: sel_func = include_sel_func
- elif include == 0: sel_func = exclude_sel_func
- sel_func.exclude = not include
- sel_func.name = "Tuple select %s" % (tuple,)
- return sel_func
-
- def glob_get_normal_sf(self, glob_str, include):
- """Return selection function based on glob_str
-
- The basic idea is to turn glob_str into a regular expression,
- and just use the normal regular expression. There is a
- complication because the selection function should return '2'
- (scan) for directories which may contain a file which matches
- the glob_str. So we break up the glob string into parts, and
- any file which matches an initial sequence of glob parts gets
- scanned.
-
- Thanks to Donovan Baarda who provided some code which did some
- things similar to this.
-
- """
- if glob_str.lower().startswith("ignorecase:"):
- re_comp = lambda r: re.compile(r, re.I | re.S)
- glob_str = glob_str[len("ignorecase:"):]
- else: re_comp = lambda r: re.compile(r, re.S)
-
- # matches what glob matches and any files in directory
- glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
-
- if glob_str.find("**") != -1:
- glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
-
- scan_comp_re = re_comp("^(%s)$" %
- "|".join(self.glob_get_prefix_res(glob_str)))
-
- def include_sel_func(dsrp):
- if glob_comp_re.match(dsrp.path): return 1
- elif scan_comp_re.match(dsrp.path): return 2
- else: return None
-
- def exclude_sel_func(dsrp):
- if glob_comp_re.match(dsrp.path): return 0
- else: return None
-
- # Check to make sure prefix is ok
- if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
-
- if include: return include_sel_func
- else: return exclude_sel_func
-
- def glob_get_prefix_res(self, glob_str):
- """Return list of regexps equivalent to prefixes of glob_str"""
- glob_parts = glob_str.split("/")
- if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
- raise GlobbingError("Consecutive '/'s found in globbing string "
- + glob_str)
-
- prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
- range(len(glob_parts)))
- # we must make exception for root "/", only dir to end in slash
- if prefixes[0] == "": prefixes[0] = "/"
- return map(self.glob_to_re, prefixes)
-
- def glob_to_re(self, pat):
- """Returned regular expression equivalent to shell glob pat
-
- Currently only the ?, *, [], and ** expressions are supported.
- Ranges like [a-z] are also currently unsupported. There is no
- way to quote these special characters.
-
- This function taken with minor modifications from efnmatch.py
- by Donovan Baarda.
-
- """
- i, n, res = 0, len(pat), ''
- while i < n:
- c, s = pat[i], pat[i:i+2]
- i = i+1
- if s == '**':
- res = res + '.*'
- i = i + 1
- elif c == '*': res = res + '[^/]*'
- elif c == '?': res = res + '[^/]'
- elif c == '[':
- j = i
- if j < n and pat[j] in '!^': j = j+1
- if j < n and pat[j] == ']': j = j+1
- while j < n and pat[j] != ']': j = j+1
- if j >= n: res = res + '\\[' # interpret the [ literally
- else: # Deal with inside of [..]
- stuff = pat[i:j].replace('\\','\\\\')
- i = j+1
- if stuff[0] in '!^': stuff = '^' + stuff[1:]
- res = res + '[' + stuff + ']'
- else: res = res + re.escape(c)
- return res
-
-