Various changes to switch to new include/exclude syntax

git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@39 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
author: ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2002-04-08 09:14:12 +0000
committer: ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2002-04-08 09:14:12 +0000
commit: 2fe5f0ee2d2545f05db11b8c227e582e6a9479d9 (patch)
tree: c5a92739fd8c1ad78acf6374fa3cd8a5c87123b6 /rdiff-backup/src/selection.py
parent: 6fe0035bb3eaf59d8a7fe13c230c534619dba04b (diff)
download: rdiff-backup-2fe5f0ee2d2545f05db11b8c227e582e6a9479d9.tar.gz
1 files changed, 185 insertions, 32 deletions
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
index 75d55ce..9f41860 100644
--- a/rdiff-backup/src/selection.py
+++ b/rdiff-backup/src/selection.py
@@ -1,4 +1,6 @@
+from __future__ import generators
 execfile("destructive_stepping.py")
+import re
 
 #######################################################################
 #
@@ -47,14 +49,23 @@ class Select:
 	be redundant and presumably isn't what the user intends.
 
 	"""
-	def __init__(self, dsrpath, starting_index = None):
-		"""DSRPIterator initializer.  dsrpath should be the root dir"""
+	# This re should not match normal filenames, but usually just globs
+	glob_re = re.compile(".*[\*\?\[]")
+
+	def __init__(self, dsrpath):
+		"""DSRPIterator initializer"""
 		self.selection_functions = []
-		if starting_index:
-			self.iter = self.iterate_starting_from(dsrpath, starting_index,
-												   self.iterate_starting_from)
-		else: self.iter = self.Iterate(dsrpath, self.Iterate)
+		self.dsrpath = dsrpath
+		self.prefix = dsrpath.path
+	
+	def set_iter(self, starting_index = None):
+		"""Initialize more variables.  dsrpath should be the root dir"""
+		if starting_index is not None:
+			self.iter = self.iterate_starting_from(self.dsrpath,
+							   starting_index, self.iterate_starting_from)
+		else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
 		self.next = self.iter.next
+		self.__iter__ = lambda: self
 
 	def Iterate(self, dsrpath, rec_func):
 		"""Return iterator yielding dsrps in dsrpath
@@ -88,7 +99,7 @@ class Select:
 		"""Like Iterate, but only yield indicies > self.starting_index"""
 		if dsrpath.index > self.starting_index: # past starting_index
 			for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
-		elif dsrpath.index = self.starting_index[:len(dsrpath.index)]:
+		elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
 			# May encounter starting index on this branch
 			for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
 				yield dsrp
@@ -100,50 +111,99 @@ class Select:
 			if result is not None: return result
 		return 1
 
-	def add_selection_func(self, sel_func):
-		"""Add another selection function at the end"""
-		self.selection_functions.append(sel_func)
+	def ParseArgs(self, argtuples):
+		"""Create selection functions based on list of tuples
+
+		The tuples have the form (option string, additional argument)
+		and are created when the initial commandline arguments are
+		read.  The reason for the extra level of processing is that
+		the filelists may only be openable by the main connection, but
+		the selection functions need to be on the backup reader or
+		writer side.  When the initial arguments are parsed the right
+		information is sent over the link.
+
+		"""
+		for opt, arg in argtuples:
+			if opt == "--exclude":
+				self.add_selection_func(self.glob_get_sf(arg, 0))
+			elif opt == "--exclude-device-files":
+				self.add_selection_func(self.devfiles_get_sf())
+			elif opt == "--exclude-filelist":
+				self.add_selection_func(self.filelist_get_sf(arg[1],
+															 0, arg[0]))
+			elif opt == "--exclude-regexp":
+				self.add_selection_func(self.regexp_get_sf(arg, 0))
+			elif opt == "--include":
+				self.add_selection_func(self.glob_get_sf(arg, 1))
+			elif opt == "--include-filelist":
+				self.add_selection_func(self.filelist_get_sf(arg[1],
+															 1, arg[0]))
+			elif opt == "--include-regexp":
+				self.add_selection_func(self.regexp_get_sf(arg, 1))
+			else: assert 0, "Bad option %s" % opt
+
+		# Exclude rdiff-backup-data directory
+		self.add_selection_func(
+			self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
+
+	def add_selection_func(self, sel_func, add_to_start = None):
+		"""Add another selection function at the end or beginning"""
+		if add_to_start: self.selection_functions.insert(0, sel_func)
+		else: self.selection_functions.append(sel_func)
 
-	def filelist_add_sf(self, filelist_fp, include, filelist_name):
-		"""Adds selection function by reading list of files
+	def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
+		"""Return selection function by reading list of files
 
 		The format of the filelist is documented in the man page.
 		filelist_fp should be an (open) file object.
-		include should be true if this is an include list, false for
-		an exclude list.
+		inc_default should be true if this is an include list,
+		false for an exclude list.
 		filelist_name is just a string used for logging.
 
 		"""
 		Log("Reading filelist %s" % filelist_name, 4)
 		tuple_list, something_excluded = \
-					self.filelist_read(filelist_fp, include, filelist_name)
+					self.filelist_read(filelist_fp, inc_default, filelist_name)
 		Log("Sorting filelist %s" % filelist_name, 4)
 		tuple_list.sort()
-		current_index = 0
+		i = [0] # We have to put index in list because of stupid scoping rules
+
 		def selection_function(dsrp):
-			
-		
+			if i[0] > len(tuple_list): return inc_default
+			while 1:
+				include, move_on = \
+						 self.filelist_pair_match(dsrp, tuple_list[i[0]])
+				if move_on:
+					i[0] += 1
+					if include is None: continue # later line may match
+				return include
+
+		selection_function.exclude = something_excluded
+		selection_function.name = "Filelist: " + filelist_name
+		return selection_function
+
 	def filelist_read(self, filelist_fp, include, filelist_name):
 		"""Read filelist from fp, return (tuplelist, something_excluded)"""
 		something_excluded, tuple_list = None, []
 		prefix_warnings = 0
-		while 1:
-			line = filelist_fp.readline()
-			if not line: break
+		for line in filelist_fp:
+			if not line.strip(): continue # skip blanks
 			try: tuple = self.filelist_parse_line(line, include)
 			except FilePrefixError, exp:
 				prefix_warnings += 1
 				if prefix_warnings < 6:
 					Log("Warning: file specification %s in filelist %s\n"
 						"doesn't start with correct prefix %s, ignoring." %
-						(exp[0], filelist_name, exp[1]), 2)
+						(exp, filelist_name, self.prefix), 2)
 					if prefix_warnings == 5:
 						Log("Future prefix errors will not be logged.", 2)
 			tuple_list.append(tuple)
 			if not tuple[1]: something_excluded = 1
+		if filelist_fp.close():
+			Log("Error closing filelist %s" % filelist_name, 2)
 		return (tuple_list, something_excluded)
 
-	def filelist_parse_line(self, line, include, prefix):
+	def filelist_parse_line(self, line, include):
 		"""Parse a single line of a filelist, returning a pair
 
 		pair will be of form (index, include), where index is another
@@ -160,17 +220,110 @@ class Select:
 			include = 0
 			line = line[2:]
 
-		if not line.startswith(prefix+"/"):
-			raise FilePrefixError(line, prefix+"/")
-		index = filter(lambda x: x, line.split("/")) # remove empties
+		if not line.startswith(self.prefix): raise FilePrefixError(line)
+		line = line[len(self.prefix):] # Discard prefix
+		index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
 		return (index, include)
 
 	def filelist_pair_match(self, dsrp, pair):
-		"""Return 0/1 if pair excludes/includes dsrp, None if doesn't match"""
+		"""Matches a filelist tuple against a dsrp
+
+		Returns a pair (include, move_on, definitive).  include is
+		None if the tuple doesn't match either way, and 0/1 if the
+		tuple excludes or includes the dsrp.
+
+		move_on is true if the tuple cannot match a later index, and
+		so we should move on to the next tuple in the index.
+
+		"""
 		index, include = pair
+		if include == 1:
+			if index < dsrp.index: return (None, 1)
+			if index == dsrp.index: return (1, 1)
+			elif index[:len(dsrp.index)] == dsrp.index:
+				return (1, None) # /foo/bar implicitly includes /foo
+			else: return (None, None) # dsrp greater, not initial sequence
+		elif include == 0:
+			if dsrp.index[:len(index)] == index:
+				return (0, None) # /foo implicitly excludes /foo/bar
+			elif index < dsrp.index: return (None, 1)
+			else: return (None, None) # dsrp greater, not initial sequence
+		else: assert 0, "Include is %s, should be 0 or 1" % (include,)
+
+	def regexp_get_sf(self, regexp_string, include):
+		"""Return selection function given by regexp_string"""
+		assert include == 0 or include == 1
+		try: regexp = re.compile(regexp_string)
+		except:
+			Log("Error compiling regular expression %s" % regexp_string, 1)
+			raise
+		
+		def sel_func(dsrp):
+			match = regexp.match(dsrp.path)
+			if match and match.end(0) == len(dsrp.path): return include
+			else: return None
+
+		sel_func.exclude = not include
+		sel_func.name = "Regular expression: %s" % regexp_string
+		return sel_func
+
+	def devfiles_get_sf(self):
+		"""Return a selection function to exclude all dev files"""
+		if self.selection_functions:
+			Log("Warning: exclude-device-files is not the first "
+				"selector.\nThis may not be what you intended", 3)
+		def sel_func(dsrp):
+			if dsrp.isdev(): return 0
+			else: return None
+		sel_func.exclude = 1
+		sel_func.name = "Exclude device files"
+		return sel_func
+
+	def glob_get_sf(self, glob_str, include):
+		"""Return selection function given by glob string"""
 		assert include == 0 or include == 1
-		if not include and dsrp.index[:len(index)] == index:
-			return 0 # /foo matches /foo/bar/baz
-		elif include and index[:len(dsrp.index)] == dsrp.index:
-			return 1 # /foo/bar implicitly matches /foo for includes only
-		else: return None
+		if glob_str == "**": sel_func = lambda dsrp: include
+		elif not self.glob_re.match(glob_str): # normal file
+			return self.glob_get_filename_sf(glob_str, include)
+		else: pass ####XXXXXXXXXXXXX
+
+		sel_func.exclude = not include
+		sel_func.name = "Command-line glob: %s" % glob_str
+		return sel_func
+
+	def glob_get_filename_sf(self, filename, include):
+		"""Get a selection function given a normal filename
+
+		Some of the parsing is better explained in
+		filelist_parse_line.  The reason this is split from normal
+		globbing is so we can check the prefix and give proper
+		warning.
+
+		"""
+		if not filename.startswith(self.prefix):
+			Log("Warning: file specification %s does not start with\n"
+				"prefix %s, ignoring" % (filename, self.prefix), 2)
+			return lambda x: None # dummy selection function
+		index = tuple(filter(lambda x: x,
+							 filename[len(self.prefix):].split("/")))
+		return self.glob_get_tuple_sf(index, include)
+
+	def glob_get_tuple_sf(self, tuple, include):
+		"""Add selection function based on tuple"""
+		def include_sel_func(dsrp):
+			if (dsrp.index == tuple[:len(dsrp.index)] or
+				dsrp.index[:len(tuple)] == tuple):
+				return 1 # /foo/bar implicitly matches /foo, vice-versa
+			else: return None
+
+		def exclude_sel_func(dsrp):
+			if dsrp.index[:len(tuple)] == tuple:
+				return 0 # /foo excludes /foo/bar, not vice-versa
+			else: return None
+
+		if include == 1: sel_func = include_sel_func
+		elif include == 0: sel_func = exclude_sel_func
+		sel_func.exclude = not include
+		sel_func.name = "Tuple select %s" % (tuple,)
+		return sel_func
+
author	ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2002-04-08 09:14:12 +0000
committer	ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2002-04-08 09:14:12 +0000
commit	2fe5f0ee2d2545f05db11b8c227e582e6a9479d9 (patch)
tree	c5a92739fd8c1ad78acf6374fa3cd8a5c87123b6 /rdiff-backup/src/selection.py
parent	6fe0035bb3eaf59d8a7fe13c230c534619dba04b (diff)
download	rdiff-backup-2fe5f0ee2d2545f05db11b8c227e582e6a9479d9.tar.gz