4 files changed, 207 insertions, 76 deletions
diff --git a/rdiff-backup/rdiff_backup/cmodule.c b/rdiff-backup/rdiff_backup/cmodule.c
index 30aac47..a5f1c65 100644
--- a/rdiff-backup/rdiff_backup/cmodule.c
+++ b/rdiff-backup/rdiff_backup/cmodule.c
@@ -27,6 +27,7 @@
 #include <unistd.h>
 #include <errno.h>
 
+
 /* Some of the following code to define major/minor taken from code by
  * Jörg Schilling's star archiver.
  */
@@ -202,7 +203,6 @@ static PyObject *c_make_file_dict(self, args)
   return return_val;
 }
 
-
 /* Convert python long into 7 byte string */
 static PyObject *long2str(self, args)
 	 PyObject *self;
@@ -247,12 +247,138 @@ static PyObject *str2long(self, args)
 }
 
 
+/* --------------------------------------------------------------------- *
+ * This section is still GPL'd, but was copied from the libmisc
+ * section of getfacl by Andreas Gruenbacher
+ * <a.gruenbacher@computer.org>.  I'm just copying the code to
+ * preserve quoting compatibility between (get|set)f(acl|attr) and
+ * rdiff-backup.  Taken on 8/24/2003.
+ * --------------------------------------------------------------------- */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+int high_water_alloc(void **buf, size_t *bufsize, size_t newsize)
+{
+#define CHUNK_SIZE	256
+	/*
+	 * Goal here is to avoid unnecessary memory allocations by
+	 * using static buffers which only grow when necessary.
+	 * Size is increased in fixed size chunks (CHUNK_SIZE).
+	 */
+	if (*bufsize < newsize) {
+		void *newbuf;
+
+		newsize = (newsize + CHUNK_SIZE-1) & ~(CHUNK_SIZE-1);
+		newbuf = realloc(*buf, newsize);
+		if (!newbuf)
+			return 1;
+		
+		*buf = newbuf;
+		*bufsize = newsize;
+	}
+	return 0;
+}
+
+const char *quote(const char *str)
+{
+	static char *quoted_str;
+	static size_t quoted_str_len;
+	const unsigned char *s;
+	char *q;
+	size_t nonpr;
+
+	if (!str)
+		return str;
+
+	for (nonpr = 0, s = (unsigned char *)str; *s != '\0'; s++)
+		if (!isprint(*s) || isspace(*s) || *s == '\\' || *s == '=')
+			nonpr++;
+	if (nonpr == 0)
+		return str;
+
+	if (high_water_alloc((void **)&quoted_str, &quoted_str_len,
+			     nonpr * 3 + 1))
+		return NULL;
+	for (s = (unsigned char *)str, q = quoted_str; *s != '\0'; s++) {
+		if (!isprint(*s) || isspace(*s) || *s == '\\' || *s == '=') {
+			*q++ = '\\';
+			*q++ = '0' + ((*s >> 6)    );
+			*q++ = '0' + ((*s >> 3) & 7);
+			*q++ = '0' + ((*s     ) & 7);
+		} else
+			*q++ = *s;
+	}
+	*q++ = '\0';
+
+	return quoted_str;
+}
+
+char *unquote(char *str)
+{
+	unsigned char *s, *t;
+
+	if (!str)
+		return str;
+
+	for (s = (unsigned char *)str; *s != '\0'; s++)
+		if (*s == '\\')
+			break;
+	if (*s == '\0')
+		return str;
+
+#define isoctal(c) \
+	((c) >= '0' && (c) <= '7')
+
+	t = s;
+	do {
+		if (*s == '\\' &&
+		    isoctal(*(s+1)) && isoctal(*(s+2)) && isoctal(*(s+3))) {
+			*t++ = ((*(s+1) - '0') << 6) +
+			       ((*(s+2) - '0') << 3) +
+			       ((*(s+3) - '0')     );
+			s += 3;
+		} else
+			*t++ = *s;
+	} while (*s++ != '\0');
+
+	return str;
+}
+
+/* ------------- End Gruenbach section --------------------------------- */
+
+/* Translate quote above into python */
+static PyObject *acl_quote(PyObject *self, PyObject *args)
+{
+  char *s;
+
+  if (!PyArg_ParseTuple(args, "s", &s)) return NULL;
+  return Py_BuildValue("s", quote(s));
+}
+
+/* Translate unquote above into python */
+static PyObject *acl_unquote(PyObject *self, PyObject *args)
+{
+  char *s;
+
+  if (!PyArg_ParseTuple(args, "s", &s)) return NULL;
+  return Py_BuildValue("s", unquote(s));
+}
+
+
+/* ------------- Python export lists -------------------------------- */
+
 static PyMethodDef CMethods[] = {
   {"make_file_dict", c_make_file_dict, METH_VARARGS,
    "Make dictionary from file stat"},
   {"long2str", long2str, METH_VARARGS, "Convert python long to 7 byte string"},
   {"str2long", str2long, METH_VARARGS, "Convert 7 byte string to python long"},
   {"sync", my_sync, METH_VARARGS, "sync buffers to disk"},
+  {"acl_quote", acl_quote, METH_VARARGS,
+   "Quote string, escaping non-printables"},
+  {"acl_unquote", acl_unquote, METH_VARARGS,
+   "Unquote string, producing original input to quote"},
   {NULL, NULL, 0, NULL}
 };
 
@@ -266,4 +392,3 @@ void initC(void)
 											NULL, NULL);
   PyDict_SetItemString(d, "UnknownFileTypeError", UnknownFileTypeError);
 }
-
diff --git a/rdiff-backup/rdiff_backup/eas_acls.py b/rdiff-backup/rdiff_backup/eas_acls.py
index 4b4d169..e543597 100644
--- a/rdiff-backup/rdiff_backup/eas_acls.py
+++ b/rdiff-backup/rdiff_backup/eas_acls.py
@@ -30,8 +30,7 @@ from __future__ import generators
 import base64, errno, re
 try: import posix1e
 except ImportError: pass
-import static, Globals, metadata, connection, rorpiter, log
-
+import static, Globals, metadata, connection, rorpiter, log, C, rpath
 
 class ExtendedAttributes:
 	"""Hold a file's extended attribute information"""
@@ -104,12 +103,12 @@ def ea_compare_rps(rp1, rp2):
 
 def EA2Record(ea):
 	"""Convert ExtendedAttributes object to text record"""
-	str_list = ['# file: %s' % ea.get_indexpath()]
+	str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())]
 	for (name, val) in ea.attr_dict.iteritems():
 		if not val: str_list.append(name)
 		else:
 			encoded_val = base64.encodestring(val).replace('\n', '')
-			str_list.append('%s=0s%s' % (name, encoded_val))
+			str_list.append('%s=0s%s' % (C.acl_quote(name), encoded_val))
 	return '\n'.join(str_list)+'\n'
 
 def Record2EA(record):
@@ -120,7 +119,7 @@ def Record2EA(record):
 		raise metadata.ParsingError("Bad record beginning: " + first[:8])
 	filename = first[8:]
 	if filename == '.': index = ()
-	else: index = tuple(filename.split('/'))
+	else: index = tuple(C.acl_unquote(filename).split('/'))
 	ea = ExtendedAttributes(index)
 
 	for line in lines:
@@ -137,27 +136,15 @@ def Record2EA(record):
 			ea.set(name, base64.decodestring(encoded_val))
 	return ea
 
-def quote_path(path):
-	"""Quote a path for use EA/ACL records.
-
-	Right now no quoting!!!  Change this to reflect the updated
-	quoting style of getfattr/setfattr when they are changed.
-
-	"""
-	return path
-
 
 class EAExtractor(metadata.FlatExtractor):
 	"""Iterate ExtendedAttributes objects from the EA information file"""
-	record_boundary_regexp = re.compile("\\n# file:")
+	record_boundary_regexp = re.compile('(?:\\n|^)(# file: (.*?))\\n')
 	record_to_object = staticmethod(Record2EA)
-	def get_index_re(self, index):
-		"""Find start of EA record with given index"""
-		if not index: indexpath = '.'
-		else: indexpath = '/'.join(index)
-		# Right now there is no quoting, due to a bug in
-		# getfacl/setfacl.  Replace later when bug fixed.
-		return re.compile('(^|\\n)(# file: %s\\n)' % indexpath)
+	def filename_to_index(self, filename):
+		"""Convert possibly quoted filename to index tuple"""
+		if filename == '.': return ()
+		else: return tuple(C.acl_unquote(filename).split('/'))
 
 class ExtendedAttributesFile(metadata.FlatFile):
 	"""Store/retrieve EAs from extended_attributes file"""
@@ -171,7 +158,7 @@ class ExtendedAttributesFile(metadata.FlatFile):
 			"""Add EA information in ea_iter to rorp_iter"""
 			collated = rorpiter.CollateIterators(rorp_iter, ea_iter)
 			for rorp, ea in collated:
-				assert rorp, (rorp, (ea.index, ea.attr_dict), rest_time)
+				assert rorp, (rorp, (ea.index, ea.attr_dict), time)
 				if not ea: ea = ExtendedAttributes(rorp.index)
 				rorp.set_ea(ea)
 				yield rorp
@@ -311,7 +298,7 @@ def acl_compare_rps(rp1, rp2):
 
 def ACL2Record(acl):
 	"""Convert an AccessControlList object into a text record"""
-	start = "# file: %s\n%s" % (acl.get_indexpath(), acl.acl_text)
+	start = "# file: %s\n%s" % (C.acl_quote(acl.get_indexpath()), acl.acl_text)
 	if not acl.def_acl_text: return start
 	default_lines = acl.def_acl_text.strip().split('\n')
 	default_text = '\ndefault:'.join(default_lines)
@@ -325,7 +312,7 @@ def Record2ACL(record):
 		raise metadata.ParsingError("Bad record beginning: "+ first_line)
 	filename = first_line[8:]
 	if filename == '.': index = ()
-	else: index = tuple(filename.split('/'))
+	else: index = tuple(C.acl_unquote(filename).split('/'))
 
 	normal_entries = []; default_entries = []
 	for line in lines:
@@ -393,3 +380,25 @@ def GetCombinedMetadataIter(rbdir, time, restrict_index = None,
 			metadata_iter, rbdir, time, restrict_index)
 	return metadata_iter
 
+
+def rpath_acl_get(rp):
+	"""Get acls of given rpath rp.
+
+	This overrides a function in the rpath module.
+
+	"""
+	acl = AccessControlList(rp.index)
+	if not rp.issym(): acl.read_from_rp(rp)
+	return acl
+rpath.acl_get = rpath_acl_get
+
+def rpath_ea_get(rp):
+	"""Get extended attributes of given rpath
+
+	This overrides a function in the rpath module.
+
+	"""
+	ea = ExtendedAttributes(rp.index)
+	if not rp.issym(): ea.read_from_rp(rp)
+	return ea
+rpath.ea_get = rpath_ea_get
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index eec8e3e..2388cfa 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -122,9 +122,7 @@ def Record2RORP(record_string):
 	"""
 	data_dict = {}
 	for field, data in line_parsing_regexp.findall(record_string):
-		if field == "File":
-			if data == ".": index = ()
-			else: index = tuple(unquote_path(data).split("/"))
+		if field == "File": index = quoted_filename_to_index(data)
 		elif field == "Type":
 			if data == "None": data_dict['type'] = None
 			else: data_dict['type'] = data
@@ -174,12 +172,23 @@ def unquote_path(quoted_string):
 		return two_chars
 	return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
 
+def quoted_filename_to_index(quoted_filename):
+	"""Return tuple index given quoted filename"""
+	if quoted_filename == '.': return ()
+	else: return tuple(unquote_path(quoted_filename).split('/'))
 
 class FlatExtractor:
 	"""Controls iterating objects from flat file"""
-	# The following two should be set in subclasses
-	record_boundary_regexp = None # Matches beginning of next record
-	record_to_object = None # Function that converts text record to object
+
+	# Set this in subclass.  record_boundary_regexp should match
+	# beginning of next record.  The first group should start at the
+	# beginning of the record.  The second group should contain the
+	# (possibly quoted) filename.
+	record_boundary_regexp = None
+
+	# Set in subclass to function that converts text record to object
+	record_to_object = None
+
 	def __init__(self, fileobj):
 		self.fileobj = fileobj # holds file object we are reading from
 		self.buf = "" # holds the next part of the file
@@ -187,10 +196,10 @@ class FlatExtractor:
 		self.blocksize = 32 * 1024
 
 	def get_next_pos(self):
-		"""Return position of next record in buffer"""
+		"""Return position of next record in buffer, or end pos if none"""
 		while 1:
-			m = self.record_boundary_regexp.search(self.buf)
-			if m: return m.start(0)+1 # the +1 skips the newline
+			m = self.record_boundary_regexp.search(self.buf, 1)
+			if m: return m.start(1)
 			else: # add next block to the buffer, loop again
 				newbuf = self.fileobj.read(self.blocksize)
 				if not newbuf:
@@ -218,27 +227,20 @@ class FlatExtractor:
 
 		"""
 		assert not self.buf or self.buf.endswith("\n")
-		begin_re = self.get_index_re(index)
 		while 1:
-			m = begin_re.search(self.buf)
-			if m:
-				self.buf = self.buf[m.start(2):]
-				return
 			self.buf = self.fileobj.read(self.blocksize)
 			self.buf += self.fileobj.readline()
 			if not self.buf:
 				self.at_end = 1
 				return
-
-	def get_index_re(self, index):
-		"""Return regular expression used to find index.
-
-		Override this in sub classes.  The regular expression's second
-		group needs to start at the beginning of the record that
-		contains information about the object with the given index.
-
-		"""
-		assert 0, "Just a placeholder, must override this in subclasses"
+			while 1:
+				m = self.record_boundary_regexp.search(self.buf)
+				if not m: break
+				cur_index = self.filename_to_index(m.group(2))
+				if cur_index >= index:
+					self.buf = self.buf[m.start(1):]
+					return
+				else: self.buf = self.buf[m.end(1):]
 
 	def iterate_starting_with(self, index):
 		"""Iterate objects whose index starts with given index"""
@@ -256,24 +258,24 @@ class FlatExtractor:
 			self.buf = self.buf[next_pos:]
 		assert not self.close()
 
+	def filename_to_index(self, filename):
+		"""Translate filename, possibly quoted, into an index tuple
+
+		The filename is the first group matched by
+		regexp_boundary_regexp.
+
+		"""
+		assert 0 # subclass
+
 	def close(self):
 		"""Return value of closing associated file"""
 		return self.fileobj.close()
 
 class RorpExtractor(FlatExtractor):
 	"""Iterate rorps from metadata file"""
-	record_boundary_regexp = re.compile("\\nFile")
+	record_boundary_regexp = re.compile("(?:\\n|^)(File (.*?))\\n")
 	record_to_object = staticmethod(Record2RORP)
-	def get_index_re(self, index):
-		"""Find start of rorp record with given index"""
-		indexpath = index and '/'.join(index) or '.'
-		# Must double all backslashes, because they will be
-		# reinterpreted.  For instance, to search for index \n
-		# (newline), it will be \\n (backslash n) in the file, so the
-		# regular expression is "File \\\\n\\n" (File two backslash n
-		# backslash n)
-		double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
-		return re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
+	filename_to_index = staticmethod(quoted_filename_to_index)
 
 
 class FlatFile:
@@ -339,7 +341,7 @@ class FlatFile:
 			else: compressed = cls._rp.get_indexpath().endswith('.gz')
 
 		fileobj = cls._rp.open('rb', compress = compressed)
-		if restrict_index is None: return cls._extractor(fileobj).iterate()
+		if not restrict_index: return cls._extractor(fileobj).iterate()
 		else:
 			re = cls._extractor(fileobj)
 			return re.iterate_starting_with(restrict_index)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 43f14e3..c1641a5 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -1001,10 +1001,7 @@ class RPath(RORPath):
 	def get_acl(self):
 		"""Return access control list object, setting if necessary"""
 		try: acl = self.data['acl']
-		except KeyError:
-			acl = eas_acls.AccessControlList(self.index)
-			if not self.issym(): acl.read_from_rp(self)
-			self.data['acl'] = acl
+		except KeyError: acl = self.data['acl'] = acl_get(self)
 		return acl
 
 	def write_acl(self, acl):
@@ -1015,14 +1012,7 @@ class RPath(RORPath):
 	def get_ea(self):
 		"""Return extended attributes object, setting if necessary"""
 		try: ea = self.data['ea']
-		except KeyError:
-			ea = eas_acls.ExtendedAttributes(self.index)
-			if not self.issym():
-				# Don't read from symlinks because they will be
-				# followed.  Update this when llistxattr,
-				# etc. available
-				ea.read_from_rp(self)
-			self.data['ea'] = ea
+		except KeyError: ea = self.data['ea'] = ea_get(self)
 		return ea
 
 	def write_ea(self, ea):
@@ -1068,4 +1058,9 @@ class RPathFileHook:
 		self.closing_thunk()
 		return result
 
-import eas_acls # Put at end to avoid regress
+
+# These two are overwritten by the eas_acls.py module.  We can't
+# import that module directory because of circular dependency
+# problems.
+def acl_get(rp): assert 0
+def ea_get(rp): assert 0