From 6b72b10acde7061c1180a59d6eea3c0c20780683 Mon Sep 17 00:00:00 2001
From: ben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>
Date: Mon, 17 Jun 2002 03:24:05 +0000
Subject: Many optimizations - up to 3x speed improvement

git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@134 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
---
 rdiff-backup/rdiff_backup/Globals.py              |   2 +-
 rdiff-backup/rdiff_backup/cmodule.c               | 139 ++++++++++++++++++++++
 rdiff-backup/rdiff_backup/connection.py           |  16 +--
 rdiff-backup/rdiff_backup/destructive_stepping.py |  11 +-
 rdiff-backup/rdiff_backup/iterfile.py             |  96 +++++++--------
 rdiff-backup/rdiff_backup/profiled_rdb.py         |   6 +-
 rdiff-backup/rdiff_backup/rpath.py                |   7 +-
 rdiff-backup/rdiff_backup/selection.py            |  50 +++++++-
 rdiff-backup/src/Globals.py                       |   2 +-
 rdiff-backup/src/cmodule.c                        | 139 ++++++++++++++++++++++
 rdiff-backup/src/connection.py                    |  16 +--
 rdiff-backup/src/destructive_stepping.py          |  11 +-
 rdiff-backup/src/iterfile.py                      |  96 +++++++--------
 rdiff-backup/src/profiled_rdb.py                  |   6 +-
 rdiff-backup/src/rpath.py                         |   7 +-
 rdiff-backup/src/selection.py                     |  50 +++++++-
 16 files changed, 512 insertions(+), 142 deletions(-)
 create mode 100644 rdiff-backup/rdiff_backup/cmodule.c
 create mode 100644 rdiff-backup/src/cmodule.c

(limited to 'rdiff-backup')

diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py
index ca6e8d1..f3e4474 100644
--- a/rdiff-backup/rdiff_backup/Globals.py
+++ b/rdiff-backup/rdiff_backup/Globals.py
@@ -1,7 +1,7 @@
 import re, os
 
 # The current version of rdiff-backup
-version = "0.8.0"
+version = "0.9.0"
 
 # If this is set, use this value in seconds as the current time
 # instead of reading it from the clock.
diff --git a/rdiff-backup/rdiff_backup/cmodule.c b/rdiff-backup/rdiff_backup/cmodule.c
new file mode 100644
index 0000000..ea10415
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/cmodule.c
@@ -0,0 +1,139 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <Python.h>
+#include <errno.h>
+
+static PyObject *c_make_file_dict(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  char *filename, filetype[5];
+  struct stat sbuf;
+  mode_t mode;
+
+  if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
+  if (lstat(filename, &sbuf) != 0) {
+	if (errno == ENOENT || errno == ENOTDIR)
+	  return Py_BuildValue("{s:s}", "type", NULL);
+	else {
+	  PyErr_SetFromErrno(PyExc_OSError);
+	  return NULL;
+	}
+  }
+  mode = sbuf.st_mode;
+
+  /* Build return dictionary from stat struct */
+  if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
+	/* Regular files, directories, sockets, and fifos */
+	if S_ISREG(mode) strcpy(filetype, "reg");
+	else if S_ISDIR(mode) strcpy(filetype, "dir");
+	else if S_ISSOCK(mode) strcpy(filetype, "sock");
+	else strcpy(filetype, "fifo");
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
+						 "type", filetype,
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "mtime", (long int)sbuf.st_mtime,
+						 "atime", (long int)sbuf.st_atime);
+  } else if S_ISLNK(mode) {
+	/* Symbolic links */
+	char linkname[1024];
+	int len_link = readlink(filename, linkname, 1023);
+	if (len_link < 0) {
+	  PyErr_SetFromErrno(PyExc_OSError);
+	  return NULL;
+	}
+
+	linkname[len_link] = '\0';
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
+						 "type", "sym",
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "linkname", linkname);
+  } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+	/* Device files */
+	char devtype[2];
+	int devnums = (int)sbuf.st_rdev;
+	if S_ISCHR(mode) strcpy(devtype, "c");
+	else strcpy(devtype, "b");
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
+						 "type", "dev",
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "devnums", Py_BuildValue("(s,i,i)", devtype,
+												  devnums >> 8,
+												  devnums & 0xff),
+						 "mtime", (long int)sbuf.st_mtime,
+						 "atime", (long int)sbuf.st_atime);
+  } else {
+	/* Unrecognized file type - pretend it isn't there */
+	errno = ENOENT;
+	PyErr_SetFromErrno(PyExc_OSError);
+	return NULL;
+  }
+}
+
+static PyObject *long2str(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  unsigned char s[7];
+  int sindex;
+  unsigned long long int l;
+  PyObject *pylong;
+
+  if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
+  l = PyLong_AsUnsignedLongLong(pylong);
+  for(sindex = 0; sindex <= 6; sindex++) {
+	s[sindex] = l % 256;
+	l /= 256;
+  }
+  return Py_BuildValue("s#", s, 7);
+}
+
+static PyObject *str2long(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  unsigned char *s;
+  unsigned long long int l = 0;
+  int sindex, ssize;
+
+  if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
+  if (ssize != 7) return Py_BuildValue("i", -1);
+  for(sindex=6; sindex >= 0; sindex--)
+	l = l*256 + s[sindex];
+  return PyLong_FromLongLong(l);
+}
+
+static PyMethodDef CMethods[] = {
+  {"make_file_dict", c_make_file_dict, METH_VARARGS,
+   "Make dictionary from file stat"},
+  {"long2str", long2str, METH_VARARGS,
+   "Convert long int to 7 byte string"},
+  {"str2long", str2long, METH_VARARGS,
+   "Convert 7 byte string to long int"},
+  {NULL, NULL, 0, NULL}
+};
+
+void initC(void)
+{
+  (void) Py_InitModule("C", CMethods);
+}
+
diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py
index 74d413d..c557dd5 100644
--- a/rdiff-backup/rdiff_backup/connection.py
+++ b/rdiff-backup/rdiff_backup/connection.py
@@ -1,5 +1,5 @@
 from __future__ import generators
-import types, os, tempfile, cPickle, shutil, traceback
+import types, os, tempfile, cPickle, shutil, traceback, pickle
 
 #######################################################################
 #
@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
 
 	def _putobj(self, obj, req_num):
 		"""Send a generic python obj down the outpipe"""
-		self._write("o", cPickle.dumps(obj, 1), req_num)
+		# for some reason there is an error when cPickle is used below..
+		self._write("o", pickle.dumps(obj, 1), req_num)
 
 	def _putbuf(self, buf, req_num):
 		"""Send buffer buf down the outpipe"""
@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
 
 	def _write(self, headerchar, data, req_num):
 		"""Write header and then data to the pipe"""
-		self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
+		self.outpipe.write(headerchar + chr(req_num) +
+						   C.long2str(long(len(data))))
 		self.outpipe.write(data)
 		self.outpipe.flush()
 
@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
 		"""Read length bytes from inpipe, returning result"""
 		return self.inpipe.read(length)
 
-	def _s2l(self, s):
+	def _s2l_old(self, s):
 		"""Convert string to long int"""
 		assert len(s) == 7
 		l = 0L
 		for i in range(7): l = l*256 + ord(s[i])
 		return l
 
-	def _l2s(self, l):
+	def _l2s_old(self, l):
 		"""Convert long int to string"""
 		s = ""
 		for i in range(7):
@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
 		try:
 			format_string, req_num, length = (header_string[0],
 											  ord(header_string[1]),
-											  self._s2l(header_string[2:]))
+											  C.str2long(header_string[2:]))
 		except IndexError: raise ConnectionError()
 		if format_string == "q": raise ConnectionQuit("Received quit signal")
 
@@ -490,7 +492,7 @@ class VirtualFile:
 
 # everything has to be available here for remote connection's use, but
 # put at bottom to reduce circularities.
-import Globals, Time, Rdiff, Hardlink, FilenameMapping
+import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
 from static import *
 from lazy import *
 from log import *
diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py
index a64ecbc..d0a4998 100644
--- a/rdiff-backup/rdiff_backup/destructive_stepping.py
+++ b/rdiff-backup/rdiff_backup/destructive_stepping.py
@@ -31,7 +31,7 @@ class DSRPath(RPath):
 	newmtime - holds the new mtime
 
 	"""
-	def __init__(self, source, *args):
+	def __init__(self, source, conn_or_rp, base = 0, index = ()):
 		"""Initialize DSRP
 
 		Source should be true iff the DSRPath is taken from the
@@ -42,10 +42,11 @@ class DSRPath(RPath):
 		otherwise use the same arguments as the RPath initializer.
 
 		"""
-		if len(args) == 1 and isinstance(args[0], RPath):
-			rp = args[0]
-			RPath.__init__(self, rp.conn, rp.base, rp.index)
-		else: RPath.__init__(self, *args)
+		if base == 0:
+			assert isinstance(conn_or_rp, RPath)
+			RPath.__init__(self, conn_or_rp.conn,
+						   conn_or_rp.base, conn_or_rp.index)
+		else: RPath.__init__(self, conn_or_rp, base, index)
 
 		if source != "bypass":
 			# "bypass" val is used when unpackaging over connection
diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py
index 26cc952..4211441 100644
--- a/rdiff-backup/rdiff_backup/iterfile.py
+++ b/rdiff-backup/rdiff_backup/iterfile.py
@@ -1,5 +1,5 @@
-import cPickle
-import Globals
+import cPickle, array
+import Globals, C
 
 #######################################################################
 #
@@ -13,7 +13,7 @@ class UnwrapFile:
 	def __init__(self, file):
 		self.file = file
 
-	def _s2l(self, s):
+	def _s2l_old(self, s):
 		"""Convert string to long int"""
 		assert len(s) == 7
 		l = 0L
@@ -31,8 +31,9 @@ class UnwrapFile:
 		"""
 		header = self.file.read(8)
 		if not header: return None, None
-		assert len(header) == 8, "Header is only %d bytes" % len(header)
-		type, length = header[0], self._s2l(header[1:])
+		if len(header) != 8:
+			assert None, "Header %s is only %d bytes" % (header, len(header))
+		type, length = header[0], C.str2long(header[1:])
 		buf = self.file.read(length)
 		if type == "o": return type, cPickle.loads(buf)
 		else: return type, buf
@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
 		"""
 		UnwrapFile.__init__(self, iwf.file)
 		self.iwf = iwf
-		self.bufferlist = [initial_data]
-		self.bufferlen = len(initial_data)
+		self.buffer = initial_data
 		self.closed = None
 
-	def check_consistency(self):
-		l = len("".join(self.bufferlist))
-		assert l == self.bufferlen, \
-			   "Length of IVF bufferlist doesn't match (%s, %s)" % \
-			   (l, self.bufferlen)
-
 	def read(self, length):
+		"""Read length bytes from the file, updating buffers as necessary"""
 		assert not self.closed
 		if self.iwf.currently_in_file:
-			while length >= self.bufferlen:
+			while length >= len(self.buffer):
 				if not self.addtobuffer(): break
 
-		real_len = min(length, self.bufferlen)
-		combined_buffer = "".join(self.bufferlist)
-		assert len(combined_buffer) == self.bufferlen, \
-			   (len(combined_buffer), self.bufferlen)
-		self.bufferlist = [combined_buffer[real_len:]]
-		self.bufferlen = self.bufferlen - real_len
-		return combined_buffer[:real_len]
+		real_len = min(length, len(self.buffer))
+		return_val = self.buffer[:real_len]
+		self.buffer = self.buffer[real_len:]
+		return return_val
 			
 	def addtobuffer(self):
 		"""Read a chunk from the file and add it to the buffer"""
@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
 		type, data = self._get()
 		assert type == "c", "Type is %s instead of c" % type
 		if data:
-			self.bufferlen = self.bufferlen + len(data)
-			self.bufferlist.append(data)
+			self.buffer += data
 			return 1
 		else:
 			self.iwf.currently_in_file = None
@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
 		"""Currently just reads whats left and discards it"""
 		while self.iwf.currently_in_file:
 			self.addtobuffer()
-			self.bufferlist = []
-			self.bufferlen = 0
+			self.buffer = ""
 		self.closed = 1
 
 
@@ -145,45 +135,43 @@ class FileWrappingIter:
 	def __init__(self, iter):
 		"""Initialize with iter"""
 		self.iter = iter
-		self.bufferlist = []
-		self.bufferlen = 0L
+		self.array_buf = array.array('c')
 		self.currently_in_file = None
 		self.closed = None
 
 	def read(self, length):
 		"""Return next length bytes in file"""
 		assert not self.closed
-		while self.bufferlen < length:
+		while len(self.array_buf) < length:
 			if not self.addtobuffer(): break
 
-		combined_buffer = "".join(self.bufferlist)
-		assert len(combined_buffer) == self.bufferlen
-		real_len = min(self.bufferlen, length)
-		self.bufferlen = self.bufferlen - real_len
-		self.bufferlist = [combined_buffer[real_len:]]
-		return combined_buffer[:real_len]
+		result = self.array_buf[:length].tostring()
+		del self.array_buf[:length]
+		return result
 
 	def addtobuffer(self):
-		"""Updates self.bufferlist and self.bufferlen, adding on a chunk
+		"""Updates self.buffer, adding a chunk from the iterator.
 
 		Returns None if we have reached the end of the iterator,
 		otherwise return true.
 
 		"""
+		array_buf = self.array_buf
 		if self.currently_in_file:
-			buf = "c" + self.addfromfile()
+			array_buf.fromstring("c")
+			array_buf.fromstring(self.addfromfile())
 		else:
 			try: currentobj = self.iter.next()
 			except StopIteration: return None
 			if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
 				self.currently_in_file = currentobj
-				buf = "f" + self.addfromfile()
+				array_buf.fromstring("f")
+				array_buf.fromstring(self.addfromfile())
 			else:
 				pickle = cPickle.dumps(currentobj, 1)
-				buf = "o" + self._l2s(len(pickle)) + pickle
-				
-		self.bufferlist.append(buf)
-		self.bufferlen = self.bufferlen + len(buf)
+				array_buf.fromstring("o")
+				array_buf.fromstring(C.long2str(long(len(pickle))))
+				array_buf.fromstring(pickle)
 		return 1
 
 	def addfromfile(self):
@@ -192,9 +180,9 @@ class FileWrappingIter:
 		if not buf:
 			assert not self.currently_in_file.close()
 			self.currently_in_file = None
-		return self._l2s(len(buf)) + buf
+		return C.long2str(long(len(buf))) + buf
 
-	def _l2s(self, l):
+	def _l2s_old(self, l):
 		"""Convert long int to string of 7 characters"""
 		s = ""
 		for i in range(7):
@@ -210,26 +198,28 @@ class BufferedRead:
 	"""Buffer the .read() calls to the given file
 
 	This is used to lessen overhead and latency when a file is sent
-	over a connection.
+	over a connection.  Profiling said that arrays were faster than
+	strings here.
 
 	"""
 	def __init__(self, file):
 		self.file = file
-		self.buffer = ""
+		self.array_buf = array.array('c')
 		self.bufsize = Globals.conn_bufsize
 
 	def read(self, l = -1):
+		array_buf = self.array_buf
 		if l < 0: # Read as much as possible
-			result = self.buffer + self.file.read()
-			self.buffer = ""
+			result = array_buf.tostring() + self.file.read()
+			del array_buf[:]
 			return result
 
-		if len(self.buffer) < l: # Try to make buffer as long as l
-			self.buffer += self.file.read(max(self.bufsize,
-											  l - len(self.buffer)))
-		actual_size = min(l, len(self.buffer))
-		result = self.buffer[:actual_size]
-		self.buffer = self.buffer[actual_size:]
+		if len(array_buf) < l: # Try to make buffer at least as long as l
+			array_buf.fromstring(self.file.read(max(self.bufsize, l)))
+		result = array_buf[:l].tostring()
+		del array_buf[:l]
 		return result
 
 	def close(self): return self.file.close()
+
+from log import *
diff --git a/rdiff-backup/rdiff_backup/profiled_rdb.py b/rdiff-backup/rdiff_backup/profiled_rdb.py
index a2f30ea..7412847 100755
--- a/rdiff-backup/rdiff_backup/profiled_rdb.py
+++ b/rdiff-backup/rdiff_backup/profiled_rdb.py
@@ -8,9 +8,9 @@ statistics afterwards.
 """
 
 __no_execute__ = 1
-execfile("main.py")
-import profile, pstats
-profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
+import sys, rdiff_backup.Main, profile, pstats
+profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
+			"profile-output")
 p = pstats.Stats("profile-output")
 p.sort_stats('time')
 p.print_stats(40)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 73910be..b6d9b70 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
 				(not Globals.change_ownership or self.issym())):
 				# Don't compare gid/uid for symlinks or if not change_ownership
 				pass
+			elif key == 'atime' and not Globals.preserve_atime: pass
 			elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
 			elif (not other.data.has_key(key) or
 				  self.data[key] != other.data[key]): return None
@@ -425,7 +426,7 @@ class RPath(RORPath):
 		if base is not None: self.path = "/".join((base,) + index)
 		self.file = None
 		if data or base is None: self.data = data
-		else: self.setdata()
+		else: self.data = self.conn.C.make_file_dict(self.path)
 
 	def __str__(self):
 		return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
@@ -448,6 +449,10 @@ class RPath(RORPath):
 		self.path = "/".join((self.base,) + self.index)
 
 	def setdata(self):
+		"""Set data dictionary using C extension"""
+		self.data = self.conn.C.make_file_dict(self.path)
+
+	def setdata_old(self):
 		"""Create the data dictionary"""
 		statblock = self.conn.RPathStatic.tupled_lstat(self.path)
 		if statblock is None:
diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py
index 4fee9ee..05436c1 100644
--- a/rdiff-backup/rdiff_backup/selection.py
+++ b/rdiff-backup/rdiff_backup/selection.py
@@ -94,7 +94,9 @@ class Select:
 			self.starting_index = starting_index
 			self.iter = self.iterate_starting_from(self.dsrpath,
 						            self.iterate_starting_from, sel_func)
-		else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+		elif self.quoting_on:
+			self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+		else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
 
 		# only iterate parents if we are not starting from beginning
 		self.iterate_parents = starting_index is not None and iterate_parents
@@ -102,6 +104,52 @@ class Select:
 		self.__iter__ = lambda: self
 		return self
 
+	def Iterate_fast(self, dsrpath, sel_func):
+		"""Like Iterate, but don't recur, saving time
+
+		This is a bit harder to read than Iterate/iterate_in_dir, but
+		it should be faster because it only recurs to half as much
+		depth.  It doesn't handle the quoting case.
+
+		"""
+		def error_handler(exc, filename):
+			Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
+			return None
+
+		def diryield(dsrpath):
+			s = sel_func(dsrpath)
+			if s == 0: return
+			elif s == 1:
+				yield dsrpath
+				for filename in Robust.listrp(dsrpath):
+					new_dsrp = Robust.check_common_error(error_handler,
+											  dsrpath.append, [filename])
+					if new_dsrp:
+						if new_dsrp.isdir():
+							for dsrp in diryield(new_dsrp): yield dsrp
+						elif sel_func(new_dsrp) == 1: yield new_dsrp
+			elif s == 2:
+				yielded_something = None
+				for filename in Robust.listrp(dsrpath):
+					new_dsrp = Robust.check_common_error(error_handler,
+											  dsrpath.append, [filename])
+					if new_dsrp:
+						if new_dsrp.isdir():
+							for dsrp in diryield(new_dsrp):
+								if not yielded_something:
+									yielded_something = 1
+									yield dsrpath
+								yield dsrp
+						elif sel_func(new_dsrp) == 1:
+							if not yielded_something:
+								yielded_something = 1
+								yield dsrpath
+							yield new_dsrp
+
+		if dsrpath.isdir():
+			for dsrp in diryield(dsrpath): yield dsrp
+		elif sel_func(dsrpath) == 1: yield dsrpath
+
 	def Iterate(self, dsrpath, rec_func, sel_func):
 		"""Return iterator yielding dsrps in dsrpath
 
diff --git a/rdiff-backup/src/Globals.py b/rdiff-backup/src/Globals.py
index ca6e8d1..f3e4474 100644
--- a/rdiff-backup/src/Globals.py
+++ b/rdiff-backup/src/Globals.py
@@ -1,7 +1,7 @@
 import re, os
 
 # The current version of rdiff-backup
-version = "0.8.0"
+version = "0.9.0"
 
 # If this is set, use this value in seconds as the current time
 # instead of reading it from the clock.
diff --git a/rdiff-backup/src/cmodule.c b/rdiff-backup/src/cmodule.c
new file mode 100644
index 0000000..ea10415
--- /dev/null
+++ b/rdiff-backup/src/cmodule.c
@@ -0,0 +1,139 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <Python.h>
+#include <errno.h>
+
+static PyObject *c_make_file_dict(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  char *filename, filetype[5];
+  struct stat sbuf;
+  mode_t mode;
+
+  if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
+  if (lstat(filename, &sbuf) != 0) {
+	if (errno == ENOENT || errno == ENOTDIR)
+	  return Py_BuildValue("{s:s}", "type", NULL);
+	else {
+	  PyErr_SetFromErrno(PyExc_OSError);
+	  return NULL;
+	}
+  }
+  mode = sbuf.st_mode;
+
+  /* Build return dictionary from stat struct */
+  if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
+	/* Regular files, directories, sockets, and fifos */
+	if S_ISREG(mode) strcpy(filetype, "reg");
+	else if S_ISDIR(mode) strcpy(filetype, "dir");
+	else if S_ISSOCK(mode) strcpy(filetype, "sock");
+	else strcpy(filetype, "fifo");
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
+						 "type", filetype,
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "mtime", (long int)sbuf.st_mtime,
+						 "atime", (long int)sbuf.st_atime);
+  } else if S_ISLNK(mode) {
+	/* Symbolic links */
+	char linkname[1024];
+	int len_link = readlink(filename, linkname, 1023);
+	if (len_link < 0) {
+	  PyErr_SetFromErrno(PyExc_OSError);
+	  return NULL;
+	}
+
+	linkname[len_link] = '\0';
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
+						 "type", "sym",
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "linkname", linkname);
+  } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+	/* Device files */
+	char devtype[2];
+	int devnums = (int)sbuf.st_rdev;
+	if S_ISCHR(mode) strcpy(devtype, "c");
+	else strcpy(devtype, "b");
+	return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
+						 "type", "dev",
+						 "size", (long int)sbuf.st_size,
+						 "perms", (int)(mode & S_IRWXU),
+						 "uid", (int)sbuf.st_uid,
+						 "gid", (int)sbuf.st_gid,
+						 "inode", (long int)sbuf.st_ino,
+						 "devloc", (int)sbuf.st_dev,
+						 "nlink", (int)sbuf.st_nlink,
+						 "devnums", Py_BuildValue("(s,i,i)", devtype,
+												  devnums >> 8,
+												  devnums & 0xff),
+						 "mtime", (long int)sbuf.st_mtime,
+						 "atime", (long int)sbuf.st_atime);
+  } else {
+	/* Unrecognized file type - pretend it isn't there */
+	errno = ENOENT;
+	PyErr_SetFromErrno(PyExc_OSError);
+	return NULL;
+  }
+}
+
+static PyObject *long2str(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  unsigned char s[7];
+  int sindex;
+  unsigned long long int l;
+  PyObject *pylong;
+
+  if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
+  l = PyLong_AsUnsignedLongLong(pylong);
+  for(sindex = 0; sindex <= 6; sindex++) {
+	s[sindex] = l % 256;
+	l /= 256;
+  }
+  return Py_BuildValue("s#", s, 7);
+}
+
+static PyObject *str2long(self, args)
+	 PyObject *self;
+	 PyObject *args;
+{
+  unsigned char *s;
+  unsigned long long int l = 0;
+  int sindex, ssize;
+
+  if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
+  if (ssize != 7) return Py_BuildValue("i", -1);
+  for(sindex=6; sindex >= 0; sindex--)
+	l = l*256 + s[sindex];
+  return PyLong_FromLongLong(l);
+}
+
+static PyMethodDef CMethods[] = {
+  {"make_file_dict", c_make_file_dict, METH_VARARGS,
+   "Make dictionary from file stat"},
+  {"long2str", long2str, METH_VARARGS,
+   "Convert long int to 7 byte string"},
+  {"str2long", str2long, METH_VARARGS,
+   "Convert 7 byte string to long int"},
+  {NULL, NULL, 0, NULL}
+};
+
+void initC(void)
+{
+  (void) Py_InitModule("C", CMethods);
+}
+
diff --git a/rdiff-backup/src/connection.py b/rdiff-backup/src/connection.py
index 74d413d..c557dd5 100644
--- a/rdiff-backup/src/connection.py
+++ b/rdiff-backup/src/connection.py
@@ -1,5 +1,5 @@
 from __future__ import generators
-import types, os, tempfile, cPickle, shutil, traceback
+import types, os, tempfile, cPickle, shutil, traceback, pickle
 
 #######################################################################
 #
@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
 
 	def _putobj(self, obj, req_num):
 		"""Send a generic python obj down the outpipe"""
-		self._write("o", cPickle.dumps(obj, 1), req_num)
+		# for some reason there is an error when cPickle is used below..
+		self._write("o", pickle.dumps(obj, 1), req_num)
 
 	def _putbuf(self, buf, req_num):
 		"""Send buffer buf down the outpipe"""
@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
 
 	def _write(self, headerchar, data, req_num):
 		"""Write header and then data to the pipe"""
-		self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
+		self.outpipe.write(headerchar + chr(req_num) +
+						   C.long2str(long(len(data))))
 		self.outpipe.write(data)
 		self.outpipe.flush()
 
@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
 		"""Read length bytes from inpipe, returning result"""
 		return self.inpipe.read(length)
 
-	def _s2l(self, s):
+	def _s2l_old(self, s):
 		"""Convert string to long int"""
 		assert len(s) == 7
 		l = 0L
 		for i in range(7): l = l*256 + ord(s[i])
 		return l
 
-	def _l2s(self, l):
+	def _l2s_old(self, l):
 		"""Convert long int to string"""
 		s = ""
 		for i in range(7):
@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
 		try:
 			format_string, req_num, length = (header_string[0],
 											  ord(header_string[1]),
-											  self._s2l(header_string[2:]))
+											  C.str2long(header_string[2:]))
 		except IndexError: raise ConnectionError()
 		if format_string == "q": raise ConnectionQuit("Received quit signal")
 
@@ -490,7 +492,7 @@ class VirtualFile:
 
 # everything has to be available here for remote connection's use, but
 # put at bottom to reduce circularities.
-import Globals, Time, Rdiff, Hardlink, FilenameMapping
+import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
 from static import *
 from lazy import *
 from log import *
diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py
index a64ecbc..d0a4998 100644
--- a/rdiff-backup/src/destructive_stepping.py
+++ b/rdiff-backup/src/destructive_stepping.py
@@ -31,7 +31,7 @@ class DSRPath(RPath):
 	newmtime - holds the new mtime
 
 	"""
-	def __init__(self, source, *args):
+	def __init__(self, source, conn_or_rp, base = 0, index = ()):
 		"""Initialize DSRP
 
 		Source should be true iff the DSRPath is taken from the
@@ -42,10 +42,11 @@ class DSRPath(RPath):
 		otherwise use the same arguments as the RPath initializer.
 
 		"""
-		if len(args) == 1 and isinstance(args[0], RPath):
-			rp = args[0]
-			RPath.__init__(self, rp.conn, rp.base, rp.index)
-		else: RPath.__init__(self, *args)
+		if base == 0:
+			assert isinstance(conn_or_rp, RPath)
+			RPath.__init__(self, conn_or_rp.conn,
+						   conn_or_rp.base, conn_or_rp.index)
+		else: RPath.__init__(self, conn_or_rp, base, index)
 
 		if source != "bypass":
 			# "bypass" val is used when unpackaging over connection
diff --git a/rdiff-backup/src/iterfile.py b/rdiff-backup/src/iterfile.py
index 26cc952..4211441 100644
--- a/rdiff-backup/src/iterfile.py
+++ b/rdiff-backup/src/iterfile.py
@@ -1,5 +1,5 @@
-import cPickle
-import Globals
+import cPickle, array
+import Globals, C
 
 #######################################################################
 #
@@ -13,7 +13,7 @@ class UnwrapFile:
 	def __init__(self, file):
 		self.file = file
 
-	def _s2l(self, s):
+	def _s2l_old(self, s):
 		"""Convert string to long int"""
 		assert len(s) == 7
 		l = 0L
@@ -31,8 +31,9 @@ class UnwrapFile:
 		"""
 		header = self.file.read(8)
 		if not header: return None, None
-		assert len(header) == 8, "Header is only %d bytes" % len(header)
-		type, length = header[0], self._s2l(header[1:])
+		if len(header) != 8:
+			assert None, "Header %s is only %d bytes" % (header, len(header))
+		type, length = header[0], C.str2long(header[1:])
 		buf = self.file.read(length)
 		if type == "o": return type, cPickle.loads(buf)
 		else: return type, buf
@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
 		"""
 		UnwrapFile.__init__(self, iwf.file)
 		self.iwf = iwf
-		self.bufferlist = [initial_data]
-		self.bufferlen = len(initial_data)
+		self.buffer = initial_data
 		self.closed = None
 
-	def check_consistency(self):
-		l = len("".join(self.bufferlist))
-		assert l == self.bufferlen, \
-			   "Length of IVF bufferlist doesn't match (%s, %s)" % \
-			   (l, self.bufferlen)
-
 	def read(self, length):
+		"""Read length bytes from the file, updating buffers as necessary"""
 		assert not self.closed
 		if self.iwf.currently_in_file:
-			while length >= self.bufferlen:
+			while length >= len(self.buffer):
 				if not self.addtobuffer(): break
 
-		real_len = min(length, self.bufferlen)
-		combined_buffer = "".join(self.bufferlist)
-		assert len(combined_buffer) == self.bufferlen, \
-			   (len(combined_buffer), self.bufferlen)
-		self.bufferlist = [combined_buffer[real_len:]]
-		self.bufferlen = self.bufferlen - real_len
-		return combined_buffer[:real_len]
+		real_len = min(length, len(self.buffer))
+		return_val = self.buffer[:real_len]
+		self.buffer = self.buffer[real_len:]
+		return return_val
 			
 	def addtobuffer(self):
 		"""Read a chunk from the file and add it to the buffer"""
@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
 		type, data = self._get()
 		assert type == "c", "Type is %s instead of c" % type
 		if data:
-			self.bufferlen = self.bufferlen + len(data)
-			self.bufferlist.append(data)
+			self.buffer += data
 			return 1
 		else:
 			self.iwf.currently_in_file = None
@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
 		"""Currently just reads whats left and discards it"""
 		while self.iwf.currently_in_file:
 			self.addtobuffer()
-			self.bufferlist = []
-			self.bufferlen = 0
+			self.buffer = ""
 		self.closed = 1
 
 
@@ -145,45 +135,43 @@ class FileWrappingIter:
 	def __init__(self, iter):
 		"""Initialize with iter"""
 		self.iter = iter
-		self.bufferlist = []
-		self.bufferlen = 0L
+		self.array_buf = array.array('c')
 		self.currently_in_file = None
 		self.closed = None
 
 	def read(self, length):
 		"""Return next length bytes in file"""
 		assert not self.closed
-		while self.bufferlen < length:
+		while len(self.array_buf) < length:
 			if not self.addtobuffer(): break
 
-		combined_buffer = "".join(self.bufferlist)
-		assert len(combined_buffer) == self.bufferlen
-		real_len = min(self.bufferlen, length)
-		self.bufferlen = self.bufferlen - real_len
-		self.bufferlist = [combined_buffer[real_len:]]
-		return combined_buffer[:real_len]
+		result = self.array_buf[:length].tostring()
+		del self.array_buf[:length]
+		return result
 
 	def addtobuffer(self):
-		"""Updates self.bufferlist and self.bufferlen, adding on a chunk
+		"""Updates self.buffer, adding a chunk from the iterator.
 
 		Returns None if we have reached the end of the iterator,
 		otherwise return true.
 
 		"""
+		array_buf = self.array_buf
 		if self.currently_in_file:
-			buf = "c" + self.addfromfile()
+			array_buf.fromstring("c")
+			array_buf.fromstring(self.addfromfile())
 		else:
 			try: currentobj = self.iter.next()
 			except StopIteration: return None
 			if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
 				self.currently_in_file = currentobj
-				buf = "f" + self.addfromfile()
+				array_buf.fromstring("f")
+				array_buf.fromstring(self.addfromfile())
 			else:
 				pickle = cPickle.dumps(currentobj, 1)
-				buf = "o" + self._l2s(len(pickle)) + pickle
-				
-		self.bufferlist.append(buf)
-		self.bufferlen = self.bufferlen + len(buf)
+				array_buf.fromstring("o")
+				array_buf.fromstring(C.long2str(long(len(pickle))))
+				array_buf.fromstring(pickle)
 		return 1
 
 	def addfromfile(self):
@@ -192,9 +180,9 @@ class FileWrappingIter:
 		if not buf:
 			assert not self.currently_in_file.close()
 			self.currently_in_file = None
-		return self._l2s(len(buf)) + buf
+		return C.long2str(long(len(buf))) + buf
 
-	def _l2s(self, l):
+	def _l2s_old(self, l):
 		"""Convert long int to string of 7 characters"""
 		s = ""
 		for i in range(7):
@@ -210,26 +198,28 @@ class BufferedRead:
 	"""Buffer the .read() calls to the given file
 
 	This is used to lessen overhead and latency when a file is sent
-	over a connection.
+	over a connection.  Profiling said that arrays were faster than
+	strings here.
 
 	"""
 	def __init__(self, file):
 		self.file = file
-		self.buffer = ""
+		self.array_buf = array.array('c')
 		self.bufsize = Globals.conn_bufsize
 
 	def read(self, l = -1):
+		array_buf = self.array_buf
 		if l < 0: # Read as much as possible
-			result = self.buffer + self.file.read()
-			self.buffer = ""
+			result = array_buf.tostring() + self.file.read()
+			del array_buf[:]
 			return result
 
-		if len(self.buffer) < l: # Try to make buffer as long as l
-			self.buffer += self.file.read(max(self.bufsize,
-											  l - len(self.buffer)))
-		actual_size = min(l, len(self.buffer))
-		result = self.buffer[:actual_size]
-		self.buffer = self.buffer[actual_size:]
+		if len(array_buf) < l: # Try to make buffer at least as long as l
+			array_buf.fromstring(self.file.read(max(self.bufsize, l)))
+		result = array_buf[:l].tostring()
+		del array_buf[:l]
 		return result
 
 	def close(self): return self.file.close()
+
+from log import *
diff --git a/rdiff-backup/src/profiled_rdb.py b/rdiff-backup/src/profiled_rdb.py
index a2f30ea..7412847 100755
--- a/rdiff-backup/src/profiled_rdb.py
+++ b/rdiff-backup/src/profiled_rdb.py
@@ -8,9 +8,9 @@ statistics afterwards.
 """
 
 __no_execute__ = 1
-execfile("main.py")
-import profile, pstats
-profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
+import sys, rdiff_backup.Main, profile, pstats
+profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
+			"profile-output")
 p = pstats.Stats("profile-output")
 p.sort_stats('time')
 p.print_stats(40)
diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py
index 73910be..b6d9b70 100644
--- a/rdiff-backup/src/rpath.py
+++ b/rdiff-backup/src/rpath.py
@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
 				(not Globals.change_ownership or self.issym())):
 				# Don't compare gid/uid for symlinks or if not change_ownership
 				pass
+			elif key == 'atime' and not Globals.preserve_atime: pass
 			elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
 			elif (not other.data.has_key(key) or
 				  self.data[key] != other.data[key]): return None
@@ -425,7 +426,7 @@ class RPath(RORPath):
 		if base is not None: self.path = "/".join((base,) + index)
 		self.file = None
 		if data or base is None: self.data = data
-		else: self.setdata()
+		else: self.data = self.conn.C.make_file_dict(self.path)
 
 	def __str__(self):
 		return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
@@ -448,6 +449,10 @@ class RPath(RORPath):
 		self.path = "/".join((self.base,) + self.index)
 
 	def setdata(self):
+		"""Set data dictionary using C extension"""
+		self.data = self.conn.C.make_file_dict(self.path)
+
+	def setdata_old(self):
 		"""Create the data dictionary"""
 		statblock = self.conn.RPathStatic.tupled_lstat(self.path)
 		if statblock is None:
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
index 4fee9ee..05436c1 100644
--- a/rdiff-backup/src/selection.py
+++ b/rdiff-backup/src/selection.py
@@ -94,7 +94,9 @@ class Select:
 			self.starting_index = starting_index
 			self.iter = self.iterate_starting_from(self.dsrpath,
 						            self.iterate_starting_from, sel_func)
-		else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+		elif self.quoting_on:
+			self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+		else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
 
 		# only iterate parents if we are not starting from beginning
 		self.iterate_parents = starting_index is not None and iterate_parents
@@ -102,6 +104,52 @@ class Select:
 		self.__iter__ = lambda: self
 		return self
 
+	def Iterate_fast(self, dsrpath, sel_func):
+		"""Like Iterate, but don't recur, saving time
+
+		This is a bit harder to read than Iterate/iterate_in_dir, but
+		it should be faster because it only recurs to half as much
+		depth.  It doesn't handle the quoting case.
+
+		"""
+		def error_handler(exc, filename):
+			Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
+			return None
+
+		def diryield(dsrpath):
+			s = sel_func(dsrpath)
+			if s == 0: return
+			elif s == 1:
+				yield dsrpath
+				for filename in Robust.listrp(dsrpath):
+					new_dsrp = Robust.check_common_error(error_handler,
+											  dsrpath.append, [filename])
+					if new_dsrp:
+						if new_dsrp.isdir():
+							for dsrp in diryield(new_dsrp): yield dsrp
+						elif sel_func(new_dsrp) == 1: yield new_dsrp
+			elif s == 2:
+				yielded_something = None
+				for filename in Robust.listrp(dsrpath):
+					new_dsrp = Robust.check_common_error(error_handler,
+											  dsrpath.append, [filename])
+					if new_dsrp:
+						if new_dsrp.isdir():
+							for dsrp in diryield(new_dsrp):
+								if not yielded_something:
+									yielded_something = 1
+									yield dsrpath
+								yield dsrp
+						elif sel_func(new_dsrp) == 1:
+							if not yielded_something:
+								yielded_something = 1
+								yield dsrpath
+							yield new_dsrp
+
+		if dsrpath.isdir():
+			for dsrp in diryield(dsrpath): yield dsrp
+		elif sel_func(dsrpath) == 1: yield dsrpath
+
 	def Iterate(self, dsrpath, rec_func, sel_func):
 		"""Return iterator yielding dsrps in dsrpath
 
-- 
cgit v1.2.1