summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-06-17 03:24:05 +0000
committerben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-06-17 03:24:05 +0000
commit6b72b10acde7061c1180a59d6eea3c0c20780683 (patch)
tree1284913e6d64c09994149f0da28551029b522731
parent30a98a1fa0d78837915e9b26b35c9abfd0783cf3 (diff)
downloadrdiff-backup-6b72b10acde7061c1180a59d6eea3c0c20780683.tar.gz
Many optimizations - up to 3x speed improvement
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@134 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/rdiff_backup/Globals.py2
-rw-r--r--rdiff-backup/rdiff_backup/cmodule.c139
-rw-r--r--rdiff-backup/rdiff_backup/connection.py16
-rw-r--r--rdiff-backup/rdiff_backup/destructive_stepping.py11
-rw-r--r--rdiff-backup/rdiff_backup/iterfile.py96
-rwxr-xr-xrdiff-backup/rdiff_backup/profiled_rdb.py6
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py7
-rw-r--r--rdiff-backup/rdiff_backup/selection.py50
-rw-r--r--rdiff-backup/src/Globals.py2
-rw-r--r--rdiff-backup/src/cmodule.c139
-rw-r--r--rdiff-backup/src/connection.py16
-rw-r--r--rdiff-backup/src/destructive_stepping.py11
-rw-r--r--rdiff-backup/src/iterfile.py96
-rwxr-xr-xrdiff-backup/src/profiled_rdb.py6
-rw-r--r--rdiff-backup/src/rpath.py7
-rw-r--r--rdiff-backup/src/selection.py50
16 files changed, 512 insertions, 142 deletions
diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py
index ca6e8d1..f3e4474 100644
--- a/rdiff-backup/rdiff_backup/Globals.py
+++ b/rdiff-backup/rdiff_backup/Globals.py
@@ -1,7 +1,7 @@
import re, os
# The current version of rdiff-backup
-version = "0.8.0"
+version = "0.9.0"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
diff --git a/rdiff-backup/rdiff_backup/cmodule.c b/rdiff-backup/rdiff_backup/cmodule.c
new file mode 100644
index 0000000..ea10415
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/cmodule.c
@@ -0,0 +1,139 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <Python.h>
+#include <errno.h>
+
+static PyObject *c_make_file_dict(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ char *filename, filetype[5];
+ struct stat sbuf;
+ mode_t mode;
+
+ if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
+ if (lstat(filename, &sbuf) != 0) {
+ if (errno == ENOENT || errno == ENOTDIR)
+ return Py_BuildValue("{s:s}", "type", NULL);
+ else {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ }
+ mode = sbuf.st_mode;
+
+ /* Build return dictionary from stat struct */
+ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
+ /* Regular files, directories, sockets, and fifos */
+ if S_ISREG(mode) strcpy(filetype, "reg");
+ else if S_ISDIR(mode) strcpy(filetype, "dir");
+ else if S_ISSOCK(mode) strcpy(filetype, "sock");
+ else strcpy(filetype, "fifo");
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
+ "type", filetype,
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "mtime", (long int)sbuf.st_mtime,
+ "atime", (long int)sbuf.st_atime);
+ } else if S_ISLNK(mode) {
+ /* Symbolic links */
+ char linkname[1024];
+ int len_link = readlink(filename, linkname, 1023);
+ if (len_link < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ linkname[len_link] = '\0';
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
+ "type", "sym",
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "linkname", linkname);
+ } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+ /* Device files */
+ char devtype[2];
+ int devnums = (int)sbuf.st_rdev;
+ if S_ISCHR(mode) strcpy(devtype, "c");
+ else strcpy(devtype, "b");
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
+ "type", "dev",
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "devnums", Py_BuildValue("(s,i,i)", devtype,
+ devnums >> 8,
+ devnums & 0xff),
+ "mtime", (long int)sbuf.st_mtime,
+ "atime", (long int)sbuf.st_atime);
+ } else {
+ /* Unrecognized file type - pretend it isn't there */
+ errno = ENOENT;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+}
+
+static PyObject *long2str(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ unsigned char s[7];
+ int sindex;
+ unsigned long long int l;
+ PyObject *pylong;
+
+ if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
+ l = PyLong_AsUnsignedLongLong(pylong);
+ for(sindex = 0; sindex <= 6; sindex++) {
+ s[sindex] = l % 256;
+ l /= 256;
+ }
+ return Py_BuildValue("s#", s, 7);
+}
+
+static PyObject *str2long(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ unsigned char *s;
+ unsigned long long int l = 0;
+ int sindex, ssize;
+
+ if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
+ if (ssize != 7) return Py_BuildValue("i", -1);
+ for(sindex=6; sindex >= 0; sindex--)
+ l = l*256 + s[sindex];
+ return PyLong_FromLongLong(l);
+}
+
+static PyMethodDef CMethods[] = {
+ {"make_file_dict", c_make_file_dict, METH_VARARGS,
+ "Make dictionary from file stat"},
+ {"long2str", long2str, METH_VARARGS,
+ "Convert long int to 7 byte string"},
+ {"str2long", str2long, METH_VARARGS,
+ "Convert 7 byte string to long int"},
+ {NULL, NULL, 0, NULL}
+};
+
+void initC(void)
+{
+ (void) Py_InitModule("C", CMethods);
+}
+
diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py
index 74d413d..c557dd5 100644
--- a/rdiff-backup/rdiff_backup/connection.py
+++ b/rdiff-backup/rdiff_backup/connection.py
@@ -1,5 +1,5 @@
from __future__ import generators
-import types, os, tempfile, cPickle, shutil, traceback
+import types, os, tempfile, cPickle, shutil, traceback, pickle
#######################################################################
#
@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe"""
- self._write("o", cPickle.dumps(obj, 1), req_num)
+ # for some reason there is an error when cPickle is used below..
+ self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe"""
@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe"""
- self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
+ self.outpipe.write(headerchar + chr(req_num) +
+ C.long2str(long(len(data))))
self.outpipe.write(data)
self.outpipe.flush()
@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result"""
return self.inpipe.read(length)
- def _s2l(self, s):
+ def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
- def _l2s(self, l):
+ def _l2s_old(self, l):
"""Convert long int to string"""
s = ""
for i in range(7):
@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try:
format_string, req_num, length = (header_string[0],
ord(header_string[1]),
- self._s2l(header_string[2:]))
+ C.str2long(header_string[2:]))
except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal")
@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities.
-import Globals, Time, Rdiff, Hardlink, FilenameMapping
+import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import *
from lazy import *
from log import *
diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py
index a64ecbc..d0a4998 100644
--- a/rdiff-backup/rdiff_backup/destructive_stepping.py
+++ b/rdiff-backup/rdiff_backup/destructive_stepping.py
@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime
"""
- def __init__(self, source, *args):
+ def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer.
"""
- if len(args) == 1 and isinstance(args[0], RPath):
- rp = args[0]
- RPath.__init__(self, rp.conn, rp.base, rp.index)
- else: RPath.__init__(self, *args)
+ if base == 0:
+ assert isinstance(conn_or_rp, RPath)
+ RPath.__init__(self, conn_or_rp.conn,
+ conn_or_rp.base, conn_or_rp.index)
+ else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py
index 26cc952..4211441 100644
--- a/rdiff-backup/rdiff_backup/iterfile.py
+++ b/rdiff-backup/rdiff_backup/iterfile.py
@@ -1,5 +1,5 @@
-import cPickle
-import Globals
+import cPickle, array
+import Globals, C
#######################################################################
#
@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file):
self.file = file
- def _s2l(self, s):
+ def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
@@ -31,8 +31,9 @@ class UnwrapFile:
"""
header = self.file.read(8)
if not header: return None, None
- assert len(header) == 8, "Header is only %d bytes" % len(header)
- type, length = header[0], self._s2l(header[1:])
+ if len(header) != 8:
+ assert None, "Header %s is only %d bytes" % (header, len(header))
+ type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
- self.bufferlist = [initial_data]
- self.bufferlen = len(initial_data)
+ self.buffer = initial_data
self.closed = None
- def check_consistency(self):
- l = len("".join(self.bufferlist))
- assert l == self.bufferlen, \
- "Length of IVF bufferlist doesn't match (%s, %s)" % \
- (l, self.bufferlen)
-
def read(self, length):
+ """Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
- while length >= self.bufferlen:
+ while length >= len(self.buffer):
if not self.addtobuffer(): break
- real_len = min(length, self.bufferlen)
- combined_buffer = "".join(self.bufferlist)
- assert len(combined_buffer) == self.bufferlen, \
- (len(combined_buffer), self.bufferlen)
- self.bufferlist = [combined_buffer[real_len:]]
- self.bufferlen = self.bufferlen - real_len
- return combined_buffer[:real_len]
+ real_len = min(length, len(self.buffer))
+ return_val = self.buffer[:real_len]
+ self.buffer = self.buffer[real_len:]
+ return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
- self.bufferlen = self.bufferlen + len(data)
- self.bufferlist.append(data)
+ self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
- self.bufferlist = []
- self.bufferlen = 0
+ self.buffer = ""
self.closed = 1
@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
- self.bufferlist = []
- self.bufferlen = 0L
+ self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
- while self.bufferlen < length:
+ while len(self.array_buf) < length:
if not self.addtobuffer(): break
- combined_buffer = "".join(self.bufferlist)
- assert len(combined_buffer) == self.bufferlen
- real_len = min(self.bufferlen, length)
- self.bufferlen = self.bufferlen - real_len
- self.bufferlist = [combined_buffer[real_len:]]
- return combined_buffer[:real_len]
+ result = self.array_buf[:length].tostring()
+ del self.array_buf[:length]
+ return result
def addtobuffer(self):
- """Updates self.bufferlist and self.bufferlen, adding on a chunk
+ """Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
+ array_buf = self.array_buf
if self.currently_in_file:
- buf = "c" + self.addfromfile()
+ array_buf.fromstring("c")
+ array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
- buf = "f" + self.addfromfile()
+ array_buf.fromstring("f")
+ array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
- buf = "o" + self._l2s(len(pickle)) + pickle
-
- self.bufferlist.append(buf)
- self.bufferlen = self.bufferlen + len(buf)
+ array_buf.fromstring("o")
+ array_buf.fromstring(C.long2str(long(len(pickle))))
+ array_buf.fromstring(pickle)
return 1
def addfromfile(self):
@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
- return self._l2s(len(buf)) + buf
+ return C.long2str(long(len(buf))) + buf
- def _l2s(self, l):
+ def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
- over a connection.
+ over a connection. Profiling said that arrays were faster than
+ strings here.
"""
def __init__(self, file):
self.file = file
- self.buffer = ""
+ self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
+ array_buf = self.array_buf
if l < 0: # Read as much as possible
- result = self.buffer + self.file.read()
- self.buffer = ""
+ result = array_buf.tostring() + self.file.read()
+ del array_buf[:]
return result
- if len(self.buffer) < l: # Try to make buffer as long as l
- self.buffer += self.file.read(max(self.bufsize,
- l - len(self.buffer)))
- actual_size = min(l, len(self.buffer))
- result = self.buffer[:actual_size]
- self.buffer = self.buffer[actual_size:]
+ if len(array_buf) < l: # Try to make buffer at least as long as l
+ array_buf.fromstring(self.file.read(max(self.bufsize, l)))
+ result = array_buf[:l].tostring()
+ del array_buf[:l]
return result
def close(self): return self.file.close()
+
+from log import *
diff --git a/rdiff-backup/rdiff_backup/profiled_rdb.py b/rdiff-backup/rdiff_backup/profiled_rdb.py
index a2f30ea..7412847 100755
--- a/rdiff-backup/rdiff_backup/profiled_rdb.py
+++ b/rdiff-backup/rdiff_backup/profiled_rdb.py
@@ -8,9 +8,9 @@ statistics afterwards.
"""
__no_execute__ = 1
-execfile("main.py")
-import profile, pstats
-profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
+import sys, rdiff_backup.Main, profile, pstats
+profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
+ "profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 73910be..b6d9b70 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
pass
+ elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index)
self.file = None
if data or base is None: self.data = data
- else: self.setdata()
+ else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index)
def setdata(self):
+ """Set data dictionary using C extension"""
+ self.data = self.conn.C.make_file_dict(self.path)
+
+ def setdata_old(self):
"""Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None:
diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py
index 4fee9ee..05436c1 100644
--- a/rdiff-backup/rdiff_backup/selection.py
+++ b/rdiff-backup/rdiff_backup/selection.py
@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func)
- else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+ elif self.quoting_on:
+ self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+ else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents
@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self
return self
+ def Iterate_fast(self, dsrpath, sel_func):
+ """Like Iterate, but don't recur, saving time
+
+ This is a bit harder to read than Iterate/iterate_in_dir, but
+ it should be faster because it only recurs to half as much
+ depth. It doesn't handle the quoting case.
+
+ """
+ def error_handler(exc, filename):
+ Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
+ return None
+
+ def diryield(dsrpath):
+ s = sel_func(dsrpath)
+ if s == 0: return
+ elif s == 1:
+ yield dsrpath
+ for filename in Robust.listrp(dsrpath):
+ new_dsrp = Robust.check_common_error(error_handler,
+ dsrpath.append, [filename])
+ if new_dsrp:
+ if new_dsrp.isdir():
+ for dsrp in diryield(new_dsrp): yield dsrp
+ elif sel_func(new_dsrp) == 1: yield new_dsrp
+ elif s == 2:
+ yielded_something = None
+ for filename in Robust.listrp(dsrpath):
+ new_dsrp = Robust.check_common_error(error_handler,
+ dsrpath.append, [filename])
+ if new_dsrp:
+ if new_dsrp.isdir():
+ for dsrp in diryield(new_dsrp):
+ if not yielded_something:
+ yielded_something = 1
+ yield dsrpath
+ yield dsrp
+ elif sel_func(new_dsrp) == 1:
+ if not yielded_something:
+ yielded_something = 1
+ yield dsrpath
+ yield new_dsrp
+
+ if dsrpath.isdir():
+ for dsrp in diryield(dsrpath): yield dsrp
+ elif sel_func(dsrpath) == 1: yield dsrpath
+
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
diff --git a/rdiff-backup/src/Globals.py b/rdiff-backup/src/Globals.py
index ca6e8d1..f3e4474 100644
--- a/rdiff-backup/src/Globals.py
+++ b/rdiff-backup/src/Globals.py
@@ -1,7 +1,7 @@
import re, os
# The current version of rdiff-backup
-version = "0.8.0"
+version = "0.9.0"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
diff --git a/rdiff-backup/src/cmodule.c b/rdiff-backup/src/cmodule.c
new file mode 100644
index 0000000..ea10415
--- /dev/null
+++ b/rdiff-backup/src/cmodule.c
@@ -0,0 +1,139 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <Python.h>
+#include <errno.h>
+
+static PyObject *c_make_file_dict(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ char *filename, filetype[5];
+ struct stat sbuf;
+ mode_t mode;
+
+ if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
+ if (lstat(filename, &sbuf) != 0) {
+ if (errno == ENOENT || errno == ENOTDIR)
+ return Py_BuildValue("{s:s}", "type", NULL);
+ else {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ }
+ mode = sbuf.st_mode;
+
+ /* Build return dictionary from stat struct */
+ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
+ /* Regular files, directories, sockets, and fifos */
+ if S_ISREG(mode) strcpy(filetype, "reg");
+ else if S_ISDIR(mode) strcpy(filetype, "dir");
+ else if S_ISSOCK(mode) strcpy(filetype, "sock");
+ else strcpy(filetype, "fifo");
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
+ "type", filetype,
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "mtime", (long int)sbuf.st_mtime,
+ "atime", (long int)sbuf.st_atime);
+ } else if S_ISLNK(mode) {
+ /* Symbolic links */
+ char linkname[1024];
+ int len_link = readlink(filename, linkname, 1023);
+ if (len_link < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ linkname[len_link] = '\0';
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
+ "type", "sym",
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "linkname", linkname);
+ } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+ /* Device files */
+ char devtype[2];
+ int devnums = (int)sbuf.st_rdev;
+ if S_ISCHR(mode) strcpy(devtype, "c");
+ else strcpy(devtype, "b");
+ return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
+ "type", "dev",
+ "size", (long int)sbuf.st_size,
+ "perms", (int)(mode & S_IRWXU),
+ "uid", (int)sbuf.st_uid,
+ "gid", (int)sbuf.st_gid,
+ "inode", (long int)sbuf.st_ino,
+ "devloc", (int)sbuf.st_dev,
+ "nlink", (int)sbuf.st_nlink,
+ "devnums", Py_BuildValue("(s,i,i)", devtype,
+ devnums >> 8,
+ devnums & 0xff),
+ "mtime", (long int)sbuf.st_mtime,
+ "atime", (long int)sbuf.st_atime);
+ } else {
+ /* Unrecognized file type - pretend it isn't there */
+ errno = ENOENT;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+}
+
+static PyObject *long2str(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ unsigned char s[7];
+ int sindex;
+ unsigned long long int l;
+ PyObject *pylong;
+
+ if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
+ l = PyLong_AsUnsignedLongLong(pylong);
+ for(sindex = 0; sindex <= 6; sindex++) {
+ s[sindex] = l % 256;
+ l /= 256;
+ }
+ return Py_BuildValue("s#", s, 7);
+}
+
+static PyObject *str2long(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ unsigned char *s;
+ unsigned long long int l = 0;
+ int sindex, ssize;
+
+ if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
+ if (ssize != 7) return Py_BuildValue("i", -1);
+ for(sindex=6; sindex >= 0; sindex--)
+ l = l*256 + s[sindex];
+ return PyLong_FromLongLong(l);
+}
+
+static PyMethodDef CMethods[] = {
+ {"make_file_dict", c_make_file_dict, METH_VARARGS,
+ "Make dictionary from file stat"},
+ {"long2str", long2str, METH_VARARGS,
+ "Convert long int to 7 byte string"},
+ {"str2long", str2long, METH_VARARGS,
+ "Convert 7 byte string to long int"},
+ {NULL, NULL, 0, NULL}
+};
+
+void initC(void)
+{
+ (void) Py_InitModule("C", CMethods);
+}
+
diff --git a/rdiff-backup/src/connection.py b/rdiff-backup/src/connection.py
index 74d413d..c557dd5 100644
--- a/rdiff-backup/src/connection.py
+++ b/rdiff-backup/src/connection.py
@@ -1,5 +1,5 @@
from __future__ import generators
-import types, os, tempfile, cPickle, shutil, traceback
+import types, os, tempfile, cPickle, shutil, traceback, pickle
#######################################################################
#
@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe"""
- self._write("o", cPickle.dumps(obj, 1), req_num)
+ # for some reason there is an error when cPickle is used below..
+ self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe"""
@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe"""
- self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
+ self.outpipe.write(headerchar + chr(req_num) +
+ C.long2str(long(len(data))))
self.outpipe.write(data)
self.outpipe.flush()
@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result"""
return self.inpipe.read(length)
- def _s2l(self, s):
+ def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
- def _l2s(self, l):
+ def _l2s_old(self, l):
"""Convert long int to string"""
s = ""
for i in range(7):
@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try:
format_string, req_num, length = (header_string[0],
ord(header_string[1]),
- self._s2l(header_string[2:]))
+ C.str2long(header_string[2:]))
except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal")
@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities.
-import Globals, Time, Rdiff, Hardlink, FilenameMapping
+import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import *
from lazy import *
from log import *
diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py
index a64ecbc..d0a4998 100644
--- a/rdiff-backup/src/destructive_stepping.py
+++ b/rdiff-backup/src/destructive_stepping.py
@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime
"""
- def __init__(self, source, *args):
+ def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer.
"""
- if len(args) == 1 and isinstance(args[0], RPath):
- rp = args[0]
- RPath.__init__(self, rp.conn, rp.base, rp.index)
- else: RPath.__init__(self, *args)
+ if base == 0:
+ assert isinstance(conn_or_rp, RPath)
+ RPath.__init__(self, conn_or_rp.conn,
+ conn_or_rp.base, conn_or_rp.index)
+ else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
diff --git a/rdiff-backup/src/iterfile.py b/rdiff-backup/src/iterfile.py
index 26cc952..4211441 100644
--- a/rdiff-backup/src/iterfile.py
+++ b/rdiff-backup/src/iterfile.py
@@ -1,5 +1,5 @@
-import cPickle
-import Globals
+import cPickle, array
+import Globals, C
#######################################################################
#
@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file):
self.file = file
- def _s2l(self, s):
+ def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
@@ -31,8 +31,9 @@ class UnwrapFile:
"""
header = self.file.read(8)
if not header: return None, None
- assert len(header) == 8, "Header is only %d bytes" % len(header)
- type, length = header[0], self._s2l(header[1:])
+ if len(header) != 8:
+ assert None, "Header %s is only %d bytes" % (header, len(header))
+ type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
- self.bufferlist = [initial_data]
- self.bufferlen = len(initial_data)
+ self.buffer = initial_data
self.closed = None
- def check_consistency(self):
- l = len("".join(self.bufferlist))
- assert l == self.bufferlen, \
- "Length of IVF bufferlist doesn't match (%s, %s)" % \
- (l, self.bufferlen)
-
def read(self, length):
+ """Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
- while length >= self.bufferlen:
+ while length >= len(self.buffer):
if not self.addtobuffer(): break
- real_len = min(length, self.bufferlen)
- combined_buffer = "".join(self.bufferlist)
- assert len(combined_buffer) == self.bufferlen, \
- (len(combined_buffer), self.bufferlen)
- self.bufferlist = [combined_buffer[real_len:]]
- self.bufferlen = self.bufferlen - real_len
- return combined_buffer[:real_len]
+ real_len = min(length, len(self.buffer))
+ return_val = self.buffer[:real_len]
+ self.buffer = self.buffer[real_len:]
+ return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
- self.bufferlen = self.bufferlen + len(data)
- self.bufferlist.append(data)
+ self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
- self.bufferlist = []
- self.bufferlen = 0
+ self.buffer = ""
self.closed = 1
@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
- self.bufferlist = []
- self.bufferlen = 0L
+ self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
- while self.bufferlen < length:
+ while len(self.array_buf) < length:
if not self.addtobuffer(): break
- combined_buffer = "".join(self.bufferlist)
- assert len(combined_buffer) == self.bufferlen
- real_len = min(self.bufferlen, length)
- self.bufferlen = self.bufferlen - real_len
- self.bufferlist = [combined_buffer[real_len:]]
- return combined_buffer[:real_len]
+ result = self.array_buf[:length].tostring()
+ del self.array_buf[:length]
+ return result
def addtobuffer(self):
- """Updates self.bufferlist and self.bufferlen, adding on a chunk
+ """Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
+ array_buf = self.array_buf
if self.currently_in_file:
- buf = "c" + self.addfromfile()
+ array_buf.fromstring("c")
+ array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
- buf = "f" + self.addfromfile()
+ array_buf.fromstring("f")
+ array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
- buf = "o" + self._l2s(len(pickle)) + pickle
-
- self.bufferlist.append(buf)
- self.bufferlen = self.bufferlen + len(buf)
+ array_buf.fromstring("o")
+ array_buf.fromstring(C.long2str(long(len(pickle))))
+ array_buf.fromstring(pickle)
return 1
def addfromfile(self):
@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
- return self._l2s(len(buf)) + buf
+ return C.long2str(long(len(buf))) + buf
- def _l2s(self, l):
+ def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
- over a connection.
+ over a connection. Profiling said that arrays were faster than
+ strings here.
"""
def __init__(self, file):
self.file = file
- self.buffer = ""
+ self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
+ array_buf = self.array_buf
if l < 0: # Read as much as possible
- result = self.buffer + self.file.read()
- self.buffer = ""
+ result = array_buf.tostring() + self.file.read()
+ del array_buf[:]
return result
- if len(self.buffer) < l: # Try to make buffer as long as l
- self.buffer += self.file.read(max(self.bufsize,
- l - len(self.buffer)))
- actual_size = min(l, len(self.buffer))
- result = self.buffer[:actual_size]
- self.buffer = self.buffer[actual_size:]
+ if len(array_buf) < l: # Try to make buffer at least as long as l
+ array_buf.fromstring(self.file.read(max(self.bufsize, l)))
+ result = array_buf[:l].tostring()
+ del array_buf[:l]
return result
def close(self): return self.file.close()
+
+from log import *
diff --git a/rdiff-backup/src/profiled_rdb.py b/rdiff-backup/src/profiled_rdb.py
index a2f30ea..7412847 100755
--- a/rdiff-backup/src/profiled_rdb.py
+++ b/rdiff-backup/src/profiled_rdb.py
@@ -8,9 +8,9 @@ statistics afterwards.
"""
__no_execute__ = 1
-execfile("main.py")
-import profile, pstats
-profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
+import sys, rdiff_backup.Main, profile, pstats
+profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
+ "profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py
index 73910be..b6d9b70 100644
--- a/rdiff-backup/src/rpath.py
+++ b/rdiff-backup/src/rpath.py
@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
pass
+ elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index)
self.file = None
if data or base is None: self.data = data
- else: self.setdata()
+ else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index)
def setdata(self):
+ """Set data dictionary using C extension"""
+ self.data = self.conn.C.make_file_dict(self.path)
+
+ def setdata_old(self):
"""Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None:
diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py
index 4fee9ee..05436c1 100644
--- a/rdiff-backup/src/selection.py
+++ b/rdiff-backup/src/selection.py
@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func)
- else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+ elif self.quoting_on:
+ self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
+ else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents
@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self
return self
+ def Iterate_fast(self, dsrpath, sel_func):
+ """Like Iterate, but don't recur, saving time
+
+ This is a bit harder to read than Iterate/iterate_in_dir, but
+ it should be faster because it only recurs to half as much
+ depth. It doesn't handle the quoting case.
+
+ """
+ def error_handler(exc, filename):
+ Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
+ return None
+
+ def diryield(dsrpath):
+ s = sel_func(dsrpath)
+ if s == 0: return
+ elif s == 1:
+ yield dsrpath
+ for filename in Robust.listrp(dsrpath):
+ new_dsrp = Robust.check_common_error(error_handler,
+ dsrpath.append, [filename])
+ if new_dsrp:
+ if new_dsrp.isdir():
+ for dsrp in diryield(new_dsrp): yield dsrp
+ elif sel_func(new_dsrp) == 1: yield new_dsrp
+ elif s == 2:
+ yielded_something = None
+ for filename in Robust.listrp(dsrpath):
+ new_dsrp = Robust.check_common_error(error_handler,
+ dsrpath.append, [filename])
+ if new_dsrp:
+ if new_dsrp.isdir():
+ for dsrp in diryield(new_dsrp):
+ if not yielded_something:
+ yielded_something = 1
+ yield dsrpath
+ yield dsrp
+ elif sel_func(new_dsrp) == 1:
+ if not yielded_something:
+ yielded_something = 1
+ yield dsrpath
+ yield new_dsrp
+
+ if dsrpath.isdir():
+ for dsrp in diryield(dsrpath): yield dsrp
+ elif sel_func(dsrpath) == 1: yield dsrpath
+
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath