From 6b72b10acde7061c1180a59d6eea3c0c20780683 Mon Sep 17 00:00:00 2001 From: ben Date: Mon, 17 Jun 2002 03:24:05 +0000 Subject: Many optimizations - up to 3x speed improvement git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@134 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/Globals.py | 2 +- rdiff-backup/rdiff_backup/cmodule.c | 139 ++++++++++++++++++++++ rdiff-backup/rdiff_backup/connection.py | 16 +-- rdiff-backup/rdiff_backup/destructive_stepping.py | 11 +- rdiff-backup/rdiff_backup/iterfile.py | 96 +++++++-------- rdiff-backup/rdiff_backup/profiled_rdb.py | 6 +- rdiff-backup/rdiff_backup/rpath.py | 7 +- rdiff-backup/rdiff_backup/selection.py | 50 +++++++- rdiff-backup/src/Globals.py | 2 +- rdiff-backup/src/cmodule.c | 139 ++++++++++++++++++++++ rdiff-backup/src/connection.py | 16 +-- rdiff-backup/src/destructive_stepping.py | 11 +- rdiff-backup/src/iterfile.py | 96 +++++++-------- rdiff-backup/src/profiled_rdb.py | 6 +- rdiff-backup/src/rpath.py | 7 +- rdiff-backup/src/selection.py | 50 +++++++- 16 files changed, 512 insertions(+), 142 deletions(-) create mode 100644 rdiff-backup/rdiff_backup/cmodule.c create mode 100644 rdiff-backup/src/cmodule.c (limited to 'rdiff-backup') diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py index ca6e8d1..f3e4474 100644 --- a/rdiff-backup/rdiff_backup/Globals.py +++ b/rdiff-backup/rdiff_backup/Globals.py @@ -1,7 +1,7 @@ import re, os # The current version of rdiff-backup -version = "0.8.0" +version = "0.9.0" # If this is set, use this value in seconds as the current time # instead of reading it from the clock. diff --git a/rdiff-backup/rdiff_backup/cmodule.c b/rdiff-backup/rdiff_backup/cmodule.c new file mode 100644 index 0000000..ea10415 --- /dev/null +++ b/rdiff-backup/rdiff_backup/cmodule.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include + +static PyObject *c_make_file_dict(self, args) + PyObject *self; + PyObject *args; +{ + char *filename, filetype[5]; + struct stat sbuf; + mode_t mode; + + if (!PyArg_ParseTuple(args, "s", &filename)) return NULL; + if (lstat(filename, &sbuf) != 0) { + if (errno == ENOENT || errno == ENOTDIR) + return Py_BuildValue("{s:s}", "type", NULL); + else { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + } + mode = sbuf.st_mode; + + /* Build return dictionary from stat struct */ + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) { + /* Regular files, directories, sockets, and fifos */ + if S_ISREG(mode) strcpy(filetype, "reg"); + else if S_ISDIR(mode) strcpy(filetype, "dir"); + else if S_ISSOCK(mode) strcpy(filetype, "sock"); + else strcpy(filetype, "fifo"); + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}", + "type", filetype, + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "mtime", (long int)sbuf.st_mtime, + "atime", (long int)sbuf.st_atime); + } else if S_ISLNK(mode) { + /* Symbolic links */ + char linkname[1024]; + int len_link = readlink(filename, linkname, 1023); + if (len_link < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + linkname[len_link] = '\0'; + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}", + "type", "sym", + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "linkname", linkname); + } else if (S_ISCHR(mode) || S_ISBLK(mode)) { + /* Device files */ + char devtype[2]; + int devnums = (int)sbuf.st_rdev; + if S_ISCHR(mode) strcpy(devtype, "c"); + else strcpy(devtype, "b"); + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}", + "type", "dev", + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "devnums", Py_BuildValue("(s,i,i)", devtype, + devnums >> 8, + devnums & 0xff), + "mtime", (long int)sbuf.st_mtime, + "atime", (long int)sbuf.st_atime); + } else { + /* Unrecognized file type - pretend it isn't there */ + errno = ENOENT; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } +} + +static PyObject *long2str(self, args) + PyObject *self; + PyObject *args; +{ + unsigned char s[7]; + int sindex; + unsigned long long int l; + PyObject *pylong; + + if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL; + l = PyLong_AsUnsignedLongLong(pylong); + for(sindex = 0; sindex <= 6; sindex++) { + s[sindex] = l % 256; + l /= 256; + } + return Py_BuildValue("s#", s, 7); +} + +static PyObject *str2long(self, args) + PyObject *self; + PyObject *args; +{ + unsigned char *s; + unsigned long long int l = 0; + int sindex, ssize; + + if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL; + if (ssize != 7) return Py_BuildValue("i", -1); + for(sindex=6; sindex >= 0; sindex--) + l = l*256 + s[sindex]; + return PyLong_FromLongLong(l); +} + +static PyMethodDef CMethods[] = { + {"make_file_dict", c_make_file_dict, METH_VARARGS, + "Make dictionary from file stat"}, + {"long2str", long2str, METH_VARARGS, + "Convert long int to 7 byte string"}, + {"str2long", str2long, METH_VARARGS, + "Convert 7 byte string to long int"}, + {NULL, NULL, 0, NULL} +}; + +void initC(void) +{ + (void) Py_InitModule("C", CMethods); +} + diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py index 74d413d..c557dd5 100644 --- a/rdiff-backup/rdiff_backup/connection.py +++ b/rdiff-backup/rdiff_backup/connection.py @@ -1,5 +1,5 @@ from __future__ import generators -import types, os, tempfile, cPickle, shutil, traceback +import types, os, tempfile, cPickle, shutil, traceback, pickle ####################################################################### # @@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection): def _putobj(self, obj, req_num): """Send a generic python obj down the outpipe""" - self._write("o", cPickle.dumps(obj, 1), req_num) + # for some reason there is an error when cPickle is used below.. + self._write("o", pickle.dumps(obj, 1), req_num) def _putbuf(self, buf, req_num): """Send buffer buf down the outpipe""" @@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection): def _write(self, headerchar, data, req_num): """Write header and then data to the pipe""" - self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) + self.outpipe.write(headerchar + chr(req_num) + + C.long2str(long(len(data)))) self.outpipe.write(data) self.outpipe.flush() @@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection): """Read length bytes from inpipe, returning result""" return self.inpipe.read(length) - def _s2l(self, s): + def _s2l_old(self, s): """Convert string to long int""" assert len(s) == 7 l = 0L for i in range(7): l = l*256 + ord(s[i]) return l - def _l2s(self, l): + def _l2s_old(self, l): """Convert long int to string""" s = "" for i in range(7): @@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection): try: format_string, req_num, length = (header_string[0], ord(header_string[1]), - self._s2l(header_string[2:])) + C.str2long(header_string[2:])) except IndexError: raise ConnectionError() if format_string == "q": raise ConnectionQuit("Received quit signal") @@ -490,7 +492,7 @@ class VirtualFile: # everything has to be available here for remote connection's use, but # put at bottom to reduce circularities. -import Globals, Time, Rdiff, Hardlink, FilenameMapping +import Globals, Time, Rdiff, Hardlink, FilenameMapping, C from static import * from lazy import * from log import * diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py index a64ecbc..d0a4998 100644 --- a/rdiff-backup/rdiff_backup/destructive_stepping.py +++ b/rdiff-backup/rdiff_backup/destructive_stepping.py @@ -31,7 +31,7 @@ class DSRPath(RPath): newmtime - holds the new mtime """ - def __init__(self, source, *args): + def __init__(self, source, conn_or_rp, base = 0, index = ()): """Initialize DSRP Source should be true iff the DSRPath is taken from the @@ -42,10 +42,11 @@ class DSRPath(RPath): otherwise use the same arguments as the RPath initializer. """ - if len(args) == 1 and isinstance(args[0], RPath): - rp = args[0] - RPath.__init__(self, rp.conn, rp.base, rp.index) - else: RPath.__init__(self, *args) + if base == 0: + assert isinstance(conn_or_rp, RPath) + RPath.__init__(self, conn_or_rp.conn, + conn_or_rp.base, conn_or_rp.index) + else: RPath.__init__(self, conn_or_rp, base, index) if source != "bypass": # "bypass" val is used when unpackaging over connection diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py index 26cc952..4211441 100644 --- a/rdiff-backup/rdiff_backup/iterfile.py +++ b/rdiff-backup/rdiff_backup/iterfile.py @@ -1,5 +1,5 @@ -import cPickle -import Globals +import cPickle, array +import Globals, C ####################################################################### # @@ -13,7 +13,7 @@ class UnwrapFile: def __init__(self, file): self.file = file - def _s2l(self, s): + def _s2l_old(self, s): """Convert string to long int""" assert len(s) == 7 l = 0L @@ -31,8 +31,9 @@ class UnwrapFile: """ header = self.file.read(8) if not header: return None, None - assert len(header) == 8, "Header is only %d bytes" % len(header) - type, length = header[0], self._s2l(header[1:]) + if len(header) != 8: + assert None, "Header %s is only %d bytes" % (header, len(header)) + type, length = header[0], C.str2long(header[1:]) buf = self.file.read(length) if type == "o": return type, cPickle.loads(buf) else: return type, buf @@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile): """ UnwrapFile.__init__(self, iwf.file) self.iwf = iwf - self.bufferlist = [initial_data] - self.bufferlen = len(initial_data) + self.buffer = initial_data self.closed = None - def check_consistency(self): - l = len("".join(self.bufferlist)) - assert l == self.bufferlen, \ - "Length of IVF bufferlist doesn't match (%s, %s)" % \ - (l, self.bufferlen) - def read(self, length): + """Read length bytes from the file, updating buffers as necessary""" assert not self.closed if self.iwf.currently_in_file: - while length >= self.bufferlen: + while length >= len(self.buffer): if not self.addtobuffer(): break - real_len = min(length, self.bufferlen) - combined_buffer = "".join(self.bufferlist) - assert len(combined_buffer) == self.bufferlen, \ - (len(combined_buffer), self.bufferlen) - self.bufferlist = [combined_buffer[real_len:]] - self.bufferlen = self.bufferlen - real_len - return combined_buffer[:real_len] + real_len = min(length, len(self.buffer)) + return_val = self.buffer[:real_len] + self.buffer = self.buffer[real_len:] + return return_val def addtobuffer(self): """Read a chunk from the file and add it to the buffer""" @@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile): type, data = self._get() assert type == "c", "Type is %s instead of c" % type if data: - self.bufferlen = self.bufferlen + len(data) - self.bufferlist.append(data) + self.buffer += data return 1 else: self.iwf.currently_in_file = None @@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile): """Currently just reads whats left and discards it""" while self.iwf.currently_in_file: self.addtobuffer() - self.bufferlist = [] - self.bufferlen = 0 + self.buffer = "" self.closed = 1 @@ -145,45 +135,43 @@ class FileWrappingIter: def __init__(self, iter): """Initialize with iter""" self.iter = iter - self.bufferlist = [] - self.bufferlen = 0L + self.array_buf = array.array('c') self.currently_in_file = None self.closed = None def read(self, length): """Return next length bytes in file""" assert not self.closed - while self.bufferlen < length: + while len(self.array_buf) < length: if not self.addtobuffer(): break - combined_buffer = "".join(self.bufferlist) - assert len(combined_buffer) == self.bufferlen - real_len = min(self.bufferlen, length) - self.bufferlen = self.bufferlen - real_len - self.bufferlist = [combined_buffer[real_len:]] - return combined_buffer[:real_len] + result = self.array_buf[:length].tostring() + del self.array_buf[:length] + return result def addtobuffer(self): - """Updates self.bufferlist and self.bufferlen, adding on a chunk + """Updates self.buffer, adding a chunk from the iterator. Returns None if we have reached the end of the iterator, otherwise return true. """ + array_buf = self.array_buf if self.currently_in_file: - buf = "c" + self.addfromfile() + array_buf.fromstring("c") + array_buf.fromstring(self.addfromfile()) else: try: currentobj = self.iter.next() except StopIteration: return None if hasattr(currentobj, "read") and hasattr(currentobj, "close"): self.currently_in_file = currentobj - buf = "f" + self.addfromfile() + array_buf.fromstring("f") + array_buf.fromstring(self.addfromfile()) else: pickle = cPickle.dumps(currentobj, 1) - buf = "o" + self._l2s(len(pickle)) + pickle - - self.bufferlist.append(buf) - self.bufferlen = self.bufferlen + len(buf) + array_buf.fromstring("o") + array_buf.fromstring(C.long2str(long(len(pickle)))) + array_buf.fromstring(pickle) return 1 def addfromfile(self): @@ -192,9 +180,9 @@ class FileWrappingIter: if not buf: assert not self.currently_in_file.close() self.currently_in_file = None - return self._l2s(len(buf)) + buf + return C.long2str(long(len(buf))) + buf - def _l2s(self, l): + def _l2s_old(self, l): """Convert long int to string of 7 characters""" s = "" for i in range(7): @@ -210,26 +198,28 @@ class BufferedRead: """Buffer the .read() calls to the given file This is used to lessen overhead and latency when a file is sent - over a connection. + over a connection. Profiling said that arrays were faster than + strings here. """ def __init__(self, file): self.file = file - self.buffer = "" + self.array_buf = array.array('c') self.bufsize = Globals.conn_bufsize def read(self, l = -1): + array_buf = self.array_buf if l < 0: # Read as much as possible - result = self.buffer + self.file.read() - self.buffer = "" + result = array_buf.tostring() + self.file.read() + del array_buf[:] return result - if len(self.buffer) < l: # Try to make buffer as long as l - self.buffer += self.file.read(max(self.bufsize, - l - len(self.buffer))) - actual_size = min(l, len(self.buffer)) - result = self.buffer[:actual_size] - self.buffer = self.buffer[actual_size:] + if len(array_buf) < l: # Try to make buffer at least as long as l + array_buf.fromstring(self.file.read(max(self.bufsize, l))) + result = array_buf[:l].tostring() + del array_buf[:l] return result def close(self): return self.file.close() + +from log import * diff --git a/rdiff-backup/rdiff_backup/profiled_rdb.py b/rdiff-backup/rdiff_backup/profiled_rdb.py index a2f30ea..7412847 100755 --- a/rdiff-backup/rdiff_backup/profiled_rdb.py +++ b/rdiff-backup/rdiff_backup/profiled_rdb.py @@ -8,9 +8,9 @@ statistics afterwards. """ __no_execute__ = 1 -execfile("main.py") -import profile, pstats -profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output") +import sys, rdiff_backup.Main, profile, pstats +profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]), + "profile-output") p = pstats.Stats("profile-output") p.sort_stats('time') p.print_stats(40) diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 73910be..b6d9b70 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -217,6 +217,7 @@ class RORPath(RPathStatic): (not Globals.change_ownership or self.issym())): # Don't compare gid/uid for symlinks or if not change_ownership pass + elif key == 'atime' and not Globals.preserve_atime: pass elif key == 'devloc' or key == 'inode' or key == 'nlink': pass elif (not other.data.has_key(key) or self.data[key] != other.data[key]): return None @@ -425,7 +426,7 @@ class RPath(RORPath): if base is not None: self.path = "/".join((base,) + index) self.file = None if data or base is None: self.data = data - else: self.setdata() + else: self.data = self.conn.C.make_file_dict(self.path) def __str__(self): return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, @@ -448,6 +449,10 @@ class RPath(RORPath): self.path = "/".join((self.base,) + self.index) def setdata(self): + """Set data dictionary using C extension""" + self.data = self.conn.C.make_file_dict(self.path) + + def setdata_old(self): """Create the data dictionary""" statblock = self.conn.RPathStatic.tupled_lstat(self.path) if statblock is None: diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py index 4fee9ee..05436c1 100644 --- a/rdiff-backup/rdiff_backup/selection.py +++ b/rdiff-backup/rdiff_backup/selection.py @@ -94,7 +94,9 @@ class Select: self.starting_index = starting_index self.iter = self.iterate_starting_from(self.dsrpath, self.iterate_starting_from, sel_func) - else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) + elif self.quoting_on: + self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) + else: self.iter = self.Iterate_fast(self.dsrpath, sel_func) # only iterate parents if we are not starting from beginning self.iterate_parents = starting_index is not None and iterate_parents @@ -102,6 +104,52 @@ class Select: self.__iter__ = lambda: self return self + def Iterate_fast(self, dsrpath, sel_func): + """Like Iterate, but don't recur, saving time + + This is a bit harder to read than Iterate/iterate_in_dir, but + it should be faster because it only recurs to half as much + depth. It doesn't handle the quoting case. + + """ + def error_handler(exc, filename): + Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2) + return None + + def diryield(dsrpath): + s = sel_func(dsrpath) + if s == 0: return + elif s == 1: + yield dsrpath + for filename in Robust.listrp(dsrpath): + new_dsrp = Robust.check_common_error(error_handler, + dsrpath.append, [filename]) + if new_dsrp: + if new_dsrp.isdir(): + for dsrp in diryield(new_dsrp): yield dsrp + elif sel_func(new_dsrp) == 1: yield new_dsrp + elif s == 2: + yielded_something = None + for filename in Robust.listrp(dsrpath): + new_dsrp = Robust.check_common_error(error_handler, + dsrpath.append, [filename]) + if new_dsrp: + if new_dsrp.isdir(): + for dsrp in diryield(new_dsrp): + if not yielded_something: + yielded_something = 1 + yield dsrpath + yield dsrp + elif sel_func(new_dsrp) == 1: + if not yielded_something: + yielded_something = 1 + yield dsrpath + yield new_dsrp + + if dsrpath.isdir(): + for dsrp in diryield(dsrpath): yield dsrp + elif sel_func(dsrpath) == 1: yield dsrpath + def Iterate(self, dsrpath, rec_func, sel_func): """Return iterator yielding dsrps in dsrpath diff --git a/rdiff-backup/src/Globals.py b/rdiff-backup/src/Globals.py index ca6e8d1..f3e4474 100644 --- a/rdiff-backup/src/Globals.py +++ b/rdiff-backup/src/Globals.py @@ -1,7 +1,7 @@ import re, os # The current version of rdiff-backup -version = "0.8.0" +version = "0.9.0" # If this is set, use this value in seconds as the current time # instead of reading it from the clock. diff --git a/rdiff-backup/src/cmodule.c b/rdiff-backup/src/cmodule.c new file mode 100644 index 0000000..ea10415 --- /dev/null +++ b/rdiff-backup/src/cmodule.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include + +static PyObject *c_make_file_dict(self, args) + PyObject *self; + PyObject *args; +{ + char *filename, filetype[5]; + struct stat sbuf; + mode_t mode; + + if (!PyArg_ParseTuple(args, "s", &filename)) return NULL; + if (lstat(filename, &sbuf) != 0) { + if (errno == ENOENT || errno == ENOTDIR) + return Py_BuildValue("{s:s}", "type", NULL); + else { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + } + mode = sbuf.st_mode; + + /* Build return dictionary from stat struct */ + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) { + /* Regular files, directories, sockets, and fifos */ + if S_ISREG(mode) strcpy(filetype, "reg"); + else if S_ISDIR(mode) strcpy(filetype, "dir"); + else if S_ISSOCK(mode) strcpy(filetype, "sock"); + else strcpy(filetype, "fifo"); + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}", + "type", filetype, + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "mtime", (long int)sbuf.st_mtime, + "atime", (long int)sbuf.st_atime); + } else if S_ISLNK(mode) { + /* Symbolic links */ + char linkname[1024]; + int len_link = readlink(filename, linkname, 1023); + if (len_link < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + linkname[len_link] = '\0'; + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}", + "type", "sym", + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "linkname", linkname); + } else if (S_ISCHR(mode) || S_ISBLK(mode)) { + /* Device files */ + char devtype[2]; + int devnums = (int)sbuf.st_rdev; + if S_ISCHR(mode) strcpy(devtype, "c"); + else strcpy(devtype, "b"); + return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}", + "type", "dev", + "size", (long int)sbuf.st_size, + "perms", (int)(mode & S_IRWXU), + "uid", (int)sbuf.st_uid, + "gid", (int)sbuf.st_gid, + "inode", (long int)sbuf.st_ino, + "devloc", (int)sbuf.st_dev, + "nlink", (int)sbuf.st_nlink, + "devnums", Py_BuildValue("(s,i,i)", devtype, + devnums >> 8, + devnums & 0xff), + "mtime", (long int)sbuf.st_mtime, + "atime", (long int)sbuf.st_atime); + } else { + /* Unrecognized file type - pretend it isn't there */ + errno = ENOENT; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } +} + +static PyObject *long2str(self, args) + PyObject *self; + PyObject *args; +{ + unsigned char s[7]; + int sindex; + unsigned long long int l; + PyObject *pylong; + + if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL; + l = PyLong_AsUnsignedLongLong(pylong); + for(sindex = 0; sindex <= 6; sindex++) { + s[sindex] = l % 256; + l /= 256; + } + return Py_BuildValue("s#", s, 7); +} + +static PyObject *str2long(self, args) + PyObject *self; + PyObject *args; +{ + unsigned char *s; + unsigned long long int l = 0; + int sindex, ssize; + + if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL; + if (ssize != 7) return Py_BuildValue("i", -1); + for(sindex=6; sindex >= 0; sindex--) + l = l*256 + s[sindex]; + return PyLong_FromLongLong(l); +} + +static PyMethodDef CMethods[] = { + {"make_file_dict", c_make_file_dict, METH_VARARGS, + "Make dictionary from file stat"}, + {"long2str", long2str, METH_VARARGS, + "Convert long int to 7 byte string"}, + {"str2long", str2long, METH_VARARGS, + "Convert 7 byte string to long int"}, + {NULL, NULL, 0, NULL} +}; + +void initC(void) +{ + (void) Py_InitModule("C", CMethods); +} + diff --git a/rdiff-backup/src/connection.py b/rdiff-backup/src/connection.py index 74d413d..c557dd5 100644 --- a/rdiff-backup/src/connection.py +++ b/rdiff-backup/src/connection.py @@ -1,5 +1,5 @@ from __future__ import generators -import types, os, tempfile, cPickle, shutil, traceback +import types, os, tempfile, cPickle, shutil, traceback, pickle ####################################################################### # @@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection): def _putobj(self, obj, req_num): """Send a generic python obj down the outpipe""" - self._write("o", cPickle.dumps(obj, 1), req_num) + # for some reason there is an error when cPickle is used below.. + self._write("o", pickle.dumps(obj, 1), req_num) def _putbuf(self, buf, req_num): """Send buffer buf down the outpipe""" @@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection): def _write(self, headerchar, data, req_num): """Write header and then data to the pipe""" - self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) + self.outpipe.write(headerchar + chr(req_num) + + C.long2str(long(len(data)))) self.outpipe.write(data) self.outpipe.flush() @@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection): """Read length bytes from inpipe, returning result""" return self.inpipe.read(length) - def _s2l(self, s): + def _s2l_old(self, s): """Convert string to long int""" assert len(s) == 7 l = 0L for i in range(7): l = l*256 + ord(s[i]) return l - def _l2s(self, l): + def _l2s_old(self, l): """Convert long int to string""" s = "" for i in range(7): @@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection): try: format_string, req_num, length = (header_string[0], ord(header_string[1]), - self._s2l(header_string[2:])) + C.str2long(header_string[2:])) except IndexError: raise ConnectionError() if format_string == "q": raise ConnectionQuit("Received quit signal") @@ -490,7 +492,7 @@ class VirtualFile: # everything has to be available here for remote connection's use, but # put at bottom to reduce circularities. -import Globals, Time, Rdiff, Hardlink, FilenameMapping +import Globals, Time, Rdiff, Hardlink, FilenameMapping, C from static import * from lazy import * from log import * diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py index a64ecbc..d0a4998 100644 --- a/rdiff-backup/src/destructive_stepping.py +++ b/rdiff-backup/src/destructive_stepping.py @@ -31,7 +31,7 @@ class DSRPath(RPath): newmtime - holds the new mtime """ - def __init__(self, source, *args): + def __init__(self, source, conn_or_rp, base = 0, index = ()): """Initialize DSRP Source should be true iff the DSRPath is taken from the @@ -42,10 +42,11 @@ class DSRPath(RPath): otherwise use the same arguments as the RPath initializer. """ - if len(args) == 1 and isinstance(args[0], RPath): - rp = args[0] - RPath.__init__(self, rp.conn, rp.base, rp.index) - else: RPath.__init__(self, *args) + if base == 0: + assert isinstance(conn_or_rp, RPath) + RPath.__init__(self, conn_or_rp.conn, + conn_or_rp.base, conn_or_rp.index) + else: RPath.__init__(self, conn_or_rp, base, index) if source != "bypass": # "bypass" val is used when unpackaging over connection diff --git a/rdiff-backup/src/iterfile.py b/rdiff-backup/src/iterfile.py index 26cc952..4211441 100644 --- a/rdiff-backup/src/iterfile.py +++ b/rdiff-backup/src/iterfile.py @@ -1,5 +1,5 @@ -import cPickle -import Globals +import cPickle, array +import Globals, C ####################################################################### # @@ -13,7 +13,7 @@ class UnwrapFile: def __init__(self, file): self.file = file - def _s2l(self, s): + def _s2l_old(self, s): """Convert string to long int""" assert len(s) == 7 l = 0L @@ -31,8 +31,9 @@ class UnwrapFile: """ header = self.file.read(8) if not header: return None, None - assert len(header) == 8, "Header is only %d bytes" % len(header) - type, length = header[0], self._s2l(header[1:]) + if len(header) != 8: + assert None, "Header %s is only %d bytes" % (header, len(header)) + type, length = header[0], C.str2long(header[1:]) buf = self.file.read(length) if type == "o": return type, cPickle.loads(buf) else: return type, buf @@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile): """ UnwrapFile.__init__(self, iwf.file) self.iwf = iwf - self.bufferlist = [initial_data] - self.bufferlen = len(initial_data) + self.buffer = initial_data self.closed = None - def check_consistency(self): - l = len("".join(self.bufferlist)) - assert l == self.bufferlen, \ - "Length of IVF bufferlist doesn't match (%s, %s)" % \ - (l, self.bufferlen) - def read(self, length): + """Read length bytes from the file, updating buffers as necessary""" assert not self.closed if self.iwf.currently_in_file: - while length >= self.bufferlen: + while length >= len(self.buffer): if not self.addtobuffer(): break - real_len = min(length, self.bufferlen) - combined_buffer = "".join(self.bufferlist) - assert len(combined_buffer) == self.bufferlen, \ - (len(combined_buffer), self.bufferlen) - self.bufferlist = [combined_buffer[real_len:]] - self.bufferlen = self.bufferlen - real_len - return combined_buffer[:real_len] + real_len = min(length, len(self.buffer)) + return_val = self.buffer[:real_len] + self.buffer = self.buffer[real_len:] + return return_val def addtobuffer(self): """Read a chunk from the file and add it to the buffer""" @@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile): type, data = self._get() assert type == "c", "Type is %s instead of c" % type if data: - self.bufferlen = self.bufferlen + len(data) - self.bufferlist.append(data) + self.buffer += data return 1 else: self.iwf.currently_in_file = None @@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile): """Currently just reads whats left and discards it""" while self.iwf.currently_in_file: self.addtobuffer() - self.bufferlist = [] - self.bufferlen = 0 + self.buffer = "" self.closed = 1 @@ -145,45 +135,43 @@ class FileWrappingIter: def __init__(self, iter): """Initialize with iter""" self.iter = iter - self.bufferlist = [] - self.bufferlen = 0L + self.array_buf = array.array('c') self.currently_in_file = None self.closed = None def read(self, length): """Return next length bytes in file""" assert not self.closed - while self.bufferlen < length: + while len(self.array_buf) < length: if not self.addtobuffer(): break - combined_buffer = "".join(self.bufferlist) - assert len(combined_buffer) == self.bufferlen - real_len = min(self.bufferlen, length) - self.bufferlen = self.bufferlen - real_len - self.bufferlist = [combined_buffer[real_len:]] - return combined_buffer[:real_len] + result = self.array_buf[:length].tostring() + del self.array_buf[:length] + return result def addtobuffer(self): - """Updates self.bufferlist and self.bufferlen, adding on a chunk + """Updates self.buffer, adding a chunk from the iterator. Returns None if we have reached the end of the iterator, otherwise return true. """ + array_buf = self.array_buf if self.currently_in_file: - buf = "c" + self.addfromfile() + array_buf.fromstring("c") + array_buf.fromstring(self.addfromfile()) else: try: currentobj = self.iter.next() except StopIteration: return None if hasattr(currentobj, "read") and hasattr(currentobj, "close"): self.currently_in_file = currentobj - buf = "f" + self.addfromfile() + array_buf.fromstring("f") + array_buf.fromstring(self.addfromfile()) else: pickle = cPickle.dumps(currentobj, 1) - buf = "o" + self._l2s(len(pickle)) + pickle - - self.bufferlist.append(buf) - self.bufferlen = self.bufferlen + len(buf) + array_buf.fromstring("o") + array_buf.fromstring(C.long2str(long(len(pickle)))) + array_buf.fromstring(pickle) return 1 def addfromfile(self): @@ -192,9 +180,9 @@ class FileWrappingIter: if not buf: assert not self.currently_in_file.close() self.currently_in_file = None - return self._l2s(len(buf)) + buf + return C.long2str(long(len(buf))) + buf - def _l2s(self, l): + def _l2s_old(self, l): """Convert long int to string of 7 characters""" s = "" for i in range(7): @@ -210,26 +198,28 @@ class BufferedRead: """Buffer the .read() calls to the given file This is used to lessen overhead and latency when a file is sent - over a connection. + over a connection. Profiling said that arrays were faster than + strings here. """ def __init__(self, file): self.file = file - self.buffer = "" + self.array_buf = array.array('c') self.bufsize = Globals.conn_bufsize def read(self, l = -1): + array_buf = self.array_buf if l < 0: # Read as much as possible - result = self.buffer + self.file.read() - self.buffer = "" + result = array_buf.tostring() + self.file.read() + del array_buf[:] return result - if len(self.buffer) < l: # Try to make buffer as long as l - self.buffer += self.file.read(max(self.bufsize, - l - len(self.buffer))) - actual_size = min(l, len(self.buffer)) - result = self.buffer[:actual_size] - self.buffer = self.buffer[actual_size:] + if len(array_buf) < l: # Try to make buffer at least as long as l + array_buf.fromstring(self.file.read(max(self.bufsize, l))) + result = array_buf[:l].tostring() + del array_buf[:l] return result def close(self): return self.file.close() + +from log import * diff --git a/rdiff-backup/src/profiled_rdb.py b/rdiff-backup/src/profiled_rdb.py index a2f30ea..7412847 100755 --- a/rdiff-backup/src/profiled_rdb.py +++ b/rdiff-backup/src/profiled_rdb.py @@ -8,9 +8,9 @@ statistics afterwards. """ __no_execute__ = 1 -execfile("main.py") -import profile, pstats -profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output") +import sys, rdiff_backup.Main, profile, pstats +profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]), + "profile-output") p = pstats.Stats("profile-output") p.sort_stats('time') p.print_stats(40) diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py index 73910be..b6d9b70 100644 --- a/rdiff-backup/src/rpath.py +++ b/rdiff-backup/src/rpath.py @@ -217,6 +217,7 @@ class RORPath(RPathStatic): (not Globals.change_ownership or self.issym())): # Don't compare gid/uid for symlinks or if not change_ownership pass + elif key == 'atime' and not Globals.preserve_atime: pass elif key == 'devloc' or key == 'inode' or key == 'nlink': pass elif (not other.data.has_key(key) or self.data[key] != other.data[key]): return None @@ -425,7 +426,7 @@ class RPath(RORPath): if base is not None: self.path = "/".join((base,) + index) self.file = None if data or base is None: self.data = data - else: self.setdata() + else: self.data = self.conn.C.make_file_dict(self.path) def __str__(self): return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, @@ -448,6 +449,10 @@ class RPath(RORPath): self.path = "/".join((self.base,) + self.index) def setdata(self): + """Set data dictionary using C extension""" + self.data = self.conn.C.make_file_dict(self.path) + + def setdata_old(self): """Create the data dictionary""" statblock = self.conn.RPathStatic.tupled_lstat(self.path) if statblock is None: diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py index 4fee9ee..05436c1 100644 --- a/rdiff-backup/src/selection.py +++ b/rdiff-backup/src/selection.py @@ -94,7 +94,9 @@ class Select: self.starting_index = starting_index self.iter = self.iterate_starting_from(self.dsrpath, self.iterate_starting_from, sel_func) - else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) + elif self.quoting_on: + self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) + else: self.iter = self.Iterate_fast(self.dsrpath, sel_func) # only iterate parents if we are not starting from beginning self.iterate_parents = starting_index is not None and iterate_parents @@ -102,6 +104,52 @@ class Select: self.__iter__ = lambda: self return self + def Iterate_fast(self, dsrpath, sel_func): + """Like Iterate, but don't recur, saving time + + This is a bit harder to read than Iterate/iterate_in_dir, but + it should be faster because it only recurs to half as much + depth. It doesn't handle the quoting case. + + """ + def error_handler(exc, filename): + Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2) + return None + + def diryield(dsrpath): + s = sel_func(dsrpath) + if s == 0: return + elif s == 1: + yield dsrpath + for filename in Robust.listrp(dsrpath): + new_dsrp = Robust.check_common_error(error_handler, + dsrpath.append, [filename]) + if new_dsrp: + if new_dsrp.isdir(): + for dsrp in diryield(new_dsrp): yield dsrp + elif sel_func(new_dsrp) == 1: yield new_dsrp + elif s == 2: + yielded_something = None + for filename in Robust.listrp(dsrpath): + new_dsrp = Robust.check_common_error(error_handler, + dsrpath.append, [filename]) + if new_dsrp: + if new_dsrp.isdir(): + for dsrp in diryield(new_dsrp): + if not yielded_something: + yielded_something = 1 + yield dsrpath + yield dsrp + elif sel_func(new_dsrp) == 1: + if not yielded_something: + yielded_something = 1 + yield dsrpath + yield new_dsrp + + if dsrpath.isdir(): + for dsrp in diryield(dsrpath): yield dsrp + elif sel_func(dsrpath) == 1: yield dsrpath + def Iterate(self, dsrpath, rec_func, sel_func): """Return iterator yielding dsrps in dsrpath -- cgit v1.2.1