From 7610a21761c90ee32fa93d0ad0768fbbfe93e6d7 Mon Sep 17 00:00:00 2001 From: bescoto Date: Tue, 22 Jul 2003 10:04:10 +0000 Subject: Final changes for 0.12.1, including librsync block size scaling git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/branches/r0-12@354 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 7 +++- rdiff-backup/dist/makedist | 5 ++- rdiff-backup/dist/makerpm | 15 ++++---- rdiff-backup/dist/rdiff-backup.spec | 51 -------------------------- rdiff-backup/dist/rdiff-backup.spec.template | 55 ++++++++++++++++++++++++++++ rdiff-backup/rdiff_backup/Rdiff.py | 27 ++++++++++---- rdiff-backup/rdiff_backup/_librsyncmodule.c | 5 ++- rdiff-backup/rdiff_backup/librsync.py | 8 ++-- rdiff-backup/testing/librsynctest.py | 52 +++++++++++++++++--------- rdiff-backup/testing/rdifftest.py | 4 +- 10 files changed, 135 insertions(+), 94 deletions(-) delete mode 100644 rdiff-backup/dist/rdiff-backup.spec create mode 100644 rdiff-backup/dist/rdiff-backup.spec.template diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index 3d1a8b6..f6e4a7d 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -1,4 +1,4 @@ -New in v0.12.1 (2003/07/??) +New in v0.12.1 (2003/07/22) --------------------------- Added --no-change-dir-inc-perms switch, to avoid some weird errors on @@ -8,6 +8,11 @@ report. Fixed bug when regressing destination directory made with --windows-mode. Reported by Tucker Sylvestro. +The librsync blocksize is now chosen based on filesize. This should +make operations on large files faster (in some cases, orders of +magnitude faster). Thanks to Ty! Boyack for bringing this issue to my +attention. + New in v0.12.0 (2003/06/26) --------------------------- diff --git a/rdiff-backup/dist/makedist b/rdiff-backup/dist/makedist index d41ab90..a0a0a6e 100755 --- a/rdiff-backup/dist/makedist +++ b/rdiff-backup/dist/makedist @@ -7,7 +7,7 @@ DistDir = "dist" # Various details about the files must also be specified by the rpm # spec template. -spec_template = "dist/rdiff-backup.spec" +spec_template = "dist/rdiff-backup.spec.template" #redhat_spec_template = "dist/rdiff-backup.rh7x.spec" def CopyMan(destination, version): @@ -115,7 +115,8 @@ def MakeTar(): def MakeSpecFile(): """Create spec file using spec template""" - specfile = "rdiff-backup-%s-2.spec" % Version + #specfile = "rdiff-backup-%s-2.spec" % Version + specfile = "rdiff-backup.spec" # Fedora standard name VersionedCopy(spec_template, specfile) return specfile diff --git a/rdiff-backup/dist/makerpm b/rdiff-backup/dist/makerpm index cddfdb2..25fee1f 100755 --- a/rdiff-backup/dist/makerpm +++ b/rdiff-backup/dist/makerpm @@ -6,23 +6,24 @@ rpmroot = "/home/ben/rpm" if len(sys.argv) == 2: version = sys.argv[1] - specfile = "rdiff-backup-%s-2.spec" % version + specfile = "rdiff-backup.spec" print "Using specfile %s" % specfile else: print "Syntax: %s version_number" % sys.argv[0] sys.exit(1) -base = ".".join(specfile.split(".")[:-1]) -srcrpm = base+".src.rpm" -i386rpm = base+".i386.rpm" -source_rpm = base+".src.rpm" -tarfile = "-".join(base.split("-")[:-1]) + ".tar.gz" +base = "rdiff-backup-%s" % (version,) +tarfile = base + ".tar.gz" +rpmbase = base + "-0.fdr.1" # Fedora suffix, with release number 1 +i386_rpm = rpmbase + ".i386.rpm" +source_rpm = rpmbase + ".src.rpm" + # These assume the rpm root directory $HOME/rpm. The # nonstandard location allows for building by non-root user. assert not os.system("cp %s %s/SOURCES" % (tarfile, rpmroot)) #assert not os.system("rpm -ba --sign -vv --target i386 " + specfile) assert not os.system("rpmbuild -ba -v --sign " + specfile) -assert not os.system("mv %s/RPMS/i386/%s ." % (rpmroot, i386rpm)) +assert not os.system("mv %s/RPMS/i386/%s ." % (rpmroot, i386_rpm)) assert not os.system("mv %s/SRPMS/%s ." % (rpmroot, source_rpm)) diff --git a/rdiff-backup/dist/rdiff-backup.spec b/rdiff-backup/dist/rdiff-backup.spec deleted file mode 100644 index 2073b51..0000000 --- a/rdiff-backup/dist/rdiff-backup.spec +++ /dev/null @@ -1,51 +0,0 @@ -%define PYTHON_NAME %((rpm -q --quiet python2 && echo python2) || echo python) - -Version: $version -Summary: Convenient and transparent local/remote incremental mirror/backup -Name: rdiff-backup -Release: 2 -URL: http://www.stanford.edu/~bescoto/rdiff-backup/ -Source: %{name}-%{version}.tar.gz -Copyright: GPL -Group: Applications/Archiving -BuildRoot: %{_tmppath}/%{name}-root -requires: librsync >= 0.9.5.1, %{PYTHON_NAME} >= 2.2 -BuildPrereq: %{PYTHON_NAME}-devel >= 2.2, librsync-devel >= 0.9.5.1 - -%description -rdiff-backup is a script, written in Python, that backs up one -directory to another and is intended to be run periodically (nightly -from cron for instance). The target directory ends up a copy of the -source directory, but extra reverse diffs are stored in the target -directory, so you can still recover files lost some time ago. The idea -is to combine the best features of a mirror and an incremental -backup. rdiff-backup can also operate in a bandwidth efficient manner -over a pipe, like rsync. Thus you can use rdiff-backup and ssh to -securely back a hard drive up to a remote location, and only the -differences from the previous backup will be transmitted. - -%prep -%setup -q - -%build -%{PYTHON_NAME} setup.py build - -%install -%{PYTHON_NAME} setup.py install --prefix=$RPM_BUILD_ROOT/usr - -%clean -[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT - -%files -%defattr(-,root,root) -/usr/bin/rdiff-backup -/usr/share/man/man1 -/usr/lib -%doc CHANGELOG COPYING FAQ.html README - -%changelog -* Sun Jan 19 2002 Troels Arvin -- Builds, no matter if Python 2.2 is called python2-2.2 or python-2.2. - -* Sun Nov 4 2001 Ben Escoto -- Initial RPM diff --git a/rdiff-backup/dist/rdiff-backup.spec.template b/rdiff-backup/dist/rdiff-backup.spec.template new file mode 100644 index 0000000..0249336 --- /dev/null +++ b/rdiff-backup/dist/rdiff-backup.spec.template @@ -0,0 +1,55 @@ +%define PYTHON_NAME %((rpm -q --quiet python2 && echo python2) || echo python) + +Version: $version +Summary: Convenient and transparent local/remote incremental mirror/backup +Name: rdiff-backup +Release: 0.fdr.1 +Epoch: 0 +URL: http://rdiff-backup.stanford.edu/ +Source: http://rdiff-backup.stanford.edu/%{name}-%{version}.tar.gz +License: GPL +Group: Applications/Archiving +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +Requires: librsync >= 0.9.5.1, %{PYTHON_NAME} >= 2.2 +BuildPrereq: %{PYTHON_NAME}-devel >= 2.2, librsync-devel >= 0.9.5.1 + +%description +rdiff-backup is a script, written in Python, that backs up one +directory to another and is intended to be run periodically (nightly +from cron for instance). The target directory ends up a copy of the +source directory, but extra reverse diffs are stored in the target +directory, so you can still recover files lost some time ago. The idea +is to combine the best features of a mirror and an incremental +backup. rdiff-backup can also operate in a bandwidth efficient manner +over a pipe, like rsync. Thus you can use rdiff-backup and ssh to +securely back a hard drive up to a remote location, and only the +differences from the previous backup will be transmitted. + +%prep +%setup -q + +%build +%{PYTHON_NAME} setup.py build + +%install +%{PYTHON_NAME} setup.py install --prefix=$RPM_BUILD_ROOT/usr + +%clean +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%{_bindir}/rdiff-backup +%{_mandir}/man1/rdiff-backup* +%{_libdir}/ +%doc CHANGELOG COPYING FAQ.html README + +%changelog +* Sun Jul 20 2003 Ben Escoto +- Minor changes to comply with Fedora standards. + +* Sun Jan 19 2002 Troels Arvin +- Builds, no matter if Python 2.2 is called python2-2.2 or python-2.2. + +* Sun Nov 4 2001 Ben Escoto +- Initial RPM diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py index ee688c1..0070ceb 100644 --- a/rdiff-backup/rdiff_backup/Rdiff.py +++ b/rdiff-backup/rdiff_backup/Rdiff.py @@ -23,10 +23,24 @@ import os, librsync import Globals, log, static, TempFile, rpath -def get_signature(rp): +def get_signature(rp, blocksize = None): """Take signature of rpin file and return in file object""" - log.Log("Getting signature of %s" % rp.get_indexpath(), 7) - return librsync.SigFile(rp.open("rb")) + if not blocksize: blocksize = find_blocksize(rp.getsize()) + log.Log("Getting signature of %s with blocksize %s" % + (rp.get_indexpath(), blocksize), 7) + return librsync.SigFile(rp.open("rb"), blocksize) + +def find_blocksize(file_len): + """Return a reasonable block size to use on files of length file_len + + If the block size is too big, deltas will be bigger than is + necessary. If the block size is too small, making deltas and + patching can take a really long time. + + """ + if file_len < 1024000: return 512 # set minimum of 512 bytes + else: # Split file into about 2000 pieces, rounding to 512 + return long((file_len/(2000*512))*512) def get_delta_sigfileobj(sig_fileobj, rp_new): """Like get_delta but signature is in a file object""" @@ -43,8 +57,7 @@ def write_delta(basis, new, delta, compress = None): """Write rdiff delta which brings basis to new""" log.Log("Writing delta %s from %s -> %s" % (basis.path, new.path, delta.path), 7) - sigfile = librsync.SigFile(basis.open("rb")) - deltafile = librsync.DeltaFile(sigfile, new.open("rb")) + deltafile = librsync.DeltaFile(get_signature(basis), new.open("rb")) delta.write_from_fileobj(deltafile, compress) def write_patched_fp(basis_fp, delta_fp, out_fp): @@ -79,8 +92,8 @@ def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None): def copy_local(rpin, rpout, rpnew = None): """Write rpnew == rpin using rpout as basis. rpout and rpnew local""" assert rpout.conn is Globals.local_connection - sigfile = librsync.SigFile(rpout.open("rb")) - deltafile = rpin.conn.librsync.DeltaFile(sigfile, rpin.open("rb")) + deltafile = rpin.conn.librsync.DeltaFile(get_signature(rpout), + rpin.open("rb")) patched_file = librsync.PatchedFile(rpout.open("rb"), deltafile) if rpnew: rpnew.write_from_fileobj(patched_file) diff --git a/rdiff-backup/rdiff_backup/_librsyncmodule.c b/rdiff-backup/rdiff_backup/_librsyncmodule.c index 9b24d6a..7a78580 100644 --- a/rdiff-backup/rdiff_backup/_librsyncmodule.c +++ b/rdiff-backup/rdiff_backup/_librsyncmodule.c @@ -41,15 +41,16 @@ static PyObject* _librsync_new_sigmaker(PyObject* self, PyObject* args) { _librsync_SigMakerObject* sm; + long blocklen; - if (!PyArg_ParseTuple(args,":new_sigmaker")) + if (!PyArg_ParseTuple(args, "l:new_sigmaker", &blocklen)) return NULL; sm = PyObject_New(_librsync_SigMakerObject, &_librsync_SigMakerType); if (sm == NULL) return NULL; sm->x_attr = NULL; - sm->sig_job = rs_sig_begin((size_t)RS_DEFAULT_BLOCK_LEN, + sm->sig_job = rs_sig_begin((size_t)blocklen, (size_t)RS_DEFAULT_STRONG_LEN); return (PyObject*)sm; } diff --git a/rdiff-backup/rdiff_backup/librsync.py b/rdiff-backup/rdiff_backup/librsync.py index eb5e235..a8e37ca 100644 --- a/rdiff-backup/rdiff_backup/librsync.py +++ b/rdiff-backup/rdiff_backup/librsync.py @@ -108,7 +108,7 @@ class LikeFile: class SigFile(LikeFile): """File-like object which incrementally generates a librsync signature""" - def __init__(self, infile): + def __init__(self, infile, blocksize = _librsync.RS_DEFAULT_BLOCK_LEN): """SigFile initializer - takes basis file basis file only needs to have read() and close() methods. It @@ -116,7 +116,7 @@ class SigFile(LikeFile): """ LikeFile.__init__(self, infile) - try: self.maker = _librsync.new_sigmaker() + try: self.maker = _librsync.new_sigmaker(blocksize) except _librsync.librsyncError, e: raise librsyncError(str(e)) class DeltaFile(LikeFile): @@ -163,9 +163,9 @@ class SigGenerator: module, not filelike object """ - def __init__(self): + def __init__(self, blocksize = _librsync.RS_DEFAULT_BLOCK_LEN): """Return new signature instance""" - try: self.sig_maker = _librsync.new_sigmaker() + try: self.sig_maker = _librsync.new_sigmaker(blocksize) except _librsync.librsyncError, e: raise librsyncError(str(e)) self.gotsig = None self.buffer = "" diff --git a/rdiff-backup/testing/librsynctest.py b/rdiff-backup/testing/librsynctest.py index 54dc4dd..2c56a8a 100644 --- a/rdiff-backup/testing/librsynctest.py +++ b/rdiff-backup/testing/librsynctest.py @@ -1,16 +1,22 @@ import unittest, random from commontest import * -from rdiff_backup import librsync - -def MakeRandomFile(path): - """Writes a random file of length between 10000 and 100000""" - fp = open(path, "w") - randseq = [] - for i in xrange(random.randrange(5000, 30000)): - randseq.append(chr(random.randrange(256))) - fp.write("".join(randseq)) - fp.close() +from rdiff_backup import librsync, log + +def MakeRandomFile(path, length = None): + """Writes a random file of given length, or random len if unspecified""" + if not length: length = random.randrange(5000, 100000) + fp = open(path, "wb") + fp_random = open('/dev/urandom', 'rb') + # Old slow way, may still be of use on systems without /dev/urandom + #randseq = [] + #for i in xrange(random.randrange(5000, 30000)): + # randseq.append(chr(random.randrange(256))) + #fp.write("".join(randseq)) + fp.write(fp_random.read(length)) + + fp.close() + fp_random.close() class LibrsyncTest(unittest.TestCase): """Test various librsync wrapper functions""" @@ -20,23 +26,33 @@ class LibrsyncTest(unittest.TestCase): sig = RPath(Globals.local_connection, "testfiles/signature") sig2 = RPath(Globals.local_connection, "testfiles/signature2") delta = RPath(Globals.local_connection, "testfiles/delta") - def testSigFile(self): - """Make sure SigFile generates same data as rdiff""" - for i in range(5): - MakeRandomFile(self.basis.path) - self.sig.delete() - assert not os.system("rdiff signature %s %s" % - (self.basis.path, self.sig.path)) + def sig_file_test_helper(self, blocksize, iterations, file_len = None): + """Compare SigFile output to rdiff output at given blocksize""" + for i in range(iterations): + MakeRandomFile(self.basis.path, file_len) + if self.sig.lstat(): self.sig.delete() + assert not os.system("rdiff -b %s signature %s %s" % + (blocksize, self.basis.path, self.sig.path)) fp = self.sig.open("rb") rdiff_sig = fp.read() fp.close() - sf = librsync.SigFile(self.basis.open("rb")) + sf = librsync.SigFile(self.basis.open("rb"), blocksize) librsync_sig = sf.read() sf.close() assert rdiff_sig == librsync_sig, \ (len(rdiff_sig), len(librsync_sig)) + + def testSigFile(self): + """Make sure SigFile generates same data as rdiff, blocksize 512""" + self.sig_file_test_helper(512, 5) + + def testSigFile2(self): + """Test SigFile like above, but try various blocksize""" + self.sig_file_test_helper(2048, 1, 60000) + self.sig_file_test_helper(7168, 1, 6000) + self.sig_file_test_helper(204800, 1, 40*1024*1024) def testSigGenerator(self): """Test SigGenerator, make sure it's same as SigFile""" diff --git a/rdiff-backup/testing/rdifftest.py b/rdiff-backup/testing/rdifftest.py index 6079f1a..d5bde4c 100644 --- a/rdiff-backup/testing/rdifftest.py +++ b/rdiff-backup/testing/rdifftest.py @@ -2,7 +2,7 @@ import unittest, random from commontest import * from rdiff_backup import Globals, Rdiff, selection, log, rpath -Log.setverbosity(6) +Log.setverbosity(7) def MakeRandomFile(path): """Writes a random file of length between 10000 and 100000""" @@ -28,7 +28,7 @@ class RdiffTest(unittest.TestCase): sig = rpath.RPath(self.lc, "testfiles/various_file_types/regular_file.sig") sigfp = sig.open("r") - rfsig = Rdiff.get_signature(RPath(self.lc, "testfiles/various_file_types/regular_file")) + rfsig = Rdiff.get_signature(RPath(self.lc, "testfiles/various_file_types/regular_file"), 2048) assert rpath.cmpfileobj(sigfp, rfsig) sigfp.close() rfsig.close() -- cgit v1.2.1