From b53bfd4d41252426cb050ef896676034d92e3ef7 Mon Sep 17 00:00:00 2001 From: bescoto Date: Tue, 31 Dec 2002 08:46:22 +0000 Subject: Various changes for v0.11.1 (see CHANGELOG) git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@256 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 22 +- rdiff-backup/dist/makedist | 26 +- rdiff-backup/dist/makerpm | 18 +- rdiff-backup/rdiff-backup.1 | 16 +- rdiff-backup/rdiff_backup/Hardlink.py | 188 +------- rdiff-backup/rdiff_backup/Main.py | 80 ++-- rdiff-backup/rdiff_backup/Rdiff.py | 99 ++--- rdiff-backup/rdiff_backup/Security.py | 18 +- rdiff-backup/rdiff_backup/SetConnections.py | 1 + rdiff-backup/rdiff_backup/Time.py | 2 +- rdiff-backup/rdiff_backup/backup.py | 234 +++++++--- rdiff-backup/rdiff_backup/connection.py | 23 +- rdiff-backup/rdiff_backup/increment.py | 422 ++---------------- rdiff-backup/rdiff_backup/manage.py | 9 +- rdiff-backup/rdiff_backup/metadata.py | 14 +- rdiff-backup/rdiff_backup/restore.py | 660 ++++++++++++++++------------ rdiff-backup/rdiff_backup/robust.py | 523 +--------------------- rdiff-backup/rdiff_backup/rorpiter.py | 115 ++--- rdiff-backup/rdiff_backup/rpath.py | 61 ++- rdiff-backup/rdiff_backup/selection.py | 38 +- rdiff-backup/rdiff_backup/statistics.py | 167 +++---- rdiff-backup/testing/benchmark.py | 141 ++++++ rdiff-backup/testing/commontest.py | 13 +- rdiff-backup/testing/finaltest.py | 73 +-- rdiff-backup/testing/hardlinktest.py | 113 +++-- rdiff-backup/testing/incrementtest.py | 98 +---- rdiff-backup/testing/metadatatest.py | 3 +- rdiff-backup/testing/rdifftest.py | 37 +- rdiff-backup/testing/regressiontest.py | 154 ++----- rdiff-backup/testing/restoretest.py | 255 ++++++----- rdiff-backup/testing/robusttest.py | 60 +-- rdiff-backup/testing/roottest.py | 68 ++- rdiff-backup/testing/rorpitertest.py | 19 - rdiff-backup/testing/selectiontest.py | 26 +- rdiff-backup/testing/statisticstest.py | 93 +++- rdiff-backup/testing/timetest.py | 2 +- 36 files changed, 1499 insertions(+), 2392 deletions(-) create mode 100644 rdiff-backup/testing/benchmark.py diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index b431dca..5bd54b3 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -1,4 +1,4 @@ -New in v0.11.1 (2002/12/??) +New in v0.11.1 (2002/12/31) --------------------------- **Warning** Various features have been removed from this version, so @@ -27,8 +27,15 @@ The following features have been removed: still generated, the directory statistics file no longer is, because the new code structure makes it less inconvenient. + The various --exclude and --include options no longer work when + restoring. This may be added later if there is demand. + + --windows-mode and filename quoting doesn't work. There have been + several requests for this in the past, so it will probably be + re-added in the next version. + Extensive refactoring. A lot of rdiff-backup's code was structured as -if it was still in one file, so it didn't make enough use of Python's +if it were still in one file, so it didn't make enough use of Python's module system. Now rdiff-backup writes metadata (uid, gid, mtime, etc.) to a @@ -42,9 +49,13 @@ some ramifications: metadata, so it may not be necessary to traverse the whole mirror directory. This can reduce file access on the destination side. + Even when the --no-hard-links option is given when backing up, + link relationships can be restored properly. However, if this + option is given, mirror files will not be linked together. - - + Special file types like device and sockets which cannot be created + on the remote side for some reason can still be backed up and + restored properly. Fixed bug with the --{include|exclude}-globbing-filelist options (reported by Claus Herwig). @@ -54,7 +65,8 @@ given date, and added Bud Bruegger's patch to that. The format and information this option provides will probably change in the near future. - +Restoring is now pipelined for better high latency performance, and +unchanged files in the target directory will not be recopied. New in v0.11.0 (2002/10/05) diff --git a/rdiff-backup/dist/makedist b/rdiff-backup/dist/makedist index bf68184..c1822c3 100755 --- a/rdiff-backup/dist/makedist +++ b/rdiff-backup/dist/makedist @@ -2,7 +2,7 @@ import os, re, shutil, time, sys, getopt -SourceDir = "src" +SourceDir = "rdiff_backup" DistDir = "dist" # Various details about the files must also be specified by the rpm @@ -89,15 +89,15 @@ def MakeTar(): assert not os.system("cp %s %s" % (filename, tardir)), filename os.mkdir(tardir+"/rdiff_backup") - for filename in ["connection.py", "destructive_stepping.py", + for filename in ["backup.py", "connection.py", "FilenameMapping.py", "Hardlink.py", - "highlevel.py", "increment.py", "__init__.py", - "iterfile.py", "lazy.py", "librsync.py", - "log.py", "Main.py", "manage.py", "MiscStats.py", - "Rdiff.py", "restore.py", "rlist.py", - "robust.py", "rorpiter.py", "rpath.py", "Security.py", - "selection.py", "SetConnections.py", "static.py", - "statistics.py", "Time.py"]: + "increment.py", "__init__.py", "iterfile.py", + "lazy.py", "librsync.py", "log.py", "Main.py", + "manage.py", "metadata.py", "Rdiff.py", + "restore.py", "robust.py", "rorpiter.py", + "rpath.py", "Security.py", "selection.py", + "SetConnections.py", "static.py", + "statistics.py", "TempFile.py", "Time.py"]: assert not os.system("cp %s/%s %s/rdiff_backup" % (SourceDir, filename, tardir)), filename @@ -137,10 +137,10 @@ def parse_cmdline(arglist): def Main(): action = parse_cmdline(sys.argv[1:]) - if action == "FAQ": - print "Making FAQ" - MakeFAQ() - else: + print "Making FAQ" + MakeFAQ() + + if action != "FAQ": assert action == "All" print "Processing version " + Version tarfile = MakeTar() diff --git a/rdiff-backup/dist/makerpm b/rdiff-backup/dist/makerpm index 8f59f65..2f275b2 100755 --- a/rdiff-backup/dist/makerpm +++ b/rdiff-backup/dist/makerpm @@ -2,7 +2,7 @@ import os, sys, re -SourceDir = "src" +rpmroot = "/home/ben/rpm" if len(sys.argv) == 2: version = sys.argv[1] @@ -20,15 +20,9 @@ tarfile = "-".join(base.split("-")[:-1]) + ".tar.gz" # These assume the rpm root directory $HOME/rpm. The # nonstandard location allows for building by non-root user. -assert not os.system("cp %s $HOME/rpm/SOURCES" % (tarfile,)) -assert not os.system("rpm -ba --sign -vv --target i386 " + specfile) -assert not os.system("cp $HOME/rpm/RPMS/i386/%s ." % i386rpm) -assert not os.system("cp $HOME/rpm/SRPMS/%s ." % source_rpm) +assert not os.system("cp %s %s/SOURCES" % (tarfile, rpmroot)) +#assert not os.system("rpm -ba --sign -vv --target i386 " + specfile) +assert not os.system("rpmbuild -ba -v " + specfile) +assert not os.system("cp %s/RPMS/i386/%s ." % (rpmroot, i386rpm)) +assert not os.system("cp %s/SRPMS/%s ." % (rpmroot, source_rpm)) -# Old root RPM instructions -#assert not os.system("install -o root -g root -m 644 %s " -# "/usr/src/redhat/SOURCES" % (tarfile,)) -#assert not os.system("install -o ben -g ben -m 644 " -# "/usr/src/redhat/RPMS/i386/%s ." % i386rpm) -#assert not os.system("install -o ben -g ben -m 644 " -# "/usr/src/redhat/SRPMS/%s ." % source_rpm) diff --git a/rdiff-backup/rdiff-backup.1 b/rdiff-backup/rdiff-backup.1 index 03f52ee..5dc8c18 100644 --- a/rdiff-backup/rdiff-backup.1 +++ b/rdiff-backup/rdiff-backup.1 @@ -217,16 +217,12 @@ The default is "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$" .TP .BI --no-hard-links -Don't preserve hard links from source to mirror directories. -Otherwise, no increment files will themselves be hard linked, but a -hard link database will be written so that hard links from any dataset -will be recreated if originally present. If many hard linked files -are present, this option can drastically decrease memory usage. -.TP -.B --no-resume -Do not resume last aborted backup even if it falls within the resume -window. -.TP +Don't replicate hard links on destination side. Note that because +metadata is written to a separate file, hard link information will not +be lost even if the --no-hard-links option is given (however, mirror +files will not be linked). If many hard-linked files are present, +this option can drastically increase memory usage. +..TP .B --null-separator Use nulls (\\0) instead of newlines (\\n) as line separators, which may help when dealing with filenames containing newlines. This diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py index 6ff9fbd..0eefe42 100644 --- a/rdiff-backup/rdiff_backup/Hardlink.py +++ b/rdiff-backup/rdiff_backup/Hardlink.py @@ -21,12 +21,12 @@ If the preserve_hardlinks option is selected, linked files in the source directory will be linked in the mirror directory. Linked files -are treated like any other with respect to incrementing, but a -database of all links will be recorded at each session, so linked -files can still be restored from the increments. +are treated like any other with respect to incrementing, but their +link status can be retrieved because their device location and inode # +is written in the metadata file. -All these functions are meant to be executed on the destination -side. The source side should only transmit inode information. +All these functions are meant to be executed on the mirror side. The +source side should only transmit inode information. """ @@ -67,58 +67,6 @@ def clear_dictionaries(): _src_index_indicies = _dest_index_indicies = _restore_index_path = None -# The keys of this dictionary are (inode, devloc) pairs on the source -# side. The values are (numlinks, index) pairs, where numlinks are -# the number of files currently linked to this spot, and index is the -# index of the first file so linked. -_src_inode_index_dict = {} -_dest_inode_index_dict = {} - - -#def rorp_eq(src_rorp, dest_rorp): -# """Return true if source and dest rorp are equal as far as hardlinking -# -# This also processes the src_rorp, adding it if necessary to the -# inode dictionary. -# -# """ -# if not src_rorp.isreg(): return 1 # only reg files can be hard linked -# if src_rorp.getnumlinks() == 1: return dest_rorp.getnumlinks() == 1 -# -# src_linked_index = process_rorp(src_rorp, _src_inode_index_dict) -# if dest_rorp.getnumlinks() == 1: return 0 -# dest_linked_index = process_rorp(dest_rorp, _dest_inode_index_dict) -# return src_linked_index == dest_linked_index - -def process_rorp(rorp, inode_dict): - """Add inode info and returns index src_rorp is linked to, or None""" - key_pair = (rorp.getinode(), rorp.getdevloc()) - try: num, linked_index = inode_dict[key_pair] - except KeyError: - inode_dict[key_pair] = (1, src_rorp.index) - return None - inode_dict[key_pair] = (num+1, linked_index) - - if num+1 == src_rorp.getnumlinks(): del _inode_index_dict[key_pair] - else: _inode_index_dict[key_pair] = (num+1, linked_index) - return linked_index - -def get_linked_index(src_rorp): - """Return the index a src_rorp is linked to, or None - - Also deletes the src_rorp's entry in the dictionary if we have - accumulated all the hard link references. - - """ - key_pair = (rorp.getinode(), rorp.getdevloc()) - try: num, linked_index = _src_inode_index_dict[key_pair] - except KeyError: return None - if num == src_rorp.getnumlinks(): - del _src_inode_index_dict[key_pair] - - - - def get_inode_key(rorp): """Return rorp's key for _inode_ dictionaries""" return (rorp.getinode(), rorp.getdevloc()) @@ -190,6 +138,10 @@ def islinked(rorp): """True if rorp's index is already linked to something on src side""" return len(get_indicies(rorp, 1)) >= 2 +def get_link_index(rorp): + """Return first index on target side rorp is already linked to""" + return get_indicies(rorp, 1)[0] + def restore_link(index, rpath): """Restores a linked file by linking it @@ -214,129 +166,13 @@ def restore_link(index, rpath): _restore_index_path[index] = rpath.path return None -def link_rp(src_rorp, dest_rpath, dest_root = None): - """Make dest_rpath into a link analogous to that of src_rorp""" +def link_rp(diff_rorp, dest_rpath, dest_root = None): + """Make dest_rpath into a link using link flag in diff_rorp""" if not dest_root: dest_root = dest_rpath # use base of dest_rpath dest_link_rpath = rpath.RPath(dest_root.conn, dest_root.base, - get_indicies(src_rorp, 1)[0]) + diff_rorp.get_link_flag()) dest_rpath.hardlink(dest_link_rpath.path) -def write_linkdict(rpath, dict, compress = None): - """Write link data to the rbdata dir - - It is stored as the a big pickled dictionary dated to match - the current hardlinks. - - """ - assert (Globals.isbackup_writer and - rpath.conn is Globals.local_connection) - tf = TempFile.new(rpath) - def init(): - fp = tf.open("wb", compress) - cPickle.dump(dict, fp) - assert not fp.close() - tf.setdata() - robust.make_tf_robustaction(init, (tf,), (rpath,)).execute() - -def get_linkrp(data_rpath, time, prefix): - """Return RPath of linkdata, or None if cannot find""" - for rp in map(data_rpath.append, data_rpath.listdir()): - if (rp.isincfile() and rp.getincbase_str() == prefix and - (rp.getinctype() == 'snapshot' or rp.getinctype() == 'data') - and Time.stringtotime(rp.getinctime()) == time): - return rp - return None - -def get_linkdata(data_rpath, time, prefix = 'hardlink_data'): - """Return index dictionary written by write_linkdata at time""" - rp = get_linkrp(data_rpath, time, prefix) - if not rp: return None - fp = rp.open("rb", rp.isinccompressed()) - index_dict = cPickle.load(fp) - assert not fp.close() - return index_dict - -def final_writedata(): - """Write final checkpoint data to rbdir after successful backup""" - global final_inc - if _src_index_indicies: - log.Log("Writing hard link data", 6) - if Globals.compression: - final_inc = Globals.rbdir.append("hardlink_data.%s.data.gz" % - Time.curtimestr) - else: final_inc = Globals.rbdir.append("hardlink_data.%s.data" % - Time.curtimestr) - write_linkdict(final_inc, _src_index_indicies, Globals.compression) - else: # no hardlinks, so writing unnecessary - final_inc = None - -def retrieve_final(time): - """Set source index dictionary from hardlink_data file if avail""" - global _src_index_indicies - hd = get_linkdata(Globals.rbdir, time) - if hd is None: return None - _src_index_indicies = hd - return 1 - -def final_checkpoint(data_rpath): - """Write contents of the four dictionaries to the data dir - - If rdiff-backup receives a fatal error, it may still be able - to save the contents of the four hard link dictionaries. - Because these dictionaries may be big, they are not saved - after every 20 seconds or whatever, but just at the end. - """ - log.Log("Writing intermediate hard link data to disk", 2) - src_inode_rp = data_rpath.append("hardlink_source_inode_checkpoint." - "%s.data" % Time.curtimestr) - src_index_rp = data_rpath.append("hardlink_source_index_checkpoint." - "%s.data" % Time.curtimestr) - dest_inode_rp = data_rpath.append("hardlink_dest_inode_checkpoint." - "%s.data" % Time.curtimestr) - dest_index_rp = data_rpath.append("hardlink_dest_index_checkpoint." - "%s.data" % Time.curtimestr) - for (rp, dict) in ((src_inode_rp, _src_inode_indicies), - (src_index_rp, _src_index_indicies), - (dest_inode_rp, _dest_inode_indicies), - (dest_index_rp, _dest_index_indicies)): - write_linkdict(rp, dict) - -def retrieve_checkpoint(data_rpath, time): - """Retrieve hardlink data from final checkpoint - - Return true if the retrieval worked, false otherwise. - - """ - global _src_inode_indicies, _src_index_indicies - global _dest_inode_indicies, _dest_index_indicies - try: - src_inode = get_linkdata(data_rpath, time, - "hardlink_source_inode_checkpoint") - src_index = get_linkdata(data_rpath, time, - "hardlink_source_index_checkpoint") - dest_inode = get_linkdata(data_rpath, time, - "hardlink_dest_inode_checkpoint") - dest_index = get_linkdata(data_rpath, time, - "hardlink_dest_index_checkpoint") - except cPickle.UnpicklingError: - log.Log("Unpickling Error", 2) - return None - if (src_inode is None or src_index is None or - dest_inode is None or dest_index is None): return None - _src_inode_indicies, _src_index_indicies = src_inode, src_index - _dest_inode_indicies, _dest_index_indicies = dest_inode, dest_index - return 1 - -def remove_all_checkpoints(): - """Remove all hardlink checkpoint information from directory""" - prefix_list = ["hardlink_source_inode_checkpoint", - "hardlink_source_index_checkpoint", - "hardlink_dest_inode_checkpoint", - "hardlink_dest_index_checkpoint"] - for rp in map(Globals.rbdir.append, Globals.rbdir.listdir()): - if (rp.isincfile() and rp.getincbase_str() in prefix_list and - (rp.getinctype() == 'snapshot' or rp.getinctype() == 'data')): - rp.delete() diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py index 6ffa355..3921c73 100644 --- a/rdiff-backup/rdiff_backup/Main.py +++ b/rdiff-backup/rdiff_backup/Main.py @@ -1,3 +1,4 @@ + # Copyright 2002 Ben Escoto # # This file is part of rdiff-backup. @@ -21,9 +22,9 @@ from __future__ import generators import getopt, sys, re, os -from log import Log +from log import Log, LoggerError import Globals, Time, SetConnections, selection, robust, rpath, \ - manage, highlevel, connection, restore, FilenameMapping, \ + manage, backup, connection, restore, FilenameMapping, \ Security, Hardlink @@ -146,6 +147,7 @@ def parse_cmdlineoptions(arglist): Globals.set('quoting_enabled', 1) Globals.set('preserve_hardlinks', 0) select_opts.append(("--exclude-special-files", None)) + assert 0, "Windows mode doesn't work in this version!" elif opt == '--windows-time-format': Globals.set('time_separator', "_") else: Log.FatalError("Unknown option %s" % opt) @@ -184,13 +186,6 @@ def commandline_error(message): def misc_setup(rps): """Set default change ownership flag, umask, relay regexps""" - if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or - (len(rps) < 2 and os.getuid() == 0)): - # Allow change_ownership if destination connection is root - for conn in Globals.connections: - conn.Globals.set('change_ownership', 1) - for rp in rps: rp.setdata() # Update with userinfo - os.umask(077) Time.setcurtime(Globals.current_time) FilenameMapping.set_init_quote_vals() @@ -240,14 +235,14 @@ def Backup(rpin, rpout): backup_init_dirs(rpin, rpout) if prevtime: Time.setprevtime(prevtime) - highlevel.Mirror_and_increment(rpin, rpout, incdir) - else: highlevel.Mirror(rpin, rpout) + backup.Mirror_and_increment(rpin, rpout, incdir) + else: backup.Mirror(rpin, rpout) rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout) def backup_set_select(rpin): """Create Select objects on source connection""" - rpin.conn.highlevel.HLSourceStruct.set_source_select(rpin, select_opts, - *select_files) + rpin.conn.backup.SourceStruct.set_source_select(rpin, select_opts, + *select_files) def backup_init_dirs(rpin, rpout): """Make sure rpin and rpout are valid, init data dir and logging""" @@ -322,8 +317,7 @@ def backup_get_mirrortime(): """Warning: duplicate current_mirror files found. Perhaps something went wrong during your last backup? Using """ + mirrorrps[-1].path, 2) - timestr = mirrorrps[-1].getinctime() - return Time.stringtotime(timestr) + return mirrorrps[-1].getinctime() def backup_touch_curmirror_local(rpin, rpout): """Make a file like current_mirror.time.data to record time @@ -348,8 +342,7 @@ def Restore(src_rp, dest_rp = None): """ rpin, rpout = restore_check_paths(src_rp, dest_rp) - time = Time.stringtotime(rpin.getinctime()) - restore_common(rpin, rpout, time) + restore_common(rpin, rpout, rpin.getinctime()) def RestoreAsOf(rpin, target): """Secondary syntax for restore operation @@ -365,12 +358,14 @@ def RestoreAsOf(rpin, target): def restore_common(rpin, target, time): """Restore operation common to Restore and RestoreAsOf""" + if target.conn.os.getuid() == 0: + SetConnections.UpdateGlobal('change_ownership', 1) mirror_root, index = restore_get_root(rpin) mirror = mirror_root.new_index(index) inc_rpath = datadir.append_path('increments', index) restore_init_select(mirror_root, target) restore_start_log(rpin, target, time) - restore.Restore(inc_rpath, mirror, target, time) + restore.Restore(mirror, inc_rpath, target, time) Log("Restore ended", 4) def restore_start_log(rpin, target, time): @@ -397,9 +392,9 @@ Try restoring from an increment file (the filenames look like if not rpout: rpout = rpath.RPath(Globals.local_connection, rpin.getincbase_str()) - if rpout.lstat(): + if rpout.lstat() and not force: Log.FatalError("Restore target %s already exists, " - "and will not be overwritten." % rpout.path) + "specify --force to overwrite." % rpout.path) return rpin, rpout def restore_init_select(rpin, rpout): @@ -462,15 +457,13 @@ def restore_get_root(rpin): def ListIncrements(rp): """Print out a summary of the increments and their times""" mirror_root, index = restore_get_root(rp) - Globals.rbdir = datadir = \ - mirror_root.append_path("rdiff-backup-data") - mirrorrp = mirror_root.new_index(index) - inc_rpath = datadir.append_path('increments', index) + mirror_rp = mirror_root.new_index(index) + inc_rpath = Globals.rbdir.append_path('increments', index) incs = restore.get_inclist(inc_rpath) - mirror_time = restore.get_mirror_time() + mirror_time = restore.MirrorStruct.get_mirror_time() if Globals.parsable_output: - print manage.describe_incs_parsable(incs, mirror_time, mirrorrp) - else: print manage.describe_incs_human(incs, mirror_time, mirrorrp) + print manage.describe_incs_parsable(incs, mirror_time, mirror_rp) + else: print manage.describe_incs_human(incs, mirror_time, mirror_rp) def CalculateAverage(rps): @@ -493,8 +486,8 @@ def RemoveOlderThan(rootrp): timep = Time.timetopretty(time) Log("Deleting increment(s) before %s" % timep, 4) - times_in_secs = map(lambda inc: Time.stringtotime(inc.getinctime()), - restore.get_inclist(datadir.append("increments"))) + times_in_secs = [inc.getinctime() for inc in + restore.get_inclist(datadir.append("increments"))] times_in_secs = filter(lambda t: t < time, times_in_secs) if not times_in_secs: Log.FatalError("No increments older than %s found" % timep) @@ -517,30 +510,7 @@ def ListChangedSince(rp): try: rest_time = Time.genstrtotime(restore_timestr) except Time.TimeException, exc: Log.FatalError(str(exc)) mirror_root, index = restore_get_root(rp) - Globals.rbdir = datadir = mirror_root.append_path("rdiff-backup-data") - mirror_time = restore.get_mirror_time() - - def get_rids_recursive(rid): - """Yield all the rids under rid that have inc newer than rest_time""" - yield rid - for sub_rid in restore.yield_rids(rid, rest_time, mirror_time): - for sub_sub_rid in get_rids_recursive(sub_rid): yield sub_sub_rid - - def determineChangeType(incList): - "returns the type of change determined from incList" - assert len(incList) > 0 - last_inc_type = incList[-1].getinctype() # examine earliest change - if last_inc_type == 'snapshot': return "misc change" - elif last_inc_type == 'missing': return "new file" - elif last_inc_type == 'diff': return "modified" - elif last_inc_type == 'dir': return "dir change" - else: return "Unknown!" - - inc_rpath = datadir.append_path('increments', index) - inc_list = restore.get_inclist(inc_rpath) - root_rid = restore.RestoreIncrementData(index, inc_rpath, inc_list) - for rid in get_rids_recursive(root_rid): - if rid.inc_list: - print "%-11s: %s" % (determineChangeType(rid.inc_list), - rid.get_indexpath()) + mirror_rp = mirror_root.new_index(index) + inc_rp = mirror_rp.append_path("increments", index) + restore.ListChangedSince(mirror_rp, inc_rp, rest_time) diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py index 39dcb8a..7821141 100644 --- a/rdiff-backup/rdiff_backup/Rdiff.py +++ b/rdiff-backup/rdiff_backup/Rdiff.py @@ -17,75 +17,53 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA -"""Invoke rdiff utility to make signatures, deltas, or patch - -All these operations should be done in a relatively safe manner using -RobustAction and the like. - -""" +"""Invoke rdiff utility to make signatures, deltas, or patch""" import os, librsync -from log import Log -import robust, TempFile, Globals +import Globals, log, static, TempFile, rpath def get_signature(rp): """Take signature of rpin file and return in file object""" - Log("Getting signature of %s" % rp.path, 7) + log.Log("Getting signature of %s" % rp.path, 7) return librsync.SigFile(rp.open("rb")) def get_delta_sigfileobj(sig_fileobj, rp_new): """Like get_delta but signature is in a file object""" - Log("Getting delta of %s with signature stream" % (rp_new.path,), 7) + log.Log("Getting delta of %s with signature stream" % (rp_new.path,), 7) return librsync.DeltaFile(sig_fileobj, rp_new.open("rb")) def get_delta_sigrp(rp_signature, rp_new): """Take signature rp and new rp, return delta file object""" - Log("Getting delta of %s with signature %s" % - (rp_new.path, rp_signature.get_indexpath()), 7) + log.Log("Getting delta of %s with signature %s" % + (rp_new.path, rp_signature.get_indexpath()), 7) return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb")) -def write_delta_action(basis, new, delta, compress = None): - """Return action writing delta which brings basis to new - - If compress is true, the output of rdiff will be gzipped - before written to delta. - - """ - delta_tf = TempFile.new(delta) - def init(): write_delta(basis, new, delta_tf, compress) - return robust.make_tf_robustaction(init, delta_tf, delta) - def write_delta(basis, new, delta, compress = None): """Write rdiff delta which brings basis to new""" - Log("Writing delta %s from %s -> %s" % - (basis.path, new.path, delta.path), 7) + log.Log("Writing delta %s from %s -> %s" % + (basis.path, new.path, delta.path), 7) sigfile = librsync.SigFile(basis.open("rb")) deltafile = librsync.DeltaFile(sigfile, new.open("rb")) delta.write_from_fileobj(deltafile, compress) -def patch_action(rp_basis, rp_delta, rp_out = None, out_tf = None, - delta_compressed = None): - """Return RobustAction which patches rp_basis with rp_delta +def write_patched_fp(basis_fp, delta_fp, out_fp): + """Write patched file to out_fp given input fps. Closes input files""" + rpath.copyfileobj(librsync.PatchedFile(basis_fp, delta_fp), out_fp) + assert not basis_fp.close() and not delta_fp.close() - If rp_out is None, put output in rp_basis. Will use TempFile - out_tf it is specified. If delta_compressed is true, the - delta file will be decompressed before processing with rdiff. +def write_via_tempfile(fp, rp): + """Write fileobj fp to rp by writing to tempfile and renaming""" + tf = TempFile.new(rp) + tf.write_from_fileobj(fp) + tf.rename(rp) - """ - if not rp_out: rp_out = rp_basis - if not out_tf: out_tf = TempFile.new(rp_out) - def init(): - rp_basis.conn.Rdiff.patch_local(rp_basis, rp_delta, - out_tf, delta_compressed) - out_tf.setdata() - return robust.make_tf_robustaction(init, out_tf, rp_out) - -def patch_local(rp_basis, rp_delta, outrp, delta_compressed = None): - """Patch routine that must be run on rp_basis.conn +def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None): + """Patch routine that must be run locally, writes to outrp - This is because librsync may need to seek() around in rp_basis, - and so needs a real file. Other rpaths can be remote. + This should be run local to rp_basis because it needs to be a real + file (librsync may need to seek around in it). If outrp is None, + patch rp_basis instead. """ assert rp_basis.conn is Globals.local_connection @@ -94,32 +72,19 @@ def patch_local(rp_basis, rp_delta, outrp, delta_compressed = None): sigfile = librsync.SigFile(rp_basis.open("rb")) patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile) - outrp.write_from_fileobj(patchfile) - -def patch_with_attribs_action(rp_basis, rp_delta, rp_out = None): - """Like patch_action, but also transfers attributs from rp_delta""" - if not rp_out: rp_out = rp_basis - tf = TempFile.new(rp_out) - return robust.chain_nested(patch_action(rp_basis, rp_delta, rp_out, tf), - robust.copy_attribs_action(rp_delta, tf)) - -def copy_action(rpin, rpout): - """Use rdiff to copy rpin to rpout, conserving bandwidth""" - if not rpin.isreg() or not rpout.isreg() or rpin.conn is rpout.conn: - # rdiff not applicable, fallback to regular copying - return robust.copy_action(rpin, rpout) - - Log("Rdiff copying %s to %s" % (rpin.path, rpout.path), 6) - out_tf = TempFile.new(rpout) - def init(): rpout.conn.Rdiff.copy_local(rpin, rpout, out_tf) - return robust.make_tf_robustaction(init, out_tf, rpout) - -def copy_local(rpin, rpout, rpnew): + + if outrp: outrp.write_from_fileobj(patchfile) + else: write_via_tempfile(patchfile, rp_basis) + +def copy_local(rpin, rpout, rpnew = None): """Write rpnew == rpin using rpout as basis. rpout and rpnew local""" - assert rpnew.conn is rpout.conn is Globals.local_connection + assert rpout.conn is Globals.local_connection sigfile = librsync.SigFile(rpout.open("rb")) deltafile = rpin.conn.librsync.DeltaFile(sigfile, rpin.open("rb")) - rpnew.write_from_fileobj(librsync.PatchedFile(rpout.open("rb"), deltafile)) + patched_file = librsync.PatchedFile(rpout.open("rb"), deltafile) + + if rpnew: rpnew.write_from_fileobj(patched_file) + else: write_via_tempfile(patched_file, rpout) diff --git a/rdiff-backup/rdiff_backup/Security.py b/rdiff-backup/rdiff_backup/Security.py index 9760041..a10330c 100644 --- a/rdiff-backup/rdiff_backup/Security.py +++ b/rdiff-backup/rdiff_backup/Security.py @@ -112,6 +112,7 @@ def set_allowed_requests(sec_level): "Globals.get_dict_val", "log.Log.open_logfile_allconn", "log.Log.close_logfile_allconn", + "Log.log_to_file", "SetConnections.add_redirected_conn", "RedirectedRun", "sys.stdout.write"] @@ -123,20 +124,21 @@ def set_allowed_requests(sec_level): "os.listdir", "Time.setcurtime_local", "robust.Resume.ResumeCheck", - "highlevel.HLSourceStruct.split_initial_dsiter", - "highlevel.HLSourceStruct.get_diffs_and_finalize", + "backup.SourceStruct.split_initial_dsiter", + "backup.SourceStruct.get_diffs_and_finalize", "rpath.gzip_open_local_read", "rpath.open_local_read"]) if sec_level == "update-only": allowed_requests.extend( ["Log.open_logfile_local", "Log.close_logfile_local", "Log.close_logfile_allconn", "Log.log_to_file", + "log.Log.log_to_file", "robust.SaveState.init_filenames", "robust.SaveState.touch_last_file", - "highlevel.HLDestinationStruct.get_sigs", - "highlevel.HLDestinationStruct.patch_w_datadir_writes", - "highlevel.HLDestinationStruct.patch_and_finalize", - "highlevel.HLDestinationStruct.patch_increment_and_finalize", + "backup.DestinationStruct.get_sigs", + "backup.DestinationStruct.patch_w_datadir_writes", + "backup.DestinationStruct.patch_and_finalize", + "backup.DestinationStruct.patch_increment_and_finalize", "Main.backup_touch_curmirror_local", "Globals.ITRB.increment_stat"]) if Globals.server: @@ -148,8 +150,8 @@ def set_allowed_requests(sec_level): "FilenameMapping.set_init_quote_vals_local", "Globals.postset_regexp_local", "Globals.set_select", - "highlevel.HLSourceStruct.set_session_info", - "highlevel.HLDestinationStruct.set_session_info"]) + "backup.SourceStruct.set_session_info", + "backup.DestinationStruct.set_session_info"]) def vet_request(request, arglist): """Examine request for security violations""" diff --git a/rdiff-backup/rdiff_backup/SetConnections.py b/rdiff-backup/rdiff_backup/SetConnections.py index 495aa87..f1c54fe 100644 --- a/rdiff-backup/rdiff_backup/SetConnections.py +++ b/rdiff-backup/rdiff_backup/SetConnections.py @@ -201,6 +201,7 @@ def BackupInitConnections(reading_conn, writing_conn): writing_conn.Globals.set("isbackup_writer", 1) UpdateGlobal("backup_reader", reading_conn) UpdateGlobal("backup_writer", writing_conn) + if writing_conn.os.getuid() == 0: UpdateGlobal('change_ownership', 1) def CloseConnections(): """Close all connections. Run by client""" diff --git a/rdiff-backup/rdiff_backup/Time.py b/rdiff-backup/rdiff_backup/Time.py index bc8fb1b..ea85ca4 100644 --- a/rdiff-backup/rdiff_backup/Time.py +++ b/rdiff-backup/rdiff_backup/Time.py @@ -40,7 +40,7 @@ def setcurtime(curtime = None): """Sets the current time in curtime and curtimestr on all systems""" t = curtime or time.time() for conn in Globals.connections: - conn.Time.setcurtime_local(t) + conn.Time.setcurtime_local(long(t)) def setcurtime_local(timeinseconds): """Only set the current time locally""" diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index fc75099..f776988 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -17,38 +17,39 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA -"""High level functions for mirroring, mirror & inc, etc.""" +"""High level functions for mirroring and mirror+incrementing""" from __future__ import generators -import Globals, MiscStats, metadata, rorpiter, TempFile, Hardlink, \ - robust, increment, rpath, lazy, static, log, selection, Time, Rdiff - +import Globals, metadata, rorpiter, TempFile, Hardlink, robust, increment, \ + rpath, static, log, selection, Time, Rdiff, statistics def Mirror(src_rpath, dest_rpath): """Turn dest_rpath into a copy of src_rpath""" - SourceS = src_rpath.conn.highlevel.HLSourceStruct - DestS = dest_rpath.conn.highlevel.HLDestinationStruct + SourceS = src_rpath.conn.backup.SourceStruct + DestS = dest_rpath.conn.backup.DestinationStruct + DestS.init_statistics() source_rpiter = SourceS.get_source_select() - dest_sigiter = DestS.process_source_get_sigs(dest_rpath, - source_rpiter, 0) + dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 0) source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter) DestS.patch(dest_rpath, source_diffiter) + DestS.write_statistics() def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath): """Mirror + put increments in tree based at inc_rpath""" - SourceS = src_rpath.conn.highlevel.HLSourceStruct - DestS = dest_rpath.conn.highlevel.HLDestinationStruct + SourceS = src_rpath.conn.backup.SourceStruct + DestS = dest_rpath.conn.backup.DestinationStruct + DestS.init_statistics() source_rpiter = SourceS.get_source_select() - dest_sigiter = DestS.process_source_get_sigs(dest_rpath, - source_rpiter, 1) + dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 1) source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter) DestS.patch_and_increment(dest_rpath, source_diffiter, inc_rpath) + DestS.write_statistics() -class HLSourceStruct: - """Hold info used by HL on the source side""" +class SourceStruct: + """Hold info used on source side when backing up""" source_select = None # will be set to source Select iterator def set_source_select(cls, rpath, tuplelist, *filelists): """Initialize select object using tuplelist @@ -69,23 +70,36 @@ class HLSourceStruct: def get_diffs(cls, baserp, dest_sigiter): """Return diffs of any files with signature in dest_sigiter""" - for dest_sig in dest_sigiter: + def get_one_diff(dest_sig): src_rp = baserp.new_index(dest_sig.index) diff_rorp = src_rp.getRORPath() - if dest_sig.isflaglinked(): diff_rorp.flaglinked() + if dest_sig.isflaglinked(): + diff_rorp.flaglinked(dest_sig.get_link_flag()) elif dest_sig.isreg() and src_rp.isreg(): diff_rorp.setfile(Rdiff.get_delta_sigrp(dest_sig, src_rp)) diff_rorp.set_attached_filetype('diff') else: diff_rorp.set_attached_filetype('snapshot') if src_rp.isreg(): diff_rorp.setfile(src_rp.open("rb")) - yield diff_rorp + return diff_rorp + + for dest_sig in dest_sigiter: + diff = robust.check_common_error(None, get_one_diff, [dest_sig]) + if diff: yield diff -static.MakeClass(HLSourceStruct) +static.MakeClass(SourceStruct) -class HLDestinationStruct: - """Hold info used by HL on the destination side""" +class DestinationStruct: + """Hold info used by destination side when backing up""" + def init_statistics(cls): + """Set cls.stats to StatFileObj object""" + cls.statfileobj = statistics.init_statfileobj() + + def write_statistics(cls): + """Write statistics file""" + statistics.write_active_statfileobj() + def get_dest_select(cls, rpath, use_metadata = 1): """Return destination select rorpath iterator @@ -95,7 +109,7 @@ class HLDestinationStruct: """ if use_metadata: metadata_iter = metadata.GetMetadata_at_time(Globals.rbdir, - Time.curtime) + Time.prevtime) if metadata_iter: return metadata_iter log.Log("Warning: Metadata file not found.\n" "Metadata will be read from filesystem.", 2) @@ -107,7 +121,7 @@ class HLDestinationStruct: def dest_iter_filter(cls, dest_iter): """Destination rorps pass through this - record stats""" for dest_rorp in dest_iter: - # XXX Statistics process + cls.statfileobj.add_dest_file(dest_rorp) Hardlink.add_rorp(dest_rorp, source = 0) yield dest_rorp @@ -115,9 +129,9 @@ class HLDestinationStruct: """Source rorps pass through this - record stats, write metadata""" metadata.OpenMetadata() for src_rorp in source_iter: + cls.statfileobj.add_source_file(src_rorp) Hardlink.add_rorp(src_rorp, source = 1) metadata.WriteMetadata(src_rorp) - #XXXX Statistics process yield src_rorp metadata.CloseMetadata() @@ -133,66 +147,156 @@ class HLDestinationStruct: source_iter = cls.src_iter_filter(source_iter) dest_iter = cls.dest_iter_filter(cls.get_dest_select(baserp, for_increment)) - for index in rorpiter.get_dissimilar_indicies(source_iter, dest_iter): + for index in rorpiter.get_dissimilar_indicies(source_iter, dest_iter, + cls.statfileobj): dest_rp = baserp.new_index(index) dest_sig = dest_rp.getRORPath() if Globals.preserve_hardlinks and Hardlink.islinked(dest_rp): - dest_sig.flaglinked() + dest_sig.flaglinked(Hardlink.get_link_index(dest_rp)) elif dest_rp.isreg(): dest_sig.setfile(Rdiff.get_signature(dest_rp)) yield dest_sig - def patch(cls, dest_rpath, source_diffiter): + def patch(cls, dest_rpath, source_diffiter, start_index = ()): """Patch dest_rpath with an rorpiter of diffs""" - ITR = rorpiter.IterTreeReducer(increment.PatchITRB, [dest_rpath]) + ITR = rorpiter.IterTreeReducer(PatchITRB, [dest_rpath]) for diff in rorpiter.FillInIter(source_diffiter, dest_rpath): + log.Log("Processing changed file " + diff.get_indexpath(), 5) ITR(diff.index, diff) ITR.Finish() dest_rpath.setdata() def patch_and_increment(cls, dest_rpath, source_diffiter, inc_rpath): """Patch dest_rpath with rorpiter of diffs and write increments""" - ITR = rorpiter.IterTreeReducer(increment.IncrementITRB, - [dest_rpath, inc_rpath]) + ITR = rorpiter.IterTreeReducer(IncrementITRB, [dest_rpath, inc_rpath]) for diff in rorpiter.FillInIter(source_diffiter, dest_rpath): + log.Log("Processing changed file " + diff.get_indexpath(), 5) ITR(diff.index, diff) ITR.Finish() dest_rpath.setdata() - def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath): - """Apply diffs, write increment if necessary, and finalize""" - collated = rorpiter.CollateIterators(diffs, cls.initial_dsiter2) - #finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath) - finalizer, ITR = None, cls.get_ITR(inc_rpath) - MiscStats.open_dir_stats_file() - dsrp, finished_dsrp = None, None - - try: - for indexed_tuple in collated: - log.Log(lambda: "Processing %s" % str(indexed_tuple), 7) - diff_rorp, dsrp = indexed_tuple - index = indexed_tuple.index - if not dsrp: dsrp = cls.get_dsrp(dest_rpath, index) - if diff_rorp and diff_rorp.isplaceholder(): diff_rorp = None - ITR(index, diff_rorp, dsrp) - #finalizer(index, dsrp) - finished_dsrp = dsrp - ITR.Finish() - #finalizer.Finish() - except: cls.handle_last_error(finished_dsrp, finalizer, ITR) - - if Globals.preserve_hardlinks: Hardlink.final_writedata() - MiscStats.close_dir_stats_file() - MiscStats.write_session_statistics(ITR.root_branch) - - def handle_last_error(cls, dsrp, finalizer, ITR): - """If catch fatal error, try to checkpoint before exiting""" - log.Log.exception(1, 2) - robust.TracebackArchive.log() - #SaveState.checkpoint(ITR, finalizer, dsrp, 1) - #if Globals.preserve_hardlinks: Hardlink.final_checkpoint(Globals.rbdir) - #SaveState.touch_last_file_definitive() - raise - -static.MakeClass(HLDestinationStruct) +static.MakeClass(DestinationStruct) + + +class PatchITRB(rorpiter.ITRBranch): + """Patch an rpath with the given diff iters (use with IterTreeReducer) + + The main complication here involves directories. We have to + finish processing the directory after what's in the directory, as + the directory may have inappropriate permissions to alter the + contents or the dir's mtime could change as we change the + contents. + + """ + def __init__(self, basis_root_rp): + """Set basis_root_rp, the base of the tree to be incremented""" + self.basis_root_rp = basis_root_rp + assert basis_root_rp.conn is Globals.local_connection + self.statfileobj = (statistics.get_active_statfileobj() or + statistics.StatFileObj()) + self.dir_replacement, self.dir_update = None, None + self.cached_rp = None + + def get_rp_from_root(self, index): + """Return RPath by adding index to self.basis_root_rp""" + if not self.cached_rp or self.cached_rp.index != index: + self.cached_rp = self.basis_root_rp.new_index(index) + return self.cached_rp + + def can_fast_process(self, index, diff_rorp): + """True if diff_rorp and mirror are not directories""" + rp = self.get_rp_from_root(index) + return not diff_rorp.isdir() and not rp.isdir() + + def fast_process(self, index, diff_rorp): + """Patch base_rp with diff_rorp (case where neither is directory)""" + rp = self.get_rp_from_root(index) + tf = TempFile.new(rp) + self.patch_to_temp(rp, diff_rorp, tf) + tf.rename(rp) + + def patch_to_temp(self, basis_rp, diff_rorp, new): + """Patch basis_rp, writing output in new, which doesn't exist yet""" + if diff_rorp.isflaglinked(): + Hardlink.link_rp(diff_rorp, new, self.basis_root_rp) + elif diff_rorp.get_attached_filetype() == 'snapshot': + rpath.copy(diff_rorp, new) + else: + assert diff_rorp.get_attached_filetype() == 'diff' + Rdiff.patch_local(basis_rp, diff_rorp, new) + if new.lstat(): rpath.copy_attribs(diff_rorp, new) + + def start_process(self, index, diff_rorp): + """Start processing directory - record information for later""" + base_rp = self.base_rp = self.get_rp_from_root(index) + assert diff_rorp.isdir() or base_rp.isdir() or not base_rp.index + if diff_rorp.isdir(): self.prepare_dir(diff_rorp, base_rp) + else: self.set_dir_replacement(diff_rorp, base_rp) + + def set_dir_replacement(self, diff_rorp, base_rp): + """Set self.dir_replacement, which holds data until done with dir + + This is used when base_rp is a dir, and diff_rorp is not. + + """ + assert diff_rorp.get_attached_filetype() == 'snapshot' + self.dir_replacement = TempFile.new(base_rp) + rpath.copy_with_attribs(diff_rorp, self.dir_replacement) + if base_rp.isdir(): base_rp.chmod(0700) + + def prepare_dir(self, diff_rorp, base_rp): + """Prepare base_rp to turn into a directory""" + self.dir_update = diff_rorp.getRORPath() # make copy in case changes + if not base_rp.isdir(): + if base_rp.lstat(): base_rp.delete() + base_rp.mkdir() + base_rp.chmod(0700) + + def end_process(self): + """Finish processing directory""" + if self.dir_update: + assert self.base_rp.isdir() + rpath.copy_attribs(self.dir_update, self.base_rp) + else: + assert self.dir_replacement + self.base_rp.rmdir() + self.dir_replacement.rename(self.base_rp) + + +class IncrementITRB(PatchITRB): + """Patch an rpath with the given diff iters and write increments + + Like PatchITRB, but this time also write increments. + + """ + def __init__(self, basis_root_rp, inc_root_rp): + self.inc_root_rp = inc_root_rp + self.cached_incrp = None + PatchITRB.__init__(self, basis_root_rp) + + def get_incrp(self, index): + """Return inc RPath by adding index to self.basis_root_rp""" + if not self.cached_incrp or self.cached_incrp.index != index: + self.cached_incrp = self.inc_root_rp.new_index(index) + return self.cached_incrp + + def fast_process(self, index, diff_rorp): + """Patch base_rp with diff_rorp and write increment (neither is dir)""" + rp = self.get_rp_from_root(index) + tf = TempFile.new(rp) + self.patch_to_temp(rp, diff_rorp, tf) + increment.Increment(tf, rp, self.get_incrp(index)) + tf.rename(rp) + + def start_process(self, index, diff_rorp): + """Start processing directory""" + base_rp = self.base_rp = self.get_rp_from_root(index) + assert diff_rorp.isdir() or base_rp.isdir() + if diff_rorp.isdir(): + increment.Increment(diff_rorp, base_rp, self.get_incrp(index)) + self.prepare_dir(diff_rorp, base_rp) + else: + self.set_dir_replacement(diff_rorp, base_rp) + increment.Increment(self.dir_replacement, base_rp, + self.get_incrp(index)) diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py index dc4fb1e..5e23b91 100644 --- a/rdiff-backup/rdiff_backup/connection.py +++ b/rdiff-backup/rdiff_backup/connection.py @@ -96,7 +96,6 @@ class LowLevelPipeConnection(Connection): b - string q - quit signal t - TempFile - d - DSRPath R - RPath r - RORPath only c - PipeConnection object @@ -124,8 +123,6 @@ class LowLevelPipeConnection(Connection): elif isinstance(obj, connection.Connection):self._putconn(obj, req_num) elif isinstance(obj, TempFile.TempFile): self._puttempfile(obj, req_num) - elif isinstance(obj, destructive_stepping.DSRPath): - self._putdsrpath(obj, req_num) elif isinstance(obj, rpath.RPath): self._putrpath(obj, req_num) elif isinstance(obj, rpath.RORPath): self._putrorpath(obj, req_num) elif ((hasattr(obj, "read") or hasattr(obj, "write")) @@ -157,11 +154,6 @@ class LowLevelPipeConnection(Connection): tempfile.index, tempfile.data) self._write("t", cPickle.dumps(tf_repr, 1), req_num) - def _putdsrpath(self, dsrpath, req_num): - """Put DSRPath into pipe. See _putrpath""" - dsrpath_repr = (dsrpath.conn.conn_number, dsrpath.getstatedict()) - self._write("d", cPickle.dumps(dsrpath_repr, 1), req_num) - def _putrpath(self, rpath, req_num): """Put an rpath into the pipe @@ -246,7 +238,6 @@ class LowLevelPipeConnection(Connection): elif format_string == "t": result = self._gettempfile(data) elif format_string == "r": result = self._getrorpath(data) elif format_string == "R": result = self._getrpath(data) - elif format_string == "d": result = self._getdsrpath(data) else: assert format_string == "c", header_string result = Globals.connection_dict[int(data)] @@ -270,16 +261,6 @@ class LowLevelPipeConnection(Connection): return rpath.RPath(Globals.connection_dict[conn_number], base, index, data) - def _getdsrpath(self, raw_dsrpath_buf): - """Return DSRPath object indicated by buf""" - conn_number, state_dict = cPickle.loads(raw_dsrpath_buf) - empty_dsrp = destructive_stepping.DSRPath("bypass", - Globals.local_connection, None) - empty_dsrp.__setstate__(state_dict) - empty_dsrp.conn = Globals.connection_dict[conn_number] - empty_dsrp.file = None - return empty_dsrp - def _close(self): """Close the pipes associated with the connection""" self.outpipe.close() @@ -544,8 +525,8 @@ class VirtualFile: # put at bottom to reduce circularities. import Globals, Time, Rdiff, Hardlink, FilenameMapping, C, Security, \ Main, rorpiter, selection, increment, statistics, manage, lazy, \ - iterfile, rpath, robust, restore, manage, highlevel, connection, \ - TempFile, destructive_stepping, SetConnections + iterfile, rpath, robust, restore, manage, backup, connection, \ + TempFile, SetConnections, librsync from log import Log Globals.local_connection = LocalConnection() diff --git a/rdiff-backup/rdiff_backup/increment.py b/rdiff-backup/rdiff_backup/increment.py index a11dd6a..a9d5413 100644 --- a/rdiff-backup/rdiff_backup/increment.py +++ b/rdiff-backup/rdiff_backup/increment.py @@ -19,14 +19,11 @@ """Provides functions and *ITR classes, for writing increment files""" -import traceback -from log import Log -import Globals, Time, MiscStats, rorpiter, TempFile, robust, \ - statistics, rpath, static, lazy, Rdiff, Hardlink +import Globals, Time, rpath, Rdiff, log, statistics -def Increment_action(new, mirror, incpref): - """Main file incrementing function, returns robust.Action +def Increment(new, mirror, incpref): + """Main file incrementing function, returns inc file created new is the file on the active partition, mirror is the mirrored file from the last backup, @@ -35,70 +32,57 @@ def Increment_action(new, mirror, incpref): This function basically moves the information about the mirror file to incpref. - The returned robust.Action when executed should return the name - of the incfile, or None if none was created. - """ if not (new and new.lstat() or mirror.lstat()): - return robust.null_action # Files deleted in meantime, do nothing + return None # Files deleted in meantime, do nothing - Log("Incrementing mirror file " + mirror.path, 5) + log.Log("Incrementing mirror file " + mirror.path, 5) if ((new and new.isdir()) or mirror.isdir()) and not incpref.isdir(): incpref.mkdir() - if not mirror.lstat(): return makemissing_action(incpref) - elif mirror.isdir(): return makedir_action(mirror, incpref) + if not mirror.lstat(): incrp = makemissing(incpref) + elif mirror.isdir(): incrp = makedir(mirror, incpref) elif new.isreg() and mirror.isreg(): - return makediff_action(new, mirror, incpref) - else: return makesnapshot_action(mirror, incpref) - -def Increment(new, mirror, incpref): - return Increment_action(new, mirror, incpref).execute() + incrp = makediff(new, mirror, incpref) + else: incrp = makesnapshot(mirror, incpref) + statistics.process_increment(incrp) + return incrp -def makemissing_action(incpref): +def makemissing(incpref): """Signify that mirror file was missing""" - def final(init_val): - incrp = get_inc_ext(incpref, "missing") - incrp.touch() - return incrp - return robust.Action(None, final, None) + incrp = get_inc_ext(incpref, "missing") + incrp.touch() + return incrp + +def iscompressed(mirror): + """Return true if mirror's increments should be compressed""" + return (Globals.compression and + not Globals.no_compression_regexp.match(mirror.path)) -def makesnapshot_action(mirror, incpref): +def makesnapshot(mirror, incpref): """Copy mirror to incfile, since new is quite different""" - if (mirror.isreg() and Globals.compression and - not Globals.no_compression_regexp.match(mirror.path)): - snapshotrp = get_inc_ext(incpref, "snapshot.gz") - return robust.copy_with_attribs_action(mirror, snapshotrp, 1) - else: - snapshotrp = get_inc_ext(incpref, "snapshot") - return robust.copy_with_attribs_action(mirror, snapshotrp, None) + compress = iscompressed(mirror) + if compress: snapshotrp = get_inc_ext(incpref, "snapshot.gz") + else: snapshotrp = get_inc_ext(incpref, "snapshot") + rpath.copy_with_attribs(mirror, snapshotrp, compress) + return snapshotrp -def makediff_action(new, mirror, incpref): +def makediff(new, mirror, incpref): """Make incfile which is a diff new -> mirror""" - if (Globals.compression and - not Globals.no_compression_regexp.match(mirror.path)): - diff = get_inc_ext(incpref, "diff.gz") - compress = 1 - else: - diff = get_inc_ext(incpref, "diff") - compress = None + compress = iscompressed(mirror) + if compress: diff = get_inc_ext(incpref, "diff.gz") + else: diff = get_inc_ext(incpref, "diff") - diff_tf = TempFile.new(diff) - def init(): - Rdiff.write_delta(new, mirror, diff_tf, compress) - rpath.copy_attribs(mirror, diff_tf) - return diff - return robust.make_tf_robustaction(init, diff_tf, diff) + Rdiff.write_delta(new, mirror, diff, compress) + rpath.copy_attribs(mirror, diff) + return diff -def makedir_action(mirrordir, incpref): +def makedir(mirrordir, incpref): """Make file indicating directory mirrordir has changed""" dirsign = get_inc_ext(incpref, "dir") - tf = TempFile.new(dirsign) - def init(): - tf.touch() - rpath.copy_attribs(mirrordir, tf) - return dirsign - return robust.make_tf_robustaction(init, tf, dirsign) + dirsign.touch() + rpath.copy_attribs(mirrordir, dirsign) + return dirsign def get_inc(rp, time, typestr): """Return increment like rp but with time and typestr suffixes""" @@ -107,344 +91,22 @@ def get_inc(rp, time, typestr): incrp = rp.__class__(rp.conn, rp.base, rp.index[:-1] + (addtostr(rp.index[-1]),)) else: incrp = rp.__class__(rp.conn, addtostr(rp.base), rp.index) - if Globals.quoting_enabled: incrp.quote_path() return incrp -def get_inc_ext(rp, typestr): - """Return increment with specified type and correct time +def get_inc_ext(rp, typestr, inctime = None): + """Return increment with specified type and time t If the file exists, then probably a previous backup has been aborted. We then keep asking FindTime to get a time later than the one that already has an inc file. """ - inctime = 0 + if inctime is None: inctime = Time.prevtime while 1: - #inctime = robust.Resume.FindTime(rp.index, inctime) - inctime = Time.prevtime incrp = get_inc(rp, inctime, typestr) if not incrp.lstat(): break else: - assert 0, "Inc file already present" + inctime += 1 + log.Log("Warning, increment %s already exists" % (incrp.path,), 2) return incrp - -class IncrementITRB(statistics.ITRB): - """Patch and increment mirror directory - - This has to be an ITR because directories that have files in them - changed are flagged with an increment marker. There are four - possibilities as to the order: - - 1. Normal file -> Normal file: right away - 2. Directory -> Directory: wait until files in the directory - are processed, as we won't know whether to add a marker - until the end. - 3. Normal file -> Directory: right away, so later files will - have a directory to go into. - 4. Directory -> Normal file: Wait until the end, so we can - process all the files in the directory. - - """ - # Iff true, mirror file was a directory - mirror_isdirectory = None - # If set, what the directory on the mirror side will be replaced with - directory_replacement = None - # True iff there has been some change at this level or lower (used - # for marking directories to be flagged) - changed = None - # Holds the RPath of the created increment file, if any - incrp = None - - def __init__(self, inc_rpath): - """Set inc_rpath, an rpath of the base of the tree""" - self.inc_rpath = inc_rpath - statistics.ITRB.__init__(self) - - def start_process(self, index, diff_rorp, dsrp): - """Initial processing of file - - diff_rorp is the RORPath of the diff from the remote side, and - dsrp is the local file to be incremented - - """ - self.start_stats(dsrp) - incpref = self.inc_rpath.new_index(index) - if Globals.quoting_enabled: incpref.quote_path() - if dsrp.isdir(): - self.init_dir(dsrp, diff_rorp, incpref) - self.mirror_isdirectory = 1 - else: self.init_non_dir(dsrp, diff_rorp, incpref) - self.setvals(diff_rorp, dsrp, incpref) - - def override_changed(self): - """Set changed flag to true - - This is used only at the top level of a backup, to make sure - that a marker is created recording every backup session. - - """ - self.changed = 1 - - def setvals(self, diff_rorp, dsrp, incpref): - """Record given values in state dict since in directory - - We don't do these earlier in case of a problem inside the - init_* functions. Index isn't given because it is done by the - superclass. - - """ - self.diff_rorp = diff_rorp - self.dsrp = dsrp - self.incpref = incpref - - def init_dir(self, dsrp, diff_rorp, incpref): - """Process a directory (initial pass) - - If the directory is changing into a normal file, we need to - save the normal file data in a temp file, and then create the - real file once we are done with everything inside the - directory. - - """ - if not (incpref.lstat() and incpref.isdir()): incpref.mkdir() - if diff_rorp and diff_rorp.isreg() and diff_rorp.file: - tf = TempFile.new(dsrp) - def init(): - rpath.copy_with_attribs(diff_rorp, tf) - tf.set_attached_filetype(diff_rorp.get_attached_filetype()) - def error(exc, ran_init, init_val): tf.delete() - robust.Action(init, None, error).execute() - self.directory_replacement = tf - - def init_non_dir(self, dsrp, diff_rorp, incpref): - """Process a non directory file (initial pass)""" - if not diff_rorp: return # no diff, so no change necessary - if diff_rorp.isreg() and (dsrp.isreg() or diff_rorp.isflaglinked()): - # Write updated mirror to temp file so we can compute - # reverse diff locally - mirror_tf = TempFile.new(dsrp) - old_dsrp_tf = TempFile.new(dsrp) - def init_thunk(): - if diff_rorp.isflaglinked(): - Hardlink.link_rp(diff_rorp, mirror_tf, dsrp) - else: Rdiff.patch_with_attribs_action(dsrp, diff_rorp, - mirror_tf).execute() - self.incrp = Increment_action(mirror_tf, dsrp, - incpref).execute() - if dsrp.lstat(): rpath.rename(dsrp, old_dsrp_tf) - mirror_tf.rename(dsrp) - - def final(init_val): old_dsrp_tf.delete() - def error(exc, ran_init, init_val): - if ran_init: old_dsrp_tf.delete() # everything is fine - else: # restore to previous state - if old_dsrp_tf.lstat(): old_dsrp_tf.rename(dsrp) - if self.incrp: self.incrp.delete() - mirror_tf.delete() - - robust.Action(init_thunk, final, error).execute() - else: self.incrp = robust.chain( - Increment_action(diff_rorp, dsrp, incpref), - rorpiter.patchonce_action(None, dsrp, diff_rorp)).execute()[0] - - self.changed = 1 - - def end_process(self): - """Do final work when leaving a tree (directory)""" - diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref - if (self.mirror_isdirectory and (diff_rorp or self.changed) - or self.directory_replacement): - if self.directory_replacement: - tf = self.directory_replacement - self.incrp = robust.chain( - Increment_action(tf, dsrp, incpref), - rorpiter.patchonce_action(None, dsrp, tf)).execute()[0] - tf.delete() - else: - self.incrp = Increment(diff_rorp, dsrp, incpref) - if diff_rorp: - rorpiter.patchonce_action(None, dsrp, diff_rorp).execute() - - self.end_stats(diff_rorp, dsrp, self.incrp) - if self.mirror_isdirectory or dsrp.isdir(): - MiscStats.write_dir_stats_line(self, dsrp.index) - - def can_fast_process(self, index, diff_rorp, dsrp): - """True if there is no change in file and is just a leaf""" - return not diff_rorp and dsrp.isreg() - - def fast_process(self, index, diff_rorp, dsrp): - """Just update statistics""" - statistics.ITRB.fast_process(self, dsrp) - - def branch_process(self, branch): - """Update statistics, and the has_changed flag if change in branch""" - if Globals.sleep_ratio is not None: Time.sleep(Globals.sleep_ratio) - if branch.changed: self.changed = 1 - self.add_file_stats(branch) - - -class PatchITRB(statistics.ITRB): - """Patch an rpath with the given diff iters (use with IterTreeReducer) - - The main complication here involves directories. We have to - finish processing the directory after what's in the directory, as - the directory may have inappropriate permissions to alter the - contents or the dir's mtime could change as we change the - contents. - - """ - def __init__(self, basis_root_rp): - """Set basis_root_rp, the base of the tree to be incremented""" - self.basis_root_rp = basis_root_rp - assert basis_root_rp.conn is Globals.local_connection - #statistics.ITRB.__init__(self) - self.dir_replacement, self.dir_update = None, None - self.cached_rp = None - - def get_rp_from_root(self, index): - """Return RPath by adding index to self.basis_root_rp""" - if not self.cached_rp or self.cached_rp.index != index: - self.cached_rp = self.basis_root_rp.new_index(index) - return self.cached_rp - - def can_fast_process(self, index, diff_rorp): - """True if diff_rorp and mirror are not directories""" - rp = self.get_rp_from_root(index) - return not diff_rorp.isdir() and not rp.isdir() - - def fast_process(self, index, diff_rorp): - """Patch base_rp with diff_rorp (case where neither is directory)""" - rp = self.get_rp_from_root(index) - tf = TempFile.new(rp) - self.patch_to_temp(rp, diff_rorp, tf) - tf.rename(rp) - - def patch_to_temp(self, basis_rp, diff_rorp, new): - """Patch basis_rp, writing output in new, which doesn't exist yet""" - if diff_rorp.isflaglinked(): - Hardlink.link_rp(diff_rorp, new, self.basis_root_rp) - elif diff_rorp.get_attached_filetype() == 'snapshot': - rpath.copy(diff_rorp, new) - else: - assert diff_rorp.get_attached_filetype() == 'diff' - Rdiff.patch_local(basis_rp, diff_rorp, new) - if new.lstat(): rpath.copy_attribs(diff_rorp, new) - - def start_process(self, index, diff_rorp): - """Start processing directory - record information for later""" - base_rp = self.base_rp = self.get_rp_from_root(index) - assert diff_rorp.isdir() or base_rp.isdir() - if diff_rorp.isdir(): self.prepare_dir(diff_rorp, base_rp) - else: self.set_dir_replacement(diff_rorp, base_rp) - - def set_dir_replacement(self, diff_rorp, base_rp): - """Set self.dir_replacement, which holds data until done with dir - - This is used when base_rp is a dir, and diff_rorp is not. - - """ - assert diff_rorp.get_attached_filetype() == 'snapshot' - self.dir_replacement = TempFile.new(base_rp) - rpath.copy_with_attribs(diff_rorp, self.dir_replacement) - - def prepare_dir(self, diff_rorp, base_rp): - """Prepare base_rp to turn into a directory""" - self.dir_update = diff_rorp.getRORPath() # make copy in case changes - if not base_rp.isdir(): - if base_rp.lstat(): base_rp.delete() - base_rp.mkdir() - base_rp.chmod(0700) - - def end_process(self): - """Finish processing directory""" - if self.dir_update: - assert self.base_rp.isdir() - rpath.copy_attribs(self.dir_update, self.base_rp) - else: - assert self.dir_replacement and self.base_rp.isdir() - self.base_rp.rmdir() - self.dir_replacement.rename(self.base_rp) - - -class IncrementITRB(PatchITRB): - """Patch an rpath with the given diff iters and write increments - - Like PatchITRB, but this time also write increments. - - """ - def __init__(self, basis_root_rp, inc_root_rp): - self.inc_root_rp = inc_root_rp - self.cached_incrp = None - PatchITRB.__init__(self, basis_root_rp) - - def get_incrp(self, index): - """Return inc RPath by adding index to self.basis_root_rp""" - if not self.cached_incrp or self.cached_incrp.index != index: - self.cached_incrp = self.inc_root_rp.new_index(index) - return self.cached_incrp - - def fast_process(self, index, diff_rorp): - """Patch base_rp with diff_rorp and write increment (neither is dir)""" - rp = self.get_rp_from_root(index) - tf = TempFile.new(rp) - self.patch_to_temp(rp, diff_rorp, tf) - Increment(tf, rp, self.get_incrp(index)) - tf.rename(rp) - - def start_process(self, index, diff_rorp): - """Start processing directory""" - base_rp = self.base_rp = self.get_rp_from_root(index) - assert diff_rorp.isdir() or base_rp.isdir() - if diff_rorp.isdir(): - Increment(diff_rorp, base_rp, self.get_incrp(index)) - self.prepare_dir(diff_rorp, base_rp) - else: - self.set_dir_replacement(diff_rorp, base_rp) - Increment(self.dir_replacement, base_rp, self.get_incrp(index)) - - -class MirrorITRB(statistics.ITRB): - """Like IncrementITR, but only patch mirror directory, don't increment""" - def __init__(self, inc_rpath): - """Set inc_rpath, an rpath of the base of the inc tree""" - self.inc_rpath = inc_rpath - statistics.ITRB.__init__(self) - - def start_process(self, index, diff_rorp, mirror_dsrp): - """Initialize statistics and do actual writing to mirror""" - self.start_stats(mirror_dsrp) - if (diff_rorp and diff_rorp.isdir() or - not diff_rorp and mirror_dsrp.isdir()): - # mirror_dsrp will end up as directory, update attribs later - if not diff_rorp: diff_rorp = mirror_dsrp.get_rorpath() - if not mirror_dsrp.isdir(): - mirror_dsrp.delete() - mirror_dsrp.mkdir() - elif diff_rorp and not diff_rorp.isplaceholder(): - rorpiter.patchonce_action(None, mirror_dsrp, diff_rorp).execute() - - self.incpref = self.inc_rpath.new_index(index) - self.diff_rorp, self.mirror_dsrp = diff_rorp, mirror_dsrp - - def end_process(self): - """Update statistics when leaving""" - self.end_stats(self.diff_rorp, self.mirror_dsrp) - if self.mirror_dsrp.isdir(): - rpath.copy_attribs(self.diff_rorp, self.mirror_dsrp) - MiscStats.write_dir_stats_line(self, self.mirror_dsrp.index) - - def can_fast_process(self, index, diff_rorp, mirror_dsrp): - """True if there is no change in file and it is just a leaf""" - return not diff_rorp and mirror_dsrp.isreg() - - def fast_process(self, index, diff_rorp, mirror_dsrp): - """Just update statistics""" - statistics.ITRB.fast_process(self, mirror_dsrp) - - def branch_process(self, branch): - """Update statistics with subdirectory results""" - if Globals.sleep_ratio is not None: Time.sleep(Globals.sleep_ratio) - self.add_file_stats(branch) - diff --git a/rdiff-backup/rdiff_backup/manage.py b/rdiff-backup/rdiff_backup/manage.py index f974147..bb4259c 100644 --- a/rdiff-backup/rdiff_backup/manage.py +++ b/rdiff-backup/rdiff_backup/manage.py @@ -56,7 +56,7 @@ def describe_incs_parsable(incs, mirror_time, mirrorrp): 50000 regular <- last will be the current mirror """ - incpairs = [(Time.stringtotime(inc.getinctime()), inc) for inc in incs] + incpairs = [(inc.getinctime(), inc) for inc in incs] incpairs.sort() result = ["%s %s" % (time, get_inc_type(inc)) for time, inc in incpairs] result.append("%s %s" % (mirror_time, get_file_type(mirrorrp))) @@ -64,7 +64,7 @@ def describe_incs_parsable(incs, mirror_time, mirrorrp): def describe_incs_human(incs, mirror_time, mirrorrp): """Return a string describing all the the root increments""" - incpairs = [(Time.stringtotime(inc.getinctime()), inc) for inc in incs] + incpairs = [(inc.getinctime(), inc) for inc in incs] incpairs.sort() result = ["Found %d increments:" % len(incpairs)] @@ -95,8 +95,7 @@ def delete_earlier_than_local(baserp, time): yield sub_rp for rp in yield_files(baserp): - if ((rp.isincfile() and - Time.stringtotime(rp.getinctime()) < time) or + if ((rp.isincfile() and rp.getinctime() < time) or (rp.isdir() and not rp.listdir())): Log("Deleting increment file %s" % rp.path, 5) rp.delete() @@ -114,7 +113,7 @@ class IncObj: if not incrp.isincfile(): raise ManageException("%s is not an inc file" % incrp.path) self.incrp = incrp - self.time = Time.stringtotime(incrp.getinctime()) + self.time = incrp.getinctime() def getbaserp(self): """Return rp of the incrp without extensions""" diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index de66588..d4eff8c 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -56,7 +56,7 @@ field names and values. from __future__ import generators import re, gzip -import log, Globals, rpath, Time, robust +import log, Globals, rpath, Time, robust, increment class ParsingError(Exception): """This is raised when bad or unparsable data is received""" @@ -207,6 +207,7 @@ class rorp_extractor: log.Log("Error parsing metadata file: %s" % (e,), 2) if self.at_end: break self.buf = self.buf[next_pos:] + assert not self.close() def skip_to_index(self, index): """Scan through the file, set buffer to beginning of index record @@ -250,6 +251,7 @@ class rorp_extractor: yield rorp if self.at_end: break self.buf = self.buf[next_pos:] + assert not self.close() def close(self): """Return value of closing associated file""" @@ -264,9 +266,10 @@ def OpenMetadata(rp = None, compress = 1): assert not metadata_fileobj, "Metadata file already open" if rp: metadata_rp = rp else: - if compress: filename_base = "mirror_metadata.%s.data.gz" - else: filename_base = "mirror_metadata.%s.data" - metadata_rp = Globals.rbdir.append(filename_base % (Time.curtimestr,)) + if compress: typestr = 'data.gz' + else: typestr = 'data' + metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" % + (Time.curtimestr, typestr)) metadata_fileobj = metadata_rp.open("wb", compress = compress) def WriteMetadata(rorp): @@ -307,8 +310,7 @@ def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None): for rp in rblist: if (rp.isincfile() and rp.getinctype() == "data" and rp.getincbase_str() == "mirror_metadata"): - if Time.stringtotime(rp.getinctime()) == time: - return GetMetadata(rp, restrict_index) + if rp.getinctime() == time: return GetMetadata(rp, restrict_index) return None diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py index c3a2f6c..ff09eee 100644 --- a/rdiff-backup/rdiff_backup/restore.py +++ b/rdiff-backup/rdiff_backup/restore.py @@ -21,9 +21,8 @@ from __future__ import generators import tempfile, os -from log import Log -import Globals, Time, Rdiff, Hardlink, FilenameMapping, SetConnections, \ - rorpiter, selection, destructive_stepping, rpath, lazy +import Globals, Time, Rdiff, Hardlink, FilenameMapping, rorpiter, \ + selection, rpath, log, backup, static, robust, metadata # This should be set to selection.Select objects over the source and @@ -31,66 +30,24 @@ import Globals, Time, Rdiff, Hardlink, FilenameMapping, SetConnections, \ _select_source = None _select_mirror = None -class RestoreError(Exception): pass - -def Restore(inc_rpath, mirror, target, rest_time): - """Recursively restore inc_rpath and mirror to target at rest_time - - Like restore_recusive below, but with a more friendly - interface (it converts to DSRPaths if necessary, finds the inc - files with the appropriate base, and makes rid). +# This will be set to the time of the current mirror +_mirror_time = None +# This will be set to the exact time to restore to (not restore_to_time) +_rest_time = None - rest_time is the time in seconds to restore to; - inc_rpath should not be the name of an increment file, but the - increment file shorn of its suffixes and thus should have the - same index as mirror. +class RestoreError(Exception): pass - """ - if not isinstance(target, destructive_stepping.DSRPath): - target = destructive_stepping.DSRPath(None, target) - - mirror_time = get_mirror_time() - rest_time = get_rest_time(rest_time, mirror_time) - inc_list = get_inclist(inc_rpath) - rid = RestoreIncrementData(inc_rpath.index, inc_rpath, inc_list) - rid.sortincseq(rest_time, mirror_time) - check_hardlinks(rest_time) - restore_recursive(inc_rpath.index, mirror, rid, target, - rest_time, mirror_time) - -def get_mirror_time(): - """Return the time (in seconds) of latest mirror""" - current_mirror_incs = get_inclist(Globals.rbdir.append("current_mirror")) - if not current_mirror_incs: - Log.FatalError("Could not get time of current mirror") - elif len(current_mirror_incs) > 1: - Log("Warning, two different dates for current mirror found", 2) - return Time.stringtotime(current_mirror_incs[0].getinctime()) - -def get_rest_time(old_rest_time, mirror_time): - """If old_rest_time is between two increments, return older time - - There is a slightly tricky reason for doing this: The rest of - the code just ignores increments that are older than - rest_time. But sometimes we want to consider the very next - increment older than rest time, because rest_time will be - between two increments, and what was actually on the mirror - side will correspond to the older one. - - So here we assume all rdiff-backup events were recorded in - "increments" increments, and if its in-between we pick the - older one here. +def Restore(mirror_rp, inc_rpath, target, restore_to_time): + """Recursively restore mirror and inc_rpath to target at rest_time""" + MirrorS = mirror_rp.conn.restore.MirrorStruct + TargetS = target.conn.restore.TargetStruct - """ - base_incs = get_inclist(Globals.rbdir.append("increments")) - if not base_incs: return old_rest_time - inctimes = [Time.stringtotime(inc.getinctime()) for inc in base_incs] - inctimes.append(mirror_time) - older_times = filter(lambda time: time <= old_rest_time, inctimes) - if older_times: return max(older_times) - else: # restore time older than oldest increment, just return that - return min(inctimes) + MirrorS.set_mirror_and_rest_times(restore_to_time) + MirrorS.initialize_rf_cache(mirror_rp, inc_rpath) + target_iter = TargetS.get_initial_iter(target) + diff_iter = MirrorS.get_diffs(target_iter) + TargetS.patch(target, diff_iter) def get_inclist(inc_rpath): """Returns increments with given base""" @@ -113,179 +70,271 @@ def get_inclist(inc_rpath): inc_list.append(inc) return inc_list -def check_hardlinks(rest_time): - """Check for hard links and enable hard link support if found""" - if (Globals.preserve_hardlinks != 0 and - Hardlink.retrieve_final(rest_time)): - Log("Hard link information found, attempting to preserve " - "hard links.", 5) - SetConnections.UpdateGlobal('preserve_hardlinks', 1) - else: SetConnections.UpdateGlobal('preserve_hardlinks', None) +def ListChangedSince(mirror_rp, inc_rp, restore_to_time): + """List the changed files under mirror_rp since rest time""" + MirrorS = mirror_rp.conn.restore.MirrorStruct + MirrorS.set_mirror_and_rest_times(restore_to_time) + MirrorS.initialize_rf_cache(mirror_rp, inc_rp) + + cur_iter = MirrorS.get_mirror_rorp_iter(_mirror_time, 1) + old_iter = MirrorS.get_mirror_rorp_iter(_rest_time, 1) + collated = rorpiter.Collate2Iters(old_iter, cur_iter) + for old_rorp, cur_rorp in collated: + if not old_rorp: change = "new" + elif not cur_rorp: change = "deleted" + elif old_rorp == cur_rorp: continue + else: change = "changed" + path_desc = (old_rorp and old_rorp.get_indexpath() or + cur_rorp.get_indexpath()) + print "%-7s %s" % (change, path_desc) + + +class MirrorStruct: + """Hold functions to be run on the mirror side""" + def set_mirror_and_rest_times(cls, restore_to_time): + """Set global variabels _mirror_time and _rest_time on mirror conn""" + global _mirror_time, _rest_time + _mirror_time = cls.get_mirror_time() + _rest_time = cls.get_rest_time(restore_to_time) + + def get_mirror_time(cls): + """Return time (in seconds) of latest mirror""" + cur_mirror_incs = get_inclist(Globals.rbdir.append("current_mirror")) + if not cur_mirror_incs: + log.Log.FatalError("Could not get time of current mirror") + elif len(cur_mirror_incs) > 1: + log.Log("Warning, two different times for current mirror found", 2) + return cur_mirror_incs[0].getinctime() + + def get_rest_time(cls, restore_to_time): + """Return older time, if restore_to_time is in between two inc times + + There is a slightly tricky reason for doing this: The rest of the + code just ignores increments that are older than restore_to_time. + But sometimes we want to consider the very next increment older + than rest time, because rest_time will be between two increments, + and what was actually on the mirror side will correspond to the + older one. + + So here we assume all rdiff-backup events were recorded in + "increments" increments, and if it's in-between we pick the + older one here. -def restore_recursive(index, mirror, rid, target, time, mirror_time): - """Recursive restore function. + """ + global _rest_time + base_incs = get_inclist(Globals.rbdir.append("increments")) + if not base_incs: return _mirror_time + inctimes = [inc.getinctime() for inc in base_incs] + inctimes.append(_mirror_time) + older_times = filter(lambda time: time <= restore_to_time, inctimes) + if older_times: return max(older_times) + else: # restore time older than oldest increment, just return that + return min(inctimes) + + def initialize_rf_cache(cls, mirror_base, inc_base): + """Set cls.rf_cache to CachedRF object""" + inc_list = get_inclist(inc_base) + rf = RestoreFile(mirror_base, inc_base, get_inclist(inc_base)) + cls.mirror_base, cls.inc_base = mirror_base, inc_base + cls.root_rf = rf + cls.rf_cache = CachedRF(rf) + + def get_mirror_rorp_iter(cls, rest_time = None, require_metadata = None): + """Return iter of mirror rps at given restore time + + Usually we can use the metadata file, but if this is + unavailable, we may have to build it from scratch. - rid is a RestoreIncrementData object whose inclist is already - sortedincseq'd, and target is the dsrp to restore to. + """ + if rest_time is None: rest_time = _rest_time + metadata_iter = metadata.GetMetadata_at_time(Globals.rbdir, + rest_time, restrict_index = cls.mirror_base.index) + if metadata_iter: return metadata_iter + if require_metadata: log.Log.FatalError("Mirror metadata not found") + log.Log("Warning: Mirror metadata not found, " + "reading from directory", 2) + return cls.get_rorp_iter_from_rf(cls.root_rf) + + def get_rorp_iter_from_rf(cls, rf): + """Recursively yield mirror rorps from rf""" + rorp = rf.get_attribs() + yield rorp + if rorp.isdir(): + for sub_rf in rf.yield_sub_rfs(): + for rorp in yield_attribs(sub_rf): yield rorp + + def subtract_indicies(cls, index, rorp_iter): + """Subtract index from index of each rorp in rorp_iter + + subtract_indicies and add_indicies are necessary because we + may not be restoring from the root index. - Note that target may have a different index than mirror and - rid, because we may be restoring a file whose index is, say - ('foo','bar') to a target whose path does not contain - "foo/bar". + """ + if index == (): return rorp_iter + def get_iter(): + for rorp in rorp_iter: + assert rorp.index[:len(index)] == index, (rorp.index, index) + rorp.index = rorp.index[len(index):] + yield rorp + return get_iter() - """ - assert isinstance(target, destructive_stepping.DSRPath) - assert mirror.index == rid.index + def get_diffs(cls, target_iter): + """Given rorp iter of target files, return diffs - target_finalizer = rorpiter.IterTreeReducer( - rorpiter.DestructiveSteppingFinalizer, ()) - for rcd in yield_rcds(rid.index, mirror, rid, target, time, mirror_time): - rcd.RestoreFile() - #if rcd.mirror: mirror_finalizer(rcd.index, rcd.mirror) - target_finalizer(rcd.target.index, rcd.target) - target_finalizer.Finish() + Here the target_iter doesn't contain any actual data, just + attribute listings. Thus any diffs we generate will be + snapshots. -def yield_rcds(index, mirrorrp, rid, target, rest_time, mirror_time): - """Iterate RestoreCombinedData objects starting with given args + """ + mir_iter = cls.subtract_indicies(cls.mirror_base.index, + cls.get_mirror_rorp_iter()) + collated = rorpiter.Collate2Iters(mir_iter, target_iter) + return cls.get_diffs_from_collated(collated) + + def get_diffs_from_collated(cls, collated): + """Get diff iterator from collated""" + for mir_rorp, target_rorp in collated: + if Globals.preserve_hardlinks: + if mir_rorp: Hardlink.add_rorp(mir_rorp, source = 1) + if target_rorp: Hardlink.add_rorp(target_rorp, source = 0) + + if (not target_rorp or not mir_rorp or + not mir_rorp == target_rorp or + (Globals.preserve_hardlinks and not + Hardlink.rorp_eq(mir_rorp, target_rorp))): + yield cls.get_diff(mir_rorp, target_rorp) + + def get_diff(cls, mir_rorp, target_rorp): + """Get a diff for mir_rorp at time""" + if not mir_rorp: mir_rorp = rpath.RORPath(target_rorp.index) + elif Globals.preserve_hardlinks and Hardlink.islinked(mir_rorp): + mir_rorp.flaglinked(Hardlink.get_link_index(mir_rorp)) + elif mir_rorp.isreg(): + expanded_index = cls.mirror_base.index + mir_rorp.index + mir_rorp.setfile(cls.rf_cache.get_fp(expanded_index)) + mir_rorp.set_attached_filetype('snapshot') + return mir_rorp + +static.MakeClass(MirrorStruct) + + +class TargetStruct: + """Hold functions to be run on the target side when restoring""" + def get_initial_iter(cls, target): + """Return a selection object iterating the rorpaths in target""" + return selection.Select(target).set_iter() + + def patch(cls, target, diff_iter): + """Patch target with the diffs from the mirror side + + This function was already written for use when backing up, so + just use that. - rid is a RestoreCombinedData object. target is an rpath where - the created file should go. + """ + backup.DestinationStruct.patch(target, diff_iter) - In this case the "mirror" directory is treated as the source, - and we are actually copying stuff onto what Select considers - the source directory. +static.MakeClass(TargetStruct) - """ - select_result = _select_mirror.Select(target) - if select_result == 0: return - - if mirrorrp and not _select_source.Select(mirrorrp): - mirrorrp = None - rcd = RestoreCombinedData(rid, mirrorrp, target) - - if mirrorrp and mirrorrp.isdir() or \ - rid and rid.inc_rpath and rid.inc_rpath.isdir(): - sub_rcds = yield_sub_rcds(index, mirrorrp, rid, - target, rest_time, mirror_time) - else: sub_rcds = None - - if select_result == 1: - yield rcd - if sub_rcds: - for sub_rcd in sub_rcds: yield sub_rcd - elif select_result == 2: - if sub_rcds: - try: first = sub_rcds.next() - except StopIteration: return # no tuples found inside, skip - yield rcd - yield first - for sub_rcd in sub_rcds: yield sub_rcd - -def yield_sub_rcds(index, mirrorrp, rid, target, rest_time, mirror_time): - """Yield collated tuples from inside given args""" - if not check_dir_exists(mirrorrp, rid): return - mirror_iter = yield_mirrorrps(mirrorrp) - rid_iter = yield_rids(rid, rest_time, mirror_time) - - for indexed_tup in rorpiter.CollateIterators(mirror_iter, rid_iter): - index = indexed_tup.index - new_mirrorrp, new_rid = indexed_tup - for rcd in yield_rcds(index, new_mirrorrp, new_rid, - target.append(index[-1]), rest_time, mirror_time): - yield rcd - -def check_dir_exists(mirrorrp, rid): - """Return true if target should be a directory""" - if rid and rid.inc_list: - # Incs say dir if last (earliest) one is a dir increment - return rid.inc_list[-1].getinctype() == "dir" - elif mirrorrp: return mirrorrp.isdir() # if no incs, copy mirror - else: return None - -def yield_mirrorrps(mirrorrp): - """Yield mirrorrps underneath given mirrorrp""" - if mirrorrp and mirrorrp.isdir(): - if Globals.quoting_enabled: - for rp in selection.get_quoted_dir_children(mirrorrp): - yield rp - else: - dirlist = mirrorrp.listdir() - dirlist.sort() - for filename in dirlist: yield mirrorrp.append(filename) -def yield_rids(rid, rest_time, mirror_time): - """Yield RestoreIncrementData objects within given rid dir +class CachedRF: + """Store RestoreFile objects until they are needed + + The code above would like to pretend it has random access to RFs, + making one for a particular index at will. However, in general + this involves listing and filtering a directory, which can get + expensive. - If the rid doesn't correspond to a directory, don't yield any - elements. If there are increments whose corresponding base - doesn't exist, the first element will be None. All the rpaths - involved correspond to files in the increment directory. + Thus, when a CachedRF retrieves an RestoreFile, it creates all the + RFs of that directory at the same time, and doesn't have to + recalculate. It assumes the indicies will be in order, so the + cache is deleted if a later index is requested. """ - if not rid or not rid.inc_rpath or not rid.inc_rpath.isdir(): return - rid_dict = {} # dictionary of basenames:rids - dirlist = rid.inc_rpath.listdir() - if Globals.quoting_enabled: - dirlist = [FilenameMapping.unquote(fn) for fn in dirlist] - - def affirm_dict_indexed(basename): - """Make sure the rid dictionary has given basename as key""" - if not rid_dict.has_key(basename): - rid_dict[basename] = RestoreIncrementData( - rid.index + (basename,), None, []) # init with empty rid - - def add_to_dict(filename): - """Add filename to the inc tuple dictionary""" - rp = rid.inc_rpath.append(filename) - if Globals.quoting_enabled: rp.quote_path() - if rp.isincfile() and rp.getinctype() != 'data': - basename = rp.getincbase_str() - affirm_dict_indexed(basename) - rid_dict[basename].inc_list.append(rp) - elif rp.isdir(): - affirm_dict_indexed(filename) - rid_dict[filename].inc_rpath = rp - - for filename in dirlist: add_to_dict(filename) - keys = rid_dict.keys() - keys.sort() - - # sortincseq now to avoid descending .missing directories later - for key in keys: - rid = rid_dict[key] - if rid.inc_rpath or rid.inc_list: - rid.sortincseq(rest_time, mirror_time) - yield rid - - -class RestoreIncrementData: - """Contains information about a specific index from the increments dir - - This is just a container class, used because it would be easier to - work with than an IndexedTuple. + def __init__(self, root_rf): + """Initialize CachedRF, self.rf_list variable""" + self.root_rf = root_rf + self.rf_list = [] # list should filled in index order + + def list_rfs_in_cache(self, index): + """Used for debugging, return indicies of cache rfs for printing""" + s1 = "-------- Cached RF for %s -------" % (index,) + s2 = " ".join([str(rf.index) for rf in self.rf_list]) + s3 = "--------------------------" + return "\n".join((s1, s2, s3)) + + def get_rf(self, index): + """Return RestoreFile of given index""" + while 1: + if not self.rf_list: self.add_rfs(index) + rf = self.rf_list.pop(0) + if rf.index < index: continue + elif rf.index == index: return rf + self.rf_list.insert(0, rf) + self.add_rfs(index) + + def get_fp(self, index): + """Return the file object (for reading) of given index""" + return self.get_rf(index).get_restore_fp() + + def add_rfs(self, index): + """Given index, add the rfs in that same directory""" + if not index: return self.root_rf + parent_index = index[:-1] + temp_rf = RestoreFile(self.root_rf.mirror_rp.new_index(parent_index), + self.root_rf.inc_rp.new_index(parent_index), []) + new_rfs = list(temp_rf.yield_sub_rfs()) + assert new_rfs, "No RFs added for index %s" % index + self.rf_list[0:0] = new_rfs + +class RestoreFile: + """Hold data about a single mirror file and its related increments + + self.relevant_incs will be set to a list of increments that matter + for restoring a regular file. If the patches are to mirror_rp, it + will be the first element in self.relevant.incs """ - def __init__(self, index, inc_rpath, inc_list): - self.index = index - self.inc_rpath = inc_rpath - self.inc_list = inc_list - - def sortincseq(self, rest_time, mirror_time): - """Sort self.inc_list sequence, throwing away irrelevant increments""" - if not self.inc_list or rest_time >= mirror_time: - self.inc_list = [] + def __init__(self, mirror_rp, inc_rp, inc_list): + assert mirror_rp.index == inc_rp.index, (mirror_rp, inc_rp) + self.index = mirror_rp.index + self.mirror_rp = mirror_rp + self.inc_rp, self.inc_list = inc_rp, inc_list + self.set_relevant_incs() + + def relevant_incs_string(self): + """Return printable string of relevant incs, used for debugging""" + l = ["---- Relevant incs for %s" % ("/".join(self.index),)] + l.extend(["%s %s %s" % (inc.getinctype(), inc.lstat(), inc.path) + for inc in self.relevant_incs]) + l.append("--------------------------------") + return "\n".join(l) + + def set_relevant_incs(self): + """Set self.relevant_incs to increments that matter for restoring + + relevant_incs is sorted newest first. If mirror_rp matters, + it will be (first) in relevant_incs. + + """ + self.mirror_rp.inc_type = 'snapshot' + self.mirror_rp.inc_compressed = 0 + if not self.inc_list or _rest_time >= _mirror_time: + self.relevant_incs = [self.mirror_rp] return - newer_incs = self.get_newer_incs(rest_time, mirror_time) + newer_incs = self.get_newer_incs() i = 0 while(i < len(newer_incs)): # Only diff type increments require later versions if newer_incs[i].getinctype() != "diff": break i = i+1 - self.inc_list = newer_incs[:i+1] - self.inc_list.reverse() # return in reversed order (latest first) + self.relevant_incs = newer_incs[:i+1] + if (not self.relevant_incs or + self.relevant_incs[-1].getinctype() == "diff"): + self.relevant_incs.append(self.mirror_rp) + self.relevant_incs.reverse() # return in reversed order - def get_newer_incs(self, rest_time, mirror_time): + def get_newer_incs(self): """Return list of newer incs sorted by time (increasing) Also discard increments older than rest_time (rest_time we are @@ -295,94 +344,119 @@ class RestoreIncrementData: """ incpairs = [] for inc in self.inc_list: - time = Time.stringtotime(inc.getinctime()) - if time >= rest_time: incpairs.append((time, inc)) + time = inc.getinctime() + if time >= _rest_time: incpairs.append((time, inc)) incpairs.sort() return [pair[1] for pair in incpairs] - -class RestoreCombinedData: - """Combine index information from increment and mirror directories + def get_attribs(self): + """Return RORP with restored attributes, but no data - This is similar to RestoreIncrementData but has mirror information - also. - - """ - def __init__(self, rid, mirror, target): - """Init - set values from one or both if they exist - - mirror and target are DSRPaths of the corresponding files in - the mirror and target directory respectively. rid is a - RestoreIncrementData as defined above + This should only be necessary if the metadata file is lost for + some reason. Otherwise the file provides all data. The size + will be wrong here, because the attribs may be taken from + diff. """ - if rid: - self.index = rid.index - self.inc_rpath = rid.inc_rpath - self.inc_list = rid.inc_list - if mirror: - self.mirror = mirror - assert mirror.index == self.index - else: self.mirror = None - elif mirror: - self.index = mirror.index - self.mirror = mirror - self.inc_list = [] - self.inc_rpath = None - else: assert None, "neither rid nor mirror given" - self.target = target - - def RestoreFile(self): - """Non-recursive restore function """ - if not self.inc_list and not (self.mirror and self.mirror.lstat()): - return # no increments were applicable - self.log() - - if self.restore_hardlink(): return - - if not self.inc_list or self.inc_list[0].getinctype() == "diff": - assert self.mirror and self.mirror.lstat(), \ - "No base to go with incs for %s" % self.target.path - rpath.copy_with_attribs(self.mirror, self.target) - for inc in self.inc_list: self.applyinc(inc, self.target) - - def log(self): - """Log current restore action""" - inc_string = ','.join([inc.path for inc in self.inc_list]) - Log("Restoring %s with increments %s to %s" % - (self.mirror and self.mirror.path, - inc_string, self.target.path), 5) - - def restore_hardlink(self): - """Hard link target and return true if hard linking appropriate""" - if (Globals.preserve_hardlinks and - Hardlink.restore_link(self.index, self.target)): - rpath.copy_attribs(self.inc_list and self.inc_list[-1] or - self.mirror, self.target) - return 1 - return None - - def applyinc(self, inc, target): - """Apply increment rp inc to targetrp target""" - Log("Applying increment %s to %s" % (inc.path, target.path), 6) - inctype = inc.getinctype() - if inctype == "diff": - if not target.lstat(): - raise RestoreError("Bad increment sequence at " + inc.path) - Rdiff.patch_action(target, inc, - delta_compressed = inc.isinccompressed() - ).execute() - elif inctype == "dir": - if not target.isdir(): - if target.lstat(): - raise RestoreError("File %s already exists" % target.path) - target.mkdir() - elif inctype == "missing": return - elif inctype == "snapshot": - if inc.isinccompressed(): - target.write_from_fileobj(inc.open("rb", compress = 1)) - else: rpath.copy(inc, target) - else: raise RestoreError("Unknown inctype %s" % inctype) - rpath.copy_attribs(inc, target) - + last_inc = self.relevant_incs[-1] + if last_inc.getinctype() == 'missing': return rpath.RORPath(self.index) + + rorp = last_inc.getRORPath() + rorp.index = self.index + if last_inc.getinctype() == 'dir': rorp.data['type'] = 'dir' + return rorp + + def get_restore_fp(self): + """Return file object of restored data""" + assert self.relevant_incs[-1].isreg(), "Not a regular file" + current_fp = self.get_first_fp() + for inc_diff in self.relevant_incs[1:]: + log.Log("Applying patch %s" % (inc_diff.get_indexpath(),), 7) + assert inc_diff.getinctype() == 'diff' + delta_fp = inc_diff.open("rb", inc_diff.isinccompressed()) + new_fp = tempfile.TemporaryFile() + Rdiff.write_patched_fp(current_fp, delta_fp, new_fp) + new_fp.seek(0) + current_fp = new_fp + return current_fp + + def get_first_fp(self): + """Return first file object from relevant inc list""" + first_inc = self.relevant_incs[0] + assert first_inc.getinctype() == 'snapshot' + if not first_inc.isinccompressed(): return first_inc.open("rb") + + # current_fp must be a real (uncompressed) file + current_fp = tempfile.TemporaryFile() + fp = first_inc.open("rb", compress = 1) + rpath.copyfileobj(fp, current_fp) + assert not fp.close() + current_fp.seek(0) + return current_fp + + def yield_sub_rfs(self): + """Return RestoreFiles under current RestoreFile (which is dir)""" + assert self.mirror_rp.isdir() or self.inc_rp.isdir() + mirror_iter = self.yield_mirrorrps(self.mirror_rp) + inc_pair_iter = self.yield_inc_complexes(self.inc_rp) + collated = rorpiter.Collate2Iters(mirror_iter, inc_pair_iter) + + for mirror_rp, inc_pair in collated: + if not inc_pair: + inc_rp = self.inc_rp.new_index(mirror_rp.index) + if Globals.quoting_enabled: inc_rp.quote_path() + inc_list = [] + else: inc_rp, inc_list = inc_pair + if not mirror_rp: + mirror_rp = self.mirror_rp.new_index(inc_rp.index) + if Globals.quoting_enabled: mirror_rp.quote_path() + yield RestoreFile(mirror_rp, inc_rp, inc_list) + + def yield_mirrorrps(self, mirrorrp): + """Yield mirrorrps underneath given mirrorrp""" + if mirrorrp and mirrorrp.isdir(): + if Globals.quoting_enabled: + for rp in selection.get_quoted_dir_children(mirrorrp): + if rp.index != ('rdiff-backup-data',): yield rp + else: + dirlist = mirrorrp.listdir() + dirlist.sort() + for filename in dirlist: + rp = mirrorrp.append(filename) + if rp.index != ('rdiff-backup-data',): yield rp + + def yield_inc_complexes(self, inc_rpath): + """Yield (sub_inc_rpath, inc_list) IndexedTuples from given inc_rpath + + Finds pairs under directory inc_rpath. sub_inc_rpath will just be + the prefix rp, while the rps in inc_list should actually exist. + """ + if not inc_rpath.isdir(): return + inc_dict = {} # dictionary of basenames:inc_lists + dirlist = robust.listrp(inc_rpath) + if Globals.quoting_enabled: + dirlist = [FilenameMapping.unquote(fn) for fn in dirlist] + + def affirm_dict_indexed(basename): + """Make sure the rid dictionary has given basename as key""" + if not inc_dict.has_key(basename): + sub_inc_rp = inc_rpath.append(basename) + if Globals.quoting_enabled: sub_inc_rp.quote_path() + inc_dict[basename] = rorpiter.IndexedTuple(sub_inc_rp.index, + (sub_inc_rp, [])) + + def add_to_dict(filename): + """Add filename to the inc tuple dictionary""" + rp = inc_rpath.append(filename) + if Globals.quoting_enabled: rp.quote_path() + if rp.isincfile() and rp.getinctype() != 'data': + basename = rp.getincbase_str() + affirm_dict_indexed(basename) + inc_dict[basename][1].append(rp) + elif rp.isdir(): affirm_dict_indexed(filename) + + for filename in dirlist: add_to_dict(filename) + keys = inc_dict.keys() + keys.sort() + for key in keys: yield inc_dict[key] diff --git a/rdiff-backup/rdiff_backup/robust.py b/rdiff-backup/rdiff_backup/robust.py index 67f32be..e43ceea 100644 --- a/rdiff-backup/rdiff_backup/robust.py +++ b/rdiff-backup/rdiff_backup/robust.py @@ -17,241 +17,9 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA -"""Prevent mirror from being corrupted; handle errors +"""Catch various exceptions given system call""" -Ideally no matter an instance of rdiff-backup gets aborted, no -information should get lost. The target directory should be left in a -coherent state, and later instances of rdiff-backup should clean -things up so there is no sign that anything ever got aborted or -failed. - -Thus, files should be updated in an atomic way as possible. Each file -should be updated (and the corresponding diff files written) or not, -and it should be clear which happened. In general, I don't think this -is possible, since the creation of the diff files and the changing of -updated files cannot be guarateed to happen together. It is possible, -I think, to record various information to files which would allow a -later process to figure out what the last operation was, but this -would add several file operations to the processing of each file, and -I don't think, would be a good tradeoff. - -The compromise reached here is that diff files should be created just -before the mirror files are updated, and each file update should be -done with a rename operation on a file in the same directory. -Furthermore, every once in a while, rdiff-backup will record which -file it just finished processing. If any fatal errors are caught, it -will also record the last processed file. Future instances may not -know exactly when the previous instance was aborted, but they will be -able to narrow down the possibilities. - -""" - -import os, time -from log import Log -import Time, librsync, errno, signal, cPickle, C, \ - Hardlink, TempFile, static, rpath, Globals - - -class Action: - """Represents a file operation to be accomplished later""" - def __init__(self, init_thunk, final_func, error_handler): - """Action initializer - - All the thunks are functions whose return value will be - ignored. init_thunk should not make any irreversible changes - but prepare for the writing of the important data. final_func - should be as short as possible and do the real work. - error_handler is run if there is an error in init_thunk or - final_func. Errors in init_thunk should be corrected by - error_handler as if nothing had been run in the first place. - - init_thunk takes no arguments. - - final_thunk takes the return value of init_thunk as its - argument, and its return value is returned by execute(). - - error_handler takes three arguments: the exception, a value - which is true just in case self.init_thunk ran correctly, and - a value which will be the return value of init_thunk if it ran - correctly. - - """ - self.init_thunk = init_thunk or self.default_init_thunk - self.final_func = final_func or self.default_final_func - self.error_handler = error_handler or self.default_error_handler - - def execute(self): - """Actually run the operation""" - ran_init_thunk = None - try: - init_val = self.init_thunk() - ran_init_thunk = 1 - return self.final_func(init_val) - except Exception, exc: # Catch all errors - Log.exception() - TracebackArchive.add() - if ran_init_thunk: self.error_handler(exc, 1, init_val) - else: self.error_handler(exc, None, None) - raise exc - - def default_init_thunk(self): return None - def default_final_func(self, init_val): return init_val - def default_error_handler(self, exc, ran_init, init_val): pass - - -null_action = Action(None, None, None) -def chain(*robust_action_list): - """Return chain tying together a number of robust actions - - The whole chain will be aborted if some error occurs in - initialization stage of any of the component actions. - - """ - ras_with_started_inits, init_return_vals = [], [] - def init(): - for ra in robust_action_list: - ras_with_started_inits.append(ra) - init_return_vals.append(ra.init_thunk()) - return init_return_vals - def final(init_return_vals): - final_vals = [] - for ra, init_val in zip(robust_action_list, init_return_vals): - final_vals.append(ra.final_func(init_val)) - return final_vals - def error(exc, ran_init, init_val): - for ra, init_val in zip(ras_with_started_inits, init_return_vals): - ra.error_handler(exc, 1, init_val) - for ra in ras_with_started_inits[len(init_return_vals):]: - ra.error_handler(exc, None, None) - return Action(init, final, error) - -def chain_nested(*robust_action_list): - """Like chain but final actions performed in reverse order""" - ras_with_started_inits, init_vals = [], [] - def init(): - for ra in robust_action_list: - ras_with_started_inits.append(ra) - init_vals.append(ra.init_thunk()) - return init_vals - def final(init_vals): - ras_and_inits = zip(robust_action_list, init_vals) - ras_and_inits.reverse() - final_vals = [] - for ra, init_val in ras_and_inits: - final_vals.append(ra.final_func(init_val)) - return final_vals - def error(exc, ran_init, init_val): - for ra, init_val in zip(ras_with_started_inits, init_vals): - ra.error_handler(exc, 1, init_val) - for ra in ras_with_started_inits[len(init_vals):]: - ra.error_handler(exc, None, None) - return Action(init, final, error) - -def make_tf_robustaction(init_thunk, tempfiles, final_renames = None): - """Shortcut Action creator when only tempfiles involved - - Often the robust action will just consist of some initial - stage, renaming tempfiles in the final stage, and deleting - them if there is an error. This function makes it easier to - create Actions of that type. - - """ - if isinstance(tempfiles, TempFile.TempFile): tempfiles = (tempfiles,) - if isinstance(final_renames, rpath.RPath): final_renames = (final_renames,) - if final_renames is None: final_renames = [None] * len(tempfiles) - assert len(tempfiles) == len(final_renames) - - def final(init_val): # rename tempfiles to final positions - for tempfile, destination in zip(tempfiles, final_renames): - if destination: - if destination.isdir(): # Cannot rename over directory - destination.delete() - tempfile.rename(destination) - return init_val - def error(exc, ran_init, init_val): - for tf in tempfiles: tf.delete() - return Action(init_thunk, final, error) - -def copy_action(rorpin, rpout): - """Return robust action copying rorpin to rpout - - The source can be a rorp or an rpath. Does not recurse. If - directories copied, then just exit (output directory not - overwritten). - - """ - tfl = [None] # Need some mutable state to hold tf value - def init(): - if not (rorpin.isdir() and rpout.isdir()): # already a dir - tfl[0] = tf = TempFile.new(rpout) - if rorpin.isreg(): tf.write_from_fileobj(rorpin.open("rb")) - else: rpath.copy(rorpin, tf) - return tf - else: return None - def final(tf): - if tf and tf.lstat(): - if rpout.isdir(): rpout.delete() - tf.rename(rpout) - return rpout - def error(exc, ran_init, init_val): - if tfl[0]: tfl[0].delete() - return Action(init, final, error) - -def copy_with_attribs_action(rorpin, rpout, compress = None): - """Like copy_action but also copy attributes""" - tfl = [None] # Need some mutable state for error handler - def init(): - if not (rorpin.isdir() and rpout.isdir()): # already a dir - tfl[0] = tf = TempFile.new(rpout) - if rorpin.isreg(): - tf.write_from_fileobj(rorpin.open("rb"), compress) - else: rpath.copy(rorpin, tf) - if tf.lstat(): # Some files, like sockets, won't be created - rpath.copy_attribs(rorpin, tf) - return tf - else: return None - def final(tf): - if rorpin.isdir() and rpout.isdir(): - rpath.copy_attribs(rorpin, rpout) - elif tf and tf.lstat(): - if rpout.isdir(): rpout.delete() # can't rename over dir - tf.rename(rpout) - return rpout - def error(exc, ran_init, init_val): - if tfl[0]: tfl[0].delete() - return Action(init, final, error) - -def copy_attribs_action(rorpin, rpout): - """Return action which just copies attributes - - Copying attributes is already pretty atomic, so just run - normal sequence. - - """ - def final(init_val): - rpath.copy_attribs(rorpin, rpout) - return rpout - return Action(None, final, None) - -def symlink_action(rpath, linktext): - """Return symlink action by moving one file over another""" - tf = TempFile.new(rpath) - def init(): tf.symlink(linktext) - return make_tf_robustaction(init, tf, rpath) - -def destructive_write_action(rp, s): - """Return action writing string s to rpath rp in robust way - - This will overwrite any data currently in rp. - - """ - tf = TempFile.new(rp) - def init(): - fp = tf.open("wb") - fp.write(s) - fp.close() - tf.setdata() - return make_tf_robustaction(init, tf, rp) +import librsync, errno, signal, C, static, rpath, Globals, log, statistics def check_common_error(error_handler, function, args = []): """Apply function to args, if error, run error_handler on exception @@ -264,19 +32,16 @@ def check_common_error(error_handler, function, args = []): except Exception, exc: TracebackArchive.add([function] + list(args)) if catch_error(exc): - Log.exception() + log.Log.exception() conn = Globals.backup_writer - if conn is not None: # increment error count - ITRB_exists = conn.Globals.is_not_None('ITRB') - if ITRB_exists: conn.Globals.ITRB.increment_stat('Errors') + if conn is not None: statistics.record_error() if error_handler: return error_handler(exc, *args) else: return - Log.exception(1, 2) + log.Log.exception(1, 2) raise def catch_error(exc): """Return true if exception exc should be caught""" - for exception_class in (rpath.SkipFileException, rpath.RPathException, librsync.librsyncError, C.UnknownFileTypeError): if isinstance(exc, exception_class): return 1 @@ -291,7 +56,7 @@ def catch_error(exc): def listrp(rp): """Like rp.listdir() but return [] if error, and sort results""" def error_handler(exc): - Log("Error listing directory %s" % rp.path, 2) + log.Log("Error listing directory %s" % rp.path, 2) return [] dir_listing = check_common_error(error_handler, rp.listdir) dir_listing.sort() @@ -322,284 +87,16 @@ class TracebackArchive: extra information to same traceback archive. """ - cls._traceback_strings.append(Log.exception_to_string(extra_args)) + cls._traceback_strings.append(log.Log.exception_to_string(extra_args)) if len(cls._traceback_strings) > 10: cls._traceback_strings = cls._traceback_strings[:10] def log(cls): """Print all exception information to log file""" if cls._traceback_strings: - Log("------------ Old traceback info -----------\n%s\n" - "-------------------------------------------" % - ("\n".join(cls._traceback_strings),), 3) + log.Log("------------ Old traceback info -----------\n%s\n" + "-------------------------------------------" % + ("\n".join(cls._traceback_strings),), 3) static.MakeClass(TracebackArchive) - -class SaveState: - """Save state in the middle of backups for resuming later""" - _last_file_sym = None # RPath of sym pointing to last file processed - _last_file_definitive_rp = None # Touch this if last file is really last - _last_checkpoint_time = 0 # time in seconds of last checkpoint - _checkpoint_rp = None # RPath of checkpoint data pickle - - def init_filenames(cls): - """Set rpaths of markers. Assume rbdir already set.""" - if not Globals.isbackup_writer: - return Globals.backup_writer.SaveState.init_filenames() - - assert Globals.local_connection is Globals.rbdir.conn, \ - (Globals.rbdir.conn, Globals.backup_writer) - - cls._last_file_sym = Globals.rbdir.append( - "last-file-incremented.%s.data" % Time.curtimestr) - cls._checkpoint_rp = Globals.rbdir.append( - "checkpoint-data.%s.data" % Time.curtimestr) - cls._last_file_definitive_rp = Globals.rbdir.append( - "last-file-definitive.%s.data" % Time.curtimestr) - - def touch_last_file(cls): - """Touch last file marker, indicating backup has begun""" - if not cls._last_file_sym.lstat(): cls._last_file_sym.touch() - - def touch_last_file_definitive(cls): - """Create last-file-definitive marker - - When a backup gets aborted, there may be time to indicate the - last file successfully processed, and this should be touched. - Sometimes when the abort is hard, there may be a last file - indicated, but further files since then have been processed, - in which case this shouldn't be touched. - - """ - cls._last_file_definitive_rp.touch() - - def record_last_file_action(cls, last_file_rorp): - """Action recording last file to be processed as symlink in rbdir - - last_file_rorp is None means that no file is known to have - been processed. - - """ - if last_file_rorp: - symtext = apply(os.path.join, - ('increments',) + last_file_rorp.index) - return symlink_action(cls._last_file_sym, symtext) - else: return Action(None, lambda init_val: cls.touch_last_file(), None) - - def checkpoint(cls, ITR, finalizer, last_file_rorp, override = None): - """Save states of tree reducer and finalizer during inc backup - - If override is true, checkpoint even if one isn't due. - - """ - if not override and not cls.checkpoint_needed(): return - assert cls._checkpoint_rp, "_checkpoint_rp not set yet" - - cls._last_checkpoint_time = time.time() - Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7) - state_string = cPickle.dumps((ITR, finalizer)) - chain(destructive_write_action(cls._checkpoint_rp, state_string), - cls.record_last_file_action(last_file_rorp)).execute() - - def checkpoint_needed(cls): - """Returns true if another checkpoint is called for""" - return (time.time() > cls._last_checkpoint_time + - Globals.checkpoint_interval) - - def checkpoint_remove(cls): - """Remove all checkpointing data after successful operation""" - for rp in Resume.get_relevant_rps(): rp.delete() - if Globals.preserve_hardlinks: Hardlink.remove_all_checkpoints() - -static.MakeClass(SaveState) - - -class ResumeException(Exception): - """Indicates some error has been encountered while trying to resume""" - pass - -class Resume: - """Check for old aborted backups and resume if necessary""" - _session_info_list = None # List of ResumeSessionInfo's, sorted by time - def FindTime(cls, index, later_than = 0): - """For a given index, find the appropriate time to use for inc - - If it is clear which time to use (because it is determined by - definitive records, or there are no aborted backup, etc.) then - just return the appropriate time. Otherwise, if an aborted - backup was last checkpointed before the index, assume that it - didn't get there, and go for the older time. If an inc file - is already present, the function will be rerun with later time - specified. - - """ - assert Globals.isbackup_writer - if Time.prevtime > later_than: return Time.prevtime # usual case - - for si in cls.get_sis_covering_index(index): - if si.time > later_than: return si.time - raise rpath.SkipFileException("Index %s already covered, skipping" % - str(index)) - - def get_sis_covering_index(cls, index): - """Return sorted list of SessionInfos which may cover index - - Aborted backup may be relevant unless index is lower and we - are sure that it didn't go further. - - """ - return filter(lambda session_info: - not ((session_info.last_index is None or - session_info.last_index < index) and - session_info.last_definitive), - cls._session_info_list) - - def SetSessionInfo(cls): - """Read data directory and initialize _session_info""" - assert Globals.isbackup_writer - silist = [] - rp_quad_dict = cls.group_rps_by_time(cls.get_relevant_rps()) - times = rp_quad_dict.keys() - times.sort() - for time in times: - try: silist.append(cls.quad_to_si(time, rp_quad_dict[time])) - except ResumeException: - Log("Bad resume information found, skipping", 2) - cls._session_info_list = silist - - def get_relevant_rps(cls): - """Return list of relevant rpaths in rbdata directory""" - relevant_bases = ['last-file-incremented', 'last-file-mirrored', - 'checkpoint-data', 'last-file-definitive'] - rps = map(Globals.rbdir.append, Globals.rbdir.listdir()) - return filter(lambda rp: rp.isincfile() - and rp.getincbase_str() in relevant_bases, rps) - - def group_rps_by_time(cls, rplist): - """Take list of rps return time dict {time: quadlist} - - Times in seconds are the keys, values are triples of rps - [last-file-incremented, last-file-mirrored, checkpoint-data, - last-is-definitive]. - - """ - result = {} - for rp in rplist: - time = Time.stringtotime(rp.getinctime()) - if result.has_key(time): quadlist = result[time] - else: quadlist = [None, None, None, None] - base_string = rp.getincbase_str() - if base_string == 'last-file-incremented': quadlist[0] = rp - elif base_string == 'last-file-mirrored': quadlist[1] = rp - elif base_string == 'last-file-definitive': quadlist[3] = 1 - else: - assert base_string == 'checkpoint-data' - quadlist[2] = rp - result[time] = quadlist - return result - - def quad_to_si(cls, time, quad): - """Take time, quadlist, return associated ResumeSessionInfo""" - increment_sym, mirror_sym, checkpoint_rp, last_definitive = quad - if increment_sym and mirror_sym: - raise ResumeException("both mirror and inc sym shouldn't exist") - ITR, finalizer = None, None - if increment_sym: - mirror = None - last_index = cls.sym_to_index(increment_sym) - if checkpoint_rp: - ITR, finalizer = cls.unpickle_checkpoint(checkpoint_rp) - elif mirror_sym: - mirror = 1 - last_index = cls.sym_to_index(mirror_sym) - if checkpoint_rp: - finalizer = cls.unpickle_checkpoint(checkpoint_rp) - else: raise ResumeException("Missing increment or mirror sym") - return ResumeSessionInfo(mirror, time, last_index, last_definitive, - finalizer, ITR) - - def sym_to_index(cls, sym_rp): - """Read last file sym rp, return last file index - - If sym_rp is not a sym at all, return None, indicating that no - file index was ever conclusively processed. - - """ - if not sym_rp.issym(): return None - link_components = sym_rp.readlink().split("/") - assert link_components[0] == 'increments' - return tuple(link_components[1:]) - - def unpickle_checkpoint(cls, checkpoint_rp): - """Read data from checkpoint_rp and return unpickled data - - Return value is pair (patch increment ITR, finalizer state). - - """ - fp = checkpoint_rp.open("rb") - data = fp.read() - fp.close() - try: result = cPickle.loads(data) - except Exception, exc: - raise ResumeException("Bad pickle at %s: %s" % - (checkpoint_rp.path, exc)) - return result - - def ResumeCheck(cls): - """Return relevant ResumeSessionInfo if there's one we should resume - - Also if find RSI to resume, reset current time to old resume - time. - - """ - cls.SetSessionInfo() - if not cls._session_info_list: - if Globals.resume == 1: - Log.FatalError("User specified resume, but no data on " - "previous backup found.") - else: return None - else: - si = cls._session_info_list[-1] - if (Globals.resume == 1 or - (time.time() <= (si.time + Globals.resume_window) and - not Globals.resume == 0)): - Log("Resuming aborted backup dated %s" % - Time.timetopretty(si.time), 2) - Time.setcurtime(si.time) - if Globals.preserve_hardlinks: - if (not si.last_definitive or not - Hardlink.retrieve_checkpoint(Globals.rbdir, si.time)): - Log("Hardlink information not successfully " - "recovered.", 2) - return si - else: - Log("Last backup dated %s was aborted, but we aren't " - "resuming it." % Time.timetopretty(si.time), 2) - return None - assert None - -static.MakeClass(Resume) - - -class ResumeSessionInfo: - """Hold information about a previously aborted session""" - def __init__(self, mirror, time, last_index, - last_definitive, finalizer = None, ITR = None): - """Class initializer - - time - starting time in seconds of backup - mirror - true if backup was a mirror, false if increment - last_index - Last confirmed index processed by backup, or None - last_definitive - True is we know last_index is really last - finalizer - the dsrp finalizer if available - ITR - For increment, ITM reducer (assume mirror if NA) - - """ - self.time = time - self.mirror = mirror - self.last_index = last_index - self.last_definitive = last_definitive - self.ITR, self.finalizer, = ITR, finalizer - - diff --git a/rdiff-backup/rdiff_backup/rorpiter.py b/rdiff-backup/rdiff_backup/rorpiter.py index f8ba83c..4392ce0 100644 --- a/rdiff-backup/rdiff_backup/rorpiter.py +++ b/rdiff-backup/rdiff_backup/rorpiter.py @@ -53,6 +53,12 @@ def FromRaw(raw_iter): rorp.setfile(getnext(raw_iter)) yield rorp +def getnext(iter): + """Return the next element of an iterator, raising error if none""" + try: next = iter.next() + except StopIteration: raise RORPIterException("Unexpected end to iter") + return next + def ToFile(rorp_iter): """Return file version of iterator""" return iterfile.FileWrappingIter(ToRaw(rorp_iter)) @@ -143,90 +149,23 @@ def Collate2Iters(riter1, riter2): yield (None, relem2) relem2 = None -def getnext(iter): - """Return the next element of an iterator, raising error if none""" - try: next = iter.next() - except StopIteration: raise RORPIterException("Unexpected end to iter") - return next - -def get_dissimilar_indicies(src_init_iter, dest_init_iter): +def get_dissimilar_indicies(src_init_iter, dest_init_iter, statfileobj = None): """Get dissimilar indicies given two rorpiters Returns an iterator which enumerates the indicies of the rorps - which are different on the source and destination ends. + which are different on the source and destination ends. If + statfileobj is given, call add_changed on each pair of different + indicies. """ collated = Collate2Iters(src_init_iter, dest_init_iter) for src_rorp, dest_rorp in collated: - if not src_rorp: yield dest_rorp.index - elif not dest_rorp: yield src_rorp.index - elif not src_rorp == dest_rorp: yield dest_rorp.index - elif (Globals.preserve_hardlinks and not - Hardlink.rorp_eq(src_rorp, dest_rorp)): yield dest_rorp.index - -def GetDiffIter(sig_iter, new_iter): - """Return delta iterator from sig_iter to new_iter - - The accompanying file for each will be a delta as produced by - rdiff, unless the destination file does not exist, in which - case it will be the file in its entirety. - - sig_iter may be composed of rorps, but new_iter should have - full RPaths. - - """ - collated_iter = CollateIterators(sig_iter, new_iter) - for rorp, rp in collated_iter: yield diffonce(rorp, rp) - -def diffonce(sig_rorp, new_rp): - """Return one diff rorp, based from signature rorp and orig rp""" - if sig_rorp and Globals.preserve_hardlinks and sig_rorp.isflaglinked(): - if new_rp: diff_rorp = new_rp.getRORPath() - else: diff_rorp = rpath.RORPath(sig_rorp.index) - diff_rorp.flaglinked() - return diff_rorp - elif sig_rorp and sig_rorp.isreg() and new_rp and new_rp.isreg(): - diff_rorp = new_rp.getRORPath() - #fp = sig_rorp.open("rb") - #print "---------------------", fp - #tmp_sig_rp = RPath(Globals.local_connection, "/tmp/sig") - #tmp_sig_rp.delete() - #tmp_sig_rp.write_from_fileobj(fp) - #diff_rorp.setfile(Rdiff.get_delta_sigfileobj(tmp_sig_rp.open("rb"), - # new_rp)) - diff_rorp.setfile(Rdiff.get_delta_sigfileobj(sig_rorp.open("rb"), - new_rp)) - diff_rorp.set_attached_filetype('diff') - return diff_rorp - else: - # Just send over originial if diff isn't appropriate - if sig_rorp: sig_rorp.close_if_necessary() - if not new_rp: return rpath.RORPath(sig_rorp.index) - elif new_rp.isreg(): - diff_rorp = new_rp.getRORPath(1) - diff_rorp.set_attached_filetype('snapshot') - return diff_rorp - else: return new_rp.getRORPath() - -def patchonce_action(base_rp, basisrp, diff_rorp): - """Return action patching basisrp using diff_rorp""" - assert diff_rorp, "Missing diff index %s" % basisrp.index - if not diff_rorp.lstat(): - return robust.Action(None, lambda init_val: basisrp.delete(), None) - - if Globals.preserve_hardlinks and diff_rorp.isflaglinked(): - if not basisrp: basisrp = base_rp.new_index(diff_rorp.index) - tf = TempFile.new(basisrp) - def init(): Hardlink.link_rp(diff_rorp, tf, basisrp) - return robust.make_tf_robustaction(init, tf, basisrp) - elif basisrp and basisrp.isreg() and diff_rorp.isreg(): - if diff_rorp.get_attached_filetype() != 'diff': - raise rpath.RPathException("File %s appears to have changed during" - " processing, skipping" % (basisrp.path,)) - return Rdiff.patch_with_attribs_action(basisrp, diff_rorp) - else: # Diff contains whole file, just copy it over - if not basisrp: basisrp = base_rp.new_index(diff_rorp.index) - return robust.copy_with_attribs_action(diff_rorp, basisrp) + if (src_rorp and dest_rorp and src_rorp == dest_rorp and + (not Globals.preserve_hardlinks or + Hardlink.rorp_eq(src_rorp, dest_rorp))): continue + if statfileobj: statfileobj.add_changed(src_rorp, dest_rorp) + if not dest_rorp: yield src_rorp.index + else: yield dest_rorp.index class IndexedTuple(UserList.UserList): @@ -277,12 +216,15 @@ def FillInIter(rpiter, rootrp): (2,5). This is used when we need to process directories before or after processing a file in that directory. + If start_index is given, start with start_index instead of (). + The indicies of rest of the rorps should also start with + start_index. + """ # Handle first element as special case first_rp = rpiter.next() # StopIteration gets passed upwards cur_index = first_rp.index - for i in range(len(cur_index)): - yield rootrp.new_index(cur_index[:i]) + for i in range(len(cur_index)): yield rootrp.new_index(cur_index[:i]) yield first_rp del first_rp old_index = cur_index @@ -294,7 +236,6 @@ def FillInIter(rpiter, rootrp): for i in range(1, len(cur_index)): # i==0 case already handled if cur_index[:i] != old_index[:i]: filler_rp = rootrp.new_index(cur_index[:i]) - assert filler_rp.isdir(), "This shouldn't be possible" yield filler_rp yield rp old_index = cur_index @@ -318,6 +259,7 @@ class IterTreeReducer: self.index = None self.root_branch = branch_class(*branch_args) self.branches = [self.root_branch] + self.root_fast_processed = None def finish_branches(self, index): """Run Finish() on all branches index has passed @@ -355,6 +297,7 @@ class IterTreeReducer: def Finish(self): """Call at end of sequence to tie everything up""" + if self.index is None or self.root_fast_processed: return while 1: to_be_finished = self.branches.pop() to_be_finished.call_end_proc() @@ -375,7 +318,10 @@ class IterTreeReducer: index = args[0] if self.index is None: self.root_branch.base_index = index - self.process_w_branch(self.root_branch, args) + if self.root_branch.can_fast_process(*args): + self.root_branch.fast_process(*args) + self.root_fast_processed = 1 + else: self.process_w_branch(self.root_branch, args) self.index = index return 1 @@ -389,7 +335,8 @@ class IterTreeReducer: last_branch = self.branches[-1] if last_branch.start_successful: if last_branch.can_fast_process(*args): - last_branch.fast_process(*args) + robust.check_common_error(last_branch.on_error, + last_branch.fast_process, args) else: branch = self.add_branch(index) self.process_w_branch(branch, args) @@ -452,7 +399,7 @@ class ITRBranch: def log_prev_error(self, index): """Call function if no pending exception""" - log.Log("Skipping %s because of previous error" % - (os.path.join(*index),), 2) + log.Log("Skipping %s because of previous error" % \ + (index and os.path.join(*index) or '()',), 2) diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index d0b1156..49b9d2a 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -35,7 +35,7 @@ are dealing with are local or remote. """ -import os, stat, re, sys, shutil, gzip, socket, time, shutil +import os, stat, re, sys, shutil, gzip, socket, time import Globals, FilenameMapping, Time, static, log @@ -81,7 +81,7 @@ def move(rpin, rpout): copy(rpin, rpout) rpin.delete() -def copy(rpin, rpout): +def copy(rpin, rpout, compress = 0): """Copy RPath rpin to rpout. Works for symlinks, dirs, etc.""" log.Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6) if not rpin.lstat(): @@ -93,7 +93,7 @@ def copy(rpin, rpout): rpout.delete() # easier to write that compare else: return - if rpin.isreg(): copy_reg_file(rpin, rpout) + if rpin.isreg(): copy_reg_file(rpin, rpout, compress) elif rpin.isdir(): rpout.mkdir() elif rpin.issym(): rpout.symlink(rpin.readlink()) elif rpin.ischardev(): @@ -106,15 +106,16 @@ def copy(rpin, rpout): elif rpin.issock(): rpout.mksock() else: raise RPathException("File %s has unknown type" % rpin.path) -def copy_reg_file(rpin, rpout): +def copy_reg_file(rpin, rpout, compress = 0): """Copy regular file rpin to rpout, possibly avoiding connection""" try: - if rpout.conn is rpin.conn: - rpout.conn.shutil.copyfile(rpin.path, rpout.path) + if (rpout.conn is rpin.conn and + rpout.conn is not Globals.local_connection): + rpout.conn.rpath.copy_reg_file(rpin.path, rpout.path, compress) rpout.setdata() return except AttributeError: pass - rpout.write_from_fileobj(rpin.open("rb")) + rpout.write_from_fileobj(rpin.open("rb"), compress = compress) def cmp(rpin, rpout): """True if rpin has the same data as rpout @@ -179,9 +180,9 @@ def cmp_attribs(rp1, rp2): (rp1.path, rp2.path, result), 7) return result -def copy_with_attribs(rpin, rpout): +def copy_with_attribs(rpin, rpout, compress = 0): """Copy file and then copy over attributes""" - copy(rpin, rpout) + copy(rpin, rpout, compress) if rpin.lstat(): copy_attribs(rpin, rpout) def quick_cmp_with_attribs(rp1, rp2): @@ -278,9 +279,9 @@ class RORPath: self.data[key] != other.data[key]): return None return 1 - def equal_verbose(self, other): + def equal_verbose(self, other, check_index = 1): """Like __eq__, but log more information. Useful when testing""" - if self.index != other.index: + if check_index and self.index != other.index: log.Log("Index %s != index %s" % (self.index, other.index), 2) return None @@ -372,6 +373,10 @@ class RORPath: """Return permission block of file""" return self.data['perms'] + def hassize(self): + """True if rpath has a size parameter""" + return self.data.has_key('size') + def getsize(self): """Return length of file in bytes""" return self.data['size'] @@ -398,7 +403,8 @@ class RORPath: def getnumlinks(self): """Number of places inode is linked to""" - return self.data['nlink'] + try: return self.data['nlink'] + except KeyError: return 1 def readlink(self): """Wrapper around os.readlink()""" @@ -446,9 +452,13 @@ class RORPath: """ return self.data.has_key('linked') - def flaglinked(self): + def get_link_flag(self): + """Return previous index that a file is hard linked to""" + return self.data['linked'] + + def flaglinked(self, index): """Signal that rorp is a signature/diff for a hardlink file""" - self.data['linked'] = 1 + self.data['linked'] = index def open(self, mode): """Return file type object if any was given using self.setfile""" @@ -742,7 +752,6 @@ class RPath(RORPath): def append_path(self, ext, new_index = ()): """Like append, but add ext to path instead of to index""" - assert not self.index # doesn't make sense if index isn't () return self.__class__(self.conn, "/".join((self.base, ext)), new_index) def new_index(self, index): @@ -822,8 +831,8 @@ class RPath(RORPath): return self.inc_type def getinctime(self): - """Return timestring of an increment file""" - return self.inc_timestr + """Return time in seconds of an increment file""" + return Time.stringtotime(self.inc_timestr) def getincbase(self): """Return the base filename of an increment file in rp form""" @@ -862,22 +871,4 @@ class RPathFileHook: self.closing_thunk() return result - -# Import these late to avoid circular dependencies -#import FilenameMapping -#from lazy import * -#from selection import * -#from highlevel import * - -#class RpathDeleter(ITRBranch): -# """Delete a directory. Called by RPath.delete()""" -# def start_process(self, index, rp): -# self.rp = rp -# -# def end_process(self): -# if self.rp.isdir(): self.rp.rmdir() -# else: self.rp.delete() -# -# def can_fast_process(self, index, rp): return not rp.isdir() -# def fast_process(self, index, rp): rp.delete() diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py index 4ca6863..d58735b 100644 --- a/rdiff-backup/rdiff_backup/selection.py +++ b/rdiff-backup/rdiff_backup/selection.py @@ -94,29 +94,20 @@ class Select: self.prefix = self.rpath.path self.quoting_on = Globals.quoting_enabled and quoted_filenames - def set_iter(self, starting_index = None, iterate_parents = None, - sel_func = None): + def set_iter(self, iterate_parents = None, sel_func = None): """Initialize more variables, get ready to iterate - Will iterate indicies greater than starting_index. If - iterate_parents is true, will also include parents of - starting_index in iteration. Selection function sel_func is - called on each rpath and is usually self.Select. Returns self - just for convenience. + Selection function sel_func is called on each rpath and is + usually self.Select. Returns self just for convenience. """ if not sel_func: sel_func = self.Select self.rpath.setdata() # this may have changed since Select init - if starting_index is not None: - self.starting_index = starting_index - self.iter = self.iterate_starting_from(self.rpath, - self.iterate_starting_from, sel_func) - elif self.quoting_on: + if self.quoting_on: self.iter = self.Iterate(self.rpath, self.Iterate, sel_func) else: self.iter = self.Iterate_fast(self.rpath, sel_func) # only iterate parents if we are not starting from beginning - self.iterate_parents = starting_index is not None and iterate_parents self.next = self.iter.next self.__iter__ = lambda: self return self @@ -149,6 +140,7 @@ class Select: elif s == 2 and new_rpath.isdir(): yield (new_rpath, 1) yield rpath + if not rpath.isdir(): return diryield_stack = [diryield(rpath)] delayed_rp_stack = [] @@ -214,26 +206,6 @@ class Select: for rp in rec_func(new_rp, rec_func, sel_func): yield rp - def iterate_starting_from(self, rpath, rec_func, sel_func): - """Like Iterate, but only yield indicies > self.starting_index""" - if rpath.index > self.starting_index: # past starting_index - for rp in self.Iterate(rpath, self.Iterate, sel_func): - yield rp - elif (rpath.index == self.starting_index[:len(rpath.index)] - and rpath.isdir()): - # May encounter starting index on this branch - if self.iterate_parents: yield rpath - for rp in self.iterate_in_dir(rpath, self.iterate_starting_from, - sel_func): yield rp - -# def iterate_with_finalizer(self): -# """Like Iterate, but missing some options, and add finalizer""" -# finalize = IterTreeReducer(DestructiveSteppingFinalizer, ()) -# for rp in self: -# yield rp -# finalize(rp.index, rp)) -# finalize.Finish() - def Select(self, rp): """Run through the selection functions and return dominant val 0/1/2""" for sf in self.selection_functions: diff --git a/rdiff-backup/rdiff_backup/statistics.py b/rdiff-backup/rdiff_backup/statistics.py index c1bf55b..068edda 100644 --- a/rdiff-backup/rdiff_backup/statistics.py +++ b/rdiff-backup/rdiff_backup/statistics.py @@ -19,8 +19,8 @@ """Generate and process aggregated backup information""" -import re, os -import Globals, TempFile, robust, Time, rorpiter +import re, os, time +import Globals, TempFile, robust, Time, rorpiter, increment class StatsException(Exception): pass @@ -73,6 +73,10 @@ class StatsObj: """Add 1 to value of attribute""" self.__dict__[attr] += 1 + def add_to_stat(self, attr, value): + """Add value to given attribute""" + self.__dict__[attr] += value + def get_total_dest_size_change(self): """Return total destination size change @@ -215,12 +219,9 @@ class StatsObj: def write_stats_to_rp(self, rp): """Write statistics string to given rpath""" - tf = TempFile.new(rp) - def init_thunk(): - fp = tf.open("w") - fp.write(self.get_stats_string()) - fp.close() - robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute() + fp = rp.open("wb") + fp.write(self.get_stats_string()) + assert not fp.close() def read_stats_from_rp(self, rp): """Set statistics from rpath, return self for convenience""" @@ -263,81 +264,81 @@ class StatsObj: return s -class ITRB(rorpiter.ITRBranch, StatsObj): - """Keep track of per directory statistics - - This is subclassed by the mirroring and incrementing ITRs. - - """ - def __init__(self): - """StatsITR initializer - zero out statistics""" - attr_dict = self.__dict__ - for attr in StatsObj.stat_file_attrs: attr_dict[attr] = 0 - self.ElapsedTime = self.Filename = None - - def start_stats(self, mirror_dsrp): - """Record status of mirror dsrp +class StatFileObj(StatsObj): + """Build on StatsObj, add functions for processing files""" + def __init__(self, start_time = None): + """StatFileObj initializer - zero out file attributes""" + StatsObj.__init__(self) + for attr in self.stat_file_attrs: self.set_stat(attr, 0) + if start_time is None: start_time = Time.curtime + self.StartTime = start_time + self.Errors = 0 - This is called before the mirror is processed so we remember - the old state. - - """ - if mirror_dsrp.lstat(): - self.mirror_base_exists = 1 - self.mirror_base_size = self.stats_getsize(mirror_dsrp) - else: self.mirror_base_exists = None - - def stats_getsize(self, rp): - """Return size of rp, with error checking""" - try: return rp.getsize() - except KeyError: return 0 - - def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None): - """Set various statistics after mirror processed""" - if mirror_dsrp.lstat(): - source_size = self.stats_getsize(mirror_dsrp) - self.SourceFiles += 1 - self.SourceFileSize += source_size - if self.mirror_base_exists: - self.MirrorFiles += 1 - self.MirrorFileSize += self.mirror_base_size - if diff_rorp: # otherwise no change - self.ChangedFiles += 1 - self.ChangedSourceSize += source_size - self.ChangedMirrorSize += self.mirror_base_size - self.stats_incr_incfiles(inc_rp) - else: # new file was created - self.NewFiles += 1 - self.NewFileSize += source_size - self.stats_incr_incfiles(inc_rp) - else: - if self.mirror_base_exists: # file was deleted from mirror - self.MirrorFiles += 1 - self.MirrorFileSize += self.mirror_base_size - self.DeletedFiles += 1 - self.DeletedFileSize += self.mirror_base_size - self.stats_incr_incfiles(inc_rp) - - def fast_process(self, mirror_rorp): - """Use when there is no change from source to mirror""" - source_size = self.stats_getsize(mirror_rorp) + def add_source_file(self, src_rorp): + """Add stats of source file""" self.SourceFiles += 1 - self.MirrorFiles += 1 - self.SourceFileSize += source_size - self.MirrorFileSize += source_size - - def stats_incr_incfiles(self, inc_rp): - """Increment IncrementFile statistics""" - if inc_rp: - self.IncrementFiles += 1 - self.IncrementFileSize += self.stats_getsize(inc_rp) - - def add_file_stats(self, branch): - """Add all file statistics from branch to current totals""" - for attr in self.stat_file_attrs: - self.__dict__[attr] += branch.__dict__[attr] - - - - + if src_rorp.isreg(): self.SourceFileSize += src_rorp.getsize() + def add_dest_file(self, dest_rorp): + """Add stats of destination size""" + self.MirrorFiles += 1 + if dest_rorp.isreg(): self.MirrorFileSize += dest_rorp.getsize() + + def add_changed(self, src_rorp, dest_rorp): + """Update stats when src_rorp changes to dest_rorp""" + if src_rorp and src_rorp.lstat() and dest_rorp and dest_rorp.lstat(): + self.ChangedFiles += 1 + if src_rorp.isreg(): self.ChangedSourceSize += src_rorp.getsize() + if dest_rorp.isreg(): self.ChangedMirrorSize += dest_rorp.getsize() + elif src_rorp and src_rorp.lstat(): + self.NewFiles += 1 + if src_rorp.isreg(): self.NewFileSize += src_rorp.getsize() + elif dest_rorp and dest_rorp.lstat(): + self.DeletedFiles += 1 + if dest_rorp.isreg(): self.DeletedFileSize += dest_rorp.getsize() + + def add_increment(self, inc_rorp): + """Update stats with increment rorp""" + self.IncrementFiles += 1 + if inc_rorp.isreg(): self.IncrementFileSize += inc_rorp.getsize() + + def add_error(self): + """Increment error stat by 1""" + self.Errors += 1 + + def finish(self, end_time = None): + """Record end time and set other stats""" + if end_time is None: end_time = time.time() + self.EndTime = end_time + + +_active_statfileobj = None +def init_statfileobj(): + """Return new stat file object, record as active stat object""" + global _active_statfileobj + assert not _active_statfileobj, _active_statfileobj + _active_statfileobj = StatFileObj() + return _active_statfileobj + +def get_active_statfileobj(): + """Return active stat file object if it exists""" + if _active_statfileobj: return _active_statfileobj + else: return None + +def record_error(): + """Record error on active statfileobj, if there is one""" + if _active_statfileobj: _active_statfileobj.add_error() + +def process_increment(inc_rorp): + """Add statistics of increment rp incrp if there is active statfile""" + if _active_statfileobj: _active_statfileobj.add_increment(inc_rorp) + +def write_active_statfileobj(): + """Write active StatFileObj object to session statistics file""" + global _active_statfileobj + assert _active_statfileobj + rp_base = Globals.rbdir.append("session_statistics") + session_stats_rp = increment.get_inc_ext(rp_base, 'data', Time.curtime) + _active_statfileobj.finish() + _active_statfileobj.write_stats_to_rp(session_stats_rp) + _active_statfileobj = None diff --git a/rdiff-backup/testing/benchmark.py b/rdiff-backup/testing/benchmark.py new file mode 100644 index 0000000..ccd9f0b --- /dev/null +++ b/rdiff-backup/testing/benchmark.py @@ -0,0 +1,141 @@ +import sys, time +from commontest import * +from rdiff_backup import rpath, Globals + +"""benchmark.py + +When possible, use 'rdiff-backup' from the shell, which allows using +different versions of rdiff-backup by altering the PYTHONPATH. We +just use clock time, so this isn't exact at all. + +""" + +output_local = 1 +output_desc = "testfiles/output" +new_pythonpath = None + +def run_cmd(cmd): + """Run the given cmd, return the amount of time it took""" + if new_pythonpath: full_cmd = "PYTHONPATH=%s %s" % (new_pythonpath, cmd) + else: full_cmd = cmd + print "Running command '%s'" % (full_cmd,) + t = time.time() + assert not os.system(full_cmd) + return time.time() - t + +def create_many_files(dirname, s, count = 1000): + """Create many short files in the dirname directory + + There will be count files in the directory, and each file will + contain the string s. + + """ + Myrm("testfiles/many_out") + dir_rp = rpath.RPath(Globals.local_connection, dirname) + dir_rp.mkdir() + for i in xrange(count): + rp = dir_rp.append(str(i)) + fp = rp.open("wb") + fp.write(s) + assert not fp.close() + +def create_nested(dirname, s, depth, branch_factor = 10): + """Create many short files in branching directory""" + def write(rp): + fp = rp.open("wb") + fp.write(s) + assert not fp.close() + + def helper(rp, depth): + rp.mkdir() + sub_rps = map(lambda i: rp.append(str(i)), range(branch_factor)) + if depth == 1: map(write, sub_rps) + else: map(lambda rp: helper(rp, depth-1), sub_rps) + + Myrm("testfiles/nested_out") + helper(rpath.RPath(Globals.local_connection, dirname), depth) + +def benchmark(backup_cmd, restore_cmd, desc, update_func = None): + """Print benchmark using backup_cmd and restore_cmd + + If update_func is given, run it and then do backup a third time. + + """ + print "Initially backing up %s: %ss" % (desc, run_cmd(backup_cmd)) + print "Updating %s, no change: %ss" % (desc, run_cmd(backup_cmd)) + + if update_func: + update_func() + print "Updating %s, all changed: %ss" % (desc, run_cmd(backup_cmd)) + + Myrm("testfiles/rest_out") + print "Restoring %s to empty dir: %ss" % (desc, run_cmd(restore_cmd)) + print "Restoring %s to unchanged dir: %ss" % (desc, run_cmd(restore_cmd)) + +def many_files(): + """Time backup and restore of 2000 files""" + count = 2000 + create_many_files("testfiles/many_out", "a", count) + backup_cmd = "rdiff-backup testfiles/many_out " + output_desc + restore_cmd = "rdiff-backup --force -r now %s testfiles/rest_out" % \ + (output_desc,) + update_func = lambda: create_many_files("testfiles/many_out", "e", count) + benchmark(backup_cmd, restore_cmd, "2000 1-byte files", update_func) + +def many_files_rsync(): + """Test rsync benchmark""" + count = 2000 + create_many_files("testfiles/many_out", "a", count) + rsync_command = ("rsync -e ssh -aH --delete testfiles/many_out " + + output_desc) + print "Initial rsync: %ss" % (run_cmd(rsync_command),) + print "rsync update: %ss" % (run_cmd(rsync_command),) + + create_many_files("testfiles/many_out", "e", count) + print "Update changed rsync: %ss" % (run_cmd(rsync_command),) + +def nested_files(): + """Time backup and restore of 10000 nested files""" + depth = 4 + create_nested("testfiles/nested_out", "a", depth) + backup_cmd = "rdiff-backup testfiles/nested_out " + output_desc + restore_cmd = "rdiff-backup --force -r now %s testfiles/rest_out" % \ + (output_desc,) + update_func = lambda: create_nested("testfiles/nested_out", "e", depth) + benchmark(backup_cmd, restore_cmd, "10000 1-byte nested files", + update_func) + +def nested_files_rsync(): + """Test rsync on nested files""" + depth = 4 + create_nested("testfiles/nested_out", "a", depth) + rsync_command = ("rsync -e ssh -aH --delete testfiles/nested_out " + + output_desc) + print "Initial rsync: %ss" % (run_cmd(rsync_command),) + print "rsync update: %ss" % (run_cmd(rsync_command),) + + create_nested("testfiles/nested_out", "e", depth) + print "Update changed rsync: %ss" % (run_cmd(rsync_command),) + +if len(sys.argv) < 2 or len(sys.argv) > 3: + print "Syntax: benchmark.py benchmark_func [output_description]" + print + print "Where output_description defaults to 'testfiles/output'." + print "Currently benchmark_func includes:" + print "'many_files', 'many_files_rsync', and, 'nested_files'." + sys.exit(1) + +if len(sys.argv) == 3: + output_desc = sys.argv[2] + if ":" in output_desc: output_local = None + +if output_local: + assert not rpath.RPath(Globals.local_connection, output_desc).lstat(), \ + "Outfile file %s exists, try deleting it first" % (output_desc,) + +if os.environ.has_key('BENCHMARKPYPATH'): + new_pythonpath = os.environ['BENCHMARKPYPATH'] + +function_name = sys.argv[1] +print "Running ", function_name +eval(sys.argv[1])() diff --git a/rdiff-backup/testing/commontest.py b/rdiff-backup/testing/commontest.py index 19b2c60..dae825a 100644 --- a/rdiff-backup/testing/commontest.py +++ b/rdiff-backup/testing/commontest.py @@ -3,7 +3,7 @@ import os, sys from rdiff_backup.log import Log from rdiff_backup.rpath import RPath from rdiff_backup import Globals, Hardlink, SetConnections, Main, \ - selection, highlevel, lazy, Time, rpath + selection, lazy, Time, rpath SourceDir = "../src" AbsCurdir = os.getcwd() # Absolute path name of current directory @@ -13,6 +13,9 @@ __no_execute__ = 1 # Keeps the actual rdiff-backup program from running def Myrm(dirstring): """Run myrm on given directory string""" + root_rp = rpath.RPath(Globals.local_connection, dirstring) + for rp in selection.Select(root_rp).set_iter(): + if rp.isdir(): rp.chmod(0700) # otherwise may not be able to remove assert not os.system("rm -rf %s" % (dirstring,)) def Make(): @@ -21,6 +24,13 @@ def Make(): os.system("python ./Make") os.chdir(AbsCurdir) +def MakeOutputDir(): + """Initialize the testfiles/output directory""" + Myrm("testfiles/output") + rp = rpath.RPath(Globals.local_connection, "testfiles/output") + rp.mkdir() + return rp + def rdiff_backup(source_local, dest_local, src_dir, dest_dir, current_time = None, extra_options = ""): """Run rdiff-backup with the given options @@ -121,6 +131,7 @@ def InternalRestore(mirror_local, dest_local, mirror_dir, dest_dir, time): the testing directory and will be modified for remote trials. """ + Main.force = 1 remote_schema = '%s' #_reset_connections() if not mirror_local: diff --git a/rdiff-backup/testing/finaltest.py b/rdiff-backup/testing/finaltest.py index ba1128e..ae48462 100644 --- a/rdiff-backup/testing/finaltest.py +++ b/rdiff-backup/testing/finaltest.py @@ -1,11 +1,11 @@ import unittest, os, re, sys, time from commontest import * -from rdiff_backup import Globals, log, rpath +from rdiff_backup import Globals, log, rpath, robust """Regression tests""" Globals.exclude_mirror_regexps = [re.compile(".*/rdiff-backup-data")] -log.Log.setverbosity(7) +log.Log.setverbosity(3) lc = Globals.local_connection @@ -43,7 +43,7 @@ class PathSetter(unittest.TestCase): def reset_schema(self): self.rb_schema = SourceDir + \ - "/../rdiff-backup -v3 --remote-schema './chdir-wrapper2 %s' " + "/../rdiff-backup -v7 --remote-schema './chdir-wrapper2 %s' " def refresh(self, *rp_list): """Reread data for the given rps""" @@ -168,8 +168,9 @@ class PathSetter(unittest.TestCase): def getinc_paths(self, basename, directory): """Return increment.______.dir paths""" - incfiles = filter(lambda s: s.startswith(basename), - os.listdir(directory)) + dirrp = rpath.RPath(Globals.local_connection, directory) + incfiles = [filename for filename in robust.listrp(dirrp) + if filename.startswith(basename)] incfiles.sort() incrps = map(lambda f: rpath.RPath(lc, directory+"/"+f), incfiles) return map(lambda x: x.path, filter(rpath.RPath.isincfile, incrps)) @@ -196,26 +197,15 @@ class Final(PathSetter): self.set_connections(None, None, "test2/tmp", "../../") self.runtest() -# def testMirroringLocal(self): -# """Run mirroring only everything remote""" -# self.delete_tmpdirs() -# self.set_connections(None, None, None, None) -# self.exec_rb_extra_args(10000, "-m", -# "testfiles/various_file_types", -# "testfiles/output") -# assert CompareRecursive(Local.vftrp, Local.rpout, exclude_rbdir = None) - -# def testMirroringRemote(self): -# """Run mirroring only everything remote""" -# self.delete_tmpdirs() -# self.set_connections("test1/", "../", "test2/tmp/", "../../") -# self.exec_rb_extra_args(10000, "-m", -# "testfiles/various_file_types", -# "testfiles/output") -# assert CompareRecursive(Local.vftrp, Local.rpout, exclude_rbdir = None) + def testProcLocal(self): + """Test initial backup of /proc locally""" + Myrm("testfiles/procoutput") + self.set_connections(None, None, None, None) + self.exec_rb(None, '../../../../../../proc', 'testfiles/procoutput') def testProcRemote(self): """Test mirroring proc""" + Myrm("testfiles/procoutput") self.set_connections(None, None, "test2/tmp/", "../../") self.exec_rb(None, '../../../../../../proc', 'testfiles/procoutput') @@ -352,44 +342,5 @@ testfiles/increment2/changed_dir""") self.assertRaises(OSError, os.lstat, 'testfiles/restoretarget1/executable2') - -class FinalCorrupt(PathSetter): - def testBackupOverlay(self): - """Test backing up onto a directory already backed up for that time - - This will test to see if rdiff-backup will ignore files who - already have an increment where it wants to put something. - Just make sure rdiff-backup doesn't exit with an error. - - """ - self.delete_tmpdirs() - assert not os.system("cp -a testfiles/corruptbackup testfiles/output") - self.set_connections(None, None, None, None) - self.exec_rb(None, 'testfiles/corruptbackup_source', - 'testfiles/output') - - def testBackupOverlayRemote(self): - """Like above but destination is remote""" - self.delete_tmpdirs() - assert not os.system("cp -a testfiles/corruptbackup testfiles/output") - self.set_connections(None, None, "test1/", '../') - self.exec_rb(None, 'testfiles/corruptbackup_source', - 'testfiles/output') - - def testCheckpointData(self): - """Destination directory has bad checkpoint data, no sym""" - self.delete_tmpdirs() - assert not os.system("cp -a testfiles/corrupt_dest1 testfiles/output") - self.set_connections(None, None, None, None) - self.exec_rb(None, 'testfiles/various_file_types', 'testfiles/output') - - def testCheckpointData2(self): - """Destination directory has bad checkpoint data, with sym""" - self.delete_tmpdirs() - assert not os.system("cp -a testfiles/corrupt_dest2 testfiles/output") - self.set_connections(None, None, None, None) - self.exec_rb(None, 'testfiles/various_file_types', 'testfiles/output') - - if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/hardlinktest.py b/rdiff-backup/testing/hardlinktest.py index 2b70ed4..a675e8a 100644 --- a/rdiff-backup/testing/hardlinktest.py +++ b/rdiff-backup/testing/hardlinktest.py @@ -1,5 +1,4 @@ - -import os, unittest +import os, unittest, time from commontest import * from rdiff_backup import Globals, Hardlink, selection, rpath @@ -30,48 +29,6 @@ class HardlinkTest(unittest.TestCase): assert not CompareRecursive(self.hardlink_dir1, self.hardlink_dir2, compare_hardlinks = 1) - def testCheckpointing(self): - """Test saving and recovering of various dictionaries""" - d1 = {1:1} - d2 = {2:2} - d3 = {3:3} - d4 = {} - - Hardlink._src_inode_indicies = d1 - Hardlink._src_index_indicies = d2 - Hardlink._dest_inode_indicies = d3 - Hardlink._dest_index_indicies = d4 - - self.reset_output() - Time.setcurtime(12345) - Globals.isbackup_writer = 1 - Hardlink.final_checkpoint(self.outputrp) - - reset_hardlink_dicts() - assert Hardlink.retrieve_checkpoint(self.outputrp, 12345) - assert Hardlink._src_inode_indicies == d1, \ - Hardlink._src_inode_indicies - assert Hardlink._src_index_indicies == d2, \ - Hardlink._src_index_indicies - assert Hardlink._dest_inode_indicies == d3, \ - Hardlink._dest_inode_indicies - assert Hardlink._dest_index_indicies == d4, \ - Hardlink._dest_index_indicies - - def testFinalwrite(self): - """Test writing of the final database""" - Globals.isbackup_writer = 1 - Time.setcurtime(123456) - Globals.rbdir = self.outputrp - finald = Hardlink._src_index_indicies = {'hello':'world'} - - self.reset_output() - Hardlink.final_writedata() - - Hardlink._src_index_indicies = None - assert Hardlink.retrieve_final(123456) - assert Hardlink._src_index_indicies == finald - def testBuildingDict(self): """See if the partial inode dictionary is correct""" Globals.preserve_hardlinks = 1 @@ -143,6 +100,74 @@ class HardlinkTest(unittest.TestCase): BackupRestoreSeries(None, None, dirlist, compare_hardlinks=1) BackupRestoreSeries(1, 1, dirlist, compare_hardlinks=1) + def testInnerRestore(self): + """Restore part of a dir, see if hard links preserved""" + MakeOutputDir() + output = rpath.RPath(Globals.local_connection, + "testfiles/output") + + # Now set up directories out_hardlink1 and out_hardlink2 + hlout1 = rpath.RPath(Globals.local_connection, + "testfiles/out_hardlink1") + if hlout1.lstat(): hlout1.delete() + hlout1.mkdir() + hlout1_sub = hlout1.append("subdir") + hlout1_sub.mkdir() + hl1_1 = hlout1_sub.append("hardlink1") + hl1_2 = hlout1_sub.append("hardlink2") + hl1_3 = hlout1_sub.append("hardlink3") + hl1_4 = hlout1_sub.append("hardlink4") + # 1 and 2 are hard linked, as are 3 and 4 + hl1_1.touch() + hl1_2.hardlink(hl1_1.path) + hl1_3.touch() + hl1_4.hardlink(hl1_3.path) + + hlout2 = rpath.RPath(Globals.local_connection, + "testfiles/out_hardlink2") + if hlout2.lstat(): hlout2.delete() + assert not os.system("cp -a testfiles/out_hardlink1 " + "testfiles/out_hardlink2") + hlout2_sub = hlout2.append("subdir") + hl2_1 = hlout2_sub.append("hardlink1") + hl2_2 = hlout2_sub.append("hardlink2") + hl2_3 = hlout2_sub.append("hardlink3") + hl2_4 = hlout2_sub.append("hardlink4") + # Now 2 and 3 are hard linked, also 1 and 4 + rpath.copy_with_attribs(hl1_1, hl2_1) + rpath.copy_with_attribs(hl1_2, hl2_2) + hl2_3.delete() + hl2_3.hardlink(hl2_2.path) + hl2_4.delete() + hl2_4.hardlink(hl2_1.path) + rpath.copy_attribs(hlout1_sub, hlout2_sub) + + InternalBackup(1, 1, hlout1.path, output.path) + time.sleep(1) + InternalBackup(1, 1, hlout2.path, output.path) + + out2 = rpath.RPath(Globals.local_connection, "testfiles/out2") + hlout1 = out2.append("hardlink1") + hlout2 = out2.append("hardlink2") + hlout3 = out2.append("hardlink3") + hlout4 = out2.append("hardlink4") + + if out2.lstat(): out2.delete() + InternalRestore(1, 1, "testfiles/output/subdir", "testfiles/out2", 1) + out2.setdata() + for rp in [hlout1, hlout2, hlout3, hlout4]: rp.setdata() + assert hlout1.getinode() == hlout2.getinode() + assert hlout3.getinode() == hlout4.getinode() + assert hlout1.getinode() != hlout3.getinode() + + if out2.lstat(): out2.delete() + InternalRestore(1, 1, "testfiles/output/subdir", "testfiles/out2", + int(time.time())) + out2.setdata() + for rp in [hlout1, hlout2, hlout3, hlout4]: rp.setdata() + assert hlout1.getinode() == hlout4.getinode() + assert hlout2.getinode() == hlout3.getinode() + assert hlout1.getinode() != hlout2.getinode() if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/incrementtest.py b/rdiff-backup/testing/incrementtest.py index 0aa52ea..165c55f 100644 --- a/rdiff-backup/testing/incrementtest.py +++ b/rdiff-backup/testing/incrementtest.py @@ -1,7 +1,6 @@ import unittest, os, re, time from commontest import * -from rdiff_backup import log, rpath, restore, increment, Time, \ - Rdiff, statistics +from rdiff_backup import log, rpath, increment, Time, Rdiff, statistics lc = Globals.local_connection Globals.change_source_perms = 1 @@ -21,14 +20,14 @@ dir = getrp(".") sym = getrp("symbolic_link") nothing = getrp("nothing") -target = rpath.RPath(lc, "testfiles/out") -out2 = rpath.RPath(lc, "testfiles/out2") -out_gz = rpath.RPath(lc, "testfiles/out.gz") +target = rpath.RPath(lc, "testfiles/output/out") +out2 = rpath.RPath(lc, "testfiles/output/out2") +out_gz = rpath.RPath(lc, "testfiles/output/out.gz") Time.setprevtime(999424113) prevtimestr = "2001-09-02T02:48:33-07:00" -t_pref = "testfiles/out.2001-09-02T02:48:33-07:00" -t_diff = "testfiles/out.2001-09-02T02:48:33-07:00.diff" +t_pref = "testfiles/output/out.2001-09-02T02:48:33-07:00" +t_diff = "testfiles/output/out.2001-09-02T02:48:33-07:00.diff" Globals.no_compression_regexp = \ re.compile(Globals.no_compression_regexp_string, re.I) @@ -37,11 +36,12 @@ class inctest(unittest.TestCase): """Test the incrementRP function""" def setUp(self): Globals.set('isbackup_writer',1) + MakeOutputDir() def check_time(self, rp): """Make sure that rp is an inc file, and time is Time.prevtime""" assert rp.isincfile(), rp - t = Time.stringtotime(rp.getinctime()) + t = rp.getinctime() assert t == Time.prevtime, (t, Time.prevtime) def testreg(self): @@ -114,7 +114,7 @@ class inctest(unittest.TestCase): rp = increment.Increment(rf, rf2, target) self.check_time(rp) assert rpath.cmp_attribs(rp, rf2) - Rdiff.patch_action(rf, rp, out2).execute() + Rdiff.patch_local(rf, rp, out2) assert rpath.cmp(rf2, out2) rp.delete() out2.delete() @@ -125,7 +125,7 @@ class inctest(unittest.TestCase): rp = increment.Increment(rf, rf2, target) self.check_time(rp) assert rpath.cmp_attribs(rp, rf2) - Rdiff.patch_action(rf, rp, out2, delta_compressed = 1).execute() + Rdiff.patch_local(rf, rp, out2, delta_compressed = 1) assert rpath.cmp(rf2, out2) rp.delete() out2.delete() @@ -139,86 +139,10 @@ class inctest(unittest.TestCase): rp = increment.Increment(rf, out_gz, target) self.check_time(rp) assert rpath.cmp_attribs(rp, out_gz) - Rdiff.patch_action(rf, rp, out2).execute() + Rdiff.patch_local(rf, rp, out2) assert rpath.cmp(out_gz, out2) rp.delete() out2.delete() out_gz.delete() -class inctest2(unittest.TestCase): - """Like inctest but contains more elaborate tests""" - def stats_check_initial(self, s): - """Make sure stats object s compatible with initial mirroring - - A lot of the off by one stuff is because the root directory - exists in the below examples. - - """ - assert s.MirrorFiles == 1 or s.MirrorFiles == 0 - assert s.MirrorFileSize < 20000 - assert s.NewFiles <= s.SourceFiles <= s.NewFiles + 1 - assert s.NewFileSize <= s.SourceFileSize <= s.NewFileSize + 20000 - assert s.ChangedFiles == 1 or s.ChangedFiles == 0 - assert s.ChangedSourceSize < 20000 - assert s.ChangedMirrorSize < 20000 - assert s.DeletedFiles == s.DeletedFileSize == 0 - assert s.IncrementFileSize == 0 - - def testStatistics(self): - """Test the writing of statistics - - The file sizes are approximate because the size of directories - could change with different file systems... - - """ - Globals.compression = 1 - Myrm("testfiles/output") - InternalBackup(1, 1, "testfiles/stattest1", "testfiles/output") - InternalBackup(1, 1, "testfiles/stattest2", "testfiles/output", - time.time()+1) - - rbdir = rpath.RPath(Globals.local_connection, - "testfiles/output/rdiff-backup-data") - - #incs = Restore.get_inclist(rbdir.append("subdir"). - # append("directory_statistics")) - #assert len(incs) == 2 - #s1 = StatsObj().read_stats_from_rp(incs[0]) # initial mirror stats - #assert s1.SourceFiles == 2 - #assert 400000 < s1.SourceFileSize < 420000 - #self.stats_check_initial(s1) - - #subdir_stats = StatsObj().read_stats_from_rp(incs[1]) # increment stats - #assert subdir_stats.SourceFiles == 2 - #assert 400000 < subdir_stats.SourceFileSize < 420000 - #assert subdir_stats.MirrorFiles == 2 - #assert 400000 < subdir_stats.MirrorFileSize < 420000 - #assert subdir_stats.NewFiles == subdir_stats.NewFileSize == 0 - #assert subdir_stats.DeletedFiles == subdir_stats.DeletedFileSize == 0 - #assert subdir_stats.ChangedFiles == 2 - #assert 400000 < subdir_stats.ChangedSourceSize < 420000 - #assert 400000 < subdir_stats.ChangedMirrorSize < 420000 - #assert 10 < subdir_stats.IncrementFileSize < 20000 - - incs = restore.get_inclist(rbdir.append("session_statistics")) - assert len(incs) == 2 - s2 = statistics.StatsObj().read_stats_from_rp(incs[0]) - assert s2.SourceFiles == 7 - assert 700000 < s2.SourceFileSize < 750000 - self.stats_check_initial(s2) - - root_stats = statistics.StatsObj().read_stats_from_rp(incs[1]) - assert root_stats.SourceFiles == 7, root_stats.SourceFiles - assert 550000 < root_stats.SourceFileSize < 570000 - assert root_stats.MirrorFiles == 7 - assert 700000 < root_stats.MirrorFileSize < 750000 - assert root_stats.NewFiles == 1 - assert root_stats.NewFileSize == 0 - assert root_stats.DeletedFiles == 1 - assert root_stats.DeletedFileSize == 200000 - assert 3 <= root_stats.ChangedFiles <= 4, root_stats.ChangedFiles - assert 450000 < root_stats.ChangedSourceSize < 470000 - assert 400000 < root_stats.ChangedMirrorSize < 420000 - assert 10 < root_stats.IncrementFileSize < 30000 - if __name__ == '__main__': unittest.main() diff --git a/rdiff-backup/testing/metadatatest.py b/rdiff-backup/testing/metadatatest.py index 570dd79..7b6a91a 100644 --- a/rdiff-backup/testing/metadatatest.py +++ b/rdiff-backup/testing/metadatatest.py @@ -1,7 +1,6 @@ import unittest, os, cStringIO, time from rdiff_backup.metadata import * -from rdiff_backup import rpath, connection, Globals, selection, \ - destructive_stepping +from rdiff_backup import rpath, connection, Globals, selection tempdir = rpath.RPath(Globals.local_connection, "testfiles/output") diff --git a/rdiff-backup/testing/rdifftest.py b/rdiff-backup/testing/rdifftest.py index 999f1ac..6079f1a 100644 --- a/rdiff-backup/testing/rdifftest.py +++ b/rdiff-backup/testing/rdifftest.py @@ -50,7 +50,7 @@ class RdiffTest(unittest.TestCase): self.delta.write_from_fileobj(Rdiff.get_delta_sigrp(self.signature, self.new)) assert self.delta.lstat() - Rdiff.patch_action(self.basis, self.delta, self.output).execute() + Rdiff.patch_local(self.basis, self.delta, self.output) assert rpath.cmp(self.new, self.output) map(rpath.RPath.delete, rplist) @@ -74,14 +74,14 @@ class RdiffTest(unittest.TestCase): os.system("mv %s %s" % (self.delta.path + ".gz", self.delta.path)) self.delta.setdata() - Rdiff.patch_action(self.basis, self.delta, self.output, - delta_compressed = 1).execute() + Rdiff.patch_local(self.basis, self.delta, self.output, + delta_compressed = 1) assert rpath.cmp(self.new, self.output) map(rpath.RPath.delete, rplist) def testWriteDelta(self): """Test write delta feature of rdiff""" - self.delta.delete() + if self.delta.lstat(): self.delta.delete() rplist = [self.basis, self.new, self.delta, self.output] MakeRandomFile(self.basis.path) MakeRandomFile(self.new.path) @@ -90,7 +90,7 @@ class RdiffTest(unittest.TestCase): Rdiff.write_delta(self.basis, self.new, self.delta) assert self.delta.lstat() - Rdiff.patch_action(self.basis, self.delta, self.output).execute() + Rdiff.patch_local(self.basis, self.delta, self.output) assert rpath.cmp(self.new, self.output) map(rpath.RPath.delete, rplist) @@ -109,7 +109,7 @@ class RdiffTest(unittest.TestCase): os.system("gunzip " + delta_gz.path) delta_gz.setdata() self.delta.setdata() - Rdiff.patch_action(self.basis, self.delta, self.output).execute() + Rdiff.patch_local(self.basis, self.delta, self.output) assert rpath.cmp(self.new, self.output) map(rpath.RPath.delete, rplist) @@ -128,7 +128,7 @@ class RdiffTest(unittest.TestCase): self.delta.write_from_fileobj(Rdiff.get_delta_sigrp(self.signature, self.new)) assert self.delta.lstat() - Rdiff.patch_action(self.basis, self.delta).execute() + Rdiff.patch_local(self.basis, self.delta) assert rpath.cmp(self.basis, self.new) map(rpath.RPath.delete, rplist) @@ -141,31 +141,10 @@ class RdiffTest(unittest.TestCase): MakeRandomFile(self.basis.path) MakeRandomFile(self.new.path) map(rpath.RPath.setdata, rplist) - Rdiff.copy_action(self.basis, self.new).execute() + Rdiff.copy_local(self.basis, self.new) assert rpath.cmp(self.basis, self.new) map(rpath.RPath.delete, rplist) - def testPatchWithAttribs(self): - """Using rdiff to copy two files with attributes""" - rplist = [self.basis, self.new, self.delta] - for rp in rplist: - if rp.lstat(): rp.delete() - - MakeRandomFile(self.basis.path) - MakeRandomFile(self.new.path) - self.new.chmod(0401) - map(rpath.RPath.setdata, rplist) - Rdiff.write_delta(self.basis, self.new, self.delta) - rpath.copy_attribs(self.new, self.delta) - assert self.delta.getperms() == 0401 - - assert not self.basis == self.new - Rdiff.patch_with_attribs_action(self.basis, self.delta).execute() - if not self.basis == self.new: - print self.basis, self.new - assert 0 - map(rpath.RPath.delete, rplist) - if __name__ == '__main__': unittest.main() diff --git a/rdiff-backup/testing/regressiontest.py b/rdiff-backup/testing/regressiontest.py index 57b57ab..5c55986 100644 --- a/rdiff-backup/testing/regressiontest.py +++ b/rdiff-backup/testing/regressiontest.py @@ -1,6 +1,6 @@ import unittest, os from commontest import * -from rdiff_backup import Globals, SetConnections, log, rpath +from rdiff_backup import Globals, SetConnections, log, rpath, backup """Regression tests @@ -12,14 +12,14 @@ testfiles Globals.set('change_source_perms', 1) Globals.counter = 0 -log.Log.setverbosity(7) +log.Log.setverbosity(3) + +def get_local_rp(extension): + return rpath.RPath(Globals.local_connection, "testfiles/" + extension) class Local: """This is just a place to put increments relative to the local connection""" - def get_local_rp(extension): - return rpath.RPath(Globals.local_connection, "testfiles/" + extension) - inc1rp = get_local_rp('increment1') inc2rp = get_local_rp('increment2') inc3rp = get_local_rp('increment3') @@ -152,98 +152,52 @@ class IncrementTest1(unittest.TestCase): """Increment/Restore when both directories are remote""" BackupRestoreSeries(None, None, self.dirlist) + def testNoWrite(self): + """Test backup/restore on dirs without write permissions""" + def write_string(rp, s = ""): + """Write string s to file""" + fp = rp.open("wb") + fp.write(s) + assert not fp.close() -class IncrementTest2(PathSetter): - def OldtestRecoveryLocal(self): - """Test to see if rdiff-backup can continue with bad increment""" - assert not os.system("rm -rf testfiles/recovery_out_backup") - self.setPathnames(None, None, None, None) - Time.setprevtime(1006136450) - Time.setcurtime() - Globals.add_regexp('.*rdiff-backup-data', 1) - os.system('cp -a testfiles/recovery_out testfiles/recovery_out_backup') - recovery_in = self.get_src_rp('testfiles/recovery') - recovery_out = self.get_dest_rp('testfiles/recovery_out_backup') - recovery_inc = self.get_dest_rp('testfiles/recovery_out_backup/' - 'rdiff-backup-data/increments') - highlevel.Mirror_and_increment(recovery_in, recovery_out, recovery_inc) - # Should probably check integrity of increments, but for now - # allow if it doesn't during the Mirror_and_increment - - def OldtestRecoveryRemote(self): - """Test Recovery with both connections remote""" - assert not os.system('rm -rf testfiles/recovery_out_backup') - self.setPathnames('test1', '../', 'test2/tmp', '../../') - Time.setprevtime(1006136450) - Time.setcurtime() - Globals.add_regexp('.*rdiff-backup-data', 1) - os.system('cp -a testfiles/recovery_out testfiles/recovery_out_backup') - recovery_in = self.get_src_rp('testfiles/recovery') - recovery_out = self.get_dest_rp('testfiles/recovery_out_backup') - recovery_inc = self.get_dest_rp('testfiles/recovery_out_backup/' - 'rdiff-backup-data/increments') - highlevel.Mirror_and_increment(recovery_in, recovery_out, recovery_inc) - # Should probably check integrity of increments, but for now - # allow if it doesn't during the Mirror_and_increment - - def runtest(self): - """After setting connections, etc., run actual test using this""" - Time.setcurtime() - - Main.backup_set_select(Local.inc1rp) - highlevel.Mirror(self.inc1rp, self.rpout) - assert CompareRecursive(Local.inc1rp, Local.rpout) - - Time.setcurtime() - Time.setprevtime(999500000) - Main.backup_set_select(self.inc2rp) - highlevel.Mirror_and_increment(self.inc2rp, self.rpout, self.rpout_inc) - assert CompareRecursive(Local.inc2rp, Local.rpout) + def make_subdirs(): + """Make testfiles/no_write_out and testfiles/no_write_out2""" + nw_out1 = get_local_rp("no_write_out") + nw_out1.mkdir() - Time.setcurtime() - Time.setprevtime(999510000) - Main.backup_set_select(self.inc3rp) - highlevel.Mirror_and_increment(self.inc3rp, self.rpout, self.rpout_inc) - assert CompareRecursive(Local.inc3rp, Local.rpout) + nw_out1_1 = get_local_rp("no_write_out/1") + write_string(nw_out1_1) + nw_out1_1.chmod(0) - Time.setcurtime() - Time.setprevtime(999520000) - Main.backup_set_select(self.inc4rp) - highlevel.Mirror_and_increment(self.inc4rp, self.rpout, self.rpout_inc) - assert CompareRecursive(Local.inc4rp, Local.rpout) - + nw_out1_2 = get_local_rp("no_write_out/2") + write_string(nw_out1_2, 'e') + nw_out1_1.chmod(0400) - print "Restoring to self.inc4" - highlevel.Restore(999530000, self.rpout, self.get_inctup(), - self.rpout4) - assert CompareRecursive(Local.inc4rp, Local.rpout4) + nw1_sub = get_local_rp("no_write_out/subdir") + nw1_sub.mkdir() - print "Restoring to self.inc3" - highlevel.Restore(999520000, self.rpout, self.get_inctup(), - self.rpout3) - assert CompareRecursive(Local.inc3rp, Local.rpout3) + nw_out1_sub1 = get_local_rp("no_write_out/subdir/1") + write_string(nw_out1_sub1, 'f') + nw1_sub.chmod(0500) + nw_out1.chmod(0500) - print "Restoring to self.inc2" - highlevel.Restore(999510000, self.rpout, self.get_inctup(), - self.rpout2) - assert CompareRecursive(Local.inc2rp, Local.rpout2) + nw_out2 = get_local_rp("no_write_out2") + nw_out2.mkdir() - print "Restoring to self.inc1" - highlevel.Restore(999500000, self.rpout, self.get_inctup(), - self.rpout1) - assert CompareRecursive(Local.inc1rp, Local.rpout1) + nw_out2_1 = get_local_rp("no_write_out2/1") + write_string(nw_out2_1, 'g') - def get_inctup(self): - """Return inc tuples as expected by Restore.RestoreRecursive + nw_out2_2 = get_local_rp("no_write_out2/2") + write_string(nw_out2_2, 'aeu') + nw_out1.chmod(0500) - Assumes output increment directory is - testfiles/output_inc._____. - - """ - filenames = filter(lambda x: x.startswith("output_inc."), - Local.prefix.listdir()) - rplist = map(lambda x: Local.prefix.append(x), filenames) - return IndexedTuple((), (Local.prefix.append("output_inc"), rplist)) + Myrm("testfiles/no_write_out") + Myrm("testfiles/no_write_out2") + Myrm("testfiles/output") + make_subdirs() + BackupRestoreSeries(1, 1, ['testfiles/no_write_out', + 'testfiles/no_write_out2', + 'testfiles/empty']) class MirrorTest(PathSetter): @@ -317,7 +271,7 @@ class MirrorTest(PathSetter): Globals.change_ownership = 1 self.refresh(self.rootfiles, self.rootfiles_out, Local.rootfiles, Local.rootfiles_out) # add uid/gid info - highlevel.Mirror(self.rootfiles, self.rootfiles_out) + backup.Mirror(self.rootfiles, self.rootfiles_out) assert CompareRecursive(Local.rootfiles, Local.rootfiles_out) Globals.change_ownership = None self.refresh(self.rootfiles, self.rootfiles_out, @@ -330,29 +284,13 @@ class MirrorTest(PathSetter): conn.Globals.set('change_ownership', 1) self.refresh(self.rootfiles, self.rootfiles_out, Local.rootfiles, Local.rootfiles_out) # add uid/gid info - highlevel.Mirror(self.rootfiles, self.rootfiles_out) + backup.Mirror(self.rootfiles, self.rootfiles_out) assert CompareRecursive(Local.rootfiles, Local.rootfiles_out) for coon in Globals.connections: conn.Globals.set('change_ownership', None) self.refresh(self.rootfiles, self.rootfiles_out, Local.rootfiles, Local.rootfiles_out) # remove that info - def testRoot2Local(self): - """Make sure we can backup a directory we don't own""" - self.setPathnames(None, None, None, None) - Globals.change_ownership = Globals.change_source_perms = None - self.refresh(self.rootfiles2, self.rootfiles_out2, - Local.rootfiles2, Local.rootfiles_out2) # add uid/gid info - self.Mirror(self.rootfiles2, self.rootfiles_out2) - assert CompareRecursive(Local.rootfiles2, Local.rootfiles_out2) - self.refresh(self.rootfiles2, self.rootfiles_out2, - Local.rootfiles2, Local.rootfiles_out2) # remove that info - self.Mirror(self.rootfiles21, self.rootfiles_out2) - assert CompareRecursive(Local.rootfiles21, Local.rootfiles_out2) - self.refresh(self.rootfiles21, self.rootfiles_out2, - Local.rootfiles21, Local.rootfiles_out2) # remove that info - Globals.change_source_perms = 1 - def deleteoutput(self): assert not os.system("rm -rf testfiles/output*") self.rbdir = self.rpout.append('rdiff-backup-data') @@ -395,12 +333,12 @@ class MirrorTest(PathSetter): assert CompareRecursive(Local.inc2rp, Local.rpout) def Mirror(self, rpin, rpout): - """Like highlevel.Mirror, but run misc_setup first""" + """Like backup.Mirror, but setup first, cleanup later""" Main.force = 1 Main.misc_setup([rpin, rpout]) Main.backup_set_select(rpin) Main.backup_init_dirs(rpin, rpout) - highlevel.Mirror(rpin, rpout) + backup.Mirror(rpin, rpout) Log.close_logfile() Hardlink.clear_dictionaries() diff --git a/rdiff-backup/testing/restoretest.py b/rdiff-backup/testing/restoretest.py index 9cb6ebb..df99300 100644 --- a/rdiff-backup/testing/restoretest.py +++ b/rdiff-backup/testing/restoretest.py @@ -1,120 +1,163 @@ import unittest from commontest import * -from rdiff_backup import log, restore, Globals, rpath +from rdiff_backup import log, restore, Globals, rpath, TempFile Log.setverbosity(3) - - lc = Globals.local_connection +tempdir = rpath.RPath(Globals.local_connection, "testfiles/output") +restore_base_rp = rpath.RPath(Globals.local_connection, + "testfiles/restoretest") +restore_base_filenames = restore_base_rp.listdir() +mirror_time = 1041109438 # just some late time + +class RestoreFileComparer: + """Holds a file to be restored and tests against it + + Each object has a restore file and a dictionary of times -> + rpaths. When the restore file is restored to one of the given + times, the resulting file should be the same as the related rpath. + + """ + def __init__(self, rf): + self.rf = rf + self.time_rp_dict = {} + + def add_rpath(self, rp, t): + """Add rp, which represents what rf should be at given time t""" + assert not self.time_rp_dict.has_key(t) + self.time_rp_dict[t] = rp + + def compare_at_time(self, t): + """Restore file, make sure it is the same at time t""" + log.Log("Checking result at time %s" % (t,), 7) + tf = TempFile.new(tempdir.append("foo")) + restore._mirror_time = mirror_time + restore._rest_time = t + self.rf.set_relevant_incs() + out_rorpath = self.rf.get_attribs().getRORPath() + correct_result = self.time_rp_dict[t] + + if out_rorpath.isreg(): + out_rorpath.setfile(self.rf.get_restore_fp()) + rpath.copy_with_attribs(out_rorpath, tf) + assert tf.equal_verbose(correct_result, check_index = 0), \ + "%s, %s" % (tf, correct_result) + if tf.isreg(): + assert rpath.cmpfileobj(tf.open("rb"), correct_result.open("rb")) + if tf.lstat(): tf.delete() + + def compare_all(self): + """Check restore results for all available times""" + for t in self.time_rp_dict.keys(): self.compare_at_time(t) + class RestoreTest(unittest.TestCase): """Test Restore class""" - prefix = "testfiles/restoretest/" - def maketesttuples(self, basename): - """Make testing tuples from available files starting with prefix - - tuples is a sorted (oldest to newest) list of pairs (rp1, rp2) - where rp1 is an increment file and rp2 is the same but without - the final extension. incs is a list of all increment files. - - """ - dirlist = os.listdir(self.prefix) - dirlist.sort() - baselist = filter(lambda f: f.startswith(basename), dirlist) - rps = map(lambda f: rpath.RPath(lc, self.prefix+f), baselist) - incs = filter(lambda rp: rp.isincfile(), rps) - tuples = map(lambda rp: (rp, rpath.RPath(lc, "%s.%s" % - (rp.getincbase().path, - rp.getinctime()))), - incs) - return tuples, incs - - def restoreonefiletest(self, basename): - tuples, incs = self.maketesttuples(basename) - rpbase = rpath.RPath(lc, self.prefix + basename) - rptarget = rpath.RPath(lc, "testfiles/outfile") - Hardlink.initialize_dictionaries() - - for pair in tuples: - print "Processing file " + pair[0].path - if rptarget.lstat(): rptarget.delete() - rest_time = Time.stringtotime(pair[0].getinctime()) - rid = restore.RestoreIncrementData((), rpbase, incs) - rid.sortincseq(rest_time, 10000000000) # pick some really late time - rcd = restore.RestoreCombinedData(rid, rpbase, rptarget) - rcd.RestoreFile() - #sorted_incs = Restore.sortincseq(rest_time, incs) - #Restore.RestoreFile(rest_time, rpbase, (), sorted_incs, rptarget) - rptarget.setdata() - if not rptarget.lstat(): assert not pair[1].lstat() - elif not pair[1].lstat(): assert not rptarget.lstat() - else: - assert rpath.cmp(rptarget, pair[1]), \ - "%s %s" % (rptarget.path, pair[1].path) - assert rpath.cmp_attribs(rptarget, pair[1]), \ - "%s %s" % (rptarget.path, pair[1].path) - rptarget.delete() - - def testsortincseq(self): - """Test the Restore.sortincseq function - - This test just makes sure that it comes up with the right - number of increments for each base name - given a list of - increments, we should eventually get sorted sequences that - end in each one (each one will be the last increment once). - - """ - for basename in ['ocaml', 'mf']: - tuples, unused = self.maketesttuples(basename) - incs = [tuple[0] for tuple in tuples] - - # Now we need a time newer than any inc - mirror_time = Time.stringtotime(incs[-1].getinctime()) + 10000 - - for inc, incbase in tuples: - assert inc.isincfile() - inctime = Time.stringtotime(inc.getinctime()) - rid1 = restore.RestoreIncrementData(basename, incbase, incs) - rid1.sortincseq(inctime, mirror_time) - assert rid1.inc_list, rid1.inc_list - # oldest increment should be exactly inctime - ridtime = Time.stringtotime(rid1.inc_list[-1].getinctime()) - assert ridtime == inctime, (ridtime, inctime) - - - def testRestorefiles(self): - """Testing restoration of files one at a time""" - map(self.restoreonefiletest, ["ocaml", "mf"]) - - def testRestoreDir(self): - """Test restoring from a real backup set - - Run makerestoretest3 if this doesn't work. - - """ - Myrm("testfiles/output") - InternalRestore(1, 1, "testfiles/restoretest3", - "testfiles/output", 20000) - - src_rp = rpath.RPath(Globals.local_connection, "testfiles/increment2") - restore_rp = rpath.RPath(Globals.local_connection, "testfiles/output") - assert CompareRecursive(src_rp, restore_rp) - - def testRestoreCorrupt(self): - """Test restoring a partially corrupt archive - - The problem here is that a directory is missing from what is - to be restored, but because the previous backup was aborted in - the middle, some of the files in that directory weren't marked - as .missing. + def get_rfcs(self): + """Return available RestoreFileCompararer objects""" + base_rf = restore.RestoreFile(restore_base_rp, restore_base_rp, []) + rfs = base_rf.yield_sub_rfs() + rfcs = [] + for rf in rfs: + if rf.mirror_rp.dirsplit()[1] in ["dir"]: + log.Log("skipping 'dir'", 5) + continue + + rfc = RestoreFileComparer(rf) + for inc in rf.inc_list: + test_time = inc.getinctime() + rfc.add_rpath(self.get_correct(rf.mirror_rp, test_time), + test_time) + rfc.add_rpath(rf.mirror_rp, mirror_time) + rfcs.append(rfc) + return rfcs + + def get_correct(self, mirror_rp, test_time): + """Return correct version with base mirror_rp at time test_time""" + assert -1 < test_time < 2000000000, test_time + dirname, basename = mirror_rp.dirsplit() + for filename in restore_base_filenames: + comps = filename.split(".") + base = ".".join(comps[:-1]) + t = Time.stringtotime(comps[-1]) + if t == test_time and basename == base: + return restore_base_rp.append(filename) + # Correct rp must be empty + return restore_base_rp.append("%s.%s" % + (basename, Time.timetostring(test_time))) + + def testRestoreSingle(self): + """Test restoring files one at at a time""" + MakeOutputDir() + for rfc in self.get_rfcs(): + if rfc.rf.inc_rp.isincfile(): continue + log.Log("Comparing %s" % (rfc.rf.inc_rp.path,), 5) + rfc.compare_all() + + def testBothLocal(self): + """Test directory restore everything local""" + self.restore_dir_test(1,1) + + def testMirrorRemote(self): + """Test directory restore mirror is remote""" + self.restore_dir_test(0, 1) + + def testDestRemote(self): + """Test directory restore destination is remote""" + self.restore_dir_test(1, 0) + + def testBothRemote(self): + """Test directory restore everything is remote""" + self.restore_dir_test(0, 0) + + def restore_dir_test(self, mirror_local, dest_local): + """Run whole dir tests + + If any of the above tests don't work, try rerunning + makerestoretest3. """ Myrm("testfiles/output") - InternalRestore(1, 1, "testfiles/restoretest4", "testfiles/output", - 10000) - assert os.lstat("testfiles/output") - self.assertRaises(OSError, os.lstat, "testfiles/output/tmp") - self.assertRaises(OSError, os.lstat, "testfiles/output/rdiff-backup") + target_rp = rpath.RPath(Globals.local_connection, "testfiles/output") + mirror_rp = rpath.RPath(Globals.local_connection, + "testfiles/restoretest3") + inc1_rp = rpath.RPath(Globals.local_connection, + "testfiles/increment1") + inc2_rp = rpath.RPath(Globals.local_connection, + "testfiles/increment2") + inc3_rp = rpath.RPath(Globals.local_connection, + "testfiles/increment3") + inc4_rp = rpath.RPath(Globals.local_connection, + "testfiles/increment4") + + InternalRestore(mirror_local, dest_local, "testfiles/restoretest3", + "testfiles/output", 45000) + assert CompareRecursive(inc4_rp, target_rp) + InternalRestore(mirror_local, dest_local, "testfiles/restoretest3", + "testfiles/output", 35000) + assert CompareRecursive(inc3_rp, target_rp, compare_hardlinks = 0) + InternalRestore(mirror_local, dest_local, "testfiles/restoretest3", + "testfiles/output", 25000) + assert CompareRecursive(inc2_rp, target_rp, compare_hardlinks = 0) + InternalRestore(mirror_local, dest_local, "testfiles/restoretest3", + "testfiles/output", 5000) + assert CompareRecursive(inc1_rp, target_rp, compare_hardlinks = 0) + +# def testRestoreCorrupt(self): +# """Test restoring a partially corrupt archive +# +# The problem here is that a directory is missing from what is +# to be restored, but because the previous backup was aborted in +# the middle, some of the files in that directory weren't marked +# as .missing. +# +# """ +# Myrm("testfiles/output") +# InternalRestore(1, 1, "testfiles/restoretest4", "testfiles/output", +# 10000) +# assert os.lstat("testfiles/output") +# self.assertRaises(OSError, os.lstat, "testfiles/output/tmp") +# self.assertRaises(OSError, os.lstat, "testfiles/output/rdiff-backup") def testRestoreNoincs(self): """Test restoring a directory with no increments, just mirror""" diff --git a/rdiff-backup/testing/robusttest.py b/rdiff-backup/testing/robusttest.py index 8c6d51c..6b9e356 100644 --- a/rdiff-backup/testing/robusttest.py +++ b/rdiff-backup/testing/robusttest.py @@ -1,32 +1,11 @@ + import os, unittest from commontest import * from rdiff_backup import rpath, robust, TempFile, Globals - - -class TestRobustAction(unittest.TestCase): - """Test some robust actions""" - def testCopyWithAttribs(self): - """Test copy with attribs action""" - rpin = rpath.RPath(Globals.local_connection, "./testfiles/robust/in") - fp = open("./testfiles/robust/in", "wb") - fp.write("hello there") - fp.close() - os.chmod("./testfiles/robust/in", 0604) - rpin.setdata() - assert rpin.isreg() and rpin.getperms() % 01000 == 0604 - - rpout = rpath.RPath(Globals.local_connection, "./testfiles/robust/out") - robust.copy_with_attribs_action(rpin, rpout).execute() - if not rpout == rpin: - print rpout, rpin - assert 0 - - rpout.delete() - rpin.delete() class TempFileTest(unittest.TestCase): - """Test creation and management of tempfiles""" + """Test creation and management of tempfiles in TempFile module""" rp_base = rpath.RPath(Globals.local_connection, "./testfiles/robust/testfile_base") def testBasic(self): @@ -61,26 +40,19 @@ class TempFileTest(unittest.TestCase): assert destination.lstat() destination.delete() - -class SaveStateTest(unittest.TestCase): - """Test SaveState class""" - data_dir = rpath.RPath(Globals.local_connection, "testfiles/robust") - def testSymlinking(self): - """Test recording last file with symlink""" - last_rorp = rpath.RORPath(('usr', 'local', 'bin', 'ls')) - Globals.rbdir = self.data_dir - Time.setcurtime() - SetConnections.BackupInitConnections(Globals.local_connection, - Globals.local_connection) - robust.SaveState.init_filenames() - robust.SaveState.record_last_file_action(last_rorp).execute() - - sym_rp = rpath.RPath(Globals.local_connection, - "testfiles/robust/last-file-incremented.%s.data" % - Time.curtimestr) - assert sym_rp.issym() - assert sym_rp.readlink() == "increments/usr/local/bin/ls" - sym_rp.delete() - +class RobustTest(unittest.TestCase): + """Test robust module""" + def test_check_common_error(self): + """Test capturing errors""" + def cause_catchable_error(a): + os.lstat("aoenuthaoeu/aosutnhcg.4fpr,38p") + def cause_uncatchable_error(): + ansoethusaotneuhsaotneuhsaontehuaou + result = robust.check_common_error(None, cause_catchable_error, [1]) + assert result is None, result + try: robust.check_common_error(None, cause_uncatchable_error) + except NameError: pass + else: assert 0, "Key error not raised" + if __name__ == '__main__': unittest.main() diff --git a/rdiff-backup/testing/roottest.py b/rdiff-backup/testing/roottest.py index fbeaaa1..81292b2 100644 --- a/rdiff-backup/testing/roottest.py +++ b/rdiff-backup/testing/roottest.py @@ -1,7 +1,6 @@ import unittest, os from commontest import * -from rdiff_backup.log import * -from rdiff_backup import Globals +from rdiff_backup import Globals, log """Root tests @@ -11,7 +10,11 @@ that are meant to be run as root. Globals.set('change_source_perms', None) Globals.counter = 0 -Log.setverbosity(4) +log.Log.setverbosity(4) + +def Run(cmd): + print "Running: ", cmd + assert not os.system(cmd) class RootTest(unittest.TestCase): dirlist1 = ["testfiles/root", "testfiles/various_file_types", "testfiles/increment4"] @@ -21,8 +24,63 @@ class RootTest(unittest.TestCase): def testLocal2(self): BackupRestoreSeries(1, 1, self.dirlist2) def testRemote(self): BackupRestoreSeries(None, None, self.dirlist1) - def tearDown(self): - os.system(MiscDir + "/myrm testfiles/output testfiles/rest_out") +class NonRoot(unittest.TestCase): + """Test backing up as non-root user + + Test backing up a directory with files of different userids and + with device files in it, as a non-root user. When restoring as + root, everything should be restored normally. + + """ + user = 'ben' + def make_root_dir(self): + """Make directory createable only by root""" + rp = rpath.RPath(Globals.local_connection, "testfiles/root_out") + if rp.lstat(): Myrm(rp.path) + rp.mkdir() + rp1 = rp.append("1") + rp1.touch() + rp2 = rp.append("2") + rp2.touch() + rp2.chown(1, 1) + rp3 = rp.append("3") + rp3.touch() + rp3.chown(2, 2) + rp4 = rp.append("dev") + rp4.makedev('c', 4, 28) + return rp + + def test_non_root(self): + """Main non-root -> root test""" + Myrm("testfiles/output") + input_rp = self.make_root_dir() + Globals.change_ownership = 1 + output_rp = rpath.RPath(Globals.local_connection, "testfiles/output") + restore_rp = rpath.RPath(Globals.local_connection, + "testfiles/rest_out") + empty_rp = rpath.RPath(Globals.local_connection, "testfiles/empty") + + backup_cmd = "rdiff-backup %s %s" % (input_rp.path, output_rp.path) + Run("su %s -c '%s'" % (self.user, backup_cmd)) + + Myrm("testfiles/rest_out") + restore_cmd = "rdiff-backup -r now %s %s" % (output_rp.path, + restore_rp.path,) + Run(restore_cmd) + assert CompareRecursive(input_rp, restore_rp) + + backup_cmd = "rdiff-backup %s %s" % (empty_rp.path, output_rp.path) + Run("su %s -c '%s'" % (self.user, backup_cmd)) + + Myrm("testfiles/rest_out") + Run(restore_cmd) + assert CompareRecursive(empty_rp, restore_rp) + Myrm("testfiles/rest_out") + restore_cmd = "rdiff-backup -r 1 %s %s" % (output_rp.path, + restore_rp.path,) + Run(restore_cmd) + assert CompareRecursive(input_rp, restore_rp) + if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/rorpitertest.py b/rdiff-backup/testing/rorpitertest.py index ec786c2..f43a085 100644 --- a/rdiff-backup/testing/rorpitertest.py +++ b/rdiff-backup/testing/rorpitertest.py @@ -52,25 +52,6 @@ class RORPIterTest(unittest.TestCase): iter([]))) - def testCombinedPatching(self): - """Combined signature, patch, and diff operations""" - if self.output.lstat(): - Myrm(self.output.path) - self.output.setdata() - - def turninto(final_rp): - sigfile = rorpiter.ToFile(rorpiter.GetSignatureIter(self.output)) - diff_file = rorpiter.ToFile(rorpiter.GetDiffIter( - rorpiter.FromFile(sigfile), rorpiter.IterateRPaths(final_rp))) - rorpiter.PatchIter(self.output, rorpiter.FromFile(diff_file)) - - turninto(self.inc1rp) - rpath.copy_attribs(self.inc1rp, self.output) # Update time - assert self.compare_no_times(self.inc1rp, self.output) - turninto(self.inc2rp) - rpath.copy_attribs(self.inc2rp, self.output) - assert self.compare_no_times(self.inc2rp, self.output) - def compare_no_times(self, src_rp, dest_rp): """Compare but disregard directories attributes""" def equal(src_rorp, dest_rorp): diff --git a/rdiff-backup/testing/selectiontest.py b/rdiff-backup/testing/selectiontest.py index 8fa970d..2e0cd78 100644 --- a/rdiff-backup/testing/selectiontest.py +++ b/rdiff-backup/testing/selectiontest.py @@ -417,19 +417,19 @@ testfiles/select**/2 ("--exclude", "/")], [(), ("home",)]) - def testParseStartingFrom(self): - """Test parse, this time starting from inside""" - self.root = rpath.RPath(Globals.local_connection, "testfiles/select") - self.Select = Select(self.root) - self.Select.ParseArgs([("--include", "testfiles/select/1/1"), - ("--exclude", "**")], []) - self.Select.set_iter(('1', '1')) - assert lazy.Iter.equal(lazy.Iter.map(lambda dsrp: dsrp.index, - self.Select), - iter([("1", '1', '1'), - ('1', '1', '2'), - ('1', '1', '3')]), - verbose = 1) +# def testParseStartingFrom(self): +# """Test parse, this time starting from inside""" +# self.root = rpath.RPath(Globals.local_connection, "testfiles/select") +# self.Select = Select(self.root) +# self.Select.ParseArgs([("--include", "testfiles/select/1/1"), +# ("--exclude", "**")], []) +# self.Select.set_iter(('1', '1')) +# assert lazy.Iter.equal(lazy.Iter.map(lambda dsrp: dsrp.index, +# self.Select), +# iter([("1", '1', '1'), +# ('1', '1', '2'), +# ('1', '1', '3')]), +# verbose = 1) if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/statisticstest.py b/rdiff-backup/testing/statisticstest.py index 7e8f02c..85a1b68 100644 --- a/rdiff-backup/testing/statisticstest.py +++ b/rdiff-backup/testing/statisticstest.py @@ -1,6 +1,6 @@ -import unittest +import unittest, time from commontest import * -from rdiff_backup import statistics, rpath +from rdiff_backup import statistics, rpath, restore class StatsObjTest(unittest.TestCase): """Test StatsObj class""" @@ -29,7 +29,7 @@ class StatsObjTest(unittest.TestCase): self.set_obj(s) assert s.get_stat('SourceFiles') == 1 - s1 = statistics.ITRB() + s1 = statistics.StatFileObj() assert s1.get_stat('SourceFiles') == 0 def test_get_stats_string(self): @@ -40,10 +40,12 @@ class StatsObjTest(unittest.TestCase): self.set_obj(s) stats_string = s.get_stats_string() - assert stats_string == \ -"""StartTime 11.00 (Wed Dec 31 16:00:11 1969) -EndTime 12.00 (Wed Dec 31 16:00:12 1969) -ElapsedTime 1.00 (1 second) + ss_list = stats_string.split("\n") + tail = "\n".join(ss_list[2:]) # Time varies by time zone, don't check +#"""StartTime 11.00 (Wed Dec 31 16:00:11 1969) +#EndTime 12.00 (Wed Dec 31 16:00:12 1969)" + assert tail == \ +"""ElapsedTime 1.00 (1 second) SourceFiles 1 SourceFileSize 2 (2 bytes) MirrorFiles 13 @@ -143,4 +145,81 @@ TotalDestinationSizeChange 7 (7 bytes) assert s3.SourceFiles == 75 +class IncStatTest(unittest.TestCase): + """Test statistics as produced by actual backup""" + def stats_check_initial(self, s): + """Make sure stats object s compatible with initial mirroring + + A lot of the off by one stuff is because the root directory + exists in the below examples. + + """ + assert s.MirrorFiles == 1 or s.MirrorFiles == 0 + assert s.MirrorFileSize < 20000 + assert s.NewFiles <= s.SourceFiles <= s.NewFiles + 1 + assert s.NewFileSize <= s.SourceFileSize <= s.NewFileSize + 20000 + assert s.ChangedFiles == 1 or s.ChangedFiles == 0 + assert s.ChangedSourceSize < 20000 + assert s.ChangedMirrorSize < 20000 + assert s.DeletedFiles == s.DeletedFileSize == 0 + assert s.IncrementFileSize == 0 + + def testStatistics(self): + """Test the writing of statistics + + The file sizes are approximate because the size of directories + could change with different file systems... + + """ + Globals.compression = 1 + Myrm("testfiles/output") + InternalBackup(1, 1, "testfiles/stattest1", "testfiles/output") + InternalBackup(1, 1, "testfiles/stattest2", "testfiles/output", + time.time()+1) + + rbdir = rpath.RPath(Globals.local_connection, + "testfiles/output/rdiff-backup-data") + + #incs = Restore.get_inclist(rbdir.append("subdir"). + # append("directory_statistics")) + #assert len(incs) == 2 + #s1 = StatsObj().read_stats_from_rp(incs[0]) # initial mirror stats + #assert s1.SourceFiles == 2 + #assert 400000 < s1.SourceFileSize < 420000 + #self.stats_check_initial(s1) + + #subdir_stats = StatsObj().read_stats_from_rp(incs[1]) # increment stats + #assert subdir_stats.SourceFiles == 2 + #assert 400000 < subdir_stats.SourceFileSize < 420000 + #assert subdir_stats.MirrorFiles == 2 + #assert 400000 < subdir_stats.MirrorFileSize < 420000 + #assert subdir_stats.NewFiles == subdir_stats.NewFileSize == 0 + #assert subdir_stats.DeletedFiles == subdir_stats.DeletedFileSize == 0 + #assert subdir_stats.ChangedFiles == 2 + #assert 400000 < subdir_stats.ChangedSourceSize < 420000 + #assert 400000 < subdir_stats.ChangedMirrorSize < 420000 + #assert 10 < subdir_stats.IncrementFileSize < 20000 + + incs = restore.get_inclist(rbdir.append("session_statistics")) + assert len(incs) == 2 + s2 = statistics.StatsObj().read_stats_from_rp(incs[0]) + assert s2.SourceFiles == 7 + assert 700000 <= s2.SourceFileSize < 750000 + self.stats_check_initial(s2) + + root_stats = statistics.StatsObj().read_stats_from_rp(incs[1]) + assert root_stats.SourceFiles == 7, root_stats.SourceFiles + assert 550000 <= root_stats.SourceFileSize < 570000 + assert root_stats.MirrorFiles == 7 + assert 700000 <= root_stats.MirrorFileSize < 750000 + assert root_stats.NewFiles == 1 + assert root_stats.NewFileSize == 0 + assert root_stats.DeletedFiles == 1 + assert root_stats.DeletedFileSize == 200000 + assert 3 <= root_stats.ChangedFiles <= 4, root_stats.ChangedFiles + assert 450000 <= root_stats.ChangedSourceSize < 470000 + assert 400000 <= root_stats.ChangedMirrorSize < 420000, \ + root_stats.ChangedMirrorSize + assert 10 < root_stats.IncrementFileSize < 30000 + if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/timetest.py b/rdiff-backup/testing/timetest.py index 97286a8..367a4f9 100644 --- a/rdiff-backup/testing/timetest.py +++ b/rdiff-backup/testing/timetest.py @@ -6,7 +6,7 @@ class TimeTest(unittest.TestCase): def testConversion(self): """test timetostring and stringtotime""" Time.setcurtime() - assert type(Time.curtime) is types.FloatType + assert type(Time.curtime) is types.FloatType or types.LongType assert type(Time.curtimestr) is types.StringType assert (Time.cmp(int(Time.curtime), Time.curtimestr) == 0 or Time.cmp(int(Time.curtime) + 1, Time.curtimestr) == 0) -- cgit v1.2.1