From a5c03feacbbd9361eb3e2abe367b75529c83459b Mon Sep 17 00:00:00 2001 From: bescoto Date: Mon, 31 Oct 2005 04:53:31 +0000 Subject: Added various compare options like --compare-full and --compare-hash git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@664 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/Main.py | 59 +++++---- rdiff-backup/rdiff_backup/Security.py | 14 ++- rdiff-backup/rdiff_backup/backup.py | 6 +- rdiff-backup/rdiff_backup/compare.py | 213 ++++++++++++++++++++++++++++++++ rdiff-backup/rdiff_backup/connection.py | 7 +- rdiff-backup/rdiff_backup/hash.py | 15 +++ rdiff-backup/rdiff_backup/iterfile.py | 71 ++++++----- rdiff-backup/rdiff_backup/regress.py | 4 +- rdiff-backup/rdiff_backup/restore.py | 65 +++------- rdiff-backup/rdiff_backup/rpath.py | 10 -- 10 files changed, 340 insertions(+), 124 deletions(-) create mode 100644 rdiff-backup/rdiff_backup/compare.py (limited to 'rdiff-backup/rdiff_backup') diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py index 0158d38..0f8a060 100644 --- a/rdiff-backup/rdiff_backup/Main.py +++ b/rdiff-backup/rdiff_backup/Main.py @@ -1,4 +1,4 @@ -# Copyright 2002, 2003, 2004 Ben Escoto +# Copyright 2002, 2003, 2004, 2005 Ben Escoto # # This file is part of rdiff-backup. # @@ -24,7 +24,7 @@ import getopt, sys, re, os, cStringIO from log import Log, LoggerError, ErrorLog import Globals, Time, SetConnections, selection, robust, rpath, \ manage, backup, connection, restore, FilenameMapping, \ - Security, Hardlink, regress, C, fs_abilities, statistics + Security, Hardlink, regress, C, fs_abilities, statistics, compare action = None @@ -59,9 +59,10 @@ def parse_cmdlineoptions(arglist): try: optlist, args = getopt.getopt(arglist, "blr:sv:V", ["backup-mode", "calculate-average", "check-destination-dir", - "compare", "compare-at-time=", "create-full-path", - "current-time=", "exclude=", "exclude-device-files", - "exclude-fifos", "exclude-filelist=", + "compare", "compare-at-time=", "compare-hash", + "compare-hash-at-time=", "compare-full", "compare-full-at-time=", + "create-full-path", "current-time=", "exclude=", + "exclude-device-files", "exclude-fifos", "exclude-filelist=", "exclude-symbolic-links", "exclude-sockets", "exclude-filelist-stdin", "exclude-globbing-filelist=", "exclude-globbing-filelist-stdin", "exclude-mirror=", @@ -91,10 +92,12 @@ def parse_cmdlineoptions(arglist): elif opt == "--calculate-average": action = "calculate-average" elif opt == "--carbonfile": Globals.set("carbonfile_active", 1) elif opt == "--check-destination-dir": action = "check-destination-dir" - elif opt == "--compare" or opt == "--compare-at-time": - action = "compare" - if opt == "--compare": restore_timestr = "now" - else: restore_timestr = arg + elif opt in ("--compare", "--compare-at-time", + "--compare-hash", "--compare-hash-at-time", + "--compare-full", "--compare-full-at-time"): + if opt[-8:] == "-at-time": restore_timestr, opt = arg, opt[:-8] + else: restore_timestr = "now" + action = opt[2:] elif opt == "--create-full-path": create_full_path = 1 elif opt == "--current-time": Globals.set_integer('current_time', arg) @@ -200,7 +203,8 @@ def check_action(): 1: ['list-increments', 'list-increment-sizes', 'remove-older-than', 'list-at-time', 'list-changed-since', 'check-destination-dir'], - 2: ['backup', 'restore', 'restore-as-of', 'compare']} + 2: ['backup', 'restore', 'restore-as-of', + 'compare', 'compare-hash', 'compare-full']} l = len(args) if l == 0 and action not in arg_action_dict[l]: commandline_error("No arguments given") @@ -263,7 +267,7 @@ def take_action(rps): elif action == "backup": Backup(rps[0], rps[1]) elif action == "calculate-average": CalculateAverage(rps) elif action == "check-destination-dir": CheckDest(rps[0]) - elif action == "compare": Compare(*rps) + elif action.startswith("compare"): Compare(action, rps[0], rps[1]) elif action == "list-at-time": ListAtTime(rps[0]) elif action == "list-changed-since": ListChangedSince(rps[0]) elif action == "list-increments": ListIncrements(rps[0]) @@ -592,7 +596,7 @@ def restore_set_root(rpin): def ListIncrements(rp): """Print out a summary of the increments and their times""" - rp = require_root_set(rp) + rp = require_root_set(rp, 1) restore_check_backup_dir(restore_root) mirror_rp = restore_root.new_index(restore_index) inc_rpath = Globals.rbdir.append_path('increments', restore_index) @@ -602,24 +606,25 @@ def ListIncrements(rp): print manage.describe_incs_parsable(incs, mirror_time, mirror_rp) else: print manage.describe_incs_human(incs, mirror_time, mirror_rp) -def require_root_set(rp): +def require_root_set(rp, read_only): """Make sure rp is or is in a valid rdiff-backup dest directory. - Also initializes fs_abilities and quoting and return quoted rp if - necessary. + Also initializes fs_abilities (read or read/write) and quoting and + return quoted rp if necessary. """ if not restore_set_root(rp): Log.FatalError(("Bad directory %s.\n" % (rp.path,)) + "It doesn't appear to be an rdiff-backup destination dir") - Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir) + Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir, + read_only) if Globals.chars_to_quote: return restore_init_quoting(rp) else: return rp def ListIncrementSizes(rp): """Print out a summary of the increments """ - rp = require_root_set(rp) + rp = require_root_set(rp, 1) print manage.ListIncrementSizes(restore_root, restore_index) @@ -634,7 +639,7 @@ def CalculateAverage(rps): def RemoveOlderThan(rootrp): """Remove all increment files older than a certain time""" - rootrp = require_root_set(rootrp) + rootrp = require_root_set(rootrp, 0) rot_require_rbdir_base(rootrp) try: time = Time.genstrtotime(remove_older_than_string) except Time.TimeException, exc: Log.FatalError(str(exc)) @@ -670,7 +675,7 @@ def rot_require_rbdir_base(rootrp): def ListChangedSince(rp): """List all the files under rp that have changed since restoretime""" - rp = require_root_set(rp) + rp = require_root_set(rp, 1) try: rest_time = Time.genstrtotime(restore_timestr) except Time.TimeException, exc: Log.FatalError(str(exc)) mirror_rp = restore_root.new_index(restore_index) @@ -682,7 +687,7 @@ def ListChangedSince(rp): def ListAtTime(rp): """List files in archive under rp that are present at restoretime""" - rp = require_root_set(rp) + rp = require_root_set(rp, 1) try: rest_time = Time.genstrtotime(restore_timestr) except Time.TimeException, exc: Log.FatalError(str(exc)) mirror_rp = restore_root.new_index(restore_index) @@ -691,7 +696,7 @@ def ListAtTime(rp): print rorp.get_indexpath() -def Compare(src_rp, dest_rp, compare_time = None): +def Compare(compare_type, src_rp, dest_rp, compare_time = None): """Compare metadata in src_rp with metadata of backup session Prints to stdout whenever a file in the src_rp directory has @@ -702,16 +707,20 @@ def Compare(src_rp, dest_rp, compare_time = None): """ global return_val - dest_rp = require_root_set(dest_rp) + dest_rp = require_root_set(dest_rp, 1) if not compare_time: try: compare_time = Time.genstrtotime(restore_timestr) except Time.TimeException, exc: Log.FatalError(str(exc)) mirror_rp = restore_root.new_index(restore_index) - inc_rp = mirror_rp.append_path("increments", restore_index) + inc_rp = Globals.rbdir.append_path("increments", restore_index) backup_set_select(src_rp) # Sets source rorp iterator - src_iter = src_rp.conn.backup.SourceStruct.get_source_select() - return_val = restore.Compare(src_iter, mirror_rp, inc_rp, compare_time) + if compare_type == "compare": compare_func = compare.Compare + elif compare_type == "compare-hash": compare_func = compare.Compare_hash + else: + assert compare_type == "compare-full", compare_type + compare_func = compare.Compare_full + return_val = compare_func(src_rp, mirror_rp, inc_rp, compare_time) def CheckDest(dest_rp): diff --git a/rdiff-backup/rdiff_backup/Security.py b/rdiff-backup/rdiff_backup/Security.py index 53a081c..1e06d46 100644 --- a/rdiff-backup/rdiff_backup/Security.py +++ b/rdiff-backup/rdiff_backup/Security.py @@ -113,8 +113,9 @@ def set_security_level(action, cmdpairs): sec_level = "all" rdir = getpath(cp2) elif action in ["test-server", "list-increments", 'list-increment-sizes', - "list-at-time", "list-changed-since", - "calculate-average", "remove-older-than", "compare"]: + "list-at-time", "list-changed-since", + "calculate-average", "remove-older-than", "compare", + "compare-hash", "compare-full"]: sec_level = "minimal" rdir = tempfile.gettempdir() else: assert 0, "Unknown action %s" % action @@ -151,7 +152,14 @@ def set_allowed_requests(sec_level): "restore.ListAtTime", "backup.SourceStruct.get_source_select", "backup.SourceStruct.set_source_select", - "backup.SourceStruct.get_diffs"]) + "backup.SourceStruct.get_diffs", + "compare.RepoSide.init_and_get_iter", + "compare.RepoSide.close_rf_cache", + "compare.RepoSide.attach_files", + "compare.DataSide.get_source_select", + "compare.DataSide.compare_fast", + "compare.DataSide.compare_hash", + "compare.DataSide.compare_full"]) if sec_level == "update-only" or sec_level == "all": l.extend(["log.Log.open_logfile_local", "log.Log.close_logfile_local", "log.ErrorLog.open", "log.ErrorLog.isopen", diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index 0854371..6731b31 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -101,8 +101,8 @@ class SourceStruct: diff_rorp.set_attached_filetype('snapshot') for dest_sig in dest_sigiter: - if dest_sig is iterfile.RORPIterFlushRepeat: - yield iterfile.RORPIterFlush # Flush buffer when get_sigs does + if dest_sig is iterfile.MiscIterFlushRepeat: + yield iterfile.MiscIterFlush # Flush buffer when get_sigs does continue src_rp = (source_rps.get(dest_sig.index) or rpath.RORPath(dest_sig.index)) @@ -172,7 +172,7 @@ class DestinationStruct: if (Globals.backup_reader is not Globals.backup_writer and num_rorps_skipped > flush_threshold): num_rorps_skipped = 0 - yield iterfile.RORPIterFlushRepeat + yield iterfile.MiscIterFlushRepeat else: index = src_rorp and src_rorp.index or dest_rorp.index sig = cls.get_one_sig(dest_base_rpath, index, diff --git a/rdiff-backup/rdiff_backup/compare.py b/rdiff-backup/rdiff_backup/compare.py new file mode 100644 index 0000000..9ceff12 --- /dev/null +++ b/rdiff-backup/rdiff_backup/compare.py @@ -0,0 +1,213 @@ +# Copyright 2002, 2003, 2004, 2005 Ben Escoto +# +# This file is part of rdiff-backup. +# +# rdiff-backup is free software; you can redistribute it and/or modify +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# rdiff-backup is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rdiff-backup; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +"""Perform various kinds of comparisons. + +For instance, full-file compare, compare by hash, and metadata-only +compare. This uses elements of the backup and restore modules. + +""" + +import Globals, restore, rorpiter, log, backup, static, rpath, hash, robust + +def Compare(src_rp, mirror_rp, inc_rp, compare_time): + """Compares metadata in src_rp dir with metadata in mirror_rp at time""" + repo_side = mirror_rp.conn.compare.RepoSide + data_side = src_rp.conn.compare.DataSide + + repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time) + return_val = print_reports(data_side.compare_fast(repo_iter)) + repo_side.close_rf_cache() + return return_val + +def Compare_hash(src_rp, mirror_rp, inc_rp, compare_time): + """Compare files at src_rp with repo at compare_time + + Note metadata differences, but also check to see if file data is + different. If two regular files have the same size, hash the + source and compare to the hash presumably already present in repo. + + """ + repo_side = mirror_rp.conn.compare.RepoSide + data_side = src_rp.conn.compare.DataSide + + repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time) + return_val = print_reports(data_side.compare_hash(repo_iter)) + repo_side.close_rf_cache() + return return_val + +def Compare_full(src_rp, mirror_rp, inc_rp, compare_time): + """Compare full data of files at src_rp with repo at compare_time + + Like Compare_hash, but do not rely on hashes, instead copy full + data over. + + """ + repo_side = mirror_rp.conn.compare.RepoSide + data_side = src_rp.conn.compare.DataSide + + src_iter = data_side.get_source_select() + attached_repo_iter = repo_side.attach_files(src_iter, mirror_rp, + inc_rp, compare_time) + report_iter = data_side.compare_full(src_rp, attached_repo_iter) + return_val = print_reports(report_iter) + repo_side.close_rf_cache() + return return_val + +def print_reports(report_iter): + """Given an iter of CompareReport objects, print them to screen""" + assert not Globals.server + changed_files_found = 0 + for report in report_iter: + changed_files_found = 1 + indexpath = report.index and "/".join(report.index) or "." + print "%s: %s" % (report.reason, indexpath) + + if not changed_files_found: + log.Log("No changes found. Directory matches archive data.", 2) + return changed_files_found + +def get_basic_report(src_rp, repo_rorp, comp_data_func = None): + """Compare src_rp and repo_rorp, return CompareReport + + comp_data_func should be a function that accepts (src_rp, + repo_rorp) as arguments, and return 1 if they have the same data, + 0 otherwise. If comp_data_func is false, don't compare file data, + only metadata. + + """ + if src_rp: index = src_rp.index + else: index = repo_rorp.index + if not repo_rorp or not repo_rorp.lstat(): + return CompareReport(index, "new") + elif not src_rp or not src_rp.lstat(): + return CompareReport(index, "deleted") + elif comp_data_func and src_rp.isreg() and repo_rorp.isreg(): + if src_rp == repo_rorp: meta_changed = 0 + else: meta_changed = 1 + data_changed = comp_data_func(src_rp, repo_rorp) + + if not meta_changed and not data_changed: return None + if meta_changed: meta_string = "metadata changed, " + else: meta_string = "metadata the same, " + if data_changed: data_string = "data changed" + else: data_string = "data the same" + return CompareReport(index, meta_string + data_string) + elif src_rp == repo_rorp: return None + else: return CompareReport(index, "changed") + + +class RepoSide(restore.MirrorStruct): + """On the repository side, comparing is like restoring""" + def init_and_get_iter(cls, mirror_rp, inc_rp, compare_time): + """Return rorp iter at given compare time""" + cls.set_mirror_and_rest_times(compare_time) + cls.initialize_rf_cache(mirror_rp, inc_rp) + return cls.subtract_indicies(cls.mirror_base.index, + cls.get_mirror_rorp_iter()) + + def attach_files(cls, src_iter, mirror_rp, inc_rp, compare_time): + """Attach data to all the files that need checking + + Return an iterator of repo rorps that includes all the files + that may have changed, and has the fileobj set on all rorps + that need it. + + """ + repo_iter = cls.init_and_get_iter(mirror_rp, inc_rp, compare_time) + base_index = cls.mirror_base.index + for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): + index = src_rp and src_rp.index or mir_rorp.index + if src_rp and mir_rorp: + if not src_rp.isreg() and src_rp == mir_rorp: + continue # They must be equal, nothing else to check + if (src_rp.isreg() and mir_rorp.isreg() and + src_rp.getsize() == mir_rorp.getsize()): + mir_rorp.setfile(cls.rf_cache.get_fp(base_index + index)) + mir_rorp.set_attached_filetype('snapshot') + + if mir_rorp: yield mir_rorp + else: yield rpath.RORPath(index) # indicate deleted mir_rorp + +static.MakeClass(RepoSide) + + +class DataSide(backup.SourceStruct): + """On the side that has the current data, compare is like backing up""" + def compare_fast(cls, repo_iter): + """Compare rorps (metadata only) quickly, return report iter""" + src_iter = cls.get_source_select() + for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): + report = get_basic_report(src_rorp, mir_rorp) + if report: yield report + + def compare_hash(cls, repo_iter): + """Like above, but also compare sha1 sums of any regular files""" + def hashs_changed(src_rp, mir_rorp): + """Return 0 if their data hashes same, 1 otherwise""" + if not mir_rorp.has_sha1(): + log.Log("Warning: Metadata file has no digest for %s, " + "unable to compare." % (index,), 2) + return 0 + elif (src_rp.getsize() == mir_rorp.getsize() and + hash.compute_sha1(src_rp) == mir_rorp.get_sha1()): + return 0 + return 1 + + src_iter = cls.get_source_select() + for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): + report = get_basic_report(src_rp, mir_rorp, hashs_changed) + if report: yield report + + def compare_full(cls, src_root, repo_iter): + """Given repo iter with full data attached, return report iter""" + def error_handler(exc, src_rp, repo_rorp): + log.Log("Error reading file %s" % (src_rp.path,), 2) + return 0 # They aren't the same if we get an error + + def data_changed(src_rp, repo_rorp): + """Return 0 if full compare of data matches, 1 otherwise""" + if src_rp.getsize() != repo_rorp.getsize(): return 1 + return not robust.check_common_error(error_handler, + rpath.cmpfileobj, (src_rp.open("rb"), repo_rorp.open("rb"))) + + for repo_rorp in repo_iter: + src_rp = src_root.new_index(repo_rorp.index) + report = get_basic_report(src_rp, repo_rorp, data_changed) + if report: yield report + +static.MakeClass(DataSide) + + +class CompareReport: + """When two files don't match, this tells you how they don't match + + This is necessary because the system that is doing the actual + comparing may not be the one printing out the reports. For speed + the compare information can be pipelined back to the client + connection as an iter of CompareReports. + + """ + # self.file is added so that CompareReports can masquerate as + # RORPaths when in an iterator, and thus get pipelined. + file = None + + def __init__(self, index, reason): + self.index = index + self.reason = reason diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py index e92402f..99b8f72 100644 --- a/rdiff-backup/rdiff_backup/connection.py +++ b/rdiff-backup/rdiff_backup/connection.py @@ -153,8 +153,7 @@ class LowLevelPipeConnection(Connection): def _putiter(self, iterator, req_num): """Put an iterator through the pipe""" self._write("i", - str(VirtualFile.new(iterfile.RORPIterToFile(iterator))), - req_num) + str(VirtualFile.new(iterfile.MiscIterToFile(iterator))), req_num) def _putrpath(self, rpath, req_num): """Put an rpath into the pipe @@ -241,7 +240,7 @@ class LowLevelPipeConnection(Connection): elif format_string == "b": result = data elif format_string == "f": result = VirtualFile(self, int(data)) elif format_string == "i": - result = iterfile.FileToRORPIter(VirtualFile(self, int(data))) + result = iterfile.FileToMiscIter(VirtualFile(self, int(data))) elif format_string == "r": result = self._getrorpath(data) elif format_string == "R": result = self._getrpath(data) elif format_string == "Q": result = self._getqrpath(data) @@ -535,7 +534,7 @@ import Globals, Time, Rdiff, Hardlink, FilenameMapping, C, Security, \ Main, rorpiter, selection, increment, statistics, manage, lazy, \ iterfile, rpath, robust, restore, manage, backup, connection, \ TempFile, SetConnections, librsync, log, regress, fs_abilities, \ - eas_acls, user_group + eas_acls, user_group, compare Globals.local_connection = LocalConnection() Globals.connections.append(Globals.local_connection) diff --git a/rdiff-backup/rdiff_backup/hash.py b/rdiff-backup/rdiff_backup/hash.py index 4fcbdab..3e7306f 100644 --- a/rdiff-backup/rdiff_backup/hash.py +++ b/rdiff-backup/rdiff_backup/hash.py @@ -20,6 +20,7 @@ """Contains a file wrapper that returns a hash on close""" import sha +import Globals class FileWrapper: """Wrapper around a file-like object @@ -51,3 +52,17 @@ class Report: def __init__(self, close_val, sha1_digest): assert not close_val # For now just assume inner file closes correctly self.sha1_digest = sha1_digest + + +def compute_sha1(rp, compressed = 0): + """Return the hex sha1 hash of given rpath""" + assert rp.conn is Globals.local_connection # inefficient not to do locally + blocksize = Globals.blocksize + fp = FileWrapper(rp.open("r", compressed)) + while 1: + if not fp.read(blocksize): break + digest = fp.close().sha1_digest + rp.set_sha1(digest) + return digest + + diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py index 0ae998e..608f251 100644 --- a/rdiff-backup/rdiff_backup/iterfile.py +++ b/rdiff-backup/rdiff_backup/iterfile.py @@ -41,14 +41,14 @@ class UnwrapFile: """Return pair (type, data) next in line on the file type is a single character which is either - "o" for object, + "o" for an object, "f" for file, "c" for a continution of a file, "e" for an exception, or None if no more data can be read. Data is either the file's data, if type is "c" or "f", or the - actual object if the type is "o" or "e". + actual object if the type is "o", "e", or "r" """ header = self.file.read(8) @@ -57,8 +57,10 @@ class UnwrapFile: assert None, "Header %s is only %d bytes" % (header, len(header)) type, length = header[0], C.str2long(header[1:]) buf = self.file.read(length) - if type == "o" or type == "e": return type, cPickle.loads(buf) - else: return type, buf + if type in ("o", "e"): return type, cPickle.loads(buf) + else: + assert type in ("f", "c") + return type, buf class IterWrappingFile(UnwrapFile): @@ -213,7 +215,7 @@ class FileWrappingIter: self.currently_in_file.read, [Globals.blocksize]) if buf == "" or buf is None: - assert not self.currently_in_file.close() + self.currently_in_file.close() self.currently_in_file = None if buf is None: # error occurred above, encode exception prefix_letter = "e" @@ -238,33 +240,37 @@ class FileWrappingIter: def close(self): self.closed = 1 -class RORPIterFlush: - """Used to signal that a RORPIterToFile should flush buffer""" +class MiscIterFlush: + """Used to signal that a MiscIterToFile should flush buffer""" pass -class RORPIterFlushRepeat(RORPIterFlush): - """Flush, but then cause RORPIter to yield this same object +class MiscIterFlushRepeat(MiscIterFlush): + """Flush, but then cause Misc Iter to yield this same object - Thus if we put together a pipeline of these, one RORPIterContFlush + Thus if we put together a pipeline of these, one MiscIterFlushRepeat can cause all the segments to flush in sequence. """ pass -class RORPIterToFile(FileWrappingIter): - """Take a RORPIter and give it a file-ish interface +class MiscIterToFile(FileWrappingIter): + """Take an iter and give it a file-ish interface + + This expands on the FileWrappingIter by understanding how to + process RORPaths with file objects attached. It adds a new + character "r" to mark these. This is how we send signatures and diffs across the line. As sending each one separately via a read() call would result in a lot of latency, the read()'s are buffered - a read() call with no arguments will return a variable length string (possibly empty). - To flush the RORPIterToFile, have the iterator yield a - RORPIterFlush class. + To flush the MiscIterToFile, have the iterator yield a + MiscIterFlush class. """ def __init__(self, rpiter, max_buffer_bytes = None, max_buffer_rps = None): - """RORPIterToFile initializer + """MiscIterToFile initializer max_buffer_bytes is the maximum size of the buffer in bytes. max_buffer_rps is the maximum size of the buffer in rorps. @@ -313,17 +319,18 @@ class RORPIterToFile(FileWrappingIter): if hasattr(currentobj, "read") and hasattr(currentobj, "close"): self.currently_in_file = currentobj self.addfromfile("f") - elif (type(currentobj) is types.ClassType and - issubclass(currentobj, iterfile.RORPIterFlush)): - if currentobj is iterfile.RORPIterFlushRepeat: - self.add_flush_repeater() + elif currentobj is iterfile.MiscIterFlush: return None + elif currentobj is iterfile.MiscIterFlushRepeat: + self.add_misc(currentobj) return None - else: self.addrorp(currentobj) + elif isinstance(currentobj, rpath.RORPath): + self.addrorp(currentobj) + else: self.add_misc(currentobj) return 1 - def add_flush_repeater(self): - """Add a RORPIterFlushRepeat object to the buffer""" - pickle = cPickle.dumps(iterfile.RORPIterFlushRepeat, 1) + def add_misc(self, obj): + """Add an arbitrary pickleable object to the buffer""" + pickle = cPickle.dumps(obj, 1) self.array_buf.fromstring("o") self.array_buf.fromstring(C.long2str(long(len(pickle)))) self.array_buf.fromstring(pickle) @@ -336,7 +343,7 @@ class RORPIterToFile(FileWrappingIter): else: pickle = cPickle.dumps((rorp.index, rorp.data, 0), 1) self.rorps_in_buffer += 1 - self.array_buf.fromstring("o") + self.array_buf.fromstring("r") self.array_buf.fromstring(C.long2str(long(len(pickle)))) self.array_buf.fromstring(pickle) @@ -348,8 +355,8 @@ class RORPIterToFile(FileWrappingIter): def close(self): self.closed = 1 -class FileToRORPIter(IterWrappingFile): - """Take a RORPIterToFile and turn it back into a RORPIter""" +class FileToMiscIter(IterWrappingFile): + """Take a MiscIterToFile and turn it back into a iterator""" def __init__(self, file): IterWrappingFile.__init__(self, file) self.buf = "" @@ -363,9 +370,8 @@ class FileToRORPIter(IterWrappingFile): type = None while not type: type, data = self._get() if type == "z": raise StopIteration - elif type == "o": - if data is iterfile.RORPIterFlushRepeat: return data - else: return self.get_rorp(data) + elif type == "r": return self.get_rorp(data) + elif type == "o": return data else: raise IterFileException("Bad file type %s" % (type,)) def get_rorp(self, pickled_tuple): @@ -401,20 +407,21 @@ class FileToRORPIter(IterWrappingFile): if not self.buf: self.buf += self.file.read() if not self.buf: return None, None - assert len(self.buf) >= 8, "Unexpected end of RORPIter file" + assert len(self.buf) >= 8, "Unexpected end of MiscIter file" type, length = self.buf[0], C.str2long(self.buf[1:8]) data = self.buf[8:8+length] self.buf = self.buf[8+length:] - if type == "o" or type == "e": return type, cPickle.loads(data) + if type in "oer": return type, cPickle.loads(data) else: return type, data class ErrorFile: - """File-like that just raises error (used by FileToRORPIter above)""" + """File-like that just raises error (used by FileToMiscIter above)""" def __init__(self, exc): """Initialize new ErrorFile. exc is the exception to raise on read""" self.exc = exc def read(self, l=-1): raise self.exc def close(self): return None + import iterfile diff --git a/rdiff-backup/rdiff_backup/regress.py b/rdiff-backup/rdiff_backup/regress.py index f366e96..5635e77 100644 --- a/rdiff-backup/rdiff_backup/regress.py +++ b/rdiff-backup/rdiff_backup/regress.py @@ -97,8 +97,8 @@ def set_restore_times(): backup time. _mirror_time is the unsuccessful backup time. """ - restore._mirror_time = unsuccessful_backup_time - restore._rest_time = regress_time + restore.MirrorStruct._mirror_time = unsuccessful_backup_time + restore.MirrorStruct._rest_time = regress_time def remove_rbdir_increments(): """Delete the increments in the rdiff-backup-data directory diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py index 58dfc2b..26de579 100644 --- a/rdiff-backup/rdiff_backup/restore.py +++ b/rdiff-backup/rdiff_backup/restore.py @@ -25,12 +25,6 @@ import Globals, Time, Rdiff, Hardlink, rorpiter, selection, rpath, \ log, static, robust, metadata, statistics, TempFile, eas_acls -# This will be set to the time of the current mirror -_mirror_time = None -# This will be set to the exact time to restore to (not restore_to_time) -_rest_time = None - - class RestoreError(Exception): pass def Restore(mirror_rp, inc_rpath, target, restore_to_time): @@ -72,8 +66,8 @@ def ListChangedSince(mirror_rp, inc_rp, restore_to_time): MirrorStruct.set_mirror_and_rest_times(restore_to_time) MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp) - old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1) - cur_iter = MirrorStruct.get_mirror_rorp_iter(_mirror_time, 1) + old_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._rest_time, 1) + cur_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._mirror_time, 1) collated = rorpiter.Collate2Iters(old_iter, cur_iter) for old_rorp, cur_rorp in collated: if not old_rorp: change = "new" @@ -94,43 +88,23 @@ def ListAtTime(mirror_rp, inc_rp, time): assert mirror_rp.conn is Globals.local_connection, "Run locally only" MirrorStruct.set_mirror_and_rest_times(time) MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp) - old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1) + old_iter = MirrorStruct.get_mirror_rorp_iter() for rorp in old_iter: yield rorp -def Compare(src_iter, mirror_rp, inc_rp, compare_time): - """Compares metadata in src_rp dir with metadata in mirror_rp at time""" - MirrorStruct.set_mirror_and_rest_times(compare_time) - MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp) - - mir_iter = MirrorStruct.get_mirror_rorp_iter(compare_time, 1) - collated = rorpiter.Collate2Iters(src_iter, mir_iter) - changed_files_found = 0 - for src_rorp, mir_rorp in collated: - if not mir_rorp: change = "new" - elif not src_rorp: change = "deleted" - elif src_rorp == mir_rorp: continue - else: change = "changed" - changed_files_found = 1 - path_desc = (src_rorp and src_rorp.get_indexpath() or - mir_rorp.get_indexpath()) - log.Log("%-7s %s" % (change, path_desc), 2) - if change == "changed": # Log more description of difference - assert not src_rorp.equal_verbose_auto(mir_rorp, 3) - - if not changed_files_found: - log.Log("No changes found. Directory matches archive data.", 2) - MirrorStruct.close_rf_cache() - return changed_files_found - class MirrorStruct: """Hold functions to be run on the mirror side""" - _select = None # If selection command line arguments given, use Select here + # If selection command line arguments given, use Select here + _select = None + # This will be set to the time of the current mirror + _mirror_time = None + # This will be set to the exact time to restore to (not restore_to_time) + _rest_time = None + def set_mirror_and_rest_times(cls, restore_to_time): - """Set global variabels _mirror_time and _rest_time on mirror conn""" - global _mirror_time, _rest_time - _mirror_time = cls.get_mirror_time() - _rest_time = cls.get_rest_time(restore_to_time) + """Set class variabels _mirror_time and _rest_time on mirror conn""" + MirrorStruct._mirror_time = cls.get_mirror_time() + MirrorStruct._rest_time = cls.get_rest_time(restore_to_time) def get_mirror_time(cls): """Return time (in seconds) of latest mirror""" @@ -169,8 +143,8 @@ class MirrorStruct: """ # use dictionary to remove dups - if not _mirror_time: d = {cls.get_mirror_time(): None} - else: d = {_mirror_time: None} + if not cls._mirror_time: d = {cls.get_mirror_time(): None} + else: d = {cls._mirror_time: None} if not rp or not rp.index: rp = Globals.rbdir.append("increments") for inc in get_inclist(rp): d[inc.getinctime()] = None for inc in get_inclist(Globals.rbdir.append("mirror_metadata")): @@ -201,7 +175,7 @@ class MirrorStruct: unwanted files from the metadata_iter. """ - if rest_time is None: rest_time = _rest_time + if rest_time is None: rest_time = cls._rest_time rorp_iter = eas_acls.GetCombinedMetadataIter( Globals.rbdir, rest_time, restrict_index = cls.mirror_base.index, @@ -371,7 +345,7 @@ class CachedRF: rf = self.get_rf(index) if not rf: log.Log("""Error: Unable to retrieve data for file %s! -The cause is probably data loss from the destination directory.""" % +The cause is probably data loss from the backup repository.""" % (index and "/".join(index) or '.',), 2) return cStringIO.StringIO('') return self.get_rf(index).get_restore_fp() @@ -434,7 +408,8 @@ class RestoreFile: """ self.mirror_rp.inc_type = 'snapshot' self.mirror_rp.inc_compressed = 0 - if not self.inc_list or _rest_time >= _mirror_time: + if (not self.inc_list or + MirrorStruct._rest_time >= MirrorStruct._mirror_time): self.relevant_incs = [self.mirror_rp] return @@ -461,7 +436,7 @@ class RestoreFile: incpairs = [] for inc in self.inc_list: time = inc.getinctime() - if time >= _rest_time: incpairs.append((time, inc)) + if time >= MirrorStruct._rest_time: incpairs.append((time, inc)) incpairs.sort() return [pair[1] for pair in incpairs] diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index dac4899..753712f 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -1128,16 +1128,6 @@ class RPath(RORPath): self.fsync(fp) if Globals.fsync_directories: self.get_parent_rp().fsync() - def sync_delete(self): - """Delete self with sync to guarantee completion - - On some filesystems (like linux's ext2), we must sync both the - file and the directory to make sure. - - """ - if self.lstat() and not self.issym(): self.fsync_local(self.delete) - if Globals.fsync_directories: self.get_parent_rp().fsync() - def get_data(self): """Open file as a regular file, read data, close, return data""" fp = self.open("rb") -- cgit v1.2.1