summaryrefslogtreecommitdiff
path: root/rdiff-backup/rdiff_backup
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-10-31 04:53:31 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-10-31 04:53:31 +0000
commita5c03feacbbd9361eb3e2abe367b75529c83459b (patch)
treebcb8f86bf8cfc61bd771b6e45d557bdda7604e80 /rdiff-backup/rdiff_backup
parenta2705f514b471e2b74c98a0cde588863e3ff22c6 (diff)
downloadrdiff-backup-a5c03feacbbd9361eb3e2abe367b75529c83459b.tar.gz
Added various compare options like --compare-full and --compare-hash
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@664 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup')
-rw-r--r--rdiff-backup/rdiff_backup/Main.py59
-rw-r--r--rdiff-backup/rdiff_backup/Security.py14
-rw-r--r--rdiff-backup/rdiff_backup/backup.py6
-rw-r--r--rdiff-backup/rdiff_backup/compare.py213
-rw-r--r--rdiff-backup/rdiff_backup/connection.py7
-rw-r--r--rdiff-backup/rdiff_backup/hash.py15
-rw-r--r--rdiff-backup/rdiff_backup/iterfile.py71
-rw-r--r--rdiff-backup/rdiff_backup/regress.py4
-rw-r--r--rdiff-backup/rdiff_backup/restore.py65
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py10
10 files changed, 340 insertions, 124 deletions
diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py
index 0158d38..0f8a060 100644
--- a/rdiff-backup/rdiff_backup/Main.py
+++ b/rdiff-backup/rdiff_backup/Main.py
@@ -1,4 +1,4 @@
-# Copyright 2002, 2003, 2004 Ben Escoto
+# Copyright 2002, 2003, 2004, 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
@@ -24,7 +24,7 @@ import getopt, sys, re, os, cStringIO
from log import Log, LoggerError, ErrorLog
import Globals, Time, SetConnections, selection, robust, rpath, \
manage, backup, connection, restore, FilenameMapping, \
- Security, Hardlink, regress, C, fs_abilities, statistics
+ Security, Hardlink, regress, C, fs_abilities, statistics, compare
action = None
@@ -59,9 +59,10 @@ def parse_cmdlineoptions(arglist):
try: optlist, args = getopt.getopt(arglist, "blr:sv:V",
["backup-mode", "calculate-average", "check-destination-dir",
- "compare", "compare-at-time=", "create-full-path",
- "current-time=", "exclude=", "exclude-device-files",
- "exclude-fifos", "exclude-filelist=",
+ "compare", "compare-at-time=", "compare-hash",
+ "compare-hash-at-time=", "compare-full", "compare-full-at-time=",
+ "create-full-path", "current-time=", "exclude=",
+ "exclude-device-files", "exclude-fifos", "exclude-filelist=",
"exclude-symbolic-links", "exclude-sockets",
"exclude-filelist-stdin", "exclude-globbing-filelist=",
"exclude-globbing-filelist-stdin", "exclude-mirror=",
@@ -91,10 +92,12 @@ def parse_cmdlineoptions(arglist):
elif opt == "--calculate-average": action = "calculate-average"
elif opt == "--carbonfile": Globals.set("carbonfile_active", 1)
elif opt == "--check-destination-dir": action = "check-destination-dir"
- elif opt == "--compare" or opt == "--compare-at-time":
- action = "compare"
- if opt == "--compare": restore_timestr = "now"
- else: restore_timestr = arg
+ elif opt in ("--compare", "--compare-at-time",
+ "--compare-hash", "--compare-hash-at-time",
+ "--compare-full", "--compare-full-at-time"):
+ if opt[-8:] == "-at-time": restore_timestr, opt = arg, opt[:-8]
+ else: restore_timestr = "now"
+ action = opt[2:]
elif opt == "--create-full-path": create_full_path = 1
elif opt == "--current-time":
Globals.set_integer('current_time', arg)
@@ -200,7 +203,8 @@ def check_action():
1: ['list-increments', 'list-increment-sizes',
'remove-older-than', 'list-at-time',
'list-changed-since', 'check-destination-dir'],
- 2: ['backup', 'restore', 'restore-as-of', 'compare']}
+ 2: ['backup', 'restore', 'restore-as-of',
+ 'compare', 'compare-hash', 'compare-full']}
l = len(args)
if l == 0 and action not in arg_action_dict[l]:
commandline_error("No arguments given")
@@ -263,7 +267,7 @@ def take_action(rps):
elif action == "backup": Backup(rps[0], rps[1])
elif action == "calculate-average": CalculateAverage(rps)
elif action == "check-destination-dir": CheckDest(rps[0])
- elif action == "compare": Compare(*rps)
+ elif action.startswith("compare"): Compare(action, rps[0], rps[1])
elif action == "list-at-time": ListAtTime(rps[0])
elif action == "list-changed-since": ListChangedSince(rps[0])
elif action == "list-increments": ListIncrements(rps[0])
@@ -592,7 +596,7 @@ def restore_set_root(rpin):
def ListIncrements(rp):
"""Print out a summary of the increments and their times"""
- rp = require_root_set(rp)
+ rp = require_root_set(rp, 1)
restore_check_backup_dir(restore_root)
mirror_rp = restore_root.new_index(restore_index)
inc_rpath = Globals.rbdir.append_path('increments', restore_index)
@@ -602,24 +606,25 @@ def ListIncrements(rp):
print manage.describe_incs_parsable(incs, mirror_time, mirror_rp)
else: print manage.describe_incs_human(incs, mirror_time, mirror_rp)
-def require_root_set(rp):
+def require_root_set(rp, read_only):
"""Make sure rp is or is in a valid rdiff-backup dest directory.
- Also initializes fs_abilities and quoting and return quoted rp if
- necessary.
+ Also initializes fs_abilities (read or read/write) and quoting and
+ return quoted rp if necessary.
"""
if not restore_set_root(rp):
Log.FatalError(("Bad directory %s.\n" % (rp.path,)) +
"It doesn't appear to be an rdiff-backup destination dir")
- Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir)
+ Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir,
+ read_only)
if Globals.chars_to_quote: return restore_init_quoting(rp)
else: return rp
def ListIncrementSizes(rp):
"""Print out a summary of the increments """
- rp = require_root_set(rp)
+ rp = require_root_set(rp, 1)
print manage.ListIncrementSizes(restore_root, restore_index)
@@ -634,7 +639,7 @@ def CalculateAverage(rps):
def RemoveOlderThan(rootrp):
"""Remove all increment files older than a certain time"""
- rootrp = require_root_set(rootrp)
+ rootrp = require_root_set(rootrp, 0)
rot_require_rbdir_base(rootrp)
try: time = Time.genstrtotime(remove_older_than_string)
except Time.TimeException, exc: Log.FatalError(str(exc))
@@ -670,7 +675,7 @@ def rot_require_rbdir_base(rootrp):
def ListChangedSince(rp):
"""List all the files under rp that have changed since restoretime"""
- rp = require_root_set(rp)
+ rp = require_root_set(rp, 1)
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
@@ -682,7 +687,7 @@ def ListChangedSince(rp):
def ListAtTime(rp):
"""List files in archive under rp that are present at restoretime"""
- rp = require_root_set(rp)
+ rp = require_root_set(rp, 1)
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
@@ -691,7 +696,7 @@ def ListAtTime(rp):
print rorp.get_indexpath()
-def Compare(src_rp, dest_rp, compare_time = None):
+def Compare(compare_type, src_rp, dest_rp, compare_time = None):
"""Compare metadata in src_rp with metadata of backup session
Prints to stdout whenever a file in the src_rp directory has
@@ -702,16 +707,20 @@ def Compare(src_rp, dest_rp, compare_time = None):
"""
global return_val
- dest_rp = require_root_set(dest_rp)
+ dest_rp = require_root_set(dest_rp, 1)
if not compare_time:
try: compare_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
- inc_rp = mirror_rp.append_path("increments", restore_index)
+ inc_rp = Globals.rbdir.append_path("increments", restore_index)
backup_set_select(src_rp) # Sets source rorp iterator
- src_iter = src_rp.conn.backup.SourceStruct.get_source_select()
- return_val = restore.Compare(src_iter, mirror_rp, inc_rp, compare_time)
+ if compare_type == "compare": compare_func = compare.Compare
+ elif compare_type == "compare-hash": compare_func = compare.Compare_hash
+ else:
+ assert compare_type == "compare-full", compare_type
+ compare_func = compare.Compare_full
+ return_val = compare_func(src_rp, mirror_rp, inc_rp, compare_time)
def CheckDest(dest_rp):
diff --git a/rdiff-backup/rdiff_backup/Security.py b/rdiff-backup/rdiff_backup/Security.py
index 53a081c..1e06d46 100644
--- a/rdiff-backup/rdiff_backup/Security.py
+++ b/rdiff-backup/rdiff_backup/Security.py
@@ -113,8 +113,9 @@ def set_security_level(action, cmdpairs):
sec_level = "all"
rdir = getpath(cp2)
elif action in ["test-server", "list-increments", 'list-increment-sizes',
- "list-at-time", "list-changed-since",
- "calculate-average", "remove-older-than", "compare"]:
+ "list-at-time", "list-changed-since",
+ "calculate-average", "remove-older-than", "compare",
+ "compare-hash", "compare-full"]:
sec_level = "minimal"
rdir = tempfile.gettempdir()
else: assert 0, "Unknown action %s" % action
@@ -151,7 +152,14 @@ def set_allowed_requests(sec_level):
"restore.ListAtTime",
"backup.SourceStruct.get_source_select",
"backup.SourceStruct.set_source_select",
- "backup.SourceStruct.get_diffs"])
+ "backup.SourceStruct.get_diffs",
+ "compare.RepoSide.init_and_get_iter",
+ "compare.RepoSide.close_rf_cache",
+ "compare.RepoSide.attach_files",
+ "compare.DataSide.get_source_select",
+ "compare.DataSide.compare_fast",
+ "compare.DataSide.compare_hash",
+ "compare.DataSide.compare_full"])
if sec_level == "update-only" or sec_level == "all":
l.extend(["log.Log.open_logfile_local", "log.Log.close_logfile_local",
"log.ErrorLog.open", "log.ErrorLog.isopen",
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index 0854371..6731b31 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -101,8 +101,8 @@ class SourceStruct:
diff_rorp.set_attached_filetype('snapshot')
for dest_sig in dest_sigiter:
- if dest_sig is iterfile.RORPIterFlushRepeat:
- yield iterfile.RORPIterFlush # Flush buffer when get_sigs does
+ if dest_sig is iterfile.MiscIterFlushRepeat:
+ yield iterfile.MiscIterFlush # Flush buffer when get_sigs does
continue
src_rp = (source_rps.get(dest_sig.index) or
rpath.RORPath(dest_sig.index))
@@ -172,7 +172,7 @@ class DestinationStruct:
if (Globals.backup_reader is not Globals.backup_writer and
num_rorps_skipped > flush_threshold):
num_rorps_skipped = 0
- yield iterfile.RORPIterFlushRepeat
+ yield iterfile.MiscIterFlushRepeat
else:
index = src_rorp and src_rorp.index or dest_rorp.index
sig = cls.get_one_sig(dest_base_rpath, index,
diff --git a/rdiff-backup/rdiff_backup/compare.py b/rdiff-backup/rdiff_backup/compare.py
new file mode 100644
index 0000000..9ceff12
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/compare.py
@@ -0,0 +1,213 @@
+# Copyright 2002, 2003, 2004, 2005 Ben Escoto
+#
+# This file is part of rdiff-backup.
+#
+# rdiff-backup is free software; you can redistribute it and/or modify
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# rdiff-backup is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with rdiff-backup; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+"""Perform various kinds of comparisons.
+
+For instance, full-file compare, compare by hash, and metadata-only
+compare. This uses elements of the backup and restore modules.
+
+"""
+
+import Globals, restore, rorpiter, log, backup, static, rpath, hash, robust
+
+def Compare(src_rp, mirror_rp, inc_rp, compare_time):
+ """Compares metadata in src_rp dir with metadata in mirror_rp at time"""
+ repo_side = mirror_rp.conn.compare.RepoSide
+ data_side = src_rp.conn.compare.DataSide
+
+ repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time)
+ return_val = print_reports(data_side.compare_fast(repo_iter))
+ repo_side.close_rf_cache()
+ return return_val
+
+def Compare_hash(src_rp, mirror_rp, inc_rp, compare_time):
+ """Compare files at src_rp with repo at compare_time
+
+ Note metadata differences, but also check to see if file data is
+ different. If two regular files have the same size, hash the
+ source and compare to the hash presumably already present in repo.
+
+ """
+ repo_side = mirror_rp.conn.compare.RepoSide
+ data_side = src_rp.conn.compare.DataSide
+
+ repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time)
+ return_val = print_reports(data_side.compare_hash(repo_iter))
+ repo_side.close_rf_cache()
+ return return_val
+
+def Compare_full(src_rp, mirror_rp, inc_rp, compare_time):
+ """Compare full data of files at src_rp with repo at compare_time
+
+ Like Compare_hash, but do not rely on hashes, instead copy full
+ data over.
+
+ """
+ repo_side = mirror_rp.conn.compare.RepoSide
+ data_side = src_rp.conn.compare.DataSide
+
+ src_iter = data_side.get_source_select()
+ attached_repo_iter = repo_side.attach_files(src_iter, mirror_rp,
+ inc_rp, compare_time)
+ report_iter = data_side.compare_full(src_rp, attached_repo_iter)
+ return_val = print_reports(report_iter)
+ repo_side.close_rf_cache()
+ return return_val
+
+def print_reports(report_iter):
+ """Given an iter of CompareReport objects, print them to screen"""
+ assert not Globals.server
+ changed_files_found = 0
+ for report in report_iter:
+ changed_files_found = 1
+ indexpath = report.index and "/".join(report.index) or "."
+ print "%s: %s" % (report.reason, indexpath)
+
+ if not changed_files_found:
+ log.Log("No changes found. Directory matches archive data.", 2)
+ return changed_files_found
+
+def get_basic_report(src_rp, repo_rorp, comp_data_func = None):
+ """Compare src_rp and repo_rorp, return CompareReport
+
+ comp_data_func should be a function that accepts (src_rp,
+ repo_rorp) as arguments, and return 1 if they have the same data,
+ 0 otherwise. If comp_data_func is false, don't compare file data,
+ only metadata.
+
+ """
+ if src_rp: index = src_rp.index
+ else: index = repo_rorp.index
+ if not repo_rorp or not repo_rorp.lstat():
+ return CompareReport(index, "new")
+ elif not src_rp or not src_rp.lstat():
+ return CompareReport(index, "deleted")
+ elif comp_data_func and src_rp.isreg() and repo_rorp.isreg():
+ if src_rp == repo_rorp: meta_changed = 0
+ else: meta_changed = 1
+ data_changed = comp_data_func(src_rp, repo_rorp)
+
+ if not meta_changed and not data_changed: return None
+ if meta_changed: meta_string = "metadata changed, "
+ else: meta_string = "metadata the same, "
+ if data_changed: data_string = "data changed"
+ else: data_string = "data the same"
+ return CompareReport(index, meta_string + data_string)
+ elif src_rp == repo_rorp: return None
+ else: return CompareReport(index, "changed")
+
+
+class RepoSide(restore.MirrorStruct):
+ """On the repository side, comparing is like restoring"""
+ def init_and_get_iter(cls, mirror_rp, inc_rp, compare_time):
+ """Return rorp iter at given compare time"""
+ cls.set_mirror_and_rest_times(compare_time)
+ cls.initialize_rf_cache(mirror_rp, inc_rp)
+ return cls.subtract_indicies(cls.mirror_base.index,
+ cls.get_mirror_rorp_iter())
+
+ def attach_files(cls, src_iter, mirror_rp, inc_rp, compare_time):
+ """Attach data to all the files that need checking
+
+ Return an iterator of repo rorps that includes all the files
+ that may have changed, and has the fileobj set on all rorps
+ that need it.
+
+ """
+ repo_iter = cls.init_and_get_iter(mirror_rp, inc_rp, compare_time)
+ base_index = cls.mirror_base.index
+ for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
+ index = src_rp and src_rp.index or mir_rorp.index
+ if src_rp and mir_rorp:
+ if not src_rp.isreg() and src_rp == mir_rorp:
+ continue # They must be equal, nothing else to check
+ if (src_rp.isreg() and mir_rorp.isreg() and
+ src_rp.getsize() == mir_rorp.getsize()):
+ mir_rorp.setfile(cls.rf_cache.get_fp(base_index + index))
+ mir_rorp.set_attached_filetype('snapshot')
+
+ if mir_rorp: yield mir_rorp
+ else: yield rpath.RORPath(index) # indicate deleted mir_rorp
+
+static.MakeClass(RepoSide)
+
+
+class DataSide(backup.SourceStruct):
+ """On the side that has the current data, compare is like backing up"""
+ def compare_fast(cls, repo_iter):
+ """Compare rorps (metadata only) quickly, return report iter"""
+ src_iter = cls.get_source_select()
+ for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
+ report = get_basic_report(src_rorp, mir_rorp)
+ if report: yield report
+
+ def compare_hash(cls, repo_iter):
+ """Like above, but also compare sha1 sums of any regular files"""
+ def hashs_changed(src_rp, mir_rorp):
+ """Return 0 if their data hashes same, 1 otherwise"""
+ if not mir_rorp.has_sha1():
+ log.Log("Warning: Metadata file has no digest for %s, "
+ "unable to compare." % (index,), 2)
+ return 0
+ elif (src_rp.getsize() == mir_rorp.getsize() and
+ hash.compute_sha1(src_rp) == mir_rorp.get_sha1()):
+ return 0
+ return 1
+
+ src_iter = cls.get_source_select()
+ for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
+ report = get_basic_report(src_rp, mir_rorp, hashs_changed)
+ if report: yield report
+
+ def compare_full(cls, src_root, repo_iter):
+ """Given repo iter with full data attached, return report iter"""
+ def error_handler(exc, src_rp, repo_rorp):
+ log.Log("Error reading file %s" % (src_rp.path,), 2)
+ return 0 # They aren't the same if we get an error
+
+ def data_changed(src_rp, repo_rorp):
+ """Return 0 if full compare of data matches, 1 otherwise"""
+ if src_rp.getsize() != repo_rorp.getsize(): return 1
+ return not robust.check_common_error(error_handler,
+ rpath.cmpfileobj, (src_rp.open("rb"), repo_rorp.open("rb")))
+
+ for repo_rorp in repo_iter:
+ src_rp = src_root.new_index(repo_rorp.index)
+ report = get_basic_report(src_rp, repo_rorp, data_changed)
+ if report: yield report
+
+static.MakeClass(DataSide)
+
+
+class CompareReport:
+ """When two files don't match, this tells you how they don't match
+
+ This is necessary because the system that is doing the actual
+ comparing may not be the one printing out the reports. For speed
+ the compare information can be pipelined back to the client
+ connection as an iter of CompareReports.
+
+ """
+ # self.file is added so that CompareReports can masquerate as
+ # RORPaths when in an iterator, and thus get pipelined.
+ file = None
+
+ def __init__(self, index, reason):
+ self.index = index
+ self.reason = reason
diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py
index e92402f..99b8f72 100644
--- a/rdiff-backup/rdiff_backup/connection.py
+++ b/rdiff-backup/rdiff_backup/connection.py
@@ -153,8 +153,7 @@ class LowLevelPipeConnection(Connection):
def _putiter(self, iterator, req_num):
"""Put an iterator through the pipe"""
self._write("i",
- str(VirtualFile.new(iterfile.RORPIterToFile(iterator))),
- req_num)
+ str(VirtualFile.new(iterfile.MiscIterToFile(iterator))), req_num)
def _putrpath(self, rpath, req_num):
"""Put an rpath into the pipe
@@ -241,7 +240,7 @@ class LowLevelPipeConnection(Connection):
elif format_string == "b": result = data
elif format_string == "f": result = VirtualFile(self, int(data))
elif format_string == "i":
- result = iterfile.FileToRORPIter(VirtualFile(self, int(data)))
+ result = iterfile.FileToMiscIter(VirtualFile(self, int(data)))
elif format_string == "r": result = self._getrorpath(data)
elif format_string == "R": result = self._getrpath(data)
elif format_string == "Q": result = self._getqrpath(data)
@@ -535,7 +534,7 @@ import Globals, Time, Rdiff, Hardlink, FilenameMapping, C, Security, \
Main, rorpiter, selection, increment, statistics, manage, lazy, \
iterfile, rpath, robust, restore, manage, backup, connection, \
TempFile, SetConnections, librsync, log, regress, fs_abilities, \
- eas_acls, user_group
+ eas_acls, user_group, compare
Globals.local_connection = LocalConnection()
Globals.connections.append(Globals.local_connection)
diff --git a/rdiff-backup/rdiff_backup/hash.py b/rdiff-backup/rdiff_backup/hash.py
index 4fcbdab..3e7306f 100644
--- a/rdiff-backup/rdiff_backup/hash.py
+++ b/rdiff-backup/rdiff_backup/hash.py
@@ -20,6 +20,7 @@
"""Contains a file wrapper that returns a hash on close"""
import sha
+import Globals
class FileWrapper:
"""Wrapper around a file-like object
@@ -51,3 +52,17 @@ class Report:
def __init__(self, close_val, sha1_digest):
assert not close_val # For now just assume inner file closes correctly
self.sha1_digest = sha1_digest
+
+
+def compute_sha1(rp, compressed = 0):
+ """Return the hex sha1 hash of given rpath"""
+ assert rp.conn is Globals.local_connection # inefficient not to do locally
+ blocksize = Globals.blocksize
+ fp = FileWrapper(rp.open("r", compressed))
+ while 1:
+ if not fp.read(blocksize): break
+ digest = fp.close().sha1_digest
+ rp.set_sha1(digest)
+ return digest
+
+
diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py
index 0ae998e..608f251 100644
--- a/rdiff-backup/rdiff_backup/iterfile.py
+++ b/rdiff-backup/rdiff_backup/iterfile.py
@@ -41,14 +41,14 @@ class UnwrapFile:
"""Return pair (type, data) next in line on the file
type is a single character which is either
- "o" for object,
+ "o" for an object,
"f" for file,
"c" for a continution of a file,
"e" for an exception, or
None if no more data can be read.
Data is either the file's data, if type is "c" or "f", or the
- actual object if the type is "o" or "e".
+ actual object if the type is "o", "e", or "r"
"""
header = self.file.read(8)
@@ -57,8 +57,10 @@ class UnwrapFile:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
- if type == "o" or type == "e": return type, cPickle.loads(buf)
- else: return type, buf
+ if type in ("o", "e"): return type, cPickle.loads(buf)
+ else:
+ assert type in ("f", "c")
+ return type, buf
class IterWrappingFile(UnwrapFile):
@@ -213,7 +215,7 @@ class FileWrappingIter:
self.currently_in_file.read,
[Globals.blocksize])
if buf == "" or buf is None:
- assert not self.currently_in_file.close()
+ self.currently_in_file.close()
self.currently_in_file = None
if buf is None: # error occurred above, encode exception
prefix_letter = "e"
@@ -238,33 +240,37 @@ class FileWrappingIter:
def close(self): self.closed = 1
-class RORPIterFlush:
- """Used to signal that a RORPIterToFile should flush buffer"""
+class MiscIterFlush:
+ """Used to signal that a MiscIterToFile should flush buffer"""
pass
-class RORPIterFlushRepeat(RORPIterFlush):
- """Flush, but then cause RORPIter to yield this same object
+class MiscIterFlushRepeat(MiscIterFlush):
+ """Flush, but then cause Misc Iter to yield this same object
- Thus if we put together a pipeline of these, one RORPIterContFlush
+ Thus if we put together a pipeline of these, one MiscIterFlushRepeat
can cause all the segments to flush in sequence.
"""
pass
-class RORPIterToFile(FileWrappingIter):
- """Take a RORPIter and give it a file-ish interface
+class MiscIterToFile(FileWrappingIter):
+ """Take an iter and give it a file-ish interface
+
+ This expands on the FileWrappingIter by understanding how to
+ process RORPaths with file objects attached. It adds a new
+ character "r" to mark these.
This is how we send signatures and diffs across the line. As
sending each one separately via a read() call would result in a
lot of latency, the read()'s are buffered - a read() call with no
arguments will return a variable length string (possibly empty).
- To flush the RORPIterToFile, have the iterator yield a
- RORPIterFlush class.
+ To flush the MiscIterToFile, have the iterator yield a
+ MiscIterFlush class.
"""
def __init__(self, rpiter, max_buffer_bytes = None, max_buffer_rps = None):
- """RORPIterToFile initializer
+ """MiscIterToFile initializer
max_buffer_bytes is the maximum size of the buffer in bytes.
max_buffer_rps is the maximum size of the buffer in rorps.
@@ -313,17 +319,18 @@ class RORPIterToFile(FileWrappingIter):
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
self.addfromfile("f")
- elif (type(currentobj) is types.ClassType and
- issubclass(currentobj, iterfile.RORPIterFlush)):
- if currentobj is iterfile.RORPIterFlushRepeat:
- self.add_flush_repeater()
+ elif currentobj is iterfile.MiscIterFlush: return None
+ elif currentobj is iterfile.MiscIterFlushRepeat:
+ self.add_misc(currentobj)
return None
- else: self.addrorp(currentobj)
+ elif isinstance(currentobj, rpath.RORPath):
+ self.addrorp(currentobj)
+ else: self.add_misc(currentobj)
return 1
- def add_flush_repeater(self):
- """Add a RORPIterFlushRepeat object to the buffer"""
- pickle = cPickle.dumps(iterfile.RORPIterFlushRepeat, 1)
+ def add_misc(self, obj):
+ """Add an arbitrary pickleable object to the buffer"""
+ pickle = cPickle.dumps(obj, 1)
self.array_buf.fromstring("o")
self.array_buf.fromstring(C.long2str(long(len(pickle))))
self.array_buf.fromstring(pickle)
@@ -336,7 +343,7 @@ class RORPIterToFile(FileWrappingIter):
else:
pickle = cPickle.dumps((rorp.index, rorp.data, 0), 1)
self.rorps_in_buffer += 1
- self.array_buf.fromstring("o")
+ self.array_buf.fromstring("r")
self.array_buf.fromstring(C.long2str(long(len(pickle))))
self.array_buf.fromstring(pickle)
@@ -348,8 +355,8 @@ class RORPIterToFile(FileWrappingIter):
def close(self): self.closed = 1
-class FileToRORPIter(IterWrappingFile):
- """Take a RORPIterToFile and turn it back into a RORPIter"""
+class FileToMiscIter(IterWrappingFile):
+ """Take a MiscIterToFile and turn it back into a iterator"""
def __init__(self, file):
IterWrappingFile.__init__(self, file)
self.buf = ""
@@ -363,9 +370,8 @@ class FileToRORPIter(IterWrappingFile):
type = None
while not type: type, data = self._get()
if type == "z": raise StopIteration
- elif type == "o":
- if data is iterfile.RORPIterFlushRepeat: return data
- else: return self.get_rorp(data)
+ elif type == "r": return self.get_rorp(data)
+ elif type == "o": return data
else: raise IterFileException("Bad file type %s" % (type,))
def get_rorp(self, pickled_tuple):
@@ -401,20 +407,21 @@ class FileToRORPIter(IterWrappingFile):
if not self.buf: self.buf += self.file.read()
if not self.buf: return None, None
- assert len(self.buf) >= 8, "Unexpected end of RORPIter file"
+ assert len(self.buf) >= 8, "Unexpected end of MiscIter file"
type, length = self.buf[0], C.str2long(self.buf[1:8])
data = self.buf[8:8+length]
self.buf = self.buf[8+length:]
- if type == "o" or type == "e": return type, cPickle.loads(data)
+ if type in "oer": return type, cPickle.loads(data)
else: return type, data
class ErrorFile:
- """File-like that just raises error (used by FileToRORPIter above)"""
+ """File-like that just raises error (used by FileToMiscIter above)"""
def __init__(self, exc):
"""Initialize new ErrorFile. exc is the exception to raise on read"""
self.exc = exc
def read(self, l=-1): raise self.exc
def close(self): return None
+
import iterfile
diff --git a/rdiff-backup/rdiff_backup/regress.py b/rdiff-backup/rdiff_backup/regress.py
index f366e96..5635e77 100644
--- a/rdiff-backup/rdiff_backup/regress.py
+++ b/rdiff-backup/rdiff_backup/regress.py
@@ -97,8 +97,8 @@ def set_restore_times():
backup time. _mirror_time is the unsuccessful backup time.
"""
- restore._mirror_time = unsuccessful_backup_time
- restore._rest_time = regress_time
+ restore.MirrorStruct._mirror_time = unsuccessful_backup_time
+ restore.MirrorStruct._rest_time = regress_time
def remove_rbdir_increments():
"""Delete the increments in the rdiff-backup-data directory
diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py
index 58dfc2b..26de579 100644
--- a/rdiff-backup/rdiff_backup/restore.py
+++ b/rdiff-backup/rdiff_backup/restore.py
@@ -25,12 +25,6 @@ import Globals, Time, Rdiff, Hardlink, rorpiter, selection, rpath, \
log, static, robust, metadata, statistics, TempFile, eas_acls
-# This will be set to the time of the current mirror
-_mirror_time = None
-# This will be set to the exact time to restore to (not restore_to_time)
-_rest_time = None
-
-
class RestoreError(Exception): pass
def Restore(mirror_rp, inc_rpath, target, restore_to_time):
@@ -72,8 +66,8 @@ def ListChangedSince(mirror_rp, inc_rp, restore_to_time):
MirrorStruct.set_mirror_and_rest_times(restore_to_time)
MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
- old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1)
- cur_iter = MirrorStruct.get_mirror_rorp_iter(_mirror_time, 1)
+ old_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._rest_time, 1)
+ cur_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._mirror_time, 1)
collated = rorpiter.Collate2Iters(old_iter, cur_iter)
for old_rorp, cur_rorp in collated:
if not old_rorp: change = "new"
@@ -94,43 +88,23 @@ def ListAtTime(mirror_rp, inc_rp, time):
assert mirror_rp.conn is Globals.local_connection, "Run locally only"
MirrorStruct.set_mirror_and_rest_times(time)
MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
- old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1)
+ old_iter = MirrorStruct.get_mirror_rorp_iter()
for rorp in old_iter: yield rorp
-def Compare(src_iter, mirror_rp, inc_rp, compare_time):
- """Compares metadata in src_rp dir with metadata in mirror_rp at time"""
- MirrorStruct.set_mirror_and_rest_times(compare_time)
- MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
-
- mir_iter = MirrorStruct.get_mirror_rorp_iter(compare_time, 1)
- collated = rorpiter.Collate2Iters(src_iter, mir_iter)
- changed_files_found = 0
- for src_rorp, mir_rorp in collated:
- if not mir_rorp: change = "new"
- elif not src_rorp: change = "deleted"
- elif src_rorp == mir_rorp: continue
- else: change = "changed"
- changed_files_found = 1
- path_desc = (src_rorp and src_rorp.get_indexpath() or
- mir_rorp.get_indexpath())
- log.Log("%-7s %s" % (change, path_desc), 2)
- if change == "changed": # Log more description of difference
- assert not src_rorp.equal_verbose_auto(mir_rorp, 3)
-
- if not changed_files_found:
- log.Log("No changes found. Directory matches archive data.", 2)
- MirrorStruct.close_rf_cache()
- return changed_files_found
-
class MirrorStruct:
"""Hold functions to be run on the mirror side"""
- _select = None # If selection command line arguments given, use Select here
+ # If selection command line arguments given, use Select here
+ _select = None
+ # This will be set to the time of the current mirror
+ _mirror_time = None
+ # This will be set to the exact time to restore to (not restore_to_time)
+ _rest_time = None
+
def set_mirror_and_rest_times(cls, restore_to_time):
- """Set global variabels _mirror_time and _rest_time on mirror conn"""
- global _mirror_time, _rest_time
- _mirror_time = cls.get_mirror_time()
- _rest_time = cls.get_rest_time(restore_to_time)
+ """Set class variabels _mirror_time and _rest_time on mirror conn"""
+ MirrorStruct._mirror_time = cls.get_mirror_time()
+ MirrorStruct._rest_time = cls.get_rest_time(restore_to_time)
def get_mirror_time(cls):
"""Return time (in seconds) of latest mirror"""
@@ -169,8 +143,8 @@ class MirrorStruct:
"""
# use dictionary to remove dups
- if not _mirror_time: d = {cls.get_mirror_time(): None}
- else: d = {_mirror_time: None}
+ if not cls._mirror_time: d = {cls.get_mirror_time(): None}
+ else: d = {cls._mirror_time: None}
if not rp or not rp.index: rp = Globals.rbdir.append("increments")
for inc in get_inclist(rp): d[inc.getinctime()] = None
for inc in get_inclist(Globals.rbdir.append("mirror_metadata")):
@@ -201,7 +175,7 @@ class MirrorStruct:
unwanted files from the metadata_iter.
"""
- if rest_time is None: rest_time = _rest_time
+ if rest_time is None: rest_time = cls._rest_time
rorp_iter = eas_acls.GetCombinedMetadataIter(
Globals.rbdir, rest_time, restrict_index = cls.mirror_base.index,
@@ -371,7 +345,7 @@ class CachedRF:
rf = self.get_rf(index)
if not rf:
log.Log("""Error: Unable to retrieve data for file %s!
-The cause is probably data loss from the destination directory.""" %
+The cause is probably data loss from the backup repository.""" %
(index and "/".join(index) or '.',), 2)
return cStringIO.StringIO('')
return self.get_rf(index).get_restore_fp()
@@ -434,7 +408,8 @@ class RestoreFile:
"""
self.mirror_rp.inc_type = 'snapshot'
self.mirror_rp.inc_compressed = 0
- if not self.inc_list or _rest_time >= _mirror_time:
+ if (not self.inc_list or
+ MirrorStruct._rest_time >= MirrorStruct._mirror_time):
self.relevant_incs = [self.mirror_rp]
return
@@ -461,7 +436,7 @@ class RestoreFile:
incpairs = []
for inc in self.inc_list:
time = inc.getinctime()
- if time >= _rest_time: incpairs.append((time, inc))
+ if time >= MirrorStruct._rest_time: incpairs.append((time, inc))
incpairs.sort()
return [pair[1] for pair in incpairs]
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index dac4899..753712f 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -1128,16 +1128,6 @@ class RPath(RORPath):
self.fsync(fp)
if Globals.fsync_directories: self.get_parent_rp().fsync()
- def sync_delete(self):
- """Delete self with sync to guarantee completion
-
- On some filesystems (like linux's ext2), we must sync both the
- file and the directory to make sure.
-
- """
- if self.lstat() and not self.issym(): self.fsync_local(self.delete)
- if Globals.fsync_directories: self.get_parent_rp().fsync()
-
def get_data(self):
"""Open file as a regular file, read data, close, return data"""
fp = self.open("rb")