From 9613406fbab1949f66fe7858590cab990c7b4b25 Mon Sep 17 00:00:00 2001 From: bescoto Date: Tue, 11 Feb 2003 07:37:12 +0000 Subject: First pass at integrating regress code At this point most of the tests work, but there are still problems with the finaltest error tests on /proc, and with some selection options. The regress code is totally unchecked, and regresstest.py is unwritten. git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@277 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/Globals.py | 2 +- rdiff-backup/rdiff_backup/Hardlink.py | 2 +- rdiff-backup/rdiff_backup/Main.py | 141 +++++++++++++----- rdiff-backup/rdiff_backup/Rdiff.py | 2 +- rdiff-backup/rdiff_backup/TempFile.py | 59 +++----- rdiff-backup/rdiff_backup/backup.py | 6 +- rdiff-backup/rdiff_backup/cmodule.c | 11 ++ rdiff-backup/rdiff_backup/connection.py | 16 -- rdiff-backup/rdiff_backup/journal.py | 119 +++++++++------ rdiff-backup/rdiff_backup/metadata.py | 5 +- rdiff-backup/rdiff_backup/regress.py | 249 ++++++++++++++++++++++++++++++++ rdiff-backup/rdiff_backup/restore.py | 2 +- rdiff-backup/rdiff_backup/rorpiter.py | 2 +- rdiff-backup/rdiff_backup/rpath.py | 26 +++- rdiff-backup/rdiff_backup/statistics.py | 2 +- rdiff-backup/testing/journaltest.py | 18 ++- rdiff-backup/testing/regresstest.py | 11 ++ rdiff-backup/testing/timetest.py | 14 -- 18 files changed, 516 insertions(+), 171 deletions(-) create mode 100644 rdiff-backup/rdiff_backup/regress.py create mode 100644 rdiff-backup/testing/regresstest.py diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py index a06e246..97c42de 100644 --- a/rdiff-backup/rdiff_backup/Globals.py +++ b/rdiff-backup/rdiff_backup/Globals.py @@ -99,7 +99,7 @@ client_conn = None # list. changed_settings = [] -# The RPath of the rdiff-backup-data directory. +# The RPath or QuotedRPath of the rdiff-backup-data directory. rbdir = None # quoting_enabled is true if we should quote certain characters in diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py index 0eefe42..f9836fb 100644 --- a/rdiff-backup/rdiff_backup/Hardlink.py +++ b/rdiff-backup/rdiff_backup/Hardlink.py @@ -32,7 +32,7 @@ source side should only transmit inode information. from __future__ import generators import cPickle -import Globals, Time, TempFile, rpath, log, robust +import Globals, Time, rpath, log, robust # In all of these lists of indicies are the values. The keys in # _inode_ ones are (inode, devloc) pairs. diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py index bfcc906..b9120fb 100644 --- a/rdiff-backup/rdiff_backup/Main.py +++ b/rdiff-backup/rdiff_backup/Main.py @@ -24,7 +24,7 @@ import getopt, sys, re, os from log import Log, LoggerError import Globals, Time, SetConnections, selection, robust, rpath, \ manage, backup, connection, restore, FilenameMapping, \ - Security, Hardlink + Security, Hardlink, regress, C action = None @@ -44,12 +44,12 @@ def parse_cmdlineoptions(arglist): try: optlist, args = getopt.getopt(arglist, "blr:sv:V", ["backup-mode", "calculate-average", "chars-to-quote=", - "current-time=", "exclude=", "exclude-device-files", - "exclude-filelist=", "exclude-filelist-stdin", - "exclude-globbing-filelist=", "exclude-mirror=", - "exclude-other-filesystems", "exclude-regexp=", - "exclude-special-files", "force", "include=", - "include-filelist=", "include-filelist-stdin", + "check-destination-dir", "current-time=", "exclude=", + "exclude-device-files", "exclude-filelist=", + "exclude-filelist-stdin", "exclude-globbing-filelist=", + "exclude-mirror=", "exclude-other-filesystems", + "exclude-regexp=", "exclude-special-files", "force", + "include=", "include-filelist=", "include-filelist-stdin", "include-globbing-filelist=", "include-regexp=", "list-changed-since=", "list-increments", "no-compression", "no-compression-regexp=", "no-hard-links", "null-separator", @@ -66,6 +66,7 @@ def parse_cmdlineoptions(arglist): for opt, arg in optlist: if opt == "-b" or opt == "--backup-mode": action = "backup" elif opt == "--calculate-average": action = "calculate-average" + elif opt == "--check-destination-dir": action = "check-destination-dir" elif opt == "--chars-to-quote": Globals.set('chars_to_quote', arg) Globals.set('quoting_enabled', 1) @@ -176,7 +177,8 @@ def set_action(): commandline_error("Two arguments are required (source, destination).") if l == 2 and (action == "list-increments" or action == "remove-older-than" or - action == "list-changed-since"): + action == "list-changed-since" or + action == "check-destination-dir"): commandline_error("Only use one argument, " "the root of the backup directory") if l > 2 and action != "calculate-average": @@ -211,6 +213,7 @@ def take_action(rps): elif action == "list-increments": ListIncrements(rps[0]) elif action == "remove-older-than": RemoveOlderThan(rps[0]) elif action == "calculate-average": CalculateAverage(rps) + elif action == "check-destination-dir": CheckDest(rps[0]) else: raise AssertionError("Unknown action " + action) def cleanup(): @@ -239,10 +242,13 @@ def Backup(rpin, rpout): backup_set_select(rpin) backup_init_dirs(rpin, rpout) if prevtime: + rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout) Time.setprevtime(prevtime) backup.Mirror_and_increment(rpin, rpout, incdir) - else: backup.Mirror(rpin, rpout) - rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout) + rpout.conn.Main.backup_remove_curmirror_local() + else: + backup.Mirror(rpin, rpout) + rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout) def backup_set_select(rpin): """Create Select objects on source connection""" @@ -266,6 +272,7 @@ def backup_init_dirs(rpin, rpout): datadir = rpout.append_path("rdiff-backup-data") SetConnections.UpdateGlobal('rbdir', datadir) + checkdest_if_necessary(rpout) incdir = datadir.append_path("increments") prevtime = backup_get_mirrortime() @@ -305,39 +312,45 @@ def backup_warn_if_infinite_regress(rpin, rpout): source directory '%s'. This could cause an infinite regress. You may need to use the --exclude option.""" % (rpout.path, rpin.path), 2) -def backup_get_mirrorrps(): - """Return list of current_mirror rps""" - datadir = Globals.rbdir - if not datadir.isdir(): return [] - mirrorrps = [datadir.append(fn) for fn in datadir.listdir() - if fn.startswith("current_mirror.")] - return filter(lambda rp: rp.isincfile(), mirrorrps) - def backup_get_mirrortime(): """Return time in seconds of previous mirror, or None if cannot""" - mirrorrps = backup_get_mirrorrps() - if not mirrorrps: return None - if len(mirrorrps) > 1: - Log( -"""Warning: duplicate current_mirror files found. Perhaps something -went wrong during your last backup? Using """ + mirrorrps[-1].path, 2) - - return mirrorrps[-1].getinctime() + incbase = Globals.rbdir.append_path("current_mirror") + mirror_rps = restore.get_inclist(incbase) + assert len(mirror_rps) <= 1, \ + "Found %s current_mirror rps, expected <=1" % (len(mirror_rps),) + if mirror_rps: return mirror_rps[0].getinctime() + else: return None def backup_touch_curmirror_local(rpin, rpout): """Make a file like current_mirror.time.data to record time - Also updates rpout so mod times don't get messed up. This should - be run on the destination connection. + When doing an incremental backup, this should happen before any + other writes, and the file should be removed after all writes. + That way we can tell whether the previous session aborted if there + are two current_mirror files. + + When doing the initial full backup, the file can be created after + everything else is in place. """ - datadir = Globals.rbdir - map(rpath.RPath.delete, backup_get_mirrorrps()) - mirrorrp = datadir.append("current_mirror.%s.%s" % (Time.curtimestr, - "data")) + mirrorrp = Globals.rbdir.append("current_mirror.%s.%s" % (Time.curtimestr, + "data")) Log("Touching mirror marker %s" % mirrorrp.path, 6) mirrorrp.touch() - rpath.copy_attribs(rpin, rpout) + mirrorrp.fsync_with_dir() + +def backup_remove_curmirror_local(): + """Remove the older of the current_mirror files. Use at end of session""" + assert Globals.rbdir.conn is Globals.local_connection + curmir_incs = restore.get_inclist(Globals.rbdir.append("current_mirror")) + assert len(curmir_incs) == 2 + if curmir_incs[0].getinctime() < curmir_incs[1].getinctime(): + older_inc = curmir_incs[0] + else: older_inc = curmir_incs[1] + + C.sync() # Make sure everything is written before curmirror is removed + older_inc.sync_delete() + def Restore(src_rp, dest_rp = None): """Main restoring function @@ -366,6 +379,7 @@ def restore_common(rpin, target, time): if target.conn.os.getuid() == 0: SetConnections.UpdateGlobal('change_ownership', 1) mirror_root, index = restore_get_root(rpin) + restore_check_backup_dir(mirror_root) mirror = mirror_root.new_index(index) inc_rpath = datadir.append_path('increments', index) restore_init_select(mirror_root, target) @@ -404,6 +418,17 @@ Try restoring from an increment file (the filenames look like "specify --force to overwrite." % rpout.path) return rpin, rpout +def restore_check_backup_dir(rpin): + """Make sure backup dir root rpin is in consistent state""" + result = checkdest_need_check(rpin) + if result is None: + Log.FatalError("%s does not appear to be an rdiff-backup directory." + % (rpin.path,)) + elif result == 1: Log.FatalError( + "Previous backup to %s seems to have failed." + "Rerun rdiff-backup with --check-destination-dir option to revert" + "directory to state before unsuccessful session." % (rpin.path,)) + def restore_init_select(rpin, rpout): """Initialize Select @@ -465,6 +490,7 @@ def restore_get_root(rpin): def ListIncrements(rp): """Print out a summary of the increments and their times""" mirror_root, index = restore_get_root(rp) + restore_check_backup_dir(mirror_root) mirror_rp = mirror_root.new_index(index) inc_rpath = Globals.rbdir.append_path('increments', index) incs = restore.get_inclist(inc_rpath) @@ -484,11 +510,7 @@ def CalculateAverage(rps): def RemoveOlderThan(rootrp): """Remove all increment files older than a certain time""" - datadir = rootrp.append_path("rdiff-backup-data") - if not datadir.lstat() or not datadir.isdir(): - Log.FatalError("Unable to open rdiff-backup-data dir %s" % - (datadir.path,)) - + rom_check_dir(rootrp) try: time = Time.genstrtotime(remove_older_than_string) except Time.TimeException, exc: Log.FatalError(str(exc)) timep = Time.timetopretty(time) @@ -512,13 +534,56 @@ def RemoveOlderThan(rootrp): else: Log("Deleting increments at times:\n" + inc_pretty_time, 3) manage.delete_earlier_than(datadir, time) +def rom_check_dir(rootrp): + """Check destination dir before RemoveOlderThan""" + SetConnections.UpdateGlobal('rbdir', + rootrp.append_path("rdiff-backup-data")) + if not Globals.rbdir.isdir(): + Log.FatalError("Unable to open rdiff-backup-data dir %s" % + (datadir.path,)) + checkdest_if_necessary(rootrp) + def ListChangedSince(rp): """List all the files under rp that have changed since restoretime""" try: rest_time = Time.genstrtotime(restore_timestr) except Time.TimeException, exc: Log.FatalError(str(exc)) mirror_root, index = restore_get_root(rp) + restore_check_backup_dir(mirror_root) mirror_rp = mirror_root.new_index(index) inc_rp = mirror_rp.append_path("increments", index) restore.ListChangedSince(mirror_rp, inc_rp, rest_time) + +def CheckDest(dest_rp): + """Check the destination directory, """ + need_check = checkdest_need_check(dest_rp) + if need_check is None: + Log.FatalError("No destination dir found at %s" % (dest_rp.path,)) + elif need_check == 0: + Log.FatalError("Destination dir %s does not need checking" % + (dest_rp.path,)) + regress.Regress(dest_rp) + +def checkdest_need_check(dest_rp): + """Return None if no dest dir found, 1 if dest dir needs check, 0 o/w""" + assert dest_rp.conn is Globals.rbdir.conn + if not dest_rp.isdir() or not Globals.rbdir.isdir(): return None + curmirroot = Globals.rbdir.append("current_mirror") + curmir_incs = restore.get_inclist(curmirroot) + if not curmir_incs: return None + elif len(curmir_incs) == 1: return 0 + else: + assert len(curmir_incs) == 2, "Found too many current_mirror incs!" + return 1 + +def checkdest_if_necessary(dest_rp): + """Check the destination dir if necessary. + + This can/should be run before an incremental backup. + + """ + need_check = checkdest_need_check(dest_rp) + if need_check == 1: + Log("Previous backup seems to have failed, checking now.", 2) + regress.Regress(dest_rp) diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py index 7821141..a14bd32 100644 --- a/rdiff-backup/rdiff_backup/Rdiff.py +++ b/rdiff-backup/rdiff_backup/Rdiff.py @@ -56,7 +56,7 @@ def write_via_tempfile(fp, rp): """Write fileobj fp to rp by writing to tempfile and renaming""" tf = TempFile.new(rp) tf.write_from_fileobj(fp) - tf.rename(rp) + rpath.rename(tf, rp) def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None): """Patch routine that must be run locally, writes to outrp diff --git a/rdiff-backup/rdiff_backup/TempFile.py b/rdiff-backup/rdiff_backup/TempFile.py index 2b18920..824d480 100644 --- a/rdiff-backup/rdiff_backup/TempFile.py +++ b/rdiff-backup/rdiff_backup/TempFile.py @@ -17,56 +17,37 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA -"""Manage temp files""" +"""Manage temp files + +Earlier this had routines for keeping track of existing tempfiles. +Now we just use normal rpaths instead of the TempFile class. + +""" import os import Globals, rpath -# This is a connection-specific list of temp files, to be cleaned -# up before rdiff-backup exits. -_tempfiles = [] - # To make collisions less likely, this gets put in the file name # and incremented whenever a new file is requested. _tfindex = 0 -def new(rp_base, same_dir = 1): - """Return new tempfile that isn't in use. - - If same_dir, tempfile will be in same directory as rp_base. - Otherwise, use tempfile module to get filename. - - """ - conn = rp_base.conn - if conn is not Globals.local_connection: - return conn.TempFile.new(rp_base, same_dir) - - def find_unused(conn, dir): - """Find an unused tempfile with connection conn in directory dir""" - global _tfindex, tempfiles - while 1: - if _tfindex > 100000000: - Log("Resetting index", 2) - _tfindex = 0 - tf = TempFile(conn, os.path.join(dir, - "rdiff-backup.tmp.%d" % _tfindex)) - _tfindex = _tfindex+1 - if not tf.lstat(): return tf +def new(rp_base): + """Return new tempfile that isn't in use in same dir as rp_base""" + return new_in_dir(rp_base.get_parent_rp()) - if same_dir: tf = find_unused(conn, rp_base.dirsplit()[0]) - else: tf = TempFile(conn, tempfile.mktemp()) - _tempfiles.append(tf) - return tf +def new_in_dir(dir_rp): + """Return new temp rpath in directory dir_rp""" + global _tfindex + assert dir_rp.conn is Globals.local_connection + while 1: + if _tfindex > 100000000: + Log("Warning: Resetting tempfile index", 2) + _tfindex = 0 + tf = dir_rp.append('rdiff-backup.tmp.%d' % _tfindex) + _tfindex = _tfindex+1 + if not tf.lstat(): return tf -def remove_listing(tempfile): - """Remove listing of tempfile""" - if Globals.local_connection is not tempfile.conn: - tempfile.conn.TempFile.remove_listing(tempfile) - elif tempfile in _tempfiles: _tempfiles.remove(tempfile) -def delete_all(): - """Delete all remaining tempfiles""" - for tf in _tempfiles[:]: tf.delete() class TempFile(rpath.RPath): diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index f776988..a4b9bff 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -213,7 +213,7 @@ class PatchITRB(rorpiter.ITRBranch): rp = self.get_rp_from_root(index) tf = TempFile.new(rp) self.patch_to_temp(rp, diff_rorp, tf) - tf.rename(rp) + rpath.rename(tf, rp) def patch_to_temp(self, basis_rp, diff_rorp, new): """Patch basis_rp, writing output in new, which doesn't exist yet""" @@ -260,7 +260,7 @@ class PatchITRB(rorpiter.ITRBranch): else: assert self.dir_replacement self.base_rp.rmdir() - self.dir_replacement.rename(self.base_rp) + rpath.rename(self.dir_replacement, self.base_rp) class IncrementITRB(PatchITRB): @@ -286,7 +286,7 @@ class IncrementITRB(PatchITRB): tf = TempFile.new(rp) self.patch_to_temp(rp, diff_rorp, tf) increment.Increment(tf, rp, self.get_incrp(index)) - tf.rename(rp) + rpath.rename(tf, rp) def start_process(self, index, diff_rorp): """Start processing directory""" diff --git a/rdiff-backup/rdiff_backup/cmodule.c b/rdiff-backup/rdiff_backup/cmodule.c index b9e3e3e..6673ee5 100644 --- a/rdiff-backup/rdiff_backup/cmodule.c +++ b/rdiff-backup/rdiff_backup/cmodule.c @@ -36,6 +36,7 @@ static PyObject *UnknownFileTypeError; static PyObject *c_make_file_dict(PyObject *self, PyObject *args); static PyObject *long2str(PyObject *self, PyObject *args); static PyObject *str2long(PyObject *self, PyObject *args); +static PyObject *my_sync(PyObject *self); /* Turn a stat structure into a python dictionary. The preprocessor @@ -179,6 +180,15 @@ static PyObject *long2str(self, args) } +/* Run sync() and return None */ +static PyObject *my_sync(self) + PyObject *self; +{ + sync(); + return Py_BuildValue(""); +} + + /* Reverse of above; convert 7 byte string into python long */ static PyObject *str2long(self, args) PyObject *self; @@ -201,6 +211,7 @@ static PyMethodDef CMethods[] = { "Make dictionary from file stat"}, {"long2str", long2str, METH_VARARGS, "Convert python long to 7 byte string"}, {"str2long", str2long, METH_VARARGS, "Convert 7 byte string to python long"}, + {"sync", my_sync, METH_VARARGS, "sync buffers to disk"}, {NULL, NULL, 0, NULL} }; diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py index 5e23b91..90b8ea3 100644 --- a/rdiff-backup/rdiff_backup/connection.py +++ b/rdiff-backup/rdiff_backup/connection.py @@ -95,7 +95,6 @@ class LowLevelPipeConnection(Connection): f - file object b - string q - quit signal - t - TempFile R - RPath r - RORPath only c - PipeConnection object @@ -121,8 +120,6 @@ class LowLevelPipeConnection(Connection): Log.conn("sending", obj, req_num) if type(obj) is types.StringType: self._putbuf(obj, req_num) elif isinstance(obj, connection.Connection):self._putconn(obj, req_num) - elif isinstance(obj, TempFile.TempFile): - self._puttempfile(obj, req_num) elif isinstance(obj, rpath.RPath): self._putrpath(obj, req_num) elif isinstance(obj, rpath.RORPath): self._putrorpath(obj, req_num) elif ((hasattr(obj, "read") or hasattr(obj, "write")) @@ -148,12 +145,6 @@ class LowLevelPipeConnection(Connection): self._write("i", str(VirtualFile.new(rorpiter.ToFile(iterator))), req_num) - def _puttempfile(self, tempfile, req_num): - """Put a tempfile into pipe. See _putrpath""" - tf_repr = (tempfile.conn.conn_number, tempfile.base, - tempfile.index, tempfile.data) - self._write("t", cPickle.dumps(tf_repr, 1), req_num) - def _putrpath(self, rpath, req_num): """Put an rpath into the pipe @@ -235,7 +226,6 @@ class LowLevelPipeConnection(Connection): elif format_string == "i": result = rorpiter.FromFile(iterfile.BufferedRead( VirtualFile(self, int(data)))) - elif format_string == "t": result = self._gettempfile(data) elif format_string == "r": result = self._getrorpath(data) elif format_string == "R": result = self._getrpath(data) else: @@ -249,12 +239,6 @@ class LowLevelPipeConnection(Connection): index, data = cPickle.loads(raw_rorpath_buf) return rpath.RORPath(index, data) - def _gettempfile(self, raw_tf_buf): - """Return TempFile object indicated by raw_tf_buf""" - conn_number, base, index, data = cPickle.loads(raw_tf_buf) - return TempFile.TempFile(Globals.connection_dict[conn_number], - base, index, data) - def _getrpath(self, raw_rpath_buf): """Return RPath object indicated by raw_rpath_buf""" conn_number, base, index, data = cPickle.loads(raw_rpath_buf) diff --git a/rdiff-backup/rdiff_backup/journal.py b/rdiff-backup/rdiff_backup/journal.py index d79875e..53b3ca7 100644 --- a/rdiff-backup/rdiff_backup/journal.py +++ b/rdiff-backup/rdiff_backup/journal.py @@ -17,6 +17,9 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA +# UPDATE: I have decided not to use journaling and use the regress +# stuff exclusively. This code is left here for posterity. + """Application level journaling for better error recovery This module has routines for maintaining a "journal" to keep track of @@ -49,12 +52,15 @@ Two caveats: """ -import Globals, log, rpath, cPickle, TempFile +import Globals, log, rpath, cPickle, TempFile, os, restore # Holds an rpath of the journal directory, a file object, and then journal_dir_rp = None journal_dir_fp = None +# Set to time in seconds of previous aborted backup +unsuccessful_backup_time = None + def open_journal(): """Make sure the journal dir exists (creating it if necessary)""" global journal_dir_rp, journal_dir_fp @@ -74,7 +80,12 @@ def close_journal(): journal_dir_rp = journal_dir_fp = None def sync_journal(): - """fsync the journal directory""" + """fsync the journal directory. + + Note that fsync'ing a particular entry file may also be required + to guarantee writes have been committed. + + """ journal_dir_rp.fsync(journal_dir_fp) def recover_journal(): @@ -94,74 +105,91 @@ def get_entries_from_journal(): else: entry_list.append(e) return entry_list -def write_entry(test_filename, test_filename_type, - increment_filename, temp_filename): +def write_entry(index, temp_index, testfile_option, testfile_type): """Write new entry given variables into journal, return entry""" e = Entry() - e.test_filename = test_filename - e.test_filename_type = test_filename_type - e.increment_filename = increment_filename - e.temp_filename = temp_filename + e.index = index + e.temp_index = index + e.testfile_option = testfile_option + e.testfile_type = testfile_type e.write() return e -def remove_entry(entry_rp): - """Remove the entry in entry_rp from the journal""" - entry_rp.delete() - sync_journal() - class Entry: """A single journal entry, describing one transaction Although called a journal entry, this is less a description of - what is going happen than a short recipe of what to do if + what is going happen than a short recipe of how to recover if something goes wrong. Currently the recipe needs to be very simple and is determined by - the four variables test_filename, test_filename_type, - increment_filename, and temp_filename. See the recover() method - for details. + the four variables index, temp_index, testfile_option, + testfile_type. See the recover() method for details. """ - test_filename = None - test_filename_type = None # None is a valid value for this variable - increment_filename = None - temp_filename = None + index = None + temp_index = None + testfile_option = None + testfile_type = None # None is a valid value for this variable - # This holds the rpath in the journal dir that holds self + # This points to the rpath in the journal dir that holds this entry entry_rp = None def recover(self): """Recover the current journal entry - See if test_filename matches test_filename_type. If so, - delete increment_filename. Delete temp_filename regardless. + self.testfile_option has 3 possibilities: + 1 - testfile is mirror file + 2 - testfile is increment file + 3 - testfile is temp file + + Either way, see if the type of the testfile matches + testfile_type. If so, delete increment file. Deleted + tempfile regardless. + + We express things in terms of indicies because we need paths + relative to a fixed directory (like Globals.dest_root). + It's OK to recover the same entry multiple times. """ - assert self.test_filename and self.temp_filename - test_rp = rpath.RPath(Globals.local_connection, self.test_filename) - temp_rp = rpath.RPath(Globals.local_connection, self.temp_filename) - inc_rp = rpath.RPath(Globals.local_connection, self.increment_filename) - if test_rp.lstat() == self.test_filename_type: - if inc_rp.lstat(): - inc_rp.delete() - inc_rp.get_parent_rp().fsync() - if temp_rp.lstat(): - temp_rp.delete() - temp_rp.get_parent_rp().fsync() + assert self.index is not None and self.temp_index is not None + mirror_rp = Globals.dest_root.new_index(self.index) + if self.temp_index: + temp_rp = Globals.dest_root.new_index(self.temp_index) + inc_rp = self.get_inc() + + assert 1 <= self.testfile_option <= 3 + if self.testfile_option == 1: test_rp = mirror_rp + elif self.testfile_option == 2: test_rp = inc_rp + else: test_rp = temp_rp + + if test_rp and test_rp.lstat() == self.testfile_type: + if inc_rp and inc_rp.lstat(): inc_rp.sync_delete() + if temp_rp and temp_rp.lstat(): temp_rp.sync_delete() + + def get_inc(self): + """Return inc_rpath, if any, corresponding to self.index""" + incroot = Globals.rbdir.append_path("increments") + incbase = incroot.new_index(self.index) + inclist = restore.get_inclist(incbase) + inclist = filter(lambda inc: + inc.getinctime() == unsuccessful_backup_time, inclist) + assert len(inclist) <= 1 + if inclist: return inclist[0] + else: return None def to_string(self): """Return string form of entry""" - return cPickle.dumps({'test_filename': self.test_filename, - 'test_filename_type': self.test_filename_type, - 'increment_filename': self.increment_filename, - 'temp_filename': self.temp_filename}) + return cPickle.dumps({'index': self.index, + 'testfile_option': self.testfile_option, + 'testfile_type': self.testfile_type, + 'temp_index': self.temp_index}) def write(self): """Write the current entry into the journal""" - entry_rp = TempFile.new(journal_dir_rp.append("foo")) + entry_rp = TempFile.new_in_dir(journal_dir_rp) fp = entry_rp.open("wb") fp.write(self.to_string()) entry_rp.fsync(fp) @@ -174,10 +202,10 @@ class Entry: try: val_dict = cPickle.loads(s) except cPickle.UnpicklingError: return 0 try: - self.test_filename = val_dict['test_filename'] - self.test_filename_type = val_dict['test_filename_type'] - self.increment_filename = val_dict['increment_filename'] - self.temp_filename = val_dict['temp_filename'] + self.index = val_dict['index'] + self.testfile_type = val_dict['testfile_type'] + self.testfile_option = val_dict['testfile_option'] + self.temp_index = val_dict['temp_index'] except TypeError, KeyError: return 0 return 1 @@ -191,5 +219,4 @@ class Entry: def delete(self): """Remove entry from the journal. self.entry_rp must be set""" - self.entry_rp.delete() - sync_journal() + self.entry_rp.sync_delete() diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index d4eff8c..ec86168 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -55,7 +55,7 @@ field names and values. """ from __future__ import generators -import re, gzip +import re, gzip, os import log, Globals, rpath, Time, robust, increment class ParsingError(Exception): @@ -280,6 +280,9 @@ def WriteMetadata(rorp): def CloseMetadata(): """Close the metadata file""" global metadata_rp, metadata_fileobj + try: fileno = metadata_fileobj.fileno() # will not work if GzipFile + except AttributeError: fileno = metadata_fileobj.fileobj.fileno() + os.fsync(fileno) result = metadata_fileobj.close() metadata_fileobj = None metadata_rp.setdata() diff --git a/rdiff-backup/rdiff_backup/regress.py b/rdiff-backup/rdiff_backup/regress.py new file mode 100644 index 0000000..db64b0a --- /dev/null +++ b/rdiff-backup/rdiff_backup/regress.py @@ -0,0 +1,249 @@ +# Copyright 2002 Ben Escoto +# +# This file is part of rdiff-backup. +# +# rdiff-backup is free software; you can redistribute it and/or modify +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# rdiff-backup is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rdiff-backup; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +"""Code for reverting the rdiff-backup directory to prev state + +This module is used after an aborted session, and the rdiff-backup +destination directory may be in-between states. In this situation we +need to bring back the directory as it was after the last successful +backup. The basic strategy is to restore all the attributes from the +metadata file (which we assume is intact) and delete the extra +increments. For regular files we examine the mirror file and use the +increment file to get the old data if the mirror file is out of date. + +Currently this does recover hard links. This make make the regressed +directory take up more disk space, but hard links can still be +recovered. + +""" + +from __future__ import generators +import Globals, restore, log, rorpiter, journal, TempFile + +# regress_time should be set to the time we want to regress back to +# (usually the time of the last successful backup) +regress_time = None + +# This should be set to the latest unsuccessful backup time +unsuccessful_backup_time = None + + +class RegressException(Exception): + """Raised on any exception in regress process""" + pass + + +def Regress(mirror_rp): + """Bring mirror and inc directory back to regress_to_time + + Also affects the rdiff-backup-data directory, so Globals.rbdir + should be set. Regress should only work one step at a time + (i.e. don't "regress" through two separate backup sets. This + function should be run locally to the rdiff-backup-data directory. + + """ + inc_rpath = Globals.rbdir.append_path("increments") + assert mirror_rp.index == () and inc_rpath.index == () + assert mirror_rp.isdir() and inc_rpath.isdir() + assert mirror_rp.conn is inc_rpath.conn is Globals.local_connection + set_regress_time() + set_restore_times() + +def set_regress_time(): + """Set global regress_time to previous sucessful backup + + If there are two current_mirror increments, then the last one + corresponds to a backup session that failed. + + """ + global regress_time, unsuccessful_backup_time + curmir_incs = restore.get_inclist(Globals.rbdir.append("current_mirror")) + assert len(curmir_incs) == 2, \ + "Found %s current_mirror flags, expected 2" % len(curmir_incs) + inctimes = [inc.getinctime() for inc in curmir_incs] + inctimes.sort() + regress_time = inctimes[0] + unsucessful_backup_time = inctimes[-1] + log.Log("Regressing to " + Time.timetopretty(regress_time), 5) + +def set_restore_times(): + """Set _rest_time and _mirror_time in the restore module + + _rest_time (restore time) corresponds to the last successful + backup time. _mirror_time is the unsuccessful backup time. + + """ + restore._mirror_time = unsuccessful_backup_time + restore._rest_time = regress_time + +def iterate_raw_rfs(mirror_rp, inc_rp): + """Iterate all RegressFile objects in mirror/inc directory""" + root_rf = RegressFile(mirror_rp, inc_rp, restore.get_inclist(inc_rp)) + def helper(rf): + yield rf + if rf.mirror_rp.isdir() or rf.inc_rp.isdir(): + for sub_rf in rf.yield_sub_rfs(): + for sub_sub_rf in helper(sub_rf): + yield sub_sub_rf + return helper(root_rf) + +def yield_metadata(): + """Iterate rorps from metadata file, if any are available""" + metadata_iter = metadata.GetMetadata_at_time(Globals.rbdir, regress_time) + if metadata_iter: return metadata_iter + log.Log.FatalError("No metadata for time %s found, cannot regress" + % Time.timetopretty(regress_time)) + +def iterate_meta_rfs(mirror_rp, inc_rp): + """Yield RegressFile objects with extra metadata information added + + Each RegressFile will have an extra object variable .metadata_rorp + which will contain the metadata attributes of the mirror file at + regress_time. + + """ + raw_rfs = iterate_raw_rfs(mirror_rp, inc_rp) + collated = rorpiter.Collate2Iters(raw_rfs, yield_metadata()) + for raw_rf, metadata_rorp in collated: + raw_rf.set_metadata_rorp(metadata_rorp) + yield raw_rf + + +class RegressFile(restore.RestoreFile): + """Like RestoreFile but with metadata + + Hold mirror_rp and related incs, but also put metadata info for + the mirror file at regress time in self.metadata_rorp. + self.metadata_rorp is not set in this class. + + """ + def __init__(self, mirror_rp, inc_rp, inc_list): + restore.RestoreFile._init__(self, mirror_rp, inc_rp, inclist) + assert len(self.relevant_incs) <= 2, "Too many incs" + if len(self.relevant_incs) == 2: + self.regress_inc = self.relevant.incs[-1] + else: self.regress_inc = None + + def set_metadata_rorp(self, metadata_rorp): + """Set self.metadata_rorp, creating empty if given None""" + if metadata_rorp: self.metadata_rorp = metadata_rorp + else: self.metadata_rorp = rpath.RORPath(self.index) + + def isdir(self): + """Return true if regress needs before/after processing""" + return ((self.metadata_rorp and self.metadata_rorp.isdir()) or + (self.mirror_rp and self.mirror_rp.isdir())) + + +class RegressITRB(rorpiter.ITRBranch): + """Turn back state of dest directory (use with IterTreeReducer) + + The arguments to the ITR will be RegressFiles. There are two main + assumptions this procedure makes (besides those mentioned above): + + 1. The mirror_rp and the metadata_rorp cmp_attribs correctly iff + they contain the same data. If this is the case, then the inc + file is unnecessary and we can delete it. + + 2. If the don't match, then applying the inc file will + successfully get us back to the previous state. + + Since the metadata file is required, the two above really only + matter for regular files. + + """ + def __init__(self): + """Just initialize some variables to None""" + self.rf = None # will hold RegressFile applying to a directory + + def can_fast_process(self, index, rf): + """True if none of the rps is a directory""" + return not rf.mirror_rp.isdir() and not rf.metadata_rorp.isdir() + + def fast_process(self, index, rf): + """Process when nothing is a directory""" + if not rpath.cmp_attribs(rf.metadata_rorp, rf.mirror_rp): + if rf.metadata_rorp.isreg(): self.restore_orig_regfile(rf) + else: + if rf.mirror_rp.lstat(): rf.mirror_rp.delete() + rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) + if rf.regress_inc: rf.regress_inc.delete() + + def restore_orig_regfile(self, rf): + """Restore original regular file + + This is the trickiest case for avoiding information loss, + because we don't want to delete the increment before the + mirror is fully written. + + """ + assert rf.metadata_rorp.isreg() + if rf.mirror_rp.isreg(): + tf = TempFile.new(rf.mirror_rp) + tf.write_from_fileobj(rf.get_restore_fp()) + rpath.copy_attribs(rf.metadata_rorp, tf) + tf.fsync_with_dir() # make sure tf fully written before move + rpath.rename(tf, rf.mirror_rp) # move is atomic + else: + if rf.mirror_rp.lstat(): rf.mirror_rp.delete() + rf.mirror_rp.write_from_fileobj(rf.get_restore_fp()) + rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp) + rf.mirror_rp.fsync_with_dir() # require move before inc delete + + def start_process(self, index, rf): + """Start processing directory""" + if rf.metadata_rorp.isdir(): + # make sure mirror is a readable dir + if not rf.mirror_rp.isdir(): + if rf.mirror_rp.lstat(): rf.mirror_rp.delete() + rf.mirror_rp.mkdir() + if not rf.mirror_rp.hasfullperms(): rf.mirror_rp.chmod(0700) + self.rf = rf + + def end_process(self): + """Finish processing a directory""" + rf = self.rf + if rf.metadata_rorp.isdir(): + if rf.mirror_rp.isdir(): + if not rpath.cmp_attribs(rf.metadata_rorp, rf.mirror_rp): + rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp) + else: + rf.mirror_rp.delete() + rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) + else: # replacing a dir with some other kind of file + assert rf.mirror_rp.isdir() + if rf.metadata_rorp.isreg(): self.restore_orig_regfile(rf) + else: + rf.mirror_rp.delete() + rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) + if rf.regress_inc: rf.regress_inc.delete() + + def on_error(self, exc, *args): + """This is run on any exception, raises RegressException + + RegressException should be fatal. We don't want to tolerate + the kinds of errors we would when backing up. + + """ + if args and args[0] and isinstance(args[0], tuple): + filename = os.path.join(*args[0]) + elif self.index: filename = os.path.join(*self.index) + else: filename = "." + log.Log("Error '%s' processing %s" % (exc, filename), 2) + raise RegressException("Error during Regress") diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py index 3ff4ce8..53b64ea 100644 --- a/rdiff-backup/rdiff_backup/restore.py +++ b/rdiff-backup/rdiff_backup/restore.py @@ -401,7 +401,7 @@ class RestoreFile: else: inc_rp, inc_list = inc_pair if not mirror_rp: mirror_rp = self.mirror_rp.new_index(inc_rp.index) - yield RestoreFile(mirror_rp, inc_rp, inc_list) + yield self.__class__(mirror_rp, inc_rp, inc_list) def yield_mirrorrps(self, mirrorrp): """Yield mirrorrps underneath given mirrorrp""" diff --git a/rdiff-backup/rdiff_backup/rorpiter.py b/rdiff-backup/rdiff_backup/rorpiter.py index 4392ce0..3027fd1 100644 --- a/rdiff-backup/rdiff_backup/rorpiter.py +++ b/rdiff-backup/rdiff_backup/rorpiter.py @@ -31,7 +31,7 @@ files), where files is the number of files attached (usually 1 or from __future__ import generators import os, tempfile, UserList, types import librsync, Globals, Rdiff, Hardlink, robust, log, static, \ - rpath, iterfile, TempFile + rpath, iterfile class RORPIterException(Exception): pass diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 0b18e19..48e98ed 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -208,10 +208,14 @@ def rename(rp_source, rp_dest): (rp_source.path, rp_dest.path), 7) if not rp_source.lstat(): rp_dest.delete() else: - rp_source.conn.os.rename(rp_source.path, rp_dest.path) + if rp_dest.lstat() and rp_source.getinode() == rp_dest.getinode(): + # You can't rename one hard linked file over another + rp_source.delete() + else: rp_source.conn.os.rename(rp_source.path, rp_dest.path) rp_dest.data = rp_source.data rp_source.data = {'type': None} + def tupled_lstat(filename): """Like os.lstat, but return only a tuple, or None if os.error @@ -872,6 +876,25 @@ class RPath(RORPath): assert not fp.close() else: os.fsync(fp.fileno()) + def fsync_with_dir(self, fp = None): + """fsync self and directory self is under""" + self.fsync(fp) + self.get_parent_rp().fsync() + + def sync_delete(self): + """Delete self with sync to guarantee completion + + On some filesystems (like linux's ext2), we must sync both the + file and the directory to make sure. + + """ + if self.lstat() and not self.issym(): + fp = self.open("rb") + self.delete() + os.fsync(fp.fileno()) + assert not fp.close() + self.get_parent_rp().fsync() + def get_data(self): """Open file as a regular file, read data, close, return data""" fp = self.open("rb") @@ -895,4 +918,3 @@ class RPathFileHook: self.closing_thunk() return result - diff --git a/rdiff-backup/rdiff_backup/statistics.py b/rdiff-backup/rdiff_backup/statistics.py index 068edda..f344472 100644 --- a/rdiff-backup/rdiff_backup/statistics.py +++ b/rdiff-backup/rdiff_backup/statistics.py @@ -20,7 +20,7 @@ """Generate and process aggregated backup information""" import re, os, time -import Globals, TempFile, robust, Time, rorpiter, increment +import Globals, robust, Time, rorpiter, increment class StatsException(Exception): pass diff --git a/rdiff-backup/testing/journaltest.py b/rdiff-backup/testing/journaltest.py index 76e638a..9db1ff2 100644 --- a/rdiff-backup/testing/journaltest.py +++ b/rdiff-backup/testing/journaltest.py @@ -6,20 +6,26 @@ class JournalTest(unittest.TestCase): def testBasic(self): """Test opening a journal, then reading, writing, and deleting""" MakeOutputDir() - Globals.rbdir = rpath.RPath(Globals.local_connection, - "testfiles/output") + Globals.dest_root = rpath.RPath(Globals.local_connection, + "testfiles/output") + Globals.rbdir = Globals.dest_root.append("rdiff-backup-data") + + Globals.rbdir.mkdir() journal.open_journal() assert len(journal.get_entries_from_journal()) == 0 # It's important that none of these files really exist - e1 = journal.write_entry("Hello48", "reg", "inc_file3917", "t39p") - e2 = journal.write_entry("2nd_euoeuo", None, "inc_file4832", "l389") + e1 = journal.write_entry(("Hello48",), ("temp_index", "foo"), + 2, "reg") + e2 = journal.write_entry(("2nd", "Entry", "now"), + ("temp_index",), 1, None) assert e1.entry_rp and e2.entry_rp l = journal.get_entries_from_journal() assert len(l) == 2 - first_filename = l[0].test_filename - assert first_filename == "Hello48" or first_filename == "2nd_euoeuo" + first_index = l[0].index + assert (first_index == ("Hello48",) or + first_index == ("2nd", "Entry", "now")) # Now test recovering journal, and make sure everything deleted journal.recover_journal() diff --git a/rdiff-backup/testing/regresstest.py b/rdiff-backup/testing/regresstest.py new file mode 100644 index 0000000..8a24958 --- /dev/null +++ b/rdiff-backup/testing/regresstest.py @@ -0,0 +1,11 @@ +"""regresstest - test the regress module. Not to be confused with the +regression tests.""" + +import unittest +from commontest import * + +class RegressTest(unittest.TestCase): + XXX + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/timetest.py b/rdiff-backup/testing/timetest.py index 367a4f9..f7c28e1 100644 --- a/rdiff-backup/testing/timetest.py +++ b/rdiff-backup/testing/timetest.py @@ -32,20 +32,6 @@ class TimeTest(unittest.TestCase): assert cmp("2001-09-01T12:00:00-08:00", "2001-09-01T12:00:00-07:00") == 1 - def testCmp_separator(self): - """Like testCmp but with new separator""" - Globals.time_separator = "_" - cmp = Time.cmp - assert cmp(1,2) == -1 - assert cmp(2,2) == 0 - assert cmp(5,1) == 1 - assert cmp("2001-09-01T21_49_04Z", "2001-08-01T21_49_04Z") == 1 - assert cmp("2001-09-01T04_49_04+03_23", "2001-09-01T21_49_04Z") == -1 - assert cmp("2001-09-01T12_00_00Z", "2001-09-01T04_00_00-08_00") == 0 - assert cmp("2001-09-01T12_00_00-08_00", - "2001-09-01T12_00_00-07_00") == 1 - Globals.time_separator = ":" - def testStringtotime(self): """Test converting string to time""" timesec = int(time.time()) -- cgit v1.2.1