diff options
author | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2003-02-04 08:40:04 +0000 |
---|---|---|
committer | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2003-02-04 08:40:04 +0000 |
commit | 62f19f8a31706a1ae65eeccba4155456e879c7d9 (patch) | |
tree | 157ce8c29b890c19fdc7d6f223ecb65fd1df0f51 | |
parent | 64976e6afefff455ee9515218942a65c6ff5eede (diff) | |
download | rdiff-backup-62f19f8a31706a1ae65eeccba4155456e879c7d9.tar.gz |
Initial checkin of journal code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@274 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r-- | rdiff-backup/rdiff_backup/journal.py | 195 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/rpath.py | 28 | ||||
-rw-r--r-- | rdiff-backup/testing/journaltest.py | 29 |
3 files changed, 252 insertions, 0 deletions
diff --git a/rdiff-backup/rdiff_backup/journal.py b/rdiff-backup/rdiff_backup/journal.py new file mode 100644 index 0000000..d79875e --- /dev/null +++ b/rdiff-backup/rdiff_backup/journal.py @@ -0,0 +1,195 @@ +# Copyright 2002 Ben Escoto +# +# This file is part of rdiff-backup. +# +# rdiff-backup is free software; you can redistribute it and/or modify +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# rdiff-backup is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rdiff-backup; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +"""Application level journaling for better error recovery + +This module has routines for maintaining a "journal" to keep track of +writes to an rdiff-backup destination directory. This is necessary +because otherwise data could be lost if the program is abruptly +stopped (say to a computer crash). For instance, various temp files +could be left on the mirror drive. Or it may not be clear whether an +increment file has been fully written. + +To keep this from happening, various writes may be journaled (a write +corresponds to the updating of a single file). To do this, a separate +file in the journal directory is created, and the necessary +information is written to it. When the transaction is finished, that +journal entry file will be deleted. If there is a crash, the next +time rdiff-backup is run, it will see the journal file, and process +it, bringing the rdiff-backup destination directory back into a +consistent state. + +Two caveats: + +1) The journal is only meant to be used in conjunction with a + regression to the state before the backup was attempted. If the + failed session is not "cleaned" out right after the journal is + recovered, something bad could happen. + +2) This journal will only be effective if the actual hardware and OS + are working. If disk failures are causing data loss, or if a crash + causes your filesystem to be corrupted, rdiff-backup could lose + data despite all this journal stuff. + +""" + +import Globals, log, rpath, cPickle, TempFile + +# Holds an rpath of the journal directory, a file object, and then +journal_dir_rp = None +journal_dir_fp = None + +def open_journal(): + """Make sure the journal dir exists (creating it if necessary)""" + global journal_dir_rp, journal_dir_fp + assert journal_dir_rp is journal_dir_fp is None + journal_dir_rp = Globals.rbdir.append("journal") + if not journal_dir_rp.lstat(): + log.Log("Creating journal directory %s" % (journal_dir_rp.path,), 5) + journal_dir_rp.mkdir() + assert journal_dir_rp.isdir() + journal_dir_fp = journal_dir_rp.open("rb") + +def close_journal(): + """Close the journal at the end of a session""" + global journal_dir_rp, journal_dir_fp + assert not journal_dir_rp.listdir() + assert not journal_dir_fp.close() + journal_dir_rp = journal_dir_fp = None + +def sync_journal(): + """fsync the journal directory""" + journal_dir_rp.fsync(journal_dir_fp) + +def recover_journal(): + """Read the journal and recover each of the events""" + for entry in get_entries_from_journal(): + entry.recover() + entry.delete() + +def get_entries_from_journal(): + """Return list of entries in the journal (deletes bad entries)""" + entry_list = [] + for filename in journal_dir_rp.listdir(): + entry_rp = journal_dir_rp.append(filename) + e = Entry() + success = e.init_from_rp(entry_rp) + if not success: entry_rp.delete() + else: entry_list.append(e) + return entry_list + +def write_entry(test_filename, test_filename_type, + increment_filename, temp_filename): + """Write new entry given variables into journal, return entry""" + e = Entry() + e.test_filename = test_filename + e.test_filename_type = test_filename_type + e.increment_filename = increment_filename + e.temp_filename = temp_filename + e.write() + return e + +def remove_entry(entry_rp): + """Remove the entry in entry_rp from the journal""" + entry_rp.delete() + sync_journal() + + +class Entry: + """A single journal entry, describing one transaction + + Although called a journal entry, this is less a description of + what is going happen than a short recipe of what to do if + something goes wrong. + + Currently the recipe needs to be very simple and is determined by + the four variables test_filename, test_filename_type, + increment_filename, and temp_filename. See the recover() method + for details. + + """ + test_filename = None + test_filename_type = None # None is a valid value for this variable + increment_filename = None + temp_filename = None + + # This holds the rpath in the journal dir that holds self + entry_rp = None + + def recover(self): + """Recover the current journal entry + + See if test_filename matches test_filename_type. If so, + delete increment_filename. Delete temp_filename regardless. + It's OK to recover the same entry multiple times. + + """ + assert self.test_filename and self.temp_filename + test_rp = rpath.RPath(Globals.local_connection, self.test_filename) + temp_rp = rpath.RPath(Globals.local_connection, self.temp_filename) + inc_rp = rpath.RPath(Globals.local_connection, self.increment_filename) + if test_rp.lstat() == self.test_filename_type: + if inc_rp.lstat(): + inc_rp.delete() + inc_rp.get_parent_rp().fsync() + if temp_rp.lstat(): + temp_rp.delete() + temp_rp.get_parent_rp().fsync() + + def to_string(self): + """Return string form of entry""" + return cPickle.dumps({'test_filename': self.test_filename, + 'test_filename_type': self.test_filename_type, + 'increment_filename': self.increment_filename, + 'temp_filename': self.temp_filename}) + + def write(self): + """Write the current entry into the journal""" + entry_rp = TempFile.new(journal_dir_rp.append("foo")) + fp = entry_rp.open("wb") + fp.write(self.to_string()) + entry_rp.fsync(fp) + assert not fp.close() + sync_journal() + self.entry_rp = entry_rp + + def init_from_string(self, s): + """Initialize values from string. Return 0 if problem.""" + try: val_dict = cPickle.loads(s) + except cPickle.UnpicklingError: return 0 + try: + self.test_filename = val_dict['test_filename'] + self.test_filename_type = val_dict['test_filename_type'] + self.increment_filename = val_dict['increment_filename'] + self.temp_filename = val_dict['temp_filename'] + except TypeError, KeyError: return 0 + return 1 + + def init_from_rp(self, entry_rp): + """Initialize values from an rpath. Return 0 if problem""" + if not entry_rp.isreg(): return 0 + success = self.init_from_string(entry_rp.get_data()) + if not success: return 0 + self.entry_rp = entry_rp + return 1 + + def delete(self): + """Remove entry from the journal. self.entry_rp must be set""" + self.entry_rp.delete() + sync_journal() diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index fb2c255..0b18e19 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -736,6 +736,14 @@ class RPath(RORPath): comps = normed.path.split("/") return "/".join(comps[:-1]), comps[-1] + def get_parent_rp(self): + """Return new RPath of directory self is in""" + if self.index: + return self.__class__(self.conn, self.base, self.index[:-1]) + dirname = self.dirsplit()[0] + if dirname: return self.__class__(self.conn, dirname) + else: return self.__class__(self.conn, "/") + def newpath(self, newpath, index = ()): """Return new RPath with the same connection but different path""" return self.__class__(self.conn, newpath, index) @@ -851,6 +859,26 @@ class RPath(RORPath): else: raise RPathException self.setdata() + def fsync(self, fp = None): + """fsync the current file or directory + + If fp is none, get the file description by opening the file. + This can be useful for directories. + + """ + if not fp: + fp = self.open("rb") + os.fsync(fp.fileno()) + assert not fp.close() + else: os.fsync(fp.fileno()) + + def get_data(self): + """Open file as a regular file, read data, close, return data""" + fp = self.open("rb") + s = fp.read() + assert not fp.close() + return s + class RPathFileHook: """Look like a file, but add closing hook""" diff --git a/rdiff-backup/testing/journaltest.py b/rdiff-backup/testing/journaltest.py new file mode 100644 index 0000000..76e638a --- /dev/null +++ b/rdiff-backup/testing/journaltest.py @@ -0,0 +1,29 @@ +from commontest import * +import unittest +from rdiff_backup import journal, Globals, rpath + +class JournalTest(unittest.TestCase): + def testBasic(self): + """Test opening a journal, then reading, writing, and deleting""" + MakeOutputDir() + Globals.rbdir = rpath.RPath(Globals.local_connection, + "testfiles/output") + journal.open_journal() + assert len(journal.get_entries_from_journal()) == 0 + + # It's important that none of these files really exist + e1 = journal.write_entry("Hello48", "reg", "inc_file3917", "t39p") + e2 = journal.write_entry("2nd_euoeuo", None, "inc_file4832", "l389") + assert e1.entry_rp and e2.entry_rp + + l = journal.get_entries_from_journal() + assert len(l) == 2 + first_filename = l[0].test_filename + assert first_filename == "Hello48" or first_filename == "2nd_euoeuo" + + # Now test recovering journal, and make sure everything deleted + journal.recover_journal() + assert len(journal.get_entries_from_journal()) == 0 + + +if __name__ == "__main__": unittest.main() |