summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-02-04 08:40:04 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-02-04 08:40:04 +0000
commit62f19f8a31706a1ae65eeccba4155456e879c7d9 (patch)
tree157ce8c29b890c19fdc7d6f223ecb65fd1df0f51
parent64976e6afefff455ee9515218942a65c6ff5eede (diff)
downloadrdiff-backup-62f19f8a31706a1ae65eeccba4155456e879c7d9.tar.gz
Initial checkin of journal code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@274 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/rdiff_backup/journal.py195
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py28
-rw-r--r--rdiff-backup/testing/journaltest.py29
3 files changed, 252 insertions, 0 deletions
diff --git a/rdiff-backup/rdiff_backup/journal.py b/rdiff-backup/rdiff_backup/journal.py
new file mode 100644
index 0000000..d79875e
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/journal.py
@@ -0,0 +1,195 @@
+# Copyright 2002 Ben Escoto
+#
+# This file is part of rdiff-backup.
+#
+# rdiff-backup is free software; you can redistribute it and/or modify
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# rdiff-backup is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with rdiff-backup; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+"""Application level journaling for better error recovery
+
+This module has routines for maintaining a "journal" to keep track of
+writes to an rdiff-backup destination directory. This is necessary
+because otherwise data could be lost if the program is abruptly
+stopped (say to a computer crash). For instance, various temp files
+could be left on the mirror drive. Or it may not be clear whether an
+increment file has been fully written.
+
+To keep this from happening, various writes may be journaled (a write
+corresponds to the updating of a single file). To do this, a separate
+file in the journal directory is created, and the necessary
+information is written to it. When the transaction is finished, that
+journal entry file will be deleted. If there is a crash, the next
+time rdiff-backup is run, it will see the journal file, and process
+it, bringing the rdiff-backup destination directory back into a
+consistent state.
+
+Two caveats:
+
+1) The journal is only meant to be used in conjunction with a
+ regression to the state before the backup was attempted. If the
+ failed session is not "cleaned" out right after the journal is
+ recovered, something bad could happen.
+
+2) This journal will only be effective if the actual hardware and OS
+ are working. If disk failures are causing data loss, or if a crash
+ causes your filesystem to be corrupted, rdiff-backup could lose
+ data despite all this journal stuff.
+
+"""
+
+import Globals, log, rpath, cPickle, TempFile
+
+# Holds an rpath of the journal directory, a file object, and then
+journal_dir_rp = None
+journal_dir_fp = None
+
+def open_journal():
+ """Make sure the journal dir exists (creating it if necessary)"""
+ global journal_dir_rp, journal_dir_fp
+ assert journal_dir_rp is journal_dir_fp is None
+ journal_dir_rp = Globals.rbdir.append("journal")
+ if not journal_dir_rp.lstat():
+ log.Log("Creating journal directory %s" % (journal_dir_rp.path,), 5)
+ journal_dir_rp.mkdir()
+ assert journal_dir_rp.isdir()
+ journal_dir_fp = journal_dir_rp.open("rb")
+
+def close_journal():
+ """Close the journal at the end of a session"""
+ global journal_dir_rp, journal_dir_fp
+ assert not journal_dir_rp.listdir()
+ assert not journal_dir_fp.close()
+ journal_dir_rp = journal_dir_fp = None
+
+def sync_journal():
+ """fsync the journal directory"""
+ journal_dir_rp.fsync(journal_dir_fp)
+
+def recover_journal():
+ """Read the journal and recover each of the events"""
+ for entry in get_entries_from_journal():
+ entry.recover()
+ entry.delete()
+
+def get_entries_from_journal():
+ """Return list of entries in the journal (deletes bad entries)"""
+ entry_list = []
+ for filename in journal_dir_rp.listdir():
+ entry_rp = journal_dir_rp.append(filename)
+ e = Entry()
+ success = e.init_from_rp(entry_rp)
+ if not success: entry_rp.delete()
+ else: entry_list.append(e)
+ return entry_list
+
+def write_entry(test_filename, test_filename_type,
+ increment_filename, temp_filename):
+ """Write new entry given variables into journal, return entry"""
+ e = Entry()
+ e.test_filename = test_filename
+ e.test_filename_type = test_filename_type
+ e.increment_filename = increment_filename
+ e.temp_filename = temp_filename
+ e.write()
+ return e
+
+def remove_entry(entry_rp):
+ """Remove the entry in entry_rp from the journal"""
+ entry_rp.delete()
+ sync_journal()
+
+
+class Entry:
+ """A single journal entry, describing one transaction
+
+ Although called a journal entry, this is less a description of
+ what is going happen than a short recipe of what to do if
+ something goes wrong.
+
+ Currently the recipe needs to be very simple and is determined by
+ the four variables test_filename, test_filename_type,
+ increment_filename, and temp_filename. See the recover() method
+ for details.
+
+ """
+ test_filename = None
+ test_filename_type = None # None is a valid value for this variable
+ increment_filename = None
+ temp_filename = None
+
+ # This holds the rpath in the journal dir that holds self
+ entry_rp = None
+
+ def recover(self):
+ """Recover the current journal entry
+
+ See if test_filename matches test_filename_type. If so,
+ delete increment_filename. Delete temp_filename regardless.
+ It's OK to recover the same entry multiple times.
+
+ """
+ assert self.test_filename and self.temp_filename
+ test_rp = rpath.RPath(Globals.local_connection, self.test_filename)
+ temp_rp = rpath.RPath(Globals.local_connection, self.temp_filename)
+ inc_rp = rpath.RPath(Globals.local_connection, self.increment_filename)
+ if test_rp.lstat() == self.test_filename_type:
+ if inc_rp.lstat():
+ inc_rp.delete()
+ inc_rp.get_parent_rp().fsync()
+ if temp_rp.lstat():
+ temp_rp.delete()
+ temp_rp.get_parent_rp().fsync()
+
+ def to_string(self):
+ """Return string form of entry"""
+ return cPickle.dumps({'test_filename': self.test_filename,
+ 'test_filename_type': self.test_filename_type,
+ 'increment_filename': self.increment_filename,
+ 'temp_filename': self.temp_filename})
+
+ def write(self):
+ """Write the current entry into the journal"""
+ entry_rp = TempFile.new(journal_dir_rp.append("foo"))
+ fp = entry_rp.open("wb")
+ fp.write(self.to_string())
+ entry_rp.fsync(fp)
+ assert not fp.close()
+ sync_journal()
+ self.entry_rp = entry_rp
+
+ def init_from_string(self, s):
+ """Initialize values from string. Return 0 if problem."""
+ try: val_dict = cPickle.loads(s)
+ except cPickle.UnpicklingError: return 0
+ try:
+ self.test_filename = val_dict['test_filename']
+ self.test_filename_type = val_dict['test_filename_type']
+ self.increment_filename = val_dict['increment_filename']
+ self.temp_filename = val_dict['temp_filename']
+ except TypeError, KeyError: return 0
+ return 1
+
+ def init_from_rp(self, entry_rp):
+ """Initialize values from an rpath. Return 0 if problem"""
+ if not entry_rp.isreg(): return 0
+ success = self.init_from_string(entry_rp.get_data())
+ if not success: return 0
+ self.entry_rp = entry_rp
+ return 1
+
+ def delete(self):
+ """Remove entry from the journal. self.entry_rp must be set"""
+ self.entry_rp.delete()
+ sync_journal()
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index fb2c255..0b18e19 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -736,6 +736,14 @@ class RPath(RORPath):
comps = normed.path.split("/")
return "/".join(comps[:-1]), comps[-1]
+ def get_parent_rp(self):
+ """Return new RPath of directory self is in"""
+ if self.index:
+ return self.__class__(self.conn, self.base, self.index[:-1])
+ dirname = self.dirsplit()[0]
+ if dirname: return self.__class__(self.conn, dirname)
+ else: return self.__class__(self.conn, "/")
+
def newpath(self, newpath, index = ()):
"""Return new RPath with the same connection but different path"""
return self.__class__(self.conn, newpath, index)
@@ -851,6 +859,26 @@ class RPath(RORPath):
else: raise RPathException
self.setdata()
+ def fsync(self, fp = None):
+ """fsync the current file or directory
+
+ If fp is none, get the file description by opening the file.
+ This can be useful for directories.
+
+ """
+ if not fp:
+ fp = self.open("rb")
+ os.fsync(fp.fileno())
+ assert not fp.close()
+ else: os.fsync(fp.fileno())
+
+ def get_data(self):
+ """Open file as a regular file, read data, close, return data"""
+ fp = self.open("rb")
+ s = fp.read()
+ assert not fp.close()
+ return s
+
class RPathFileHook:
"""Look like a file, but add closing hook"""
diff --git a/rdiff-backup/testing/journaltest.py b/rdiff-backup/testing/journaltest.py
new file mode 100644
index 0000000..76e638a
--- /dev/null
+++ b/rdiff-backup/testing/journaltest.py
@@ -0,0 +1,29 @@
+from commontest import *
+import unittest
+from rdiff_backup import journal, Globals, rpath
+
+class JournalTest(unittest.TestCase):
+ def testBasic(self):
+ """Test opening a journal, then reading, writing, and deleting"""
+ MakeOutputDir()
+ Globals.rbdir = rpath.RPath(Globals.local_connection,
+ "testfiles/output")
+ journal.open_journal()
+ assert len(journal.get_entries_from_journal()) == 0
+
+ # It's important that none of these files really exist
+ e1 = journal.write_entry("Hello48", "reg", "inc_file3917", "t39p")
+ e2 = journal.write_entry("2nd_euoeuo", None, "inc_file4832", "l389")
+ assert e1.entry_rp and e2.entry_rp
+
+ l = journal.get_entries_from_journal()
+ assert len(l) == 2
+ first_filename = l[0].test_filename
+ assert first_filename == "Hello48" or first_filename == "2nd_euoeuo"
+
+ # Now test recovering journal, and make sure everything deleted
+ journal.recover_journal()
+ assert len(journal.get_entries_from_journal()) == 0
+
+
+if __name__ == "__main__": unittest.main()