summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-03-21 07:37:00 +0000
committerben <ben@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-03-21 07:37:00 +0000
commitfb059444cc325ff02b48e982060d29e38e1ee937 (patch)
treec89d65c5b202928a11b09c4db242fd049aca3010
parenta2e3c38d72877dd9142d802e76047e10cf490e19 (diff)
downloadrdiff-backup-fb059444cc325ff02b48e982060d29e38e1ee937.tar.gz
Initial version
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@8 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/src/hardlink.py242
-rw-r--r--rdiff-backup/testing/hardlinktest.py145
2 files changed, 387 insertions, 0 deletions
diff --git a/rdiff-backup/src/hardlink.py b/rdiff-backup/src/hardlink.py
new file mode 100644
index 0000000..ee5248e
--- /dev/null
+++ b/rdiff-backup/src/hardlink.py
@@ -0,0 +1,242 @@
+execfile("rpath.py")
+
+#######################################################################
+#
+# hardlink - code for preserving and restoring hardlinks
+#
+# If the preserve_hardlinks option is selected, linked files in the
+# source directory will be linked in the mirror directory. Linked
+# files are treated like any other with respect to incrementing, but a
+# database of all links will be recorded at each session, so linked
+# files can still be restored from the increments.
+#
+
+class Hardlink:
+ """Hardlink class methods and data
+
+ All these functions are meant to be executed on the destination
+ side. The source side should only transmit inode information.
+
+ """
+ # In all of these lists of indicies are the values. The keys in
+ # _inode_ ones are (inode, devloc) pairs.
+ _src_inode_indicies = {}
+ _dest_inode_indicies = {}
+
+ # The keys for these two are just indicies. They share values
+ # with the earlier dictionaries.
+ _src_index_indicies = {}
+ _dest_index_indicies = {}
+
+ def get_inode_key(cls, rorp):
+ """Return rorp's key for _inode_ dictionaries"""
+ return (rorp.getinode(), rorp.getdevloc())
+
+ def get_indicies(cls, rorp, source):
+ """Return a list of similarly linked indicies, using rorp's index"""
+ if source: dict = cls._src_index_indicies
+ else: dict = cls._dest_index_indicies
+ try: return dict[rorp.index]
+ except KeyError: return []
+
+ def add_rorp(cls, rorp, source):
+ """Process new rorp and update hard link dictionaries
+
+ First enter it into src_inode_indicies. If we have already
+ seen all the hard links, then we can delete the entry.
+ Everything must stay recorded in src_index_indicies though.
+
+ """
+ if not rorp.isreg() or rorp.getnumlinks() < 2: return
+
+ if source: inode_dict, index_dict = (cls._src_inode_indicies,
+ cls._src_index_indicies)
+ else: inode_dict, index_dict = (cls._dest_inode_indicies,
+ cls._dest_index_indicies)
+
+ rp_inode_key = cls.get_inode_key(rorp)
+ if inode_dict.has_key(rp_inode_key):
+ index_list = inode_dict[rp_inode_key]
+ index_list.append(rorp.index)
+ if len(index_list) == rorp.getnumlinks():
+ del inode_dict[rp_inode_key]
+ else: # make new entry in both src dicts
+ index_list = [rorp.index]
+ inode_dict[rp_inode_key] = index_list
+ index_dict[rorp.index] = index_list
+
+ def add_rorp_iter(cls, iter, source):
+ """Return new rorp iterator like iter that cls.add_rorp's first"""
+ for rorp in iter:
+ cls.add_rorp(rorp, source)
+ yield rorp
+
+ def rorp_eq(cls, src_rorp, dest_rorp):
+ """Compare hardlinked for equality
+
+ Two files may otherwise seem equal but be hardlinked in
+ different ways. This function considers them equal enough if
+ they have been hardlinked correctly to the previously seen
+ indicies.
+
+ """
+ assert src_rorp.index == dest_rorp.index
+ if (not src_rorp.isreg() or not dest_rorp.isreg() or
+ src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1):
+ return 1 # Hard links don't apply
+
+ src_index_list = cls.get_indicies(src_rorp, 1)
+ dest_index_list = cls.get_indicies(dest_rorp, None)
+
+ # If a list only has one element, then it is only hardlinked
+ # to itself so far, so that is not a genuine difference yet.
+ if not src_index_list or len(src_index_list) == 1:
+ return not dest_index_list or len(dest_index_list) == 1
+ if not dest_index_list or len(dest_index_list) == 1: return None
+
+ # Both index lists exist and are non-empty
+ return src_index_list == dest_index_list # they are always sorted
+
+ def islinked(cls, rorp):
+ """True if rorp's index is already linked to something on src side"""
+ return len(cls.get_indicies(rorp, 1)) >= 2
+
+ def restore_link(cls, mirror_rel_index, rpath):
+ """Restores a linked file by linking it
+
+ When restoring, all the hardlink data is already present, and
+ we can only link to something already written. Returns true
+ if succeeded in creating rpath, false if must restore rpath
+ normally.
+
+ """
+ full_index = mirror_rel_index + rpath.index
+ if not cls._src_index_indicies.has_key(full_index): return None
+ truncated_list = []
+ for index in cls._src_index_indicies[full_index]:
+ if index[:len(mirror_rel_index)] == mirror_rel_index:
+ truncated_list.append(index[len(mirror_rel_index):])
+
+ if not truncated_list or truncated_list[0] >= rpath.index: return None
+ srclink = RPath(rpath.conn, rpath.base, truncated_list[0])
+ rpath.hardlink(srclink.path)
+ return 1
+
+ def link_rp(cls, src_rorp, dest_rpath, dest_root = None):
+ """Make dest_rpath into a link analogous to that of src_rorp"""
+ if not dest_root: dest_root = dest_rpath # use base of dest_rpath
+ dest_link_rpath = RPath(dest_root.conn, dest_root.base,
+ cls.get_indicies(src_rorp, 1)[0])
+ dest_rpath.hardlink(dest_link_rpath.path)
+
+ def write_linkdict(cls, rpath, dict):
+ """Write link data to the rbdata dir
+
+ It is stored as the a big pickled dictionary dated to match
+ the current hardlinks.
+
+ """
+ assert (Globals.isbackup_writer and
+ rpath.conn is Globals.local_connection)
+ tf = TempFileManager.new(rpath)
+ def init():
+ fp = tf.open("wb")
+ cPickle.dump(dict, fp)
+ assert not fp.close()
+ Robust.make_tf_robustaction(init, (tf,), (rpath,)).execute()
+
+ def get_linkrp(cls, data_rpath, time, prefix):
+ """Return RPath of linkdata, or None if cannot find"""
+ for rp in map(data_rpath.append, data_rpath.listdir()):
+ if (rp.isincfile() and rp.getincbase_str() == prefix and
+ rp.getinctype() == 'snapshot' and
+ Time.stringtotime(rp.getinctime()) == time):
+ return rp
+ return None
+
+ def get_linkdata(cls, data_rpath, time, prefix = 'hardlink_data'):
+ """Return index dictionary written by write_linkdata at time"""
+ rp = cls.get_linkrp(data_rpath, time, prefix)
+ if not rp: return None
+ fp = rp.open("rb")
+ index_dict = cPickle.load(fp)
+ assert not fp.close()
+ return index_dict
+
+ def final_writedata(cls):
+ """Write final checkpoint data to rbdir after successful backup"""
+ if cls._src_index_indicies:
+ Log("Writing hard link data", 6)
+ rp = Globals.rbdir.append("hardlink_data.%s.snapshot" %
+ Time.curtimestr)
+ cls.write_linkdict(rp, cls._src_index_indicies)
+
+ def retrieve_final(cls, time):
+ """Set source index dictionary from hardlink_data file if avail"""
+ hd = cls.get_linkdata(Globals.rbdir, time)
+ if hd is None: return None
+ cls._src_index_indicies = hd
+ return 1
+
+ def final_checkpoint(cls, data_rpath):
+ """Write contents of the four dictionaries to the data dir
+
+ If rdiff-backup receives a fatal error, it may still be able
+ to save the contents of the four hard link dictionaries.
+ Because these dictionaries may be big, they are not saved
+ after every 20 seconds or whatever, but just at the end.
+
+ """
+ Log("Writing intermediate hard link data to disk", 2)
+ src_inode_rp = data_rpath.append("hardlink_source_inode_checkpoint."
+ "%s.snapshot" % Time.curtimestr)
+ src_index_rp = data_rpath.append("hardlink_source_index_checkpoint."
+ "%s.snapshot" % Time.curtimestr)
+ dest_inode_rp = data_rpath.append("hardlink_dest_inode_checkpoint."
+ "%s.snapshot" % Time.curtimestr)
+ dest_index_rp = data_rpath.append("hardlink_dest_index_checkpoint."
+ "%s.snapshot" % Time.curtimestr)
+ for (rp, dict) in ((src_inode_rp, cls._src_inode_indicies),
+ (src_index_rp, cls._src_index_indicies),
+ (dest_inode_rp, cls._dest_inode_indicies),
+ (dest_index_rp, cls._dest_index_indicies)):
+ cls.write_linkdict(rp, dict)
+
+ def retrieve_checkpoint(cls, data_rpath, time):
+ """Retrieve hardlink data from final checkpoint
+
+ Return true if the retrieval worked, false otherwise.
+
+ """
+ try:
+ src_inode = cls.get_linkdata(data_rpath, time,
+ "hardlink_source_inode_checkpoint")
+ src_index = cls.get_linkdata(data_rpath, time,
+ "hardlink_source_index_checkpoint")
+ dest_inode = cls.get_linkdata(data_rpath, time,
+ "hardlink_dest_inode_checkpoint")
+ dest_index = cls.get_linkdata(data_rpath, time,
+ "hardlink_dest_index_checkpoint")
+ except cPickle.UnpicklingError:
+ Log("Unpickling Error", 2)
+ return None
+ if (src_inode is None or src_index is None or
+ dest_inode is None or dest_index is None): return None
+ cls._src_inode_indicies = src_inode
+ cls._src_index_indicies = src_index
+ cls._dest_inode_indicies = dest_inode
+ cls._dest_index_indicies = dest_index
+ return 1
+
+ def remove_all_checkpoints(cls):
+ """Remove all hardlink checkpoint information from directory"""
+ prefix_list = ["hardlink_source_inode_checkpoint",
+ "hardlink_source_index_checkpoint",
+ "hardlink_dest_inode_checkpoint",
+ "hardlink_dest_index_checkpoint"]
+ for rp in map(Globals.rbdir.append, Globals.rbdir.listdir()):
+ if (rp.isincfile() and rp.getincbase_str() in prefix_list and
+ rp.getinctype() == 'snapshot'):
+ rp.delete()
+
+MakeClass(Hardlink)
diff --git a/rdiff-backup/testing/hardlinktest.py b/rdiff-backup/testing/hardlinktest.py
new file mode 100644
index 0000000..38cdfbe
--- /dev/null
+++ b/rdiff-backup/testing/hardlinktest.py
@@ -0,0 +1,145 @@
+import os, unittest
+execfile("commontest.py")
+rbexec("main.py")
+
+
+Log.setverbosity(6)
+
+class HardlinkTest(unittest.TestCase):
+ """Test cases for Hard links"""
+ outputrp = RPath(Globals.local_connection, "testfiles/output")
+ hardlink_dir1 = RPath(Globals.local_connection, "testfiles/hardlinks/dir1")
+ hardlink_dir1copy = \
+ RPath(Globals.local_connection, "testfiles/hardlinks/dir1copy")
+ hardlink_dir2 = RPath(Globals.local_connection, "testfiles/hardlinks/dir2")
+ hardlink_dir3 = RPath(Globals.local_connection, "testfiles/hardlinks/dir3")
+
+ def reset_output(self):
+ """Erase and recreate testfiles/output directory"""
+ os.system(MiscDir+'/myrm testfiles/output')
+ self.outputrp.mkdir()
+
+ def testEquality(self):
+ """Test rorp_eq function in conjunction with CompareRecursive"""
+ assert CompareRecursive(self.hardlink_dir1, self.hardlink_dir1copy)
+ assert CompareRecursive(self.hardlink_dir1, self.hardlink_dir2,
+ compare_hardlinks = None)
+ assert not CompareRecursive(self.hardlink_dir1, self.hardlink_dir2,
+ compare_hardlinks = 1)
+
+ def testCheckpointing(self):
+ """Test saving and recovering of various dictionaries"""
+ d1 = {1:1}
+ d2 = {2:2}
+ d3 = {3:3}
+ d4 = {}
+
+ Hardlink._src_inode_indicies = d1
+ Hardlink._src_index_indicies = d2
+ Hardlink._dest_inode_indicies = d3
+ Hardlink._dest_index_indicies = d4
+
+ self.reset_output()
+ Time.setcurtime(12345)
+ Globals.isbackup_writer = 1
+ Hardlink.final_checkpoint(self.outputrp)
+
+ reset_hardlink_dicts()
+ assert Hardlink.retrieve_checkpoint(self.outputrp, 12345)
+ assert Hardlink._src_inode_indicies == d1, \
+ Hardlink._src_inode_indicies
+ assert Hardlink._src_index_indicies == d2, \
+ Hardlink._src_index_indicies
+ assert Hardlink._dest_inode_indicies == d3, \
+ Hardlink._dest_inode_indicies
+ assert Hardlink._dest_index_indicies == d4, \
+ Hardlink._dest_index_indicies
+
+ def testFinalwrite(self):
+ """Test writing of the final database"""
+ Globals.isbackup_writer = 1
+ Time.setcurtime(123456)
+ Globals.rbdir = self.outputrp
+ finald = Hardlink._src_index_indicies = {'hello':'world'}
+
+ self.reset_output()
+ Hardlink.final_writedata()
+
+ Hardlink._src_index_indicies = None
+ assert Hardlink.retrieve_final(123456)
+ assert Hardlink._src_index_indicies == finald
+
+ def testBuildingDict(self):
+ """See if the partial inode dictionary is correct"""
+ Globals.preserve_hardlinks = 1
+ reset_hardlink_dicts()
+ for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir3, 1):
+ Hardlink.add_rorp(dsrp, 1)
+
+ assert len(Hardlink._src_inode_indicies.keys()) == 3, \
+ Hardlink._src_inode_indicies
+ assert len(Hardlink._src_index_indicies.keys()) == 3, \
+ Hardlink._src_index_indicies
+ vals1 = Hardlink._src_inode_indicies.values()
+ vals2 = Hardlink._src_index_indicies.values()
+ vals1.sort()
+ vals2.sort()
+ assert vals1 == vals2
+
+ def testBuildingDict2(self):
+ """Same as testBuildingDict but test destination building"""
+ Globals.preserve_hardlinks = 1
+ reset_hardlink_dicts()
+ for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir3, None):
+ Hardlink.add_rorp(dsrp, None)
+
+ assert len(Hardlink._dest_inode_indicies.keys()) == 3, \
+ Hardlink._dest_inode_indicies
+ assert len(Hardlink._dest_index_indicies.keys()) == 3, \
+ Hardlink._dest_index_indicies
+ vals1 = Hardlink._dest_inode_indicies.values()
+ vals2 = Hardlink._dest_index_indicies.values()
+ vals1.sort()
+ vals2.sort()
+ assert vals1 == vals2
+
+ def testCompletedDict(self):
+ """See if the hardlink dictionaries are built correctly"""
+ reset_hardlink_dicts()
+ for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir1, 1):
+ Hardlink.add_rorp(dsrp, 1)
+ assert Hardlink._src_inode_indicies == {}, \
+ Hardlink._src_inode_indicies
+
+ hll1 = [('file1',), ('file2',), ('file3',)]
+ hll2 = [('file4',), ('file5',), ('file6',)]
+ dict = {}
+ for index in hll1: dict[index] = hll1
+ for index in hll2: dict[index] = hll2
+ assert Hardlink._src_index_indicies == dict
+
+ reset_hardlink_dicts()
+ for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir2, 1):
+ Hardlink.add_rorp(dsrp, 1)
+ assert Hardlink._src_inode_indicies == {}, \
+ Hardlink._src_inode_indicies
+
+ hll1 = [('file1',), ('file3',), ('file4',)]
+ hll2 = [('file2',), ('file5',), ('file6',)]
+ dict = {}
+ for index in hll1: dict[index] = hll1
+ for index in hll2: dict[index] = hll2
+ assert Hardlink._src_index_indicies == dict
+
+ def testSeries(self):
+ """Test hardlink system by backing up and restoring a few dirs"""
+ dirlist = ['testfiles/hardlinks/dir1',
+ 'testfiles/hardlinks/dir2',
+ 'testfiles/hardlinks/dir3',
+ 'testfiles/various_file_types']
+ BackupRestoreSeries(None, None, dirlist, compare_hardlinks=1)
+ BackupRestoreSeries(1, 1, dirlist, compare_hardlinks=1)
+
+
+
+if __name__ == "__main__": unittest.main()