From fb059444cc325ff02b48e982060d29e38e1ee937 Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 21 Mar 2002 07:37:00 +0000 Subject: Initial version git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@8 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/src/hardlink.py | 242 +++++++++++++++++++++++++++++++++++ rdiff-backup/testing/hardlinktest.py | 145 +++++++++++++++++++++ 2 files changed, 387 insertions(+) create mode 100644 rdiff-backup/src/hardlink.py create mode 100644 rdiff-backup/testing/hardlinktest.py (limited to 'rdiff-backup') diff --git a/rdiff-backup/src/hardlink.py b/rdiff-backup/src/hardlink.py new file mode 100644 index 0000000..ee5248e --- /dev/null +++ b/rdiff-backup/src/hardlink.py @@ -0,0 +1,242 @@ +execfile("rpath.py") + +####################################################################### +# +# hardlink - code for preserving and restoring hardlinks +# +# If the preserve_hardlinks option is selected, linked files in the +# source directory will be linked in the mirror directory. Linked +# files are treated like any other with respect to incrementing, but a +# database of all links will be recorded at each session, so linked +# files can still be restored from the increments. +# + +class Hardlink: + """Hardlink class methods and data + + All these functions are meant to be executed on the destination + side. The source side should only transmit inode information. + + """ + # In all of these lists of indicies are the values. The keys in + # _inode_ ones are (inode, devloc) pairs. + _src_inode_indicies = {} + _dest_inode_indicies = {} + + # The keys for these two are just indicies. They share values + # with the earlier dictionaries. + _src_index_indicies = {} + _dest_index_indicies = {} + + def get_inode_key(cls, rorp): + """Return rorp's key for _inode_ dictionaries""" + return (rorp.getinode(), rorp.getdevloc()) + + def get_indicies(cls, rorp, source): + """Return a list of similarly linked indicies, using rorp's index""" + if source: dict = cls._src_index_indicies + else: dict = cls._dest_index_indicies + try: return dict[rorp.index] + except KeyError: return [] + + def add_rorp(cls, rorp, source): + """Process new rorp and update hard link dictionaries + + First enter it into src_inode_indicies. If we have already + seen all the hard links, then we can delete the entry. + Everything must stay recorded in src_index_indicies though. + + """ + if not rorp.isreg() or rorp.getnumlinks() < 2: return + + if source: inode_dict, index_dict = (cls._src_inode_indicies, + cls._src_index_indicies) + else: inode_dict, index_dict = (cls._dest_inode_indicies, + cls._dest_index_indicies) + + rp_inode_key = cls.get_inode_key(rorp) + if inode_dict.has_key(rp_inode_key): + index_list = inode_dict[rp_inode_key] + index_list.append(rorp.index) + if len(index_list) == rorp.getnumlinks(): + del inode_dict[rp_inode_key] + else: # make new entry in both src dicts + index_list = [rorp.index] + inode_dict[rp_inode_key] = index_list + index_dict[rorp.index] = index_list + + def add_rorp_iter(cls, iter, source): + """Return new rorp iterator like iter that cls.add_rorp's first""" + for rorp in iter: + cls.add_rorp(rorp, source) + yield rorp + + def rorp_eq(cls, src_rorp, dest_rorp): + """Compare hardlinked for equality + + Two files may otherwise seem equal but be hardlinked in + different ways. This function considers them equal enough if + they have been hardlinked correctly to the previously seen + indicies. + + """ + assert src_rorp.index == dest_rorp.index + if (not src_rorp.isreg() or not dest_rorp.isreg() or + src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1): + return 1 # Hard links don't apply + + src_index_list = cls.get_indicies(src_rorp, 1) + dest_index_list = cls.get_indicies(dest_rorp, None) + + # If a list only has one element, then it is only hardlinked + # to itself so far, so that is not a genuine difference yet. + if not src_index_list or len(src_index_list) == 1: + return not dest_index_list or len(dest_index_list) == 1 + if not dest_index_list or len(dest_index_list) == 1: return None + + # Both index lists exist and are non-empty + return src_index_list == dest_index_list # they are always sorted + + def islinked(cls, rorp): + """True if rorp's index is already linked to something on src side""" + return len(cls.get_indicies(rorp, 1)) >= 2 + + def restore_link(cls, mirror_rel_index, rpath): + """Restores a linked file by linking it + + When restoring, all the hardlink data is already present, and + we can only link to something already written. Returns true + if succeeded in creating rpath, false if must restore rpath + normally. + + """ + full_index = mirror_rel_index + rpath.index + if not cls._src_index_indicies.has_key(full_index): return None + truncated_list = [] + for index in cls._src_index_indicies[full_index]: + if index[:len(mirror_rel_index)] == mirror_rel_index: + truncated_list.append(index[len(mirror_rel_index):]) + + if not truncated_list or truncated_list[0] >= rpath.index: return None + srclink = RPath(rpath.conn, rpath.base, truncated_list[0]) + rpath.hardlink(srclink.path) + return 1 + + def link_rp(cls, src_rorp, dest_rpath, dest_root = None): + """Make dest_rpath into a link analogous to that of src_rorp""" + if not dest_root: dest_root = dest_rpath # use base of dest_rpath + dest_link_rpath = RPath(dest_root.conn, dest_root.base, + cls.get_indicies(src_rorp, 1)[0]) + dest_rpath.hardlink(dest_link_rpath.path) + + def write_linkdict(cls, rpath, dict): + """Write link data to the rbdata dir + + It is stored as the a big pickled dictionary dated to match + the current hardlinks. + + """ + assert (Globals.isbackup_writer and + rpath.conn is Globals.local_connection) + tf = TempFileManager.new(rpath) + def init(): + fp = tf.open("wb") + cPickle.dump(dict, fp) + assert not fp.close() + Robust.make_tf_robustaction(init, (tf,), (rpath,)).execute() + + def get_linkrp(cls, data_rpath, time, prefix): + """Return RPath of linkdata, or None if cannot find""" + for rp in map(data_rpath.append, data_rpath.listdir()): + if (rp.isincfile() and rp.getincbase_str() == prefix and + rp.getinctype() == 'snapshot' and + Time.stringtotime(rp.getinctime()) == time): + return rp + return None + + def get_linkdata(cls, data_rpath, time, prefix = 'hardlink_data'): + """Return index dictionary written by write_linkdata at time""" + rp = cls.get_linkrp(data_rpath, time, prefix) + if not rp: return None + fp = rp.open("rb") + index_dict = cPickle.load(fp) + assert not fp.close() + return index_dict + + def final_writedata(cls): + """Write final checkpoint data to rbdir after successful backup""" + if cls._src_index_indicies: + Log("Writing hard link data", 6) + rp = Globals.rbdir.append("hardlink_data.%s.snapshot" % + Time.curtimestr) + cls.write_linkdict(rp, cls._src_index_indicies) + + def retrieve_final(cls, time): + """Set source index dictionary from hardlink_data file if avail""" + hd = cls.get_linkdata(Globals.rbdir, time) + if hd is None: return None + cls._src_index_indicies = hd + return 1 + + def final_checkpoint(cls, data_rpath): + """Write contents of the four dictionaries to the data dir + + If rdiff-backup receives a fatal error, it may still be able + to save the contents of the four hard link dictionaries. + Because these dictionaries may be big, they are not saved + after every 20 seconds or whatever, but just at the end. + + """ + Log("Writing intermediate hard link data to disk", 2) + src_inode_rp = data_rpath.append("hardlink_source_inode_checkpoint." + "%s.snapshot" % Time.curtimestr) + src_index_rp = data_rpath.append("hardlink_source_index_checkpoint." + "%s.snapshot" % Time.curtimestr) + dest_inode_rp = data_rpath.append("hardlink_dest_inode_checkpoint." + "%s.snapshot" % Time.curtimestr) + dest_index_rp = data_rpath.append("hardlink_dest_index_checkpoint." + "%s.snapshot" % Time.curtimestr) + for (rp, dict) in ((src_inode_rp, cls._src_inode_indicies), + (src_index_rp, cls._src_index_indicies), + (dest_inode_rp, cls._dest_inode_indicies), + (dest_index_rp, cls._dest_index_indicies)): + cls.write_linkdict(rp, dict) + + def retrieve_checkpoint(cls, data_rpath, time): + """Retrieve hardlink data from final checkpoint + + Return true if the retrieval worked, false otherwise. + + """ + try: + src_inode = cls.get_linkdata(data_rpath, time, + "hardlink_source_inode_checkpoint") + src_index = cls.get_linkdata(data_rpath, time, + "hardlink_source_index_checkpoint") + dest_inode = cls.get_linkdata(data_rpath, time, + "hardlink_dest_inode_checkpoint") + dest_index = cls.get_linkdata(data_rpath, time, + "hardlink_dest_index_checkpoint") + except cPickle.UnpicklingError: + Log("Unpickling Error", 2) + return None + if (src_inode is None or src_index is None or + dest_inode is None or dest_index is None): return None + cls._src_inode_indicies = src_inode + cls._src_index_indicies = src_index + cls._dest_inode_indicies = dest_inode + cls._dest_index_indicies = dest_index + return 1 + + def remove_all_checkpoints(cls): + """Remove all hardlink checkpoint information from directory""" + prefix_list = ["hardlink_source_inode_checkpoint", + "hardlink_source_index_checkpoint", + "hardlink_dest_inode_checkpoint", + "hardlink_dest_index_checkpoint"] + for rp in map(Globals.rbdir.append, Globals.rbdir.listdir()): + if (rp.isincfile() and rp.getincbase_str() in prefix_list and + rp.getinctype() == 'snapshot'): + rp.delete() + +MakeClass(Hardlink) diff --git a/rdiff-backup/testing/hardlinktest.py b/rdiff-backup/testing/hardlinktest.py new file mode 100644 index 0000000..38cdfbe --- /dev/null +++ b/rdiff-backup/testing/hardlinktest.py @@ -0,0 +1,145 @@ +import os, unittest +execfile("commontest.py") +rbexec("main.py") + + +Log.setverbosity(6) + +class HardlinkTest(unittest.TestCase): + """Test cases for Hard links""" + outputrp = RPath(Globals.local_connection, "testfiles/output") + hardlink_dir1 = RPath(Globals.local_connection, "testfiles/hardlinks/dir1") + hardlink_dir1copy = \ + RPath(Globals.local_connection, "testfiles/hardlinks/dir1copy") + hardlink_dir2 = RPath(Globals.local_connection, "testfiles/hardlinks/dir2") + hardlink_dir3 = RPath(Globals.local_connection, "testfiles/hardlinks/dir3") + + def reset_output(self): + """Erase and recreate testfiles/output directory""" + os.system(MiscDir+'/myrm testfiles/output') + self.outputrp.mkdir() + + def testEquality(self): + """Test rorp_eq function in conjunction with CompareRecursive""" + assert CompareRecursive(self.hardlink_dir1, self.hardlink_dir1copy) + assert CompareRecursive(self.hardlink_dir1, self.hardlink_dir2, + compare_hardlinks = None) + assert not CompareRecursive(self.hardlink_dir1, self.hardlink_dir2, + compare_hardlinks = 1) + + def testCheckpointing(self): + """Test saving and recovering of various dictionaries""" + d1 = {1:1} + d2 = {2:2} + d3 = {3:3} + d4 = {} + + Hardlink._src_inode_indicies = d1 + Hardlink._src_index_indicies = d2 + Hardlink._dest_inode_indicies = d3 + Hardlink._dest_index_indicies = d4 + + self.reset_output() + Time.setcurtime(12345) + Globals.isbackup_writer = 1 + Hardlink.final_checkpoint(self.outputrp) + + reset_hardlink_dicts() + assert Hardlink.retrieve_checkpoint(self.outputrp, 12345) + assert Hardlink._src_inode_indicies == d1, \ + Hardlink._src_inode_indicies + assert Hardlink._src_index_indicies == d2, \ + Hardlink._src_index_indicies + assert Hardlink._dest_inode_indicies == d3, \ + Hardlink._dest_inode_indicies + assert Hardlink._dest_index_indicies == d4, \ + Hardlink._dest_index_indicies + + def testFinalwrite(self): + """Test writing of the final database""" + Globals.isbackup_writer = 1 + Time.setcurtime(123456) + Globals.rbdir = self.outputrp + finald = Hardlink._src_index_indicies = {'hello':'world'} + + self.reset_output() + Hardlink.final_writedata() + + Hardlink._src_index_indicies = None + assert Hardlink.retrieve_final(123456) + assert Hardlink._src_index_indicies == finald + + def testBuildingDict(self): + """See if the partial inode dictionary is correct""" + Globals.preserve_hardlinks = 1 + reset_hardlink_dicts() + for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir3, 1): + Hardlink.add_rorp(dsrp, 1) + + assert len(Hardlink._src_inode_indicies.keys()) == 3, \ + Hardlink._src_inode_indicies + assert len(Hardlink._src_index_indicies.keys()) == 3, \ + Hardlink._src_index_indicies + vals1 = Hardlink._src_inode_indicies.values() + vals2 = Hardlink._src_index_indicies.values() + vals1.sort() + vals2.sort() + assert vals1 == vals2 + + def testBuildingDict2(self): + """Same as testBuildingDict but test destination building""" + Globals.preserve_hardlinks = 1 + reset_hardlink_dicts() + for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir3, None): + Hardlink.add_rorp(dsrp, None) + + assert len(Hardlink._dest_inode_indicies.keys()) == 3, \ + Hardlink._dest_inode_indicies + assert len(Hardlink._dest_index_indicies.keys()) == 3, \ + Hardlink._dest_index_indicies + vals1 = Hardlink._dest_inode_indicies.values() + vals2 = Hardlink._dest_index_indicies.values() + vals1.sort() + vals2.sort() + assert vals1 == vals2 + + def testCompletedDict(self): + """See if the hardlink dictionaries are built correctly""" + reset_hardlink_dicts() + for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir1, 1): + Hardlink.add_rorp(dsrp, 1) + assert Hardlink._src_inode_indicies == {}, \ + Hardlink._src_inode_indicies + + hll1 = [('file1',), ('file2',), ('file3',)] + hll2 = [('file4',), ('file5',), ('file6',)] + dict = {} + for index in hll1: dict[index] = hll1 + for index in hll2: dict[index] = hll2 + assert Hardlink._src_index_indicies == dict + + reset_hardlink_dicts() + for dsrp in DestructiveStepping.Iterate_from(self.hardlink_dir2, 1): + Hardlink.add_rorp(dsrp, 1) + assert Hardlink._src_inode_indicies == {}, \ + Hardlink._src_inode_indicies + + hll1 = [('file1',), ('file3',), ('file4',)] + hll2 = [('file2',), ('file5',), ('file6',)] + dict = {} + for index in hll1: dict[index] = hll1 + for index in hll2: dict[index] = hll2 + assert Hardlink._src_index_indicies == dict + + def testSeries(self): + """Test hardlink system by backing up and restoring a few dirs""" + dirlist = ['testfiles/hardlinks/dir1', + 'testfiles/hardlinks/dir2', + 'testfiles/hardlinks/dir3', + 'testfiles/various_file_types'] + BackupRestoreSeries(None, None, dirlist, compare_hardlinks=1) + BackupRestoreSeries(1, 1, dirlist, compare_hardlinks=1) + + + +if __name__ == "__main__": unittest.main() -- cgit v1.2.1