From 5ccca65dec1ef00d3551493958af5ee27e7df212 Mon Sep 17 00:00:00 2001 From: bescoto Date: Sun, 12 Oct 2003 05:24:25 +0000 Subject: New hard link system should use less memory git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@468 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/Hardlink.py | 150 ++++++++++------------------------ 1 file changed, 45 insertions(+), 105 deletions(-) (limited to 'rdiff-backup/rdiff_backup/Hardlink.py') diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py index 453672e..81472b2 100644 --- a/rdiff-backup/rdiff_backup/Hardlink.py +++ b/rdiff-backup/rdiff_backup/Hardlink.py @@ -34,137 +34,80 @@ from __future__ import generators import cPickle import Globals, Time, rpath, log, robust -# In all of these lists of indicies are the values. The keys in -# _inode_ ones are (inode, devloc) pairs. -_src_inode_indicies = None -_dest_inode_indicies = None - -# The keys for these two are just indicies. They share values -# with the earlier dictionaries. -_src_index_indicies = None -_dest_index_indicies = None - -# When a linked file is restored, its path is added to this dict, -# so it can be found when later paths being restored are linked to -# it. -_restore_index_path = None +# The keys in this dictionary are (inode, devloc) pairs. The values +# are a pair (index, remaining_links, dest_key) where index is the +# rorp index of the first such linked file, remaining_links is the +# number of files hard linked to this one we may see, and key is +# either (dest_inode, dest_devloc) or None, and represents the +# hardlink info of the existing file on the destination. +_inode_index = None def initialize_dictionaries(): """Set all the hard link dictionaries to empty""" - global _src_inode_indicies, _dest_inode_indicies - global _src_index_indicies, _dest_index_indicies, _restore_index_path - _src_inode_indicies = {} - _dest_inode_indicies = {} - _src_index_indicies = {} - _dest_index_indicies = {} - _restore_index_path = {} + global _inode_index + _inode_index = {} def clear_dictionaries(): """Delete all dictionaries""" - global _src_inode_indicies, _dest_inode_indicies - global _src_index_indicies, _dest_index_indicies, _restore_index_path - _src_inode_indicies = _dest_inode_indicies = None - _src_index_indicies = _dest_index_indicies = _restore_index_path = None - + global _inode_index + _inode_index = None def get_inode_key(rorp): """Return rorp's key for _inode_ dictionaries""" return (rorp.getinode(), rorp.getdevloc()) -def get_indicies(rorp, source): - """Return a list of similarly linked indicies, using rorp's index""" - if source: dict = _src_index_indicies - else: dict = _dest_index_indicies - try: return dict[rorp.index] - except KeyError: return [] - -def add_rorp(rorp, source): - """Process new rorp and update hard link dictionaries - - First enter it into src_inode_indicies. If we have already - seen all the hard links, then we can delete the entry. - Everything must stay recorded in src_index_indicies though. - - """ +def add_rorp(rorp, dest_rorp = None): + """Process new rorp and update hard link dictionaries""" + if not rorp.isreg() or rorp.getnumlinks() < 2: return + rp_inode_key = get_inode_key(rorp) + if not _inode_index.has_key(rp_inode_key): + if not dest_rorp: dest_key = None + elif dest_rorp.getnumlinks() == 1: dest_key = "NA" + else: dest_key = get_inode_key(dest_rorp) + _inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key) + +def del_rorp(rorp): + """Remove rorp information from dictionary if seen all links""" if not rorp.isreg() or rorp.getnumlinks() < 2: return - - if source: - inode_dict, index_dict = _src_inode_indicies, _src_index_indicies - else: inode_dict, index_dict = _dest_inode_indicies, _dest_index_indicies - rp_inode_key = get_inode_key(rorp) - if inode_dict.has_key(rp_inode_key): - index_list = inode_dict[rp_inode_key] - index_list.append(rorp.index) - if len(index_list) == rorp.getnumlinks(): - del inode_dict[rp_inode_key] - else: # make new entry in both src dicts - index_list = [rorp.index] - inode_dict[rp_inode_key] = index_list - index_dict[rorp.index] = index_list - -def add_rorp_iter(iter, source): - """Return new rorp iterator like iter that add_rorp's first""" - for rorp in iter: - add_rorp(rorp, source) - yield rorp + val = _inode_index.get(rp_inode_key) + if not val: return + index, remaining, dest_key = val + if remaining == 1: del _inode_index[rp_inode_key] + else: _inode_index[rp_inode_key] = (index, remaining-1, dest_key) def rorp_eq(src_rorp, dest_rorp): """Compare hardlinked for equality - Two files may otherwise seem equal but be hardlinked in - different ways. This function considers them equal enough if - they have been hardlinked correctly to the previously seen - indicies. + Return false if dest_rorp is linked differently, which can happen + if dest is linked more than source, or if it is represented by a + different inode. """ if (not src_rorp.isreg() or not dest_rorp.isreg() or src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1): return 1 # Hard links don't apply - src_index_list = get_indicies(src_rorp, 1) - dest_index_list = get_indicies(dest_rorp, None) - - # If a list only has one element, then it is only hardlinked - # to itself so far, so that is not a genuine difference yet. - if not src_index_list or len(src_index_list) == 1: - return not dest_index_list or len(dest_index_list) == 1 - if not dest_index_list or len(dest_index_list) == 1: return None - - # Both index lists exist and are non-empty - return src_index_list == dest_index_list # they are always sorted + if src_rorp.getnumlinks() < dest_rorp.getnumlinks(): return 0 + src_key = get_inode_key(src_rorp) + index, remaining, dest_key = _inode_index[src_key] + if dest_key == "NA": + # Allow this to be ok for first comparison, but not any + # subsequent ones + _inode_index[src_key] = (index, remaining, None) + return 1 + return dest_key == get_inode_key(dest_rorp) def islinked(rorp): """True if rorp's index is already linked to something on src side""" - return len(get_indicies(rorp, 1)) >= 2 + if not rorp.getnumlinks() > 1: return 0 + dict_val = _inode_index.get(get_inode_key(rorp)) + if not dict_val: return 0 + return dict_val[0] != rorp.index # If equal, then rorp is first def get_link_index(rorp): """Return first index on target side rorp is already linked to""" - return get_indicies(rorp, 1)[0] - -def restore_link(index, rpath): - """Restores a linked file by linking it - - When restoring, all the hardlink data is already present, and - we can only link to something already written. In either - case, add to the _restore_index_path dict, so we know later - that the file is available for hard - linking. - - Returns true if succeeded in creating rpath, false if must - restore rpath normally. - - """ - if index not in _src_index_indicies: return None - for linked_index in _src_index_indicies[index]: - if linked_index in _restore_index_path: - srcpath = _restore_index_path[linked_index] - log.Log("Restoring %s by hard linking to %s" % - (rpath.path, srcpath), 6) - rpath.hardlink(srcpath) - return 1 - _restore_index_path[index] = rpath.path - return None + return _inode_index[get_inode_key(rorp)][0] def link_rp(diff_rorp, dest_rpath, dest_root = None): """Make dest_rpath into a link using link flag in diff_rorp""" @@ -172,6 +115,3 @@ def link_rp(diff_rorp, dest_rpath, dest_root = None): dest_link_rpath = dest_root.new_index(diff_rorp.get_link_flag()) dest_rpath.hardlink(dest_link_rpath.path) - - - -- cgit v1.2.1