summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-10-12 05:24:25 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-10-12 05:24:25 +0000
commit5ccca65dec1ef00d3551493958af5ee27e7df212 (patch)
treef4c311256bc8dd7722852b526f98908943eebd8b
parent34471974703bc788103b28f12abc37ad1e249623 (diff)
downloadrdiff-backup-5ccca65dec1ef00d3551493958af5ee27e7df212.tar.gz
New hard link system should use less memory
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@468 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/CHANGELOG2
-rw-r--r--rdiff-backup/rdiff_backup/Hardlink.py150
-rw-r--r--rdiff-backup/rdiff_backup/backup.py13
-rw-r--r--rdiff-backup/rdiff_backup/restore.py12
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py1
-rw-r--r--rdiff-backup/testing/commontest.py129
-rw-r--r--rdiff-backup/testing/hardlinktest.py58
7 files changed, 135 insertions, 230 deletions
diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG
index 7a7c0f1..6c714ef 100644
--- a/rdiff-backup/CHANGELOG
+++ b/rdiff-backup/CHANGELOG
@@ -18,6 +18,8 @@ that contain unreadable files and directories as long as they are
owned by that user. Bug report by Arkadiusz Miskiewicz. Hopefully
this is the last of the unreadable file bugs...
+Rewrote hard link tracking system. New way should use less memory.
+
New in v0.13.2 (2003/09/16)
---------------------------
diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py
index 453672e..81472b2 100644
--- a/rdiff-backup/rdiff_backup/Hardlink.py
+++ b/rdiff-backup/rdiff_backup/Hardlink.py
@@ -34,137 +34,80 @@ from __future__ import generators
import cPickle
import Globals, Time, rpath, log, robust
-# In all of these lists of indicies are the values. The keys in
-# _inode_ ones are (inode, devloc) pairs.
-_src_inode_indicies = None
-_dest_inode_indicies = None
-
-# The keys for these two are just indicies. They share values
-# with the earlier dictionaries.
-_src_index_indicies = None
-_dest_index_indicies = None
-
-# When a linked file is restored, its path is added to this dict,
-# so it can be found when later paths being restored are linked to
-# it.
-_restore_index_path = None
+# The keys in this dictionary are (inode, devloc) pairs. The values
+# are a pair (index, remaining_links, dest_key) where index is the
+# rorp index of the first such linked file, remaining_links is the
+# number of files hard linked to this one we may see, and key is
+# either (dest_inode, dest_devloc) or None, and represents the
+# hardlink info of the existing file on the destination.
+_inode_index = None
def initialize_dictionaries():
"""Set all the hard link dictionaries to empty"""
- global _src_inode_indicies, _dest_inode_indicies
- global _src_index_indicies, _dest_index_indicies, _restore_index_path
- _src_inode_indicies = {}
- _dest_inode_indicies = {}
- _src_index_indicies = {}
- _dest_index_indicies = {}
- _restore_index_path = {}
+ global _inode_index
+ _inode_index = {}
def clear_dictionaries():
"""Delete all dictionaries"""
- global _src_inode_indicies, _dest_inode_indicies
- global _src_index_indicies, _dest_index_indicies, _restore_index_path
- _src_inode_indicies = _dest_inode_indicies = None
- _src_index_indicies = _dest_index_indicies = _restore_index_path = None
-
+ global _inode_index
+ _inode_index = None
def get_inode_key(rorp):
"""Return rorp's key for _inode_ dictionaries"""
return (rorp.getinode(), rorp.getdevloc())
-def get_indicies(rorp, source):
- """Return a list of similarly linked indicies, using rorp's index"""
- if source: dict = _src_index_indicies
- else: dict = _dest_index_indicies
- try: return dict[rorp.index]
- except KeyError: return []
-
-def add_rorp(rorp, source):
- """Process new rorp and update hard link dictionaries
-
- First enter it into src_inode_indicies. If we have already
- seen all the hard links, then we can delete the entry.
- Everything must stay recorded in src_index_indicies though.
-
- """
+def add_rorp(rorp, dest_rorp = None):
+ """Process new rorp and update hard link dictionaries"""
+ if not rorp.isreg() or rorp.getnumlinks() < 2: return
+ rp_inode_key = get_inode_key(rorp)
+ if not _inode_index.has_key(rp_inode_key):
+ if not dest_rorp: dest_key = None
+ elif dest_rorp.getnumlinks() == 1: dest_key = "NA"
+ else: dest_key = get_inode_key(dest_rorp)
+ _inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key)
+
+def del_rorp(rorp):
+ """Remove rorp information from dictionary if seen all links"""
if not rorp.isreg() or rorp.getnumlinks() < 2: return
-
- if source:
- inode_dict, index_dict = _src_inode_indicies, _src_index_indicies
- else: inode_dict, index_dict = _dest_inode_indicies, _dest_index_indicies
-
rp_inode_key = get_inode_key(rorp)
- if inode_dict.has_key(rp_inode_key):
- index_list = inode_dict[rp_inode_key]
- index_list.append(rorp.index)
- if len(index_list) == rorp.getnumlinks():
- del inode_dict[rp_inode_key]
- else: # make new entry in both src dicts
- index_list = [rorp.index]
- inode_dict[rp_inode_key] = index_list
- index_dict[rorp.index] = index_list
-
-def add_rorp_iter(iter, source):
- """Return new rorp iterator like iter that add_rorp's first"""
- for rorp in iter:
- add_rorp(rorp, source)
- yield rorp
+ val = _inode_index.get(rp_inode_key)
+ if not val: return
+ index, remaining, dest_key = val
+ if remaining == 1: del _inode_index[rp_inode_key]
+ else: _inode_index[rp_inode_key] = (index, remaining-1, dest_key)
def rorp_eq(src_rorp, dest_rorp):
"""Compare hardlinked for equality
- Two files may otherwise seem equal but be hardlinked in
- different ways. This function considers them equal enough if
- they have been hardlinked correctly to the previously seen
- indicies.
+ Return false if dest_rorp is linked differently, which can happen
+ if dest is linked more than source, or if it is represented by a
+ different inode.
"""
if (not src_rorp.isreg() or not dest_rorp.isreg() or
src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1):
return 1 # Hard links don't apply
- src_index_list = get_indicies(src_rorp, 1)
- dest_index_list = get_indicies(dest_rorp, None)
-
- # If a list only has one element, then it is only hardlinked
- # to itself so far, so that is not a genuine difference yet.
- if not src_index_list or len(src_index_list) == 1:
- return not dest_index_list or len(dest_index_list) == 1
- if not dest_index_list or len(dest_index_list) == 1: return None
-
- # Both index lists exist and are non-empty
- return src_index_list == dest_index_list # they are always sorted
+ if src_rorp.getnumlinks() < dest_rorp.getnumlinks(): return 0
+ src_key = get_inode_key(src_rorp)
+ index, remaining, dest_key = _inode_index[src_key]
+ if dest_key == "NA":
+ # Allow this to be ok for first comparison, but not any
+ # subsequent ones
+ _inode_index[src_key] = (index, remaining, None)
+ return 1
+ return dest_key == get_inode_key(dest_rorp)
def islinked(rorp):
"""True if rorp's index is already linked to something on src side"""
- return len(get_indicies(rorp, 1)) >= 2
+ if not rorp.getnumlinks() > 1: return 0
+ dict_val = _inode_index.get(get_inode_key(rorp))
+ if not dict_val: return 0
+ return dict_val[0] != rorp.index # If equal, then rorp is first
def get_link_index(rorp):
"""Return first index on target side rorp is already linked to"""
- return get_indicies(rorp, 1)[0]
-
-def restore_link(index, rpath):
- """Restores a linked file by linking it
-
- When restoring, all the hardlink data is already present, and
- we can only link to something already written. In either
- case, add to the _restore_index_path dict, so we know later
- that the file is available for hard
- linking.
-
- Returns true if succeeded in creating rpath, false if must
- restore rpath normally.
-
- """
- if index not in _src_index_indicies: return None
- for linked_index in _src_index_indicies[index]:
- if linked_index in _restore_index_path:
- srcpath = _restore_index_path[linked_index]
- log.Log("Restoring %s by hard linking to %s" %
- (rpath.path, srcpath), 6)
- rpath.hardlink(srcpath)
- return 1
- _restore_index_path[index] = rpath.path
- return None
+ return _inode_index[get_inode_key(rorp)][0]
def link_rp(diff_rorp, dest_rpath, dest_root = None):
"""Make dest_rpath into a link using link flag in diff_rorp"""
@@ -172,6 +115,3 @@ def link_rp(diff_rorp, dest_rpath, dest_root = None):
dest_link_rpath = dest_root.new_index(diff_rorp.get_link_flag())
dest_rpath.hardlink(dest_link_rpath.path)
-
-
-
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index a89f21e..850e266 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -180,10 +180,10 @@ class DestinationStruct:
def get_one_sig(cls, dest_base_rpath, index, src_rorp, dest_rorp):
"""Return a signature given source and destination rorps"""
- if (Globals.preserve_hardlinks and
- Hardlink.islinked(src_rorp or dest_rorp)):
+ if (Globals.preserve_hardlinks and src_rorp and
+ Hardlink.islinked(src_rorp)):
dest_sig = rpath.RORPath(index)
- dest_sig.flaglinked(Hardlink.get_link_index(dest_sig))
+ dest_sig.flaglinked(Hardlink.get_link_index(src_rorp))
elif dest_rorp:
dest_sig = dest_rorp.getRORPath()
if dest_rorp.isreg():
@@ -312,8 +312,8 @@ class CacheCollatedPostProcess:
will be backed up correctly.
"""
- if source_rorp: Hardlink.add_rorp(source_rorp, source = 1)
- if dest_rorp: Hardlink.add_rorp(dest_rorp, source = 0)
+ if Globals.preserve_hardlinks and source_rorp:
+ Hardlink.add_rorp(source_rorp, dest_rorp)
if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0
and dest_rorp.getperms() % 01000 < 0700):
self.unreadable_dir_init(source_rorp, dest_rorp)
@@ -355,6 +355,9 @@ class CacheCollatedPostProcess:
always false for un-changed files).
"""
+ if Globals.preserve_hardlinks and source_rorp:
+ Hardlink.del_rorp(source_rorp)
+
if not changed or success:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
if dest_rorp: self.statfileobj.add_dest_file(dest_rorp)
diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py
index 8d64c36..7a4f0bf 100644
--- a/rdiff-backup/rdiff_backup/restore.py
+++ b/rdiff-backup/rdiff_backup/restore.py
@@ -235,15 +235,17 @@ class MirrorStruct:
def get_diffs_from_collated(cls, collated):
"""Get diff iterator from collated"""
for mir_rorp, target_rorp in collated:
- if Globals.preserve_hardlinks:
- if mir_rorp: Hardlink.add_rorp(mir_rorp, source = 1)
- if target_rorp: Hardlink.add_rorp(target_rorp, source = 0)
-
+ if Globals.preserve_hardlinks and mir_rorp:
+ Hardlink.add_rorp(mir_rorp, target_rorp)
if (not target_rorp or not mir_rorp or
not mir_rorp == target_rorp or
(Globals.preserve_hardlinks and not
Hardlink.rorp_eq(mir_rorp, target_rorp))):
- yield cls.get_diff(mir_rorp, target_rorp)
+ diff = cls.get_diff(mir_rorp, target_rorp)
+ else: diff = None
+ if Globals.preserve_hardlinks and mir_rorp:
+ Hardlink.del_rorp(mir_rorp)
+ if diff: yield diff
def get_diff(cls, mir_rorp, target_rorp):
"""Get a diff for mir_rorp at time"""
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 1786dd8..b2f6f20 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -761,6 +761,7 @@ class RPath(RORPath):
def hardlink(self, linkpath):
"""Make self into a hardlink joined to linkpath"""
+ log.Log("Hard linking %s to %s" % (self.path, linkpath), 6)
self.conn.os.link(linkpath, self.path)
self.setdata()
diff --git a/rdiff-backup/testing/commontest.py b/rdiff-backup/testing/commontest.py
index e80d699..9d1ee81 100644
--- a/rdiff-backup/testing/commontest.py
+++ b/rdiff-backup/testing/commontest.py
@@ -3,7 +3,7 @@ import os, sys, code
from rdiff_backup.log import Log
from rdiff_backup.rpath import RPath
from rdiff_backup import Globals, Hardlink, SetConnections, Main, \
- selection, lazy, Time, rpath, eas_acls
+ selection, lazy, Time, rpath, eas_acls, rorpiter
RBBin = "../rdiff-backup"
SourceDir = "../rdiff_backup"
@@ -175,63 +175,78 @@ def CompareRecursive(src_rp, dest_rp, compare_hardlinks = 1,
specified.
"""
- if compare_hardlinks: reset_hardlink_dicts()
- src_rp.setdata()
- dest_rp.setdata()
-
- Log("Comparing %s and %s, hardlinks %s, eas %s, acls %s" %
- (src_rp.path, dest_rp.path, compare_hardlinks,
- compare_eas, compare_acls), 3)
- src_select = selection.Select(src_rp)
- dest_select = selection.Select(dest_rp)
-
- if ignore_tmp_files:
- # Ignoring temp files can be useful when we want to check the
- # correctness of a backup which aborted in the middle. In
- # these cases it is OK to have tmp files lying around.
- src_select.add_selection_func(src_select.regexp_get_sf(
- ".*rdiff-backup.tmp.[^/]+$", 0))
- dest_select.add_selection_func(dest_select.regexp_get_sf(
- ".*rdiff-backup.tmp.[^/]+$", 0))
-
- if exclude_rbdir:
- src_select.parse_rbdir_exclude()
- dest_select.parse_rbdir_exclude()
- else:
- # include rdiff-backup-data/increments
- src_select.add_selection_func(src_select.glob_get_tuple_sf(
- ('rdiff-backup-data', 'increments'), 1))
- dest_select.add_selection_func(dest_select.glob_get_tuple_sf(
- ('rdiff-backup-data', 'increments'), 1))
-
- # but exclude rdiff-backup-data
- src_select.add_selection_func(src_select.glob_get_tuple_sf(
- ('rdiff-backup-data',), 0))
- dest_select.add_selection_func(dest_select.glob_get_tuple_sf(
- ('rdiff-backup-data',), 0))
-
- dsiter1, dsiter2 = src_select.set_iter(), dest_select.set_iter()
-
- def hardlink_equal(src_rorp, dest_rorp):
+ def get_selection_functions():
+ """Return generators of files in source, dest"""
+ src_rp.setdata()
+ dest_rp.setdata()
+ src_select = selection.Select(src_rp)
+ dest_select = selection.Select(dest_rp)
+
+ if ignore_tmp_files:
+ # Ignoring temp files can be useful when we want to check the
+ # correctness of a backup which aborted in the middle. In
+ # these cases it is OK to have tmp files lying around.
+ src_select.add_selection_func(src_select.regexp_get_sf(
+ ".*rdiff-backup.tmp.[^/]+$", 0))
+ dest_select.add_selection_func(dest_select.regexp_get_sf(
+ ".*rdiff-backup.tmp.[^/]+$", 0))
+
+ if exclude_rbdir: # Exclude rdiff-backup-data directory
+ src_select.parse_rbdir_exclude()
+ dest_select.parse_rbdir_exclude()
+
+ return src_select.set_iter(), dest_select.set_iter()
+
+ def preprocess(src_rorp, dest_rorp):
+ """Initially process src and dest_rorp"""
+ if compare_hardlinks and src_rorp:
+ Hardlink.add_rorp(src_rorp, dest_rorp)
+
+ def postprocess(src_rorp, dest_rorp):
+ """After comparison, process src_rorp and dest_rorp"""
+ if compare_hardlinks and src_rorp:
+ Hardlink.del_rorp(src_rorp)
+
+ def equality_func(src_rorp, dest_rorp):
+ """Combined eq func returns true iff two files compare same"""
+ if not src_rorp:
+ Log("Source rorp missing: " + str(dest_rorp), 3)
+ return 0
+ if not dest_rorp:
+ Log("Dest rorp missing: " + str(src_rorp), 3)
+ return 0
if not src_rorp.equal_verbose(dest_rorp,
compare_ownership = compare_ownership):
- return None
- if not Hardlink.rorp_eq(src_rorp, dest_rorp):
+ return 0
+ if compare_hardlinks and not Hardlink.rorp_eq(src_rorp, dest_rorp):
+ Log("Hardlink compare failure", 3)
Log("%s: %s" % (src_rorp.index,
- Hardlink.get_indicies(src_rorp, 1)), 3)
+ Hardlink.get_inode_key(src_rorp)), 3)
Log("%s: %s" % (dest_rorp.index,
- Hardlink.get_indicies(dest_rorp, None)), 3)
- return None
+ Hardlink.get_inode_key(dest_rorp)), 3)
+ return 0
if compare_eas and not eas_acls.ea_compare_rps(src_rorp, dest_rorp):
Log("Different EAs in files %s and %s" %
(src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3)
- return None
+ return 0
if compare_acls and not eas_acls.acl_compare_rps(src_rorp, dest_rorp):
Log("Different ACLs in files %s and %s" %
(src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3)
- return None
+ return 0
return 1
+ Log("Comparing %s and %s, hardlinks %s, eas %s, acls %s" %
+ (src_rp.path, dest_rp.path, compare_hardlinks,
+ compare_eas, compare_acls), 3)
+ if compare_hardlinks: reset_hardlink_dicts()
+ src_iter, dest_iter = get_selection_functions()
+ for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter):
+ preprocess(src_rorp, dest_rorp)
+ if not equality_func(src_rorp, dest_rorp): return 0
+ postprocess(src_rorp, dest_rorp)
+ return 1
+
+
def rbdir_equal(src_rorp, dest_rorp):
"""Like hardlink_equal, but make allowances for data directories"""
if not src_rorp.index and not dest_rorp.index: return 1
@@ -263,30 +278,10 @@ def CompareRecursive(src_rp, dest_rp, compare_hardlinks = 1,
Hardlink.get_indicies(dest_rorp, None)), 3)
return None
- if equality_func: result = lazy.Iter.equal(dsiter1, dsiter2,
- 1, equality_func)
- elif compare_hardlinks:
- dsiter1 = Hardlink.add_rorp_iter(dsiter1, 1)
- dsiter2 = Hardlink.add_rorp_iter(dsiter2, None)
- if exclude_rbdir:
- result = lazy.Iter.equal(dsiter1, dsiter2, 1, hardlink_equal)
- else: result = lazy.Iter.equal(dsiter1, dsiter2, 1, rbdir_equal)
- elif not exclude_rbdir:
- result = lazy.Iter.equal(dsiter1, dsiter2, 1, rbdir_equal)
- else: result = lazy.Iter.equal(dsiter1, dsiter2, 1,
- lambda x, y: x.equal_verbose(y, compare_ownership = compare_ownership))
-
- for i in dsiter1: pass # make sure all files processed anyway
- for i in dsiter2: pass
- return result
def reset_hardlink_dicts():
"""Clear the hardlink dictionaries"""
- Hardlink._src_inode_indicies = {}
- Hardlink._src_index_indicies = {}
- Hardlink._dest_inode_indicies = {}
- Hardlink._dest_index_indicies = {}
- Hardlink._restore_index_path = {}
+ Hardlink._inode_index = {}
def BackupRestoreSeries(source_local, dest_local, list_of_dirnames,
compare_hardlinks = 1,
diff --git a/rdiff-backup/testing/hardlinktest.py b/rdiff-backup/testing/hardlinktest.py
index 22dc31d..c3827f6 100644
--- a/rdiff-backup/testing/hardlinktest.py
+++ b/rdiff-backup/testing/hardlinktest.py
@@ -2,7 +2,7 @@ import os, unittest, time
from commontest import *
from rdiff_backup import Globals, Hardlink, selection, rpath
-Log.setverbosity(3)
+Log.setverbosity(6)
class HardlinkTest(unittest.TestCase):
"""Test cases for Hard links"""
@@ -34,62 +34,24 @@ class HardlinkTest(unittest.TestCase):
Globals.preserve_hardlinks = 1
reset_hardlink_dicts()
for dsrp in selection.Select(self.hardlink_dir3).set_iter():
- Hardlink.add_rorp(dsrp, 1)
+ Hardlink.add_rorp(dsrp)
- assert len(Hardlink._src_inode_indicies.keys()) == 3, \
- Hardlink._src_inode_indicies
- assert len(Hardlink._src_index_indicies.keys()) == 3, \
- Hardlink._src_index_indicies
- vals1 = Hardlink._src_inode_indicies.values()
- vals2 = Hardlink._src_index_indicies.values()
- vals1.sort()
- vals2.sort()
- assert vals1 == vals2
-
- def testBuildingDict2(self):
- """Same as testBuildingDict but test destination building"""
- Globals.preserve_hardlinks = 1
- reset_hardlink_dicts()
- for dsrp in selection.Select(self.hardlink_dir3).set_iter():
- Hardlink.add_rorp(dsrp, None)
-
- assert len(Hardlink._dest_inode_indicies.keys()) == 3, \
- Hardlink._dest_inode_indicies
- assert len(Hardlink._dest_index_indicies.keys()) == 3, \
- Hardlink._dest_index_indicies
- vals1 = Hardlink._dest_inode_indicies.values()
- vals2 = Hardlink._dest_index_indicies.values()
- vals1.sort()
- vals2.sort()
- assert vals1 == vals2
+ assert len(Hardlink._inode_index.keys()) == 3, \
+ Hardlink._inode_index
def testCompletedDict(self):
"""See if the hardlink dictionaries are built correctly"""
reset_hardlink_dicts()
for dsrp in selection.Select(self.hardlink_dir1).set_iter():
- Hardlink.add_rorp(dsrp, 1)
- assert Hardlink._src_inode_indicies == {}, \
- Hardlink._src_inode_indicies
-
- hll1 = [('file1',), ('file2',), ('file3',)]
- hll2 = [('file4',), ('file5',), ('file6',)]
- dict = {}
- for index in hll1: dict[index] = hll1
- for index in hll2: dict[index] = hll2
- assert Hardlink._src_index_indicies == dict
+ Hardlink.add_rorp(dsrp)
+ Hardlink.del_rorp(dsrp)
+ assert Hardlink._inode_index == {}, Hardlink._inode_index
reset_hardlink_dicts()
for dsrp in selection.Select(self.hardlink_dir2).set_iter():
- Hardlink.add_rorp(dsrp, 1)
- assert Hardlink._src_inode_indicies == {}, \
- Hardlink._src_inode_indicies
-
- hll1 = [('file1',), ('file3',), ('file4',)]
- hll2 = [('file2',), ('file5',), ('file6',)]
- dict = {}
- for index in hll1: dict[index] = hll1
- for index in hll2: dict[index] = hll2
- assert Hardlink._src_index_indicies == dict
+ Hardlink.add_rorp(dsrp)
+ Hardlink.del_rorp(dsrp)
+ assert Hardlink._inode_index == {}, Hardlink._inode_index
def testSeries(self):
"""Test hardlink system by backing up and restoring a few dirs"""