summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-12-24 20:02:05 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-12-24 20:02:05 +0000
commit81ac975fe2bdcd95de1faed9a318f3867aa95059 (patch)
treea2498875818e50c257c33b4005a9ee046825a80c
parent5c7697c75d65acf2cb319dabcb573e3c22ccac56 (diff)
downloadrdiff-backup-81ac975fe2bdcd95de1faed9a318f3867aa95059.tar.gz
Reduce hardlink memory usage
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@719 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/CHANGELOG3
-rw-r--r--rdiff-backup/rdiff-backup.17
-rw-r--r--rdiff-backup/rdiff_backup/Hardlink.py28
-rw-r--r--rdiff-backup/rdiff_backup/backup.py19
-rw-r--r--rdiff-backup/testing/commontest.py17
5 files changed, 44 insertions, 30 deletions
diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG
index 8c9aa16..4766486 100644
--- a/rdiff-backup/CHANGELOG
+++ b/rdiff-backup/CHANGELOG
@@ -14,6 +14,9 @@ Added supplementary rdiff-backup-statistics utility for parsing
rdiff-backup's statistics files (originally based off perl script by
Dean Gaudet).
+rdiff-backup should now use much less memory than v1.1.1-1.1.4 if you
+have lots of hard links.
+
New in v1.1.4 (2005/12/13)
--------------------------
diff --git a/rdiff-backup/rdiff-backup.1 b/rdiff-backup/rdiff-backup.1
index e785e98..e13a9f0 100644
--- a/rdiff-backup/rdiff-backup.1
+++ b/rdiff-backup/rdiff-backup.1
@@ -314,11 +314,8 @@ rdiff-backup-data directory. rdiff-backup will run slightly quicker
and take up a bit less space.
.TP
.BI --no-hard-links
-Don't replicate hard links on destination side. Note that because
-metadata is written to a separate file, hard link information will not
-be lost even if the --no-hard-links option is given (however, mirror
-files will not be linked). If many hard-linked files are present,
-this option can drastically decrease memory usage.
+Don't replicate hard links on destination side. If many hard-linked
+files are present, this option can drastically decrease memory usage.
.TP
.B --null-separator
Use nulls (\\0) instead of newlines (\\n) as line separators, which
diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py
index 855c512..1dddbbb 100644
--- a/rdiff-backup/rdiff_backup/Hardlink.py
+++ b/rdiff-backup/rdiff_backup/Hardlink.py
@@ -31,15 +31,15 @@ source side should only transmit inode information.
"""
from __future__ import generators
-import cPickle
-import Globals, Time, rpath, log, robust, errno
+import Globals, Time, log, robust, errno
# The keys in this dictionary are (inode, devloc) pairs. The values
-# are a pair (index, remaining_links, dest_key) where index is the
-# rorp index of the first such linked file, remaining_links is the
-# number of files hard linked to this one we may see, and key is
+# are a pair (index, remaining_links, dest_key, sha1sum) where index
+# is the rorp index of the first such linked file, remaining_links is
+# the number of files hard linked to this one we may see, and key is
# either (dest_inode, dest_devloc) or None, and represents the
-# hardlink info of the existing file on the destination.
+# hardlink info of the existing file on the destination. Finally
+# sha1sum is the hash of the file if it exists, or None.
_inode_index = None
def initialize_dictionaries():
@@ -64,7 +64,9 @@ def add_rorp(rorp, dest_rorp = None):
if not dest_rorp: dest_key = None
elif dest_rorp.getnumlinks() == 1: dest_key = "NA"
else: dest_key = get_inode_key(dest_rorp)
- _inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key)
+ digest = rorp.has_sha1() and rorp.get_sha1() or None
+ _inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(),
+ dest_key, digest)
return rp_inode_key
def del_rorp(rorp):
@@ -73,12 +75,12 @@ def del_rorp(rorp):
rp_inode_key = get_inode_key(rorp)
val = _inode_index.get(rp_inode_key)
if not val: return
- index, remaining, dest_key = val
+ index, remaining, dest_key, digest = val
if remaining == 1:
del _inode_index[rp_inode_key]
return 1
else:
- _inode_index[rp_inode_key] = (index, remaining-1, dest_key)
+ _inode_index[rp_inode_key] = (index, remaining-1, dest_key, digest)
return 0
def rorp_eq(src_rorp, dest_rorp):
@@ -95,11 +97,11 @@ def rorp_eq(src_rorp, dest_rorp):
if src_rorp.getnumlinks() < dest_rorp.getnumlinks(): return 0
src_key = get_inode_key(src_rorp)
- index, remaining, dest_key = _inode_index[src_key]
+ index, remaining, dest_key, digest = _inode_index[src_key]
if dest_key == "NA":
# Allow this to be ok for first comparison, but not any
# subsequent ones
- _inode_index[src_key] = (index, remaining, None)
+ _inode_index[src_key] = (index, remaining, None, None)
return 1
return dest_key == get_inode_key(dest_rorp)
@@ -114,6 +116,10 @@ def get_link_index(rorp):
"""Return first index on target side rorp is already linked to"""
return _inode_index[get_inode_key(rorp)][0]
+def get_sha1(rorp):
+ """Return sha1 digest of what rorp is linked to"""
+ return _inode_index[get_inode_key(rorp)][3]
+
def link_rp(diff_rorp, dest_rpath, dest_root = None):
"""Make dest_rpath into a link using link flag in diff_rorp"""
if not dest_root: dest_root = dest_rpath # use base of dest_rpath
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index 819ae91..181c918 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -295,11 +295,6 @@ class CacheCollatedPostProcess:
# after we're finished with them
self.dir_perms_list = []
- # A dictionary of {index: source_rorp}. We use this to
- # hold the digest of a hard linked file so it only needs to be
- # computed once.
- self.inode_digest_dict = {}
-
# Contains list of (index, (source_rorp, diff_rorp)) pairs for
# the parent directories of the last item in the cache.
self.parent_list = []
@@ -326,8 +321,7 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
- if Hardlink.add_rorp(source_rorp, dest_rorp):
- self.inode_digest_dict[source_rorp.index] = source_rorp
+ Hardlink.add_rorp(source_rorp, dest_rorp)
if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0
and dest_rorp.getperms() % 01000 < 0700):
self.unreadable_dir_init(source_rorp, dest_rorp)
@@ -394,8 +388,7 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
- if Hardlink.del_rorp(source_rorp):
- del self.inode_digest_dict[source_rorp.index]
+ Hardlink.del_rorp(source_rorp)
if not changed or success:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
@@ -469,10 +462,10 @@ class CacheCollatedPostProcess:
def update_hardlink_hash(self, diff_rorp):
"""Tag associated source_rorp with same hash diff_rorp points to"""
- orig_rorp = self.inode_digest_dict[diff_rorp.get_link_flag()]
- if orig_rorp.has_sha1():
- new_source_rorp = self.get_source_rorp(diff_rorp.index)
- new_source_rorp.set_sha1(orig_rorp.get_sha1())
+ sha1sum = Hardlink.get_sha1(diff_rorp)
+ if not sha1sum: return
+ source_rorp = self.get_source_rorp(diff_rorp.index)
+ source_rorp.set_sha1(sha1sum)
def close(self):
"""Process the remaining elements in the cache"""
diff --git a/rdiff-backup/testing/commontest.py b/rdiff-backup/testing/commontest.py
index 04cdb93..e9b272e 100644
--- a/rdiff-backup/testing/commontest.py
+++ b/rdiff-backup/testing/commontest.py
@@ -386,4 +386,19 @@ def raise_interpreter(use_locals = None):
else: local_dict = globals()
code.InteractiveConsole(local_dict).interact()
-
+def getrefs(i, depth):
+ """Get the i'th object in memory, return objects that reference it"""
+ import sys, gc, types
+ o = sys.getobjects(i)[-1]
+ for d in range(depth):
+ for ref in gc.get_referrers(o):
+ if type(ref) in (types.ListType, types.DictType,
+ types.InstanceType):
+ if type(ref) is types.DictType and ref.has_key('copyright'):
+ continue
+ o = ref
+ break
+ else:
+ print "Max depth ", d
+ return o
+ return o