From adc9d7a9a9cd90c8d078b1276cfdad98d0303d07 Mon Sep 17 00:00:00 2001 From: bescoto Date: Wed, 12 Feb 2003 07:28:33 +0000 Subject: Fixed selection bug, renamed metadata files to ".snapshot" git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@278 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 4 ++++ rdiff-backup/TODO | 4 ++-- rdiff-backup/rdiff_backup/Globals.py | 5 +++++ rdiff-backup/rdiff_backup/backup.py | 17 ++++++++++----- rdiff-backup/rdiff_backup/metadata.py | 9 ++++---- rdiff-backup/rdiff_backup/rorpiter.py | 39 +++++++++++++++++++++++++++++++++++ rdiff-backup/testing/commontest.py | 2 +- rdiff-backup/testing/finaltest.py | 2 +- rdiff-backup/testing/rorpitertest.py | 35 +++++++++++++++++++++++++++++++ 9 files changed, 104 insertions(+), 13 deletions(-) diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index f9a4cd1..57948b4 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -6,6 +6,10 @@ to Dave Steinberg for giving me an account on his system for testing. Re-enabled --windows-mode and filename quoting. +Fixed selection bug: In 0.11.1, files which were included in one +backup would be automatically included in the next. Now you can +include/exclude files session-by-session. + New in v0.11.1 (2002/12/31) --------------------------- diff --git a/rdiff-backup/TODO b/rdiff-backup/TODO index 17c689f..7145ea1 100644 --- a/rdiff-backup/TODO +++ b/rdiff-backup/TODO @@ -16,12 +16,12 @@ have changed between two times ---------[ Medium term ]--------------------------------------- +Add ACL support + Add --dry-run option (target for v1.1.x) Add # of increments option to --remove-older-than -Restore only changed files - Make argument shortcut for cstream Make --calculate-averages work with directory_statistics file. diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py index 97c42de..35c465b 100644 --- a/rdiff-backup/rdiff_backup/Globals.py +++ b/rdiff-backup/rdiff_backup/Globals.py @@ -37,6 +37,11 @@ blocksize = 32768 # values may save on connection overhead and latency. conn_bufsize = 98304 +# This is used in rorpiter.CacheIndexable. The number represents the +# number of rpaths which may be stuck in buffers when moving over a +# remote connection. +pipeline_max_length = int(conn_bufsize / 150) + # True if script is running as a server server = None diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index a4b9bff..2f3d362 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -31,7 +31,7 @@ def Mirror(src_rpath, dest_rpath): DestS.init_statistics() source_rpiter = SourceS.get_source_select() dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 0) - source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter) + source_diffiter = SourceS.get_diffs(dest_sigiter) DestS.patch(dest_rpath, source_diffiter) DestS.write_statistics() @@ -43,7 +43,7 @@ def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath): DestS.init_statistics() source_rpiter = SourceS.get_source_select() dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 1) - source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter) + source_diffiter = SourceS.get_diffs(dest_sigiter) DestS.patch_and_increment(dest_rpath, source_diffiter, inc_rpath) DestS.write_statistics() @@ -59,19 +59,26 @@ class SourceStruct: connection. Otherwise we will get an error because a list containing files can't be pickled. + Also, cls.source_select needs to be cached so get_diffs below + can retrieve the necessary rps. + """ sel = selection.Select(rpath) sel.ParseArgs(tuplelist, filelists) - cls.source_select = sel.set_iter() + sel.set_iter() + cache_size = Globals.pipeline_max_length * 2 # 2 because to and from + cls.source_select = rorpiter.CacheIndexable(sel, cache_size) def get_source_select(cls): """Return source select iterator, set by set_source_select""" return cls.source_select - def get_diffs(cls, baserp, dest_sigiter): + def get_diffs(cls, dest_sigiter): """Return diffs of any files with signature in dest_sigiter""" + source_rps = cls.source_select def get_one_diff(dest_sig): - src_rp = baserp.new_index(dest_sig.index) + src_rp = (source_rps.get(dest_sig.index) or + rpath.RORPath(dest_sig.index)) diff_rorp = src_rp.getRORPath() if dest_sig.isflaglinked(): diff_rorp.flaglinked(dest_sig.get_link_flag()) diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index ec86168..8b4e96d 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -266,8 +266,8 @@ def OpenMetadata(rp = None, compress = 1): assert not metadata_fileobj, "Metadata file already open" if rp: metadata_rp = rp else: - if compress: typestr = 'data.gz' - else: typestr = 'data' + if compress: typestr = 'snapshot.gz' + else: typestr = 'snapshot' metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" % (Time.curtimestr, typestr)) metadata_fileobj = metadata_rp.open("wb", compress = compress) @@ -293,7 +293,7 @@ def GetMetadata(rp, restrict_index = None, compressed = None): if compressed is None: if rp.isincfile(): compressed = rp.inc_compressed - assert rp.inc_type == "data", rp.inc_type + assert rp.inc_type == "data" or rp.inc_type == "snapshot" else: compressed = rp.get_indexpath().endswith(".gz") fileobj = rp.open("rb", compress = compressed) @@ -311,7 +311,8 @@ def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None): if rblist is None: rblist = map(lambda x: rbdir.append(x), robust.listrp(rbdir)) for rp in rblist: - if (rp.isincfile() and rp.getinctype() == "data" and + if (rp.isincfile() and + (rp.getinctype() == "data" or rp.getinctype() == "snapshot") and rp.getincbase_str() == "mirror_metadata"): if rp.getinctime() == time: return GetMetadata(rp, restrict_index) return None diff --git a/rdiff-backup/rdiff_backup/rorpiter.py b/rdiff-backup/rdiff_backup/rorpiter.py index 3027fd1..f75d8e8 100644 --- a/rdiff-backup/rdiff_backup/rorpiter.py +++ b/rdiff-backup/rdiff_backup/rorpiter.py @@ -403,3 +403,42 @@ class ITRBranch: (index and os.path.join(*index) or '()',), 2) +class CacheIndexable: + """Cache last few indexed elements in iterator + + This class should be initialized with an iterator yielding + .index'd objects. It looks like it is just the same iterator as + the one that initialized it. Luckily, it does more, caching the + last few elements iterated, which can be retrieved using the + .get() method. + + If the index is not in the cache, return None. + + """ + def __init__(self, indexed_iter, cache_size = None): + """Make new CacheIndexable. Cache_size is max cache length""" + self.cache_size = cache_size + self.iter = indexed_iter + self.cache_dict = {} + self.cache_indicies = [] + + def next(self): + """Return next elem, add to cache. StopIteration passed upwards""" + next_elem = self.iter.next() + next_index = next_elem.index + self.cache_dict[next_index] = next_elem + self.cache_indicies.append(next_index) + + if len(self.cache_indicies) > self.cache_size: + del self.cache_dict[self.cache_indicies[0]] + del self.cache_indicies[0] + + return next_elem + + def __iter__(self): return self + + def get(self, index): + """Return element with index index from cache""" + try: return self.cache_dict[index] + except KeyError: return None + diff --git a/rdiff-backup/testing/commontest.py b/rdiff-backup/testing/commontest.py index 1e6fa82..57aad7b 100644 --- a/rdiff-backup/testing/commontest.py +++ b/rdiff-backup/testing/commontest.py @@ -157,7 +157,7 @@ def get_increment_rp(mirror_rp, time): for filename in data_rp.listdir(): rp = data_rp.append(filename) if rp.isincfile() and rp.getincbase_str() == "increments": - if Time.stringtotime(rp.getinctime()) == time: return rp + if rp.getinctime() == time: return rp return None # Couldn't find appropriate increment def _reset_connections(src_rp, dest_rp): diff --git a/rdiff-backup/testing/finaltest.py b/rdiff-backup/testing/finaltest.py index 60bc072..44a8a2a 100644 --- a/rdiff-backup/testing/finaltest.py +++ b/rdiff-backup/testing/finaltest.py @@ -249,7 +249,7 @@ class Final(PathSetter): class FinalSelection(PathSetter): """Test selection options""" - def run(cmd): + def run(self, cmd): print "Executing: ", cmd assert not os.system(cmd) diff --git a/rdiff-backup/testing/rorpitertest.py b/rdiff-backup/testing/rorpitertest.py index f43a085..35f5916 100644 --- a/rdiff-backup/testing/rorpitertest.py +++ b/rdiff-backup/testing/rorpitertest.py @@ -254,6 +254,41 @@ class TreeReducerTest(unittest.TestCase): assert itm2c.root_branch.total == 12, itm2c.root_branch.total +class CacheIndexableTest(unittest.TestCase): + def get_iter(self): + """Return iterator yielding indexed objects, add to dict d""" + for i in range(100): + it = rorpiter.IndexedTuple((i,), range(i)) + self.d[(i,)] = it + yield it + + def testCaching(self): + """Test basic properties of CacheIndexable object""" + self.d = {} + + ci = rorpiter.CacheIndexable(self.get_iter(), 3) + val0 = ci.next() + val1 = ci.next() + val2 = ci.next() + + assert ci.get((1,)) == self.d[(1,)] + assert ci.get((3,)) is None + + val3 = ci.next() + val4 = ci.next() + val5 = ci.next() + + assert ci.get((3,)) == self.d[(3,)] + assert ci.get((4,)) == self.d[(4,)] + assert ci.get((1,)) is None + + def testEqual(self): + """Make sure CI doesn't alter properties of underlying iter""" + self.d = {} + l1 = list(self.get_iter()) + l2 = list(rorpiter.CacheIndexable(iter(l1), 10)) + assert l1 == l2, (l1, l2) + if __name__ == "__main__": unittest.main() -- cgit v1.2.1