Fixed selection bug, renamed metadata files to ".snapshot"

git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@278 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
author: bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2003-02-12 07:28:33 +0000
committer: bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2003-02-12 07:28:33 +0000
commit: adc9d7a9a9cd90c8d078b1276cfdad98d0303d07 (patch)
tree: afa49da6e1ebc85a0d16aad52591e55bda0c70f2 /rdiff-backup/rdiff_backup
parent: 9613406fbab1949f66fe7858590cab990c7b4b25 (diff)
download: rdiff-backup-adc9d7a9a9cd90c8d078b1276cfdad98d0303d07.tar.gz
4 files changed, 61 insertions, 9 deletions
diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py
index 97c42de..35c465b 100644
--- a/rdiff-backup/rdiff_backup/Globals.py
+++ b/rdiff-backup/rdiff_backup/Globals.py
@@ -37,6 +37,11 @@ blocksize = 32768
 # values may save on connection overhead and latency.
 conn_bufsize = 98304
 
+# This is used in rorpiter.CacheIndexable.  The number represents the
+# number of rpaths which may be stuck in buffers when moving over a
+# remote connection.
+pipeline_max_length = int(conn_bufsize / 150)
+
 # True if script is running as a server
 server = None
 
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index a4b9bff..2f3d362 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -31,7 +31,7 @@ def Mirror(src_rpath, dest_rpath):
 	DestS.init_statistics()
 	source_rpiter = SourceS.get_source_select()
 	dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 0)
-	source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter)
+	source_diffiter = SourceS.get_diffs(dest_sigiter)
 	DestS.patch(dest_rpath, source_diffiter)
 	DestS.write_statistics()
 
@@ -43,7 +43,7 @@ def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath):
 	DestS.init_statistics()
 	source_rpiter = SourceS.get_source_select()
 	dest_sigiter = DestS.process_source_get_sigs(dest_rpath, source_rpiter, 1)
-	source_diffiter = SourceS.get_diffs(src_rpath, dest_sigiter)
+	source_diffiter = SourceS.get_diffs(dest_sigiter)
 	DestS.patch_and_increment(dest_rpath, source_diffiter, inc_rpath)
 	DestS.write_statistics()
 
@@ -59,19 +59,26 @@ class SourceStruct:
 		connection.  Otherwise we will get an error because a list
 		containing files can't be pickled.
 
+		Also, cls.source_select needs to be cached so get_diffs below
+		can retrieve the necessary rps.
+
 		"""
 		sel = selection.Select(rpath)
 		sel.ParseArgs(tuplelist, filelists)
-		cls.source_select = sel.set_iter()
+		sel.set_iter()
+		cache_size = Globals.pipeline_max_length * 2 # 2 because to and from
+		cls.source_select = rorpiter.CacheIndexable(sel, cache_size)
 
 	def get_source_select(cls):
 		"""Return source select iterator, set by set_source_select"""
 		return cls.source_select
 
-	def get_diffs(cls, baserp, dest_sigiter):
+	def get_diffs(cls, dest_sigiter):
 		"""Return diffs of any files with signature in dest_sigiter"""
+		source_rps = cls.source_select
 		def get_one_diff(dest_sig):
-			src_rp = baserp.new_index(dest_sig.index)
+			src_rp = (source_rps.get(dest_sig.index) or
+					  rpath.RORPath(dest_sig.index))
 			diff_rorp = src_rp.getRORPath()
 			if dest_sig.isflaglinked():
 				diff_rorp.flaglinked(dest_sig.get_link_flag())
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index ec86168..8b4e96d 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -266,8 +266,8 @@ def OpenMetadata(rp = None, compress = 1):
 	assert not metadata_fileobj, "Metadata file already open"
 	if rp: metadata_rp = rp
 	else:
-		if compress: typestr = 'data.gz'
-		else: typestr = 'data'
+		if compress: typestr = 'snapshot.gz'
+		else: typestr = 'snapshot'
 		metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" %
 										   (Time.curtimestr, typestr))
 	metadata_fileobj = metadata_rp.open("wb", compress = compress)
@@ -293,7 +293,7 @@ def GetMetadata(rp, restrict_index = None, compressed = None):
 	if compressed is None:
 		if rp.isincfile():
 			compressed = rp.inc_compressed
-			assert rp.inc_type == "data", rp.inc_type
+			assert rp.inc_type == "data" or rp.inc_type == "snapshot"
 		else: compressed = rp.get_indexpath().endswith(".gz")
 
 	fileobj = rp.open("rb", compress = compressed)
@@ -311,7 +311,8 @@ def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None):
 	if rblist is None: rblist = map(lambda x: rbdir.append(x),
 									robust.listrp(rbdir))
 	for rp in rblist:
-		if (rp.isincfile() and rp.getinctype() == "data" and
+		if (rp.isincfile() and
+			(rp.getinctype() == "data" or rp.getinctype() == "snapshot") and
 			rp.getincbase_str() == "mirror_metadata"):
 			if rp.getinctime() == time: return GetMetadata(rp, restrict_index)
 	return None
diff --git a/rdiff-backup/rdiff_backup/rorpiter.py b/rdiff-backup/rdiff_backup/rorpiter.py
index 3027fd1..f75d8e8 100644
--- a/rdiff-backup/rdiff_backup/rorpiter.py
+++ b/rdiff-backup/rdiff_backup/rorpiter.py
@@ -403,3 +403,42 @@ class ITRBranch:
 				(index and os.path.join(*index) or '()',), 2)
 
 
+class CacheIndexable:
+	"""Cache last few indexed elements in iterator
+
+	This class should be initialized with an iterator yielding
+	.index'd objects.  It looks like it is just the same iterator as
+	the one that initialized it.  Luckily, it does more, caching the
+	last few elements iterated, which can be retrieved using the
+	.get() method.
+
+	If the index is not in the cache, return None.
+
+	"""
+	def __init__(self, indexed_iter, cache_size = None):
+		"""Make new CacheIndexable.  Cache_size is max cache length"""
+		self.cache_size = cache_size
+		self.iter = indexed_iter
+		self.cache_dict = {}
+		self.cache_indicies = []
+
+	def next(self):
+		"""Return next elem, add to cache.  StopIteration passed upwards"""
+		next_elem = self.iter.next()
+		next_index = next_elem.index
+		self.cache_dict[next_index] = next_elem
+		self.cache_indicies.append(next_index)
+
+		if len(self.cache_indicies) > self.cache_size: 
+			del self.cache_dict[self.cache_indicies[0]]
+			del self.cache_indicies[0]
+
+		return next_elem
+
+	def __iter__(self): return self
+
+	def get(self, index):
+		"""Return element with index index from cache"""
+		try: return self.cache_dict[index]
+		except KeyError: return None
+
author	bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2003-02-12 07:28:33 +0000
committer	bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2003-02-12 07:28:33 +0000
commit	adc9d7a9a9cd90c8d078b1276cfdad98d0303d07 (patch)
tree	afa49da6e1ebc85a0d16aad52591e55bda0c70f2 /rdiff-backup/rdiff_backup
parent	9613406fbab1949f66fe7858590cab990c7b4b25 (diff)
download	rdiff-backup-adc9d7a9a9cd90c8d078b1276cfdad98d0303d07.tar.gz