Added iterator operations on metadata file

git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@244 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
author: bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2002-12-13 20:49:17 +0000
committer: bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> 2002-12-13 20:49:17 +0000
commit: 3b9298b8018c0df65ce601af092b6ce4a6a4d090 (patch)
tree: c976707f5b16c8a897830d1f7020c3f0b1d61273 /rdiff-backup/rdiff_backup
parent: c1a39e88df7ec66297635f519cb1f3fbd1b584f0 (diff)
download: rdiff-backup-3b9298b8018c0df65ce601af092b6ce4a6a4d090.tar.gz
2 files changed, 164 insertions, 2 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index b596f76..50a7704 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -54,7 +54,9 @@ field names and values.
 
 """
 
-import re, log, Globals, rpath
+from __future__ import generators
+import re, gzip
+from rdiff_backup import log, Globals, rpath, Time
 
 class ParsingError(Exception):
 	"""This is raised when bad or unparsable data is received"""
@@ -169,3 +171,139 @@ def unquote_path(quoted_string):
 		log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2)
 		return two_chars
 	return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
+
+
+def write_rorp_iter_to_file(rorp_iter, file):
+	"""Given iterator of RORPs, write records to (pre-opened) file object"""
+	for rorp in rorp_iter: file.write(RORP2Record(rorp))
+
+class rorp_extractor:
+	"""Controls iterating rorps from metadata file"""
+	def __init__(self, fileobj):
+		self.fileobj = fileobj # holds file object we are reading from
+		self.buf = "" # holds the next part of the file
+		self.record_boundary_regexp = re.compile("\\nFile")
+		self.at_end = 0 # True if we are at the end of the file
+		self.blocksize = 32 * 1024
+
+	def get_next_pos(self):
+		"""Return position of next record in buffer"""
+		while 1:
+			m = self.record_boundary_regexp.search(self.buf)
+			if m: return m.start(0)+1 # the +1 skips the newline
+			else: # add next block to the buffer, loop again
+				newbuf = self.fileobj.read(self.blocksize)
+				if not newbuf:
+					self.at_end = 1
+					return len(self.buf)
+				else: self.buf += newbuf
+
+	def iterate(self):
+		"""Return iterator over all records"""
+		while 1:
+			next_pos = self.get_next_pos()
+			try: yield Record2RORP(self.buf[:next_pos])
+			except ParsingError, e:
+				log.Log("Error parsing metadata file: %s" % (e,), 2)
+			if self.at_end: break
+			self.buf = self.buf[next_pos:]
+
+	def skip_to_index(self, index):
+		"""Scan through the file, set buffer to beginning of index record
+
+		Here we make sure that the buffer always ends in a newline, so
+		we will not be splitting lines in half.
+
+		"""
+		assert not self.buf or self.buf.endswith("\n")
+		if not index: indexpath = "."
+		else: indexpath = "/".join(index)
+		# Must double all backslashes, because they will be
+		# reinterpreted.  For instance, to search for index \n
+		# (newline), it will be \\n (backslash n) in the file, so the
+		# regular expression is "File \\\\n\\n" (File two backslash n
+		# backslash n)
+		double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
+		begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
+		while 1:
+			m = begin_re.search(self.buf)
+			if m:
+				self.buf = self.buf[m.start(2):]
+				return
+			self.buf = self.fileobj.read(self.blocksize)
+			self.buf += self.fileobj.readline()
+			if not self.buf:
+				self.at_end = 1
+				return
+
+	def iterate_starting_with(self, index):
+		"""Iterate records whose index starts with given index"""
+		self.skip_to_index(index)
+		if self.at_end: return
+		while 1:
+			next_pos = self.get_next_pos()
+			try: rorp = Record2RORP(self.buf[:next_pos])
+			except ParsingError, e:
+				log.Log("Error parsing metadata file: %s" % (e,), 2)
+			else:
+				if rorp.index[:len(index)] != index: break
+				yield rorp
+			if self.at_end: break
+			self.buf = self.buf[next_pos:]
+
+	def close(self):
+		"""Return value of closing associated file"""
+		return self.fileobj.close()
+
+
+metadata_rp = None
+metadata_fileobj = None
+def OpenMetadata(rp = None, compress = 1):
+	"""Open the Metadata file for writing"""
+	global metadata_filename, metadata_fileobj
+	assert not metadata_fileobj, "Metadata file already open"
+	if rp: metadata_rp = rp
+	else: metadata_rp = Globals.rbdir.append("mirror_metadata.%s.data.gz" %
+											 (Time.curtimestr,))
+	metadata_fileobj = metadata_rp.open("wb", compress = compress)
+
+def WriteMetadata(rorp):
+	"""Write metadata of rorp to file"""
+	global metadata_fileobj
+	metadata_fileobj.write(RORP2Record(rorp))
+
+def CloseMetadata():
+	"""Close the metadata file"""
+	global metadata_fileobj
+	result = metadata_fileobj.close()
+	metadata_fileobj = None
+	metadata_rp.setdata()
+	return result
+
+def GetMetadata(rp = None, restrict_index = None, compressed = None):
+	"""Return iterator of metadata from given metadata file rp"""
+	if compressed is None:
+		if rp.isincfile():
+			compressed = rp.inc_compressed
+			assert rp.inc_type == "data", rp.inc_type
+		else: compressed = rp.get_indexpath().endswith(".gz")
+
+	fileobj = rp.open("rb", compress = compressed)
+	if restrict_index is None: return rorp_extractor(fileobj).iterate()
+	else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index)
+
+def GetMetadata_at_time(rpdir, time, restrict_index = None, rplist = None):
+	"""Scan through rpdir, finding metadata file at given time, iterate
+
+	If rplist is given, use that instead of listing rpdir.  Time here
+	is exact, we don't take the next one older or anything.  Returns
+	None if no matching metadata found.
+
+	"""
+	if rplist is None: rplist = map(lambda x: rpdir.append(x), rpdir.listdir())
+	for rp in rplist:
+		if (rp.isincfile() and rp.getinctype() == "data" and
+			rp.getincbase_str() == "mirror_metadata"):
+			if Time.stringtotime(rp.getinctime()) == time:
+				return GetMetadata(rp, restrict_index)
+	return None
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index cfc0688..9d54872 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -271,11 +271,35 @@ class RORPath(RPathStatic):
 				pass
 			elif key == 'atime' and not Globals.preserve_atime: pass
 			elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
-			elif key == 'size' and self.isdir(): pass
+			elif key == 'size' and not self.isreg():
+				pass # size only matters for regular files
 			elif (not other.data.has_key(key) or
 				  self.data[key] != other.data[key]): return None
 		return 1
 
+	def equal_verbose(self, other):
+		"""Like __eq__, but log more information.  Useful when testing"""
+		if self.index != other.index:
+			Log("Index %s != index %s" % (self.index, other.index), 2)
+			return None
+
+		for key in self.data.keys(): # compare dicts key by key
+			if ((key == 'uid' or key == 'gid') and
+				(not Globals.change_ownership or self.issym())):
+				# Don't compare gid/uid for symlinks or if not change_ownership
+				pass
+			elif key == 'atime' and not Globals.preserve_atime: pass
+			elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
+			elif key == 'size' and not self.isreg(): pass
+			elif (not other.data.has_key(key) or
+				  self.data[key] != other.data[key]):
+				if not other.data.has_key(key):
+					Log("Second is missing key %s" % (key,), 2)
+				else: Log("Value of %s differs: %s vs %s" %
+						  (key, self.data[key], other.data[key]), 2)
+				return None
+		return 1
+
 	def __ne__(self, other): return not self.__eq__(other)
 
 	def __str__(self):
author	bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2002-12-13 20:49:17 +0000
committer	bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>	2002-12-13 20:49:17 +0000
commit	3b9298b8018c0df65ce601af092b6ce4a6a4d090 (patch)
tree	c976707f5b16c8a897830d1f7020c3f0b1d61273 /rdiff-backup/rdiff_backup
parent	c1a39e88df7ec66297635f519cb1f3fbd1b584f0 (diff)
download	rdiff-backup-3b9298b8018c0df65ce601af092b6ce4a6a4d090.tar.gz