From 5e09353fe5ce80b724a30db11cb10523d147c6ec Mon Sep 17 00:00:00 2001
From: bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>
Date: Mon, 30 Jun 2003 03:00:18 +0000
Subject: Many changes - added extended attribute support and file system
 ability detection

git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@334 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
---
 rdiff-backup/rdiff_backup/metadata.py | 222 ++++++++++++++++++++--------------
 1 file changed, 133 insertions(+), 89 deletions(-)

(limited to 'rdiff-backup/rdiff_backup/metadata.py')

diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index f1f23ac..3d8ba60 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -56,7 +56,7 @@ field names and values.
 
 from __future__ import generators
 import re, gzip, os
-import log, Globals, rpath, Time, robust, increment
+import log, Globals, rpath, Time, robust, increment, static
 
 class ParsingError(Exception):
 	"""This is raised when bad or unparsable data is received"""
@@ -165,16 +165,14 @@ def unquote_path(quoted_string):
 	return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
 
 
-def write_rorp_iter_to_file(rorp_iter, file):
-	"""Given iterator of RORPs, write records to (pre-opened) file object"""
-	for rorp in rorp_iter: file.write(RORP2Record(rorp))
-
-class rorp_extractor:
-	"""Controls iterating rorps from metadata file"""
+class FlatExtractor:
+	"""Controls iterating objects from flat file"""
+	# The following two should be set in subclasses
+	record_boundary_regexp = None # Matches beginning of next record
+	record_to_object = None # Function that converts text record to object
 	def __init__(self, fileobj):
 		self.fileobj = fileobj # holds file object we are reading from
 		self.buf = "" # holds the next part of the file
-		self.record_boundary_regexp = re.compile("\\nFile")
 		self.at_end = 0 # True if we are at the end of the file
 		self.blocksize = 32 * 1024
 
@@ -191,12 +189,13 @@ class rorp_extractor:
 				else: self.buf += newbuf
 
 	def iterate(self):
-		"""Return iterator over all records"""
+		"""Return iterator that yields all objects with records"""
 		while 1:
 			next_pos = self.get_next_pos()
-			try: yield Record2RORP(self.buf[:next_pos])
+			try: yield self.record_to_object(self.buf[:next_pos])
 			except ParsingError, e:
-				log.Log("Error parsing metadata file: %s" % (e,), 2)
+				if self.at_end: break # Ignore whitespace/bad records at end
+				log.Log("Error parsing flat file: %s" % (e,), 2)
 			if self.at_end: break
 			self.buf = self.buf[next_pos:]
 		assert not self.close()
@@ -209,15 +208,7 @@ class rorp_extractor:
 
 		"""
 		assert not self.buf or self.buf.endswith("\n")
-		if not index: indexpath = "."
-		else: indexpath = "/".join(index)
-		# Must double all backslashes, because they will be
-		# reinterpreted.  For instance, to search for index \n
-		# (newline), it will be \\n (backslash n) in the file, so the
-		# regular expression is "File \\\\n\\n" (File two backslash n
-		# backslash n)
-		double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
-		begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
+		begin_re = self.get_index_re(index)
 		while 1:
 			m = begin_re.search(self.buf)
 			if m:
@@ -229,18 +220,28 @@ class rorp_extractor:
 				self.at_end = 1
 				return
 
+	def get_index_re(self, index):
+		"""Return regular expression used to find index.
+
+		Override this in sub classes.  The regular expression's second
+		group needs to start at the beginning of the record that
+		contains information about the object with the given index.
+
+		"""
+		assert 0, "Just a placeholder, must override this in subclasses"
+
 	def iterate_starting_with(self, index):
-		"""Iterate records whose index starts with given index"""
+		"""Iterate objects whose index starts with given index"""
 		self.skip_to_index(index)
 		if self.at_end: return
 		while 1:
 			next_pos = self.get_next_pos()
-			try: rorp = Record2RORP(self.buf[:next_pos])
+			try: obj = self.record_to_object(self.buf[:next_pos])
 			except ParsingError, e:
 				log.Log("Error parsing metadata file: %s" % (e,), 2)
 			else:
-				if rorp.index[:len(index)] != index: break
-				yield rorp
+				if obj.index[:len(index)] != index: break
+				yield obj
 			if self.at_end: break
 			self.buf = self.buf[next_pos:]
 		assert not self.close()
@@ -249,73 +250,116 @@ class rorp_extractor:
 		"""Return value of closing associated file"""
 		return self.fileobj.close()
 
+class RorpExtractor(FlatExtractor):
+	"""Iterate rorps from metadata file"""
+	record_boundary_regexp = re.compile("\\nFile")
+	record_to_object = staticmethod(Record2RORP)
+	def get_index_re(self, index):
+		"""Find start of rorp record with given index"""
+		indexpath = index and '/'.join(index) or '.'
+		# Must double all backslashes, because they will be
+		# reinterpreted.  For instance, to search for index \n
+		# (newline), it will be \\n (backslash n) in the file, so the
+		# regular expression is "File \\\\n\\n" (File two backslash n
+		# backslash n)
+		double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
+		return re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
 
-metadata_rp = None
-metadata_fileobj = None
-metadata_record_buffer = [] # Use this because gzip writes are slow
-def OpenMetadata(rp = None, compress = 1):
-	"""Open the Metadata file for writing, return metadata fileobj"""
-	global metadata_rp, metadata_fileobj
-	assert not metadata_fileobj, "Metadata file already open"
-	if rp: metadata_rp = rp
-	else:
-		if compress: typestr = 'snapshot.gz'
-		else: typestr = 'snapshot'
-		metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" %
-										   (Time.curtimestr, typestr))
-	metadata_fileobj = metadata_rp.open("wb", compress = compress)
-
-def WriteMetadata(rorp):
-	"""Write metadata of rorp to file"""
-	global metadata_fileobj, metadata_record_buffer
-	metadata_record_buffer.append(RORP2Record(rorp))
-	if len(metadata_record_buffer) >= 100: write_metadata_buffer()
-
-def write_metadata_buffer():
-	global metadata_record_buffer
-	metadata_fileobj.write("".join(metadata_record_buffer))
-	metadata_record_buffer = []
-
-def CloseMetadata():
-	"""Close the metadata file"""
-	global metadata_rp, metadata_fileobj
-	assert metadata_fileobj, "Metadata file not open"
-	if metadata_record_buffer: write_metadata_buffer()
-	try: fileno = metadata_fileobj.fileno() # will not work if GzipFile
-	except AttributeError: fileno = metadata_fileobj.fileobj.fileno()
-	os.fsync(fileno)
-	result = metadata_fileobj.close()
-	metadata_fileobj = None
-	metadata_rp.setdata()
-	return result
-
-def GetMetadata(rp, restrict_index = None, compressed = None):
-	"""Return iterator of metadata from given metadata file rp"""
-	if compressed is None:
-		if rp.isincfile():
-			compressed = rp.inc_compressed
-			assert rp.inc_type == "data" or rp.inc_type == "snapshot"
-		else: compressed = rp.get_indexpath().endswith(".gz")
-
-	fileobj = rp.open("rb", compress = compressed)
-	if restrict_index is None: return rorp_extractor(fileobj).iterate()
-	else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index)
-
-def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None):
-	"""Scan through rbdir, finding metadata file at given time, iterate
-
-	If rdlist is given, use that instead of listing rddir.  Time here
-	is exact, we don't take the next one older or anything.  Returns
-	None if no matching metadata found.
+
+class FlatFile:
+	"""Manage a flat (probably text) file containing info on various files
+
+	This is used for metadata information, and possibly EAs and ACLs.
+	The main read interface is as an iterator.  The storage format is
+	a flat, probably compressed file, so random access is not
+	recommended.
 
 	"""
-	if rblist is None: rblist = map(lambda x: rbdir.append(x),
-									robust.listrp(rbdir))
-	for rp in rblist:
-		if (rp.isincfile() and
-			(rp.getinctype() == "data" or rp.getinctype() == "snapshot") and
-			rp.getincbase_str() == "mirror_metadata"):
-			if rp.getinctime() == time: return GetMetadata(rp, restrict_index)
-	return None
+	_prefix = None # Set this to real prefix when subclassing
+	_rp, _fileobj = None, None
+	# Buffering may be useful because gzip writes are slow
+	_buffering_on = 1
+	_record_buffer, _max_buffer_size = None, 100
+	_extractor = FlatExtractor # Set to class that iterates objects
+
+	def open_file(cls, rp = None, compress = 1):
+		"""Open file for writing.  Use cls._rp if rp not given."""
+		assert not cls._fileobj, "Flatfile already open"
+		cls._record_buffer = []
+		if rp: cls._rp = rp
+		else:
+			if compress: typestr = 'snapshot.gz'
+			else: typestr = 'snapshot'
+			cls._rp = Globals.rbdir.append(
+				"%s.%s.%s" % (cls._prefix, Time.curtimestr, typestr))
+		cls._fileobj = cls._rp.open("wb", compress = compress)
+
+	def write_object(cls, object):
+		"""Convert one object to record and write to file"""
+		record = cls._object_to_record(object)
+		if cls._buffering_on:
+			cls._record_buffer.append(record)
+			if len(cls._record_buffer) >= cls._max_buffer_size:
+				cls._fileobj.write("".join(cls._record_buffer))
+				cls._record_buffer = []
+		else: cls._fileobj.write(record)
+
+	def close_file(cls):
+		"""Close file, for when any writing is done"""
+		assert cls._fileobj, "File already closed"
+		if cls._buffering_on and cls._record_buffer: 
+			cls._fileobj.write("".join(cls._record_buffer))
+			cls._record_buffer = []
+		try: fileno = cls._fileobj.fileno() # will not work if GzipFile
+		except AttributeError: fileno = cls._fileobj.fileobj.fileno()
+		os.fsync(fileno)
+		result = cls._fileobj.close()
+		cls._fileobj = None
+		cls._rp.setdata()
+		return result
+
+	def get_objects(cls, restrict_index = None, compressed = None):
+		"""Return iterator of objects records from file rp"""
+		assert cls._rp, "Must have rp set before get_objects can be used"
+		if compressed is None:
+			if cls._rp.isincfile():
+				compressed = cls._rp.inc_compressed
+				assert (cls._rp.inc_type == 'data' or
+						cls._rp.inc_type == 'snapshot'), cls._rp.inc_type
+			else: compressed = cls._rp.get_indexpath().endswith('.gz')
+
+		fileobj = cls._rp.open('rb', compress = compressed)
+		if restrict_index is None: return cls._extractor(fileobj).iterate()
+		else:
+			re = cls._extractor(fileobj)
+			return re.iterate_starting_with(restrict_index)
+		
+	def get_objects_at_time(cls, rbdir, time, restrict_index = None,
+							rblist = None):
+		"""Scan through rbdir, finding data at given time, iterate
+
+		If rblist is givenr, use that instead of listing rbdir.  Time
+		here is exact, we don't take the next one older or anything.
+		Returns None if no file matching prefix is found.
 
+		"""
+		if rblist is None:
+			rblist = map(lambda x: rbdir.append(x), robust.listrp(rbdir))
+
+		for rp in rblist:
+			if (rp.isincfile() and
+				(rp.getinctype() == "data" or rp.getinctype() == "snapshot")
+				and rp.getincbase_str() == cls._prefix):
+				if rp.getinctime() == time:
+					cls._rp = rp
+					return cls.get_objects(restrict_index)
+		return None
+
+static.MakeClass(FlatFile)
+
+class MetadataFile(FlatFile):
+	"""Store/retrieve metadata from mirror_metadata as rorps"""
+	_prefix = "mirror_metadata"
+	_extractor = RorpExtractor
+	_object_to_record = staticmethod(RORP2Record)
 
-- 
cgit v1.2.1