From 5e09353fe5ce80b724a30db11cb10523d147c6ec Mon Sep 17 00:00:00 2001 From: bescoto Date: Mon, 30 Jun 2003 03:00:18 +0000 Subject: Many changes - added extended attribute support and file system ability detection git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@334 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/metadata.py | 222 ++++++++++++++++++++-------------- 1 file changed, 133 insertions(+), 89 deletions(-) (limited to 'rdiff-backup/rdiff_backup/metadata.py') diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index f1f23ac..3d8ba60 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -56,7 +56,7 @@ field names and values. from __future__ import generators import re, gzip, os -import log, Globals, rpath, Time, robust, increment +import log, Globals, rpath, Time, robust, increment, static class ParsingError(Exception): """This is raised when bad or unparsable data is received""" @@ -165,16 +165,14 @@ def unquote_path(quoted_string): return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string) -def write_rorp_iter_to_file(rorp_iter, file): - """Given iterator of RORPs, write records to (pre-opened) file object""" - for rorp in rorp_iter: file.write(RORP2Record(rorp)) - -class rorp_extractor: - """Controls iterating rorps from metadata file""" +class FlatExtractor: + """Controls iterating objects from flat file""" + # The following two should be set in subclasses + record_boundary_regexp = None # Matches beginning of next record + record_to_object = None # Function that converts text record to object def __init__(self, fileobj): self.fileobj = fileobj # holds file object we are reading from self.buf = "" # holds the next part of the file - self.record_boundary_regexp = re.compile("\\nFile") self.at_end = 0 # True if we are at the end of the file self.blocksize = 32 * 1024 @@ -191,12 +189,13 @@ class rorp_extractor: else: self.buf += newbuf def iterate(self): - """Return iterator over all records""" + """Return iterator that yields all objects with records""" while 1: next_pos = self.get_next_pos() - try: yield Record2RORP(self.buf[:next_pos]) + try: yield self.record_to_object(self.buf[:next_pos]) except ParsingError, e: - log.Log("Error parsing metadata file: %s" % (e,), 2) + if self.at_end: break # Ignore whitespace/bad records at end + log.Log("Error parsing flat file: %s" % (e,), 2) if self.at_end: break self.buf = self.buf[next_pos:] assert not self.close() @@ -209,15 +208,7 @@ class rorp_extractor: """ assert not self.buf or self.buf.endswith("\n") - if not index: indexpath = "." - else: indexpath = "/".join(index) - # Must double all backslashes, because they will be - # reinterpreted. For instance, to search for index \n - # (newline), it will be \\n (backslash n) in the file, so the - # regular expression is "File \\\\n\\n" (File two backslash n - # backslash n) - double_quote = re.sub("\\\\", "\\\\\\\\", indexpath) - begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,)) + begin_re = self.get_index_re(index) while 1: m = begin_re.search(self.buf) if m: @@ -229,18 +220,28 @@ class rorp_extractor: self.at_end = 1 return + def get_index_re(self, index): + """Return regular expression used to find index. + + Override this in sub classes. The regular expression's second + group needs to start at the beginning of the record that + contains information about the object with the given index. + + """ + assert 0, "Just a placeholder, must override this in subclasses" + def iterate_starting_with(self, index): - """Iterate records whose index starts with given index""" + """Iterate objects whose index starts with given index""" self.skip_to_index(index) if self.at_end: return while 1: next_pos = self.get_next_pos() - try: rorp = Record2RORP(self.buf[:next_pos]) + try: obj = self.record_to_object(self.buf[:next_pos]) except ParsingError, e: log.Log("Error parsing metadata file: %s" % (e,), 2) else: - if rorp.index[:len(index)] != index: break - yield rorp + if obj.index[:len(index)] != index: break + yield obj if self.at_end: break self.buf = self.buf[next_pos:] assert not self.close() @@ -249,73 +250,116 @@ class rorp_extractor: """Return value of closing associated file""" return self.fileobj.close() +class RorpExtractor(FlatExtractor): + """Iterate rorps from metadata file""" + record_boundary_regexp = re.compile("\\nFile") + record_to_object = staticmethod(Record2RORP) + def get_index_re(self, index): + """Find start of rorp record with given index""" + indexpath = index and '/'.join(index) or '.' + # Must double all backslashes, because they will be + # reinterpreted. For instance, to search for index \n + # (newline), it will be \\n (backslash n) in the file, so the + # regular expression is "File \\\\n\\n" (File two backslash n + # backslash n) + double_quote = re.sub("\\\\", "\\\\\\\\", indexpath) + return re.compile("(^|\\n)(File %s\\n)" % (double_quote,)) -metadata_rp = None -metadata_fileobj = None -metadata_record_buffer = [] # Use this because gzip writes are slow -def OpenMetadata(rp = None, compress = 1): - """Open the Metadata file for writing, return metadata fileobj""" - global metadata_rp, metadata_fileobj - assert not metadata_fileobj, "Metadata file already open" - if rp: metadata_rp = rp - else: - if compress: typestr = 'snapshot.gz' - else: typestr = 'snapshot' - metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" % - (Time.curtimestr, typestr)) - metadata_fileobj = metadata_rp.open("wb", compress = compress) - -def WriteMetadata(rorp): - """Write metadata of rorp to file""" - global metadata_fileobj, metadata_record_buffer - metadata_record_buffer.append(RORP2Record(rorp)) - if len(metadata_record_buffer) >= 100: write_metadata_buffer() - -def write_metadata_buffer(): - global metadata_record_buffer - metadata_fileobj.write("".join(metadata_record_buffer)) - metadata_record_buffer = [] - -def CloseMetadata(): - """Close the metadata file""" - global metadata_rp, metadata_fileobj - assert metadata_fileobj, "Metadata file not open" - if metadata_record_buffer: write_metadata_buffer() - try: fileno = metadata_fileobj.fileno() # will not work if GzipFile - except AttributeError: fileno = metadata_fileobj.fileobj.fileno() - os.fsync(fileno) - result = metadata_fileobj.close() - metadata_fileobj = None - metadata_rp.setdata() - return result - -def GetMetadata(rp, restrict_index = None, compressed = None): - """Return iterator of metadata from given metadata file rp""" - if compressed is None: - if rp.isincfile(): - compressed = rp.inc_compressed - assert rp.inc_type == "data" or rp.inc_type == "snapshot" - else: compressed = rp.get_indexpath().endswith(".gz") - - fileobj = rp.open("rb", compress = compressed) - if restrict_index is None: return rorp_extractor(fileobj).iterate() - else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index) - -def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None): - """Scan through rbdir, finding metadata file at given time, iterate - - If rdlist is given, use that instead of listing rddir. Time here - is exact, we don't take the next one older or anything. Returns - None if no matching metadata found. + +class FlatFile: + """Manage a flat (probably text) file containing info on various files + + This is used for metadata information, and possibly EAs and ACLs. + The main read interface is as an iterator. The storage format is + a flat, probably compressed file, so random access is not + recommended. """ - if rblist is None: rblist = map(lambda x: rbdir.append(x), - robust.listrp(rbdir)) - for rp in rblist: - if (rp.isincfile() and - (rp.getinctype() == "data" or rp.getinctype() == "snapshot") and - rp.getincbase_str() == "mirror_metadata"): - if rp.getinctime() == time: return GetMetadata(rp, restrict_index) - return None + _prefix = None # Set this to real prefix when subclassing + _rp, _fileobj = None, None + # Buffering may be useful because gzip writes are slow + _buffering_on = 1 + _record_buffer, _max_buffer_size = None, 100 + _extractor = FlatExtractor # Set to class that iterates objects + + def open_file(cls, rp = None, compress = 1): + """Open file for writing. Use cls._rp if rp not given.""" + assert not cls._fileobj, "Flatfile already open" + cls._record_buffer = [] + if rp: cls._rp = rp + else: + if compress: typestr = 'snapshot.gz' + else: typestr = 'snapshot' + cls._rp = Globals.rbdir.append( + "%s.%s.%s" % (cls._prefix, Time.curtimestr, typestr)) + cls._fileobj = cls._rp.open("wb", compress = compress) + + def write_object(cls, object): + """Convert one object to record and write to file""" + record = cls._object_to_record(object) + if cls._buffering_on: + cls._record_buffer.append(record) + if len(cls._record_buffer) >= cls._max_buffer_size: + cls._fileobj.write("".join(cls._record_buffer)) + cls._record_buffer = [] + else: cls._fileobj.write(record) + + def close_file(cls): + """Close file, for when any writing is done""" + assert cls._fileobj, "File already closed" + if cls._buffering_on and cls._record_buffer: + cls._fileobj.write("".join(cls._record_buffer)) + cls._record_buffer = [] + try: fileno = cls._fileobj.fileno() # will not work if GzipFile + except AttributeError: fileno = cls._fileobj.fileobj.fileno() + os.fsync(fileno) + result = cls._fileobj.close() + cls._fileobj = None + cls._rp.setdata() + return result + + def get_objects(cls, restrict_index = None, compressed = None): + """Return iterator of objects records from file rp""" + assert cls._rp, "Must have rp set before get_objects can be used" + if compressed is None: + if cls._rp.isincfile(): + compressed = cls._rp.inc_compressed + assert (cls._rp.inc_type == 'data' or + cls._rp.inc_type == 'snapshot'), cls._rp.inc_type + else: compressed = cls._rp.get_indexpath().endswith('.gz') + + fileobj = cls._rp.open('rb', compress = compressed) + if restrict_index is None: return cls._extractor(fileobj).iterate() + else: + re = cls._extractor(fileobj) + return re.iterate_starting_with(restrict_index) + + def get_objects_at_time(cls, rbdir, time, restrict_index = None, + rblist = None): + """Scan through rbdir, finding data at given time, iterate + + If rblist is givenr, use that instead of listing rbdir. Time + here is exact, we don't take the next one older or anything. + Returns None if no file matching prefix is found. + """ + if rblist is None: + rblist = map(lambda x: rbdir.append(x), robust.listrp(rbdir)) + + for rp in rblist: + if (rp.isincfile() and + (rp.getinctype() == "data" or rp.getinctype() == "snapshot") + and rp.getincbase_str() == cls._prefix): + if rp.getinctime() == time: + cls._rp = rp + return cls.get_objects(restrict_index) + return None + +static.MakeClass(FlatFile) + +class MetadataFile(FlatFile): + """Store/retrieve metadata from mirror_metadata as rorps""" + _prefix = "mirror_metadata" + _extractor = RorpExtractor + _object_to_record = staticmethod(RORP2Record) -- cgit v1.2.1