summaryrefslogtreecommitdiff
path: root/rdiff-backup/rdiff_backup/metadata.py
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-06-30 03:00:18 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-06-30 03:00:18 +0000
commit5e09353fe5ce80b724a30db11cb10523d147c6ec (patch)
tree083998f6893121da5d651ee7a475fde3372c78f9 /rdiff-backup/rdiff_backup/metadata.py
parent46afa3e6b3aacfe05d1586a583a2606cc139f658 (diff)
downloadrdiff-backup-5e09353fe5ce80b724a30db11cb10523d147c6ec.tar.gz
Many changes - added extended attribute support and file system
ability detection git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@334 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup/metadata.py')
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py222
1 files changed, 133 insertions, 89 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index f1f23ac..3d8ba60 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -56,7 +56,7 @@ field names and values.
from __future__ import generators
import re, gzip, os
-import log, Globals, rpath, Time, robust, increment
+import log, Globals, rpath, Time, robust, increment, static
class ParsingError(Exception):
"""This is raised when bad or unparsable data is received"""
@@ -165,16 +165,14 @@ def unquote_path(quoted_string):
return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
-def write_rorp_iter_to_file(rorp_iter, file):
- """Given iterator of RORPs, write records to (pre-opened) file object"""
- for rorp in rorp_iter: file.write(RORP2Record(rorp))
-
-class rorp_extractor:
- """Controls iterating rorps from metadata file"""
+class FlatExtractor:
+ """Controls iterating objects from flat file"""
+ # The following two should be set in subclasses
+ record_boundary_regexp = None # Matches beginning of next record
+ record_to_object = None # Function that converts text record to object
def __init__(self, fileobj):
self.fileobj = fileobj # holds file object we are reading from
self.buf = "" # holds the next part of the file
- self.record_boundary_regexp = re.compile("\\nFile")
self.at_end = 0 # True if we are at the end of the file
self.blocksize = 32 * 1024
@@ -191,12 +189,13 @@ class rorp_extractor:
else: self.buf += newbuf
def iterate(self):
- """Return iterator over all records"""
+ """Return iterator that yields all objects with records"""
while 1:
next_pos = self.get_next_pos()
- try: yield Record2RORP(self.buf[:next_pos])
+ try: yield self.record_to_object(self.buf[:next_pos])
except ParsingError, e:
- log.Log("Error parsing metadata file: %s" % (e,), 2)
+ if self.at_end: break # Ignore whitespace/bad records at end
+ log.Log("Error parsing flat file: %s" % (e,), 2)
if self.at_end: break
self.buf = self.buf[next_pos:]
assert not self.close()
@@ -209,15 +208,7 @@ class rorp_extractor:
"""
assert not self.buf or self.buf.endswith("\n")
- if not index: indexpath = "."
- else: indexpath = "/".join(index)
- # Must double all backslashes, because they will be
- # reinterpreted. For instance, to search for index \n
- # (newline), it will be \\n (backslash n) in the file, so the
- # regular expression is "File \\\\n\\n" (File two backslash n
- # backslash n)
- double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
- begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
+ begin_re = self.get_index_re(index)
while 1:
m = begin_re.search(self.buf)
if m:
@@ -229,18 +220,28 @@ class rorp_extractor:
self.at_end = 1
return
+ def get_index_re(self, index):
+ """Return regular expression used to find index.
+
+ Override this in sub classes. The regular expression's second
+ group needs to start at the beginning of the record that
+ contains information about the object with the given index.
+
+ """
+ assert 0, "Just a placeholder, must override this in subclasses"
+
def iterate_starting_with(self, index):
- """Iterate records whose index starts with given index"""
+ """Iterate objects whose index starts with given index"""
self.skip_to_index(index)
if self.at_end: return
while 1:
next_pos = self.get_next_pos()
- try: rorp = Record2RORP(self.buf[:next_pos])
+ try: obj = self.record_to_object(self.buf[:next_pos])
except ParsingError, e:
log.Log("Error parsing metadata file: %s" % (e,), 2)
else:
- if rorp.index[:len(index)] != index: break
- yield rorp
+ if obj.index[:len(index)] != index: break
+ yield obj
if self.at_end: break
self.buf = self.buf[next_pos:]
assert not self.close()
@@ -249,73 +250,116 @@ class rorp_extractor:
"""Return value of closing associated file"""
return self.fileobj.close()
+class RorpExtractor(FlatExtractor):
+ """Iterate rorps from metadata file"""
+ record_boundary_regexp = re.compile("\\nFile")
+ record_to_object = staticmethod(Record2RORP)
+ def get_index_re(self, index):
+ """Find start of rorp record with given index"""
+ indexpath = index and '/'.join(index) or '.'
+ # Must double all backslashes, because they will be
+ # reinterpreted. For instance, to search for index \n
+ # (newline), it will be \\n (backslash n) in the file, so the
+ # regular expression is "File \\\\n\\n" (File two backslash n
+ # backslash n)
+ double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
+ return re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
-metadata_rp = None
-metadata_fileobj = None
-metadata_record_buffer = [] # Use this because gzip writes are slow
-def OpenMetadata(rp = None, compress = 1):
- """Open the Metadata file for writing, return metadata fileobj"""
- global metadata_rp, metadata_fileobj
- assert not metadata_fileobj, "Metadata file already open"
- if rp: metadata_rp = rp
- else:
- if compress: typestr = 'snapshot.gz'
- else: typestr = 'snapshot'
- metadata_rp = Globals.rbdir.append("mirror_metadata.%s.%s" %
- (Time.curtimestr, typestr))
- metadata_fileobj = metadata_rp.open("wb", compress = compress)
-
-def WriteMetadata(rorp):
- """Write metadata of rorp to file"""
- global metadata_fileobj, metadata_record_buffer
- metadata_record_buffer.append(RORP2Record(rorp))
- if len(metadata_record_buffer) >= 100: write_metadata_buffer()
-
-def write_metadata_buffer():
- global metadata_record_buffer
- metadata_fileobj.write("".join(metadata_record_buffer))
- metadata_record_buffer = []
-
-def CloseMetadata():
- """Close the metadata file"""
- global metadata_rp, metadata_fileobj
- assert metadata_fileobj, "Metadata file not open"
- if metadata_record_buffer: write_metadata_buffer()
- try: fileno = metadata_fileobj.fileno() # will not work if GzipFile
- except AttributeError: fileno = metadata_fileobj.fileobj.fileno()
- os.fsync(fileno)
- result = metadata_fileobj.close()
- metadata_fileobj = None
- metadata_rp.setdata()
- return result
-
-def GetMetadata(rp, restrict_index = None, compressed = None):
- """Return iterator of metadata from given metadata file rp"""
- if compressed is None:
- if rp.isincfile():
- compressed = rp.inc_compressed
- assert rp.inc_type == "data" or rp.inc_type == "snapshot"
- else: compressed = rp.get_indexpath().endswith(".gz")
-
- fileobj = rp.open("rb", compress = compressed)
- if restrict_index is None: return rorp_extractor(fileobj).iterate()
- else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index)
-
-def GetMetadata_at_time(rbdir, time, restrict_index = None, rblist = None):
- """Scan through rbdir, finding metadata file at given time, iterate
-
- If rdlist is given, use that instead of listing rddir. Time here
- is exact, we don't take the next one older or anything. Returns
- None if no matching metadata found.
+
+class FlatFile:
+ """Manage a flat (probably text) file containing info on various files
+
+ This is used for metadata information, and possibly EAs and ACLs.
+ The main read interface is as an iterator. The storage format is
+ a flat, probably compressed file, so random access is not
+ recommended.
"""
- if rblist is None: rblist = map(lambda x: rbdir.append(x),
- robust.listrp(rbdir))
- for rp in rblist:
- if (rp.isincfile() and
- (rp.getinctype() == "data" or rp.getinctype() == "snapshot") and
- rp.getincbase_str() == "mirror_metadata"):
- if rp.getinctime() == time: return GetMetadata(rp, restrict_index)
- return None
+ _prefix = None # Set this to real prefix when subclassing
+ _rp, _fileobj = None, None
+ # Buffering may be useful because gzip writes are slow
+ _buffering_on = 1
+ _record_buffer, _max_buffer_size = None, 100
+ _extractor = FlatExtractor # Set to class that iterates objects
+
+ def open_file(cls, rp = None, compress = 1):
+ """Open file for writing. Use cls._rp if rp not given."""
+ assert not cls._fileobj, "Flatfile already open"
+ cls._record_buffer = []
+ if rp: cls._rp = rp
+ else:
+ if compress: typestr = 'snapshot.gz'
+ else: typestr = 'snapshot'
+ cls._rp = Globals.rbdir.append(
+ "%s.%s.%s" % (cls._prefix, Time.curtimestr, typestr))
+ cls._fileobj = cls._rp.open("wb", compress = compress)
+
+ def write_object(cls, object):
+ """Convert one object to record and write to file"""
+ record = cls._object_to_record(object)
+ if cls._buffering_on:
+ cls._record_buffer.append(record)
+ if len(cls._record_buffer) >= cls._max_buffer_size:
+ cls._fileobj.write("".join(cls._record_buffer))
+ cls._record_buffer = []
+ else: cls._fileobj.write(record)
+
+ def close_file(cls):
+ """Close file, for when any writing is done"""
+ assert cls._fileobj, "File already closed"
+ if cls._buffering_on and cls._record_buffer:
+ cls._fileobj.write("".join(cls._record_buffer))
+ cls._record_buffer = []
+ try: fileno = cls._fileobj.fileno() # will not work if GzipFile
+ except AttributeError: fileno = cls._fileobj.fileobj.fileno()
+ os.fsync(fileno)
+ result = cls._fileobj.close()
+ cls._fileobj = None
+ cls._rp.setdata()
+ return result
+
+ def get_objects(cls, restrict_index = None, compressed = None):
+ """Return iterator of objects records from file rp"""
+ assert cls._rp, "Must have rp set before get_objects can be used"
+ if compressed is None:
+ if cls._rp.isincfile():
+ compressed = cls._rp.inc_compressed
+ assert (cls._rp.inc_type == 'data' or
+ cls._rp.inc_type == 'snapshot'), cls._rp.inc_type
+ else: compressed = cls._rp.get_indexpath().endswith('.gz')
+
+ fileobj = cls._rp.open('rb', compress = compressed)
+ if restrict_index is None: return cls._extractor(fileobj).iterate()
+ else:
+ re = cls._extractor(fileobj)
+ return re.iterate_starting_with(restrict_index)
+
+ def get_objects_at_time(cls, rbdir, time, restrict_index = None,
+ rblist = None):
+ """Scan through rbdir, finding data at given time, iterate
+
+ If rblist is givenr, use that instead of listing rbdir. Time
+ here is exact, we don't take the next one older or anything.
+ Returns None if no file matching prefix is found.
+ """
+ if rblist is None:
+ rblist = map(lambda x: rbdir.append(x), robust.listrp(rbdir))
+
+ for rp in rblist:
+ if (rp.isincfile() and
+ (rp.getinctype() == "data" or rp.getinctype() == "snapshot")
+ and rp.getincbase_str() == cls._prefix):
+ if rp.getinctime() == time:
+ cls._rp = rp
+ return cls.get_objects(restrict_index)
+ return None
+
+static.MakeClass(FlatFile)
+
+class MetadataFile(FlatFile):
+ """Store/retrieve metadata from mirror_metadata as rorps"""
+ _prefix = "mirror_metadata"
+ _extractor = RorpExtractor
+ _object_to_record = staticmethod(RORP2Record)