diff options
-rw-r--r-- | rdiff-backup/rdiff_backup/metadata.py | 140 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/rpath.py | 26 | ||||
-rw-r--r-- | rdiff-backup/testing/metadatatest.py | 88 |
3 files changed, 247 insertions, 7 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index b596f76..50a7704 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -54,7 +54,9 @@ field names and values. """ -import re, log, Globals, rpath +from __future__ import generators +import re, gzip +from rdiff_backup import log, Globals, rpath, Time class ParsingError(Exception): """This is raised when bad or unparsable data is received""" @@ -169,3 +171,139 @@ def unquote_path(quoted_string): log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2) return two_chars return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string) + + +def write_rorp_iter_to_file(rorp_iter, file): + """Given iterator of RORPs, write records to (pre-opened) file object""" + for rorp in rorp_iter: file.write(RORP2Record(rorp)) + +class rorp_extractor: + """Controls iterating rorps from metadata file""" + def __init__(self, fileobj): + self.fileobj = fileobj # holds file object we are reading from + self.buf = "" # holds the next part of the file + self.record_boundary_regexp = re.compile("\\nFile") + self.at_end = 0 # True if we are at the end of the file + self.blocksize = 32 * 1024 + + def get_next_pos(self): + """Return position of next record in buffer""" + while 1: + m = self.record_boundary_regexp.search(self.buf) + if m: return m.start(0)+1 # the +1 skips the newline + else: # add next block to the buffer, loop again + newbuf = self.fileobj.read(self.blocksize) + if not newbuf: + self.at_end = 1 + return len(self.buf) + else: self.buf += newbuf + + def iterate(self): + """Return iterator over all records""" + while 1: + next_pos = self.get_next_pos() + try: yield Record2RORP(self.buf[:next_pos]) + except ParsingError, e: + log.Log("Error parsing metadata file: %s" % (e,), 2) + if self.at_end: break + self.buf = self.buf[next_pos:] + + def skip_to_index(self, index): + """Scan through the file, set buffer to beginning of index record + + Here we make sure that the buffer always ends in a newline, so + we will not be splitting lines in half. + + """ + assert not self.buf or self.buf.endswith("\n") + if not index: indexpath = "." + else: indexpath = "/".join(index) + # Must double all backslashes, because they will be + # reinterpreted. For instance, to search for index \n + # (newline), it will be \\n (backslash n) in the file, so the + # regular expression is "File \\\\n\\n" (File two backslash n + # backslash n) + double_quote = re.sub("\\\\", "\\\\\\\\", indexpath) + begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,)) + while 1: + m = begin_re.search(self.buf) + if m: + self.buf = self.buf[m.start(2):] + return + self.buf = self.fileobj.read(self.blocksize) + self.buf += self.fileobj.readline() + if not self.buf: + self.at_end = 1 + return + + def iterate_starting_with(self, index): + """Iterate records whose index starts with given index""" + self.skip_to_index(index) + if self.at_end: return + while 1: + next_pos = self.get_next_pos() + try: rorp = Record2RORP(self.buf[:next_pos]) + except ParsingError, e: + log.Log("Error parsing metadata file: %s" % (e,), 2) + else: + if rorp.index[:len(index)] != index: break + yield rorp + if self.at_end: break + self.buf = self.buf[next_pos:] + + def close(self): + """Return value of closing associated file""" + return self.fileobj.close() + + +metadata_rp = None +metadata_fileobj = None +def OpenMetadata(rp = None, compress = 1): + """Open the Metadata file for writing""" + global metadata_filename, metadata_fileobj + assert not metadata_fileobj, "Metadata file already open" + if rp: metadata_rp = rp + else: metadata_rp = Globals.rbdir.append("mirror_metadata.%s.data.gz" % + (Time.curtimestr,)) + metadata_fileobj = metadata_rp.open("wb", compress = compress) + +def WriteMetadata(rorp): + """Write metadata of rorp to file""" + global metadata_fileobj + metadata_fileobj.write(RORP2Record(rorp)) + +def CloseMetadata(): + """Close the metadata file""" + global metadata_fileobj + result = metadata_fileobj.close() + metadata_fileobj = None + metadata_rp.setdata() + return result + +def GetMetadata(rp = None, restrict_index = None, compressed = None): + """Return iterator of metadata from given metadata file rp""" + if compressed is None: + if rp.isincfile(): + compressed = rp.inc_compressed + assert rp.inc_type == "data", rp.inc_type + else: compressed = rp.get_indexpath().endswith(".gz") + + fileobj = rp.open("rb", compress = compressed) + if restrict_index is None: return rorp_extractor(fileobj).iterate() + else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index) + +def GetMetadata_at_time(rpdir, time, restrict_index = None, rplist = None): + """Scan through rpdir, finding metadata file at given time, iterate + + If rplist is given, use that instead of listing rpdir. Time here + is exact, we don't take the next one older or anything. Returns + None if no matching metadata found. + + """ + if rplist is None: rplist = map(lambda x: rpdir.append(x), rpdir.listdir()) + for rp in rplist: + if (rp.isincfile() and rp.getinctype() == "data" and + rp.getincbase_str() == "mirror_metadata"): + if Time.stringtotime(rp.getinctime()) == time: + return GetMetadata(rp, restrict_index) + return None diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index cfc0688..9d54872 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -271,11 +271,35 @@ class RORPath(RPathStatic): pass elif key == 'atime' and not Globals.preserve_atime: pass elif key == 'devloc' or key == 'inode' or key == 'nlink': pass - elif key == 'size' and self.isdir(): pass + elif key == 'size' and not self.isreg(): + pass # size only matters for regular files elif (not other.data.has_key(key) or self.data[key] != other.data[key]): return None return 1 + def equal_verbose(self, other): + """Like __eq__, but log more information. Useful when testing""" + if self.index != other.index: + Log("Index %s != index %s" % (self.index, other.index), 2) + return None + + for key in self.data.keys(): # compare dicts key by key + if ((key == 'uid' or key == 'gid') and + (not Globals.change_ownership or self.issym())): + # Don't compare gid/uid for symlinks or if not change_ownership + pass + elif key == 'atime' and not Globals.preserve_atime: pass + elif key == 'devloc' or key == 'inode' or key == 'nlink': pass + elif key == 'size' and not self.isreg(): pass + elif (not other.data.has_key(key) or + self.data[key] != other.data[key]): + if not other.data.has_key(key): + Log("Second is missing key %s" % (key,), 2) + else: Log("Value of %s differs: %s vs %s" % + (key, self.data[key], other.data[key]), 2) + return None + return 1 + def __ne__(self, other): return not self.__eq__(other) def __str__(self): diff --git a/rdiff-backup/testing/metadatatest.py b/rdiff-backup/testing/metadatatest.py index bad6d27..7211c67 100644 --- a/rdiff-backup/testing/metadatatest.py +++ b/rdiff-backup/testing/metadatatest.py @@ -1,8 +1,16 @@ -import unittest, os +import unittest, os, cStringIO, time from rdiff_backup.metadata import * -from rdiff_backup import rpath, Globals +from rdiff_backup import rpath, Globals, selection, destructive_stepping + +tempdir = rpath.RPath(Globals.local_connection, "testfiles/output") class MetadataTest(unittest.TestCase): + def make_temp(self): + """Make temp directory testfiles/output""" + global tempdir + tempdir.delete() + tempdir.mkdir() + def testQuote(self): """Test quoting and unquoting""" filenames = ["foo", ".", "hello\nthere", "\\", "\\\\\\", @@ -13,19 +21,89 @@ class MetadataTest(unittest.TestCase): result = unquote_path(quoted) assert result == filename, (quoted, result, filename) - def testRORP2Record(self): - """Test turning RORPs into records and back again""" + def get_rpaths(self): + """Return list of rorps""" vft = rpath.RPath(Globals.local_connection, "testfiles/various_file_types") rpaths = map(lambda x: vft.append(x), vft.listdir()) extra_rpaths = map(lambda x: rpath.RPath(Globals.local_connection, x), ['/bin/ls', '/dev/ttyS0', '/dev/hda', 'aoeuaou']) + return [vft] + rpaths + extra_rpaths - for rp in [vft] + rpaths + extra_rpaths: + def testRORP2Record(self): + """Test turning RORPs into records and back again""" + for rp in self.get_rpaths(): record = RORP2Record(rp) #print record new_rorp = Record2RORP(record) assert new_rorp == rp, (new_rorp, rp, record) + def testIterator(self): + """Test writing RORPs to file and iterating them back""" + l = self.get_rpaths() + fp = cStringIO.StringIO() + write_rorp_iter_to_file(iter(l), fp) + fp.seek(0) + cstring = fp.read() + fp.seek(0) + outlist = list(rorp_extractor(fp).iterate()) + assert len(l) == len(outlist), (len(l), len(outlist)) + for i in range(len(l)): + if not l[i].equal_verbose(outlist[i]): + #print cstring + assert 0, (i, str(l[i]), str(outlist[i])) + fp.close() + + def write_metadata_to_temp(self): + """If necessary, write metadata of bigdir to file metadata.gz""" + global tempdir + temprp = tempdir.append("metadata.gz") + if temprp.lstat(): return temprp + + self.make_temp() + root = rpath.RPath(Globals.local_connection, "testfiles/bigdir") + dsrp_root = destructive_stepping.DSRPath(1, root) + rpath_iter = selection.Select(dsrp_root).set_iter() + + start_time = time.time() + OpenMetadata(temprp) + for rp in rpath_iter: WriteMetadata(rp) + CloseMetadata() + print "Writing metadata took %s seconds" % (time.time() - start_time) + return temprp + + def testSpeed(self): + """Test testIterator on 10000 files""" + temprp = self.write_metadata_to_temp() + + start_time = time.time(); i = 0 + for rorp in GetMetadata(temprp): i += 1 + print "Reading %s metadata entries took %s seconds." % \ + (i, time.time() - start_time) + + start_time = time.time() + blocksize = 32 * 1024 + tempfp = temprp.open("rb", compress = 1) + while 1: + buf = tempfp.read(blocksize) + if not buf: break + assert not tempfp.close() + print "Simply decompressing metadata file took %s seconds" % \ + (time.time() - start_time) + + def testIterate_restricted(self): + """Test getting rorps restricted to certain index + + In this case, get assume subdir (subdir3, subdir10) has 50 + files in it. + + """ + temprp = self.write_metadata_to_temp() + start_time = time.time(); i = 0 + for rorp in GetMetadata(temprp, ("subdir3", "subdir10")): i += 1 + print "Reading %s metadata entries took %s seconds." % \ + (i, time.time() - start_time) + assert i == 51 + if __name__ == "__main__": unittest.main() |