summaryrefslogtreecommitdiff
path: root/rdiff-backup
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-12-13 20:49:17 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-12-13 20:49:17 +0000
commit3b9298b8018c0df65ce601af092b6ce4a6a4d090 (patch)
treec976707f5b16c8a897830d1f7020c3f0b1d61273 /rdiff-backup
parentc1a39e88df7ec66297635f519cb1f3fbd1b584f0 (diff)
downloadrdiff-backup-3b9298b8018c0df65ce601af092b6ce4a6a4d090.tar.gz
Added iterator operations on metadata file
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@244 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup')
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py140
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py26
-rw-r--r--rdiff-backup/testing/metadatatest.py88
3 files changed, 247 insertions, 7 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index b596f76..50a7704 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -54,7 +54,9 @@ field names and values.
"""
-import re, log, Globals, rpath
+from __future__ import generators
+import re, gzip
+from rdiff_backup import log, Globals, rpath, Time
class ParsingError(Exception):
"""This is raised when bad or unparsable data is received"""
@@ -169,3 +171,139 @@ def unquote_path(quoted_string):
log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2)
return two_chars
return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
+
+
+def write_rorp_iter_to_file(rorp_iter, file):
+ """Given iterator of RORPs, write records to (pre-opened) file object"""
+ for rorp in rorp_iter: file.write(RORP2Record(rorp))
+
+class rorp_extractor:
+ """Controls iterating rorps from metadata file"""
+ def __init__(self, fileobj):
+ self.fileobj = fileobj # holds file object we are reading from
+ self.buf = "" # holds the next part of the file
+ self.record_boundary_regexp = re.compile("\\nFile")
+ self.at_end = 0 # True if we are at the end of the file
+ self.blocksize = 32 * 1024
+
+ def get_next_pos(self):
+ """Return position of next record in buffer"""
+ while 1:
+ m = self.record_boundary_regexp.search(self.buf)
+ if m: return m.start(0)+1 # the +1 skips the newline
+ else: # add next block to the buffer, loop again
+ newbuf = self.fileobj.read(self.blocksize)
+ if not newbuf:
+ self.at_end = 1
+ return len(self.buf)
+ else: self.buf += newbuf
+
+ def iterate(self):
+ """Return iterator over all records"""
+ while 1:
+ next_pos = self.get_next_pos()
+ try: yield Record2RORP(self.buf[:next_pos])
+ except ParsingError, e:
+ log.Log("Error parsing metadata file: %s" % (e,), 2)
+ if self.at_end: break
+ self.buf = self.buf[next_pos:]
+
+ def skip_to_index(self, index):
+ """Scan through the file, set buffer to beginning of index record
+
+ Here we make sure that the buffer always ends in a newline, so
+ we will not be splitting lines in half.
+
+ """
+ assert not self.buf or self.buf.endswith("\n")
+ if not index: indexpath = "."
+ else: indexpath = "/".join(index)
+ # Must double all backslashes, because they will be
+ # reinterpreted. For instance, to search for index \n
+ # (newline), it will be \\n (backslash n) in the file, so the
+ # regular expression is "File \\\\n\\n" (File two backslash n
+ # backslash n)
+ double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
+ begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
+ while 1:
+ m = begin_re.search(self.buf)
+ if m:
+ self.buf = self.buf[m.start(2):]
+ return
+ self.buf = self.fileobj.read(self.blocksize)
+ self.buf += self.fileobj.readline()
+ if not self.buf:
+ self.at_end = 1
+ return
+
+ def iterate_starting_with(self, index):
+ """Iterate records whose index starts with given index"""
+ self.skip_to_index(index)
+ if self.at_end: return
+ while 1:
+ next_pos = self.get_next_pos()
+ try: rorp = Record2RORP(self.buf[:next_pos])
+ except ParsingError, e:
+ log.Log("Error parsing metadata file: %s" % (e,), 2)
+ else:
+ if rorp.index[:len(index)] != index: break
+ yield rorp
+ if self.at_end: break
+ self.buf = self.buf[next_pos:]
+
+ def close(self):
+ """Return value of closing associated file"""
+ return self.fileobj.close()
+
+
+metadata_rp = None
+metadata_fileobj = None
+def OpenMetadata(rp = None, compress = 1):
+ """Open the Metadata file for writing"""
+ global metadata_filename, metadata_fileobj
+ assert not metadata_fileobj, "Metadata file already open"
+ if rp: metadata_rp = rp
+ else: metadata_rp = Globals.rbdir.append("mirror_metadata.%s.data.gz" %
+ (Time.curtimestr,))
+ metadata_fileobj = metadata_rp.open("wb", compress = compress)
+
+def WriteMetadata(rorp):
+ """Write metadata of rorp to file"""
+ global metadata_fileobj
+ metadata_fileobj.write(RORP2Record(rorp))
+
+def CloseMetadata():
+ """Close the metadata file"""
+ global metadata_fileobj
+ result = metadata_fileobj.close()
+ metadata_fileobj = None
+ metadata_rp.setdata()
+ return result
+
+def GetMetadata(rp = None, restrict_index = None, compressed = None):
+ """Return iterator of metadata from given metadata file rp"""
+ if compressed is None:
+ if rp.isincfile():
+ compressed = rp.inc_compressed
+ assert rp.inc_type == "data", rp.inc_type
+ else: compressed = rp.get_indexpath().endswith(".gz")
+
+ fileobj = rp.open("rb", compress = compressed)
+ if restrict_index is None: return rorp_extractor(fileobj).iterate()
+ else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index)
+
+def GetMetadata_at_time(rpdir, time, restrict_index = None, rplist = None):
+ """Scan through rpdir, finding metadata file at given time, iterate
+
+ If rplist is given, use that instead of listing rpdir. Time here
+ is exact, we don't take the next one older or anything. Returns
+ None if no matching metadata found.
+
+ """
+ if rplist is None: rplist = map(lambda x: rpdir.append(x), rpdir.listdir())
+ for rp in rplist:
+ if (rp.isincfile() and rp.getinctype() == "data" and
+ rp.getincbase_str() == "mirror_metadata"):
+ if Time.stringtotime(rp.getinctime()) == time:
+ return GetMetadata(rp, restrict_index)
+ return None
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index cfc0688..9d54872 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -271,11 +271,35 @@ class RORPath(RPathStatic):
pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
- elif key == 'size' and self.isdir(): pass
+ elif key == 'size' and not self.isreg():
+ pass # size only matters for regular files
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
return 1
+ def equal_verbose(self, other):
+ """Like __eq__, but log more information. Useful when testing"""
+ if self.index != other.index:
+ Log("Index %s != index %s" % (self.index, other.index), 2)
+ return None
+
+ for key in self.data.keys(): # compare dicts key by key
+ if ((key == 'uid' or key == 'gid') and
+ (not Globals.change_ownership or self.issym())):
+ # Don't compare gid/uid for symlinks or if not change_ownership
+ pass
+ elif key == 'atime' and not Globals.preserve_atime: pass
+ elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
+ elif key == 'size' and not self.isreg(): pass
+ elif (not other.data.has_key(key) or
+ self.data[key] != other.data[key]):
+ if not other.data.has_key(key):
+ Log("Second is missing key %s" % (key,), 2)
+ else: Log("Value of %s differs: %s vs %s" %
+ (key, self.data[key], other.data[key]), 2)
+ return None
+ return 1
+
def __ne__(self, other): return not self.__eq__(other)
def __str__(self):
diff --git a/rdiff-backup/testing/metadatatest.py b/rdiff-backup/testing/metadatatest.py
index bad6d27..7211c67 100644
--- a/rdiff-backup/testing/metadatatest.py
+++ b/rdiff-backup/testing/metadatatest.py
@@ -1,8 +1,16 @@
-import unittest, os
+import unittest, os, cStringIO, time
from rdiff_backup.metadata import *
-from rdiff_backup import rpath, Globals
+from rdiff_backup import rpath, Globals, selection, destructive_stepping
+
+tempdir = rpath.RPath(Globals.local_connection, "testfiles/output")
class MetadataTest(unittest.TestCase):
+ def make_temp(self):
+ """Make temp directory testfiles/output"""
+ global tempdir
+ tempdir.delete()
+ tempdir.mkdir()
+
def testQuote(self):
"""Test quoting and unquoting"""
filenames = ["foo", ".", "hello\nthere", "\\", "\\\\\\",
@@ -13,19 +21,89 @@ class MetadataTest(unittest.TestCase):
result = unquote_path(quoted)
assert result == filename, (quoted, result, filename)
- def testRORP2Record(self):
- """Test turning RORPs into records and back again"""
+ def get_rpaths(self):
+ """Return list of rorps"""
vft = rpath.RPath(Globals.local_connection,
"testfiles/various_file_types")
rpaths = map(lambda x: vft.append(x), vft.listdir())
extra_rpaths = map(lambda x: rpath.RPath(Globals.local_connection, x),
['/bin/ls', '/dev/ttyS0', '/dev/hda', 'aoeuaou'])
+ return [vft] + rpaths + extra_rpaths
- for rp in [vft] + rpaths + extra_rpaths:
+ def testRORP2Record(self):
+ """Test turning RORPs into records and back again"""
+ for rp in self.get_rpaths():
record = RORP2Record(rp)
#print record
new_rorp = Record2RORP(record)
assert new_rorp == rp, (new_rorp, rp, record)
+ def testIterator(self):
+ """Test writing RORPs to file and iterating them back"""
+ l = self.get_rpaths()
+ fp = cStringIO.StringIO()
+ write_rorp_iter_to_file(iter(l), fp)
+ fp.seek(0)
+ cstring = fp.read()
+ fp.seek(0)
+ outlist = list(rorp_extractor(fp).iterate())
+ assert len(l) == len(outlist), (len(l), len(outlist))
+ for i in range(len(l)):
+ if not l[i].equal_verbose(outlist[i]):
+ #print cstring
+ assert 0, (i, str(l[i]), str(outlist[i]))
+ fp.close()
+
+ def write_metadata_to_temp(self):
+ """If necessary, write metadata of bigdir to file metadata.gz"""
+ global tempdir
+ temprp = tempdir.append("metadata.gz")
+ if temprp.lstat(): return temprp
+
+ self.make_temp()
+ root = rpath.RPath(Globals.local_connection, "testfiles/bigdir")
+ dsrp_root = destructive_stepping.DSRPath(1, root)
+ rpath_iter = selection.Select(dsrp_root).set_iter()
+
+ start_time = time.time()
+ OpenMetadata(temprp)
+ for rp in rpath_iter: WriteMetadata(rp)
+ CloseMetadata()
+ print "Writing metadata took %s seconds" % (time.time() - start_time)
+ return temprp
+
+ def testSpeed(self):
+ """Test testIterator on 10000 files"""
+ temprp = self.write_metadata_to_temp()
+
+ start_time = time.time(); i = 0
+ for rorp in GetMetadata(temprp): i += 1
+ print "Reading %s metadata entries took %s seconds." % \
+ (i, time.time() - start_time)
+
+ start_time = time.time()
+ blocksize = 32 * 1024
+ tempfp = temprp.open("rb", compress = 1)
+ while 1:
+ buf = tempfp.read(blocksize)
+ if not buf: break
+ assert not tempfp.close()
+ print "Simply decompressing metadata file took %s seconds" % \
+ (time.time() - start_time)
+
+ def testIterate_restricted(self):
+ """Test getting rorps restricted to certain index
+
+ In this case, get assume subdir (subdir3, subdir10) has 50
+ files in it.
+
+ """
+ temprp = self.write_metadata_to_temp()
+ start_time = time.time(); i = 0
+ for rorp in GetMetadata(temprp, ("subdir3", "subdir10")): i += 1
+ print "Reading %s metadata entries took %s seconds." % \
+ (i, time.time() - start_time)
+ assert i == 51
+
if __name__ == "__main__": unittest.main()