summaryrefslogtreecommitdiff
path: root/rdiff-backup/rdiff_backup
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-11-04 22:41:13 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-11-04 22:41:13 +0000
commit828d9e44d4417ca9ee3831919d6023492805b7a9 (patch)
tree773c464f98092b7fefc1b6b974f6e966f692f3ad /rdiff-backup/rdiff_backup
parent070e5c4080dac3de8e26a7d5d7314ceb36d32440 (diff)
downloadrdiff-backup-828d9e44d4417ca9ee3831919d6023492805b7a9.tar.gz
Added metadata diffing, and an iterfile hash bugfix
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@669 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup')
-rw-r--r--rdiff-backup/rdiff_backup/Rdiff.py2
-rw-r--r--rdiff-backup/rdiff_backup/Security.py2
-rw-r--r--rdiff-backup/rdiff_backup/backup.py1
-rw-r--r--rdiff-backup/rdiff_backup/iterfile.py47
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py156
-rw-r--r--rdiff-backup/rdiff_backup/restore.py2
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py4
7 files changed, 148 insertions, 66 deletions
diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py
index 3ed589d..aade8af 100644
--- a/rdiff-backup/rdiff_backup/Rdiff.py
+++ b/rdiff-backup/rdiff_backup/Rdiff.py
@@ -55,7 +55,7 @@ def get_delta_sigrp(rp_signature, rp_new):
def get_delta_sigrp_hash(rp_signature, rp_new):
"""Like above but also calculate hash of new as close() value"""
- log.Log("Getting delta with hash of %s with signature %s" %
+ log.Log("Getting delta (with hash) of %s with signature %s" %
(rp_new.path, rp_signature.get_indexpath()), 7)
return librsync.DeltaFile(rp_signature.open("rb"),
hash.FileWrapper(rp_new.open("rb")))
diff --git a/rdiff-backup/rdiff_backup/Security.py b/rdiff-backup/rdiff_backup/Security.py
index ba61c60..55a8f37 100644
--- a/rdiff-backup/rdiff_backup/Security.py
+++ b/rdiff-backup/rdiff_backup/Security.py
@@ -206,12 +206,12 @@ def vet_request(request, arglist):
"""Examine request for security violations"""
#if Globals.server: sys.stderr.write(str(request) + "\n")
security_level = Globals.security_level
+ if security_level == "override": return
if Globals.restrict_path:
for arg in arglist:
if isinstance(arg, rpath.RPath): vet_rpath(arg)
if request.function_string in file_requests:
vet_filename(request, arglist)
- if security_level == "override": return
if request.function_string in allowed_requests: return
if request.function_string in ("Globals.set", "Globals.set_local"):
if arglist[0] not in disallowed_server_globals: return
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index 78d3c22..f92d163 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -443,6 +443,7 @@ class CacheCollatedPostProcess:
dir_rp, perms = self.dir_perms_list.pop()
dir_rp.chmod(perms)
self.metawriter.close()
+ metadata.ManagerObj.ConvertMetaToDiff()
if Globals.print_statistics: statistics.print_active_stats()
if Globals.file_statistics: statistics.FileStats.close()
diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py
index 608f251..bed285a 100644
--- a/rdiff-backup/rdiff_backup/iterfile.py
+++ b/rdiff-backup/rdiff_backup/iterfile.py
@@ -44,6 +44,7 @@ class UnwrapFile:
"o" for an object,
"f" for file,
"c" for a continution of a file,
+ "h" for the close value of a file
"e" for an exception, or
None if no more data can be read.
@@ -57,7 +58,7 @@ class UnwrapFile:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
- if type in ("o", "e"): return type, cPickle.loads(buf)
+ if type in ("o", "e", "h"): return type, cPickle.loads(buf)
else:
assert type in ("f", "c")
return type, buf
@@ -82,11 +83,7 @@ class IterWrappingFile(UnwrapFile):
type, data = self._get()
if not type: raise StopIteration
if type == "o" or type == "e": return data
- elif type == "f":
- file = IterVirtualFile(self, data)
- if data: self.currently_in_file = file
- else: self.currently_in_file = None
- return file
+ elif type == "f": return IterVirtualFile(self, data)
else: raise IterFileException("Bad file type %s" % type)
@@ -107,8 +104,10 @@ class IterVirtualFile(UnwrapFile):
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
+ iwf.currently_in_file = self
self.buffer = initial_data
self.closed = None
+ if not initial_data: self.set_close_val()
def read(self, length = -1):
"""Read length bytes from the file, updating buffers as necessary"""
@@ -140,15 +139,24 @@ class IterVirtualFile(UnwrapFile):
self.buffer += data
return 1
else:
- self.iwf.currently_in_file = None
+ self.set_close_val()
return None
+ def set_close_val(self):
+ """Read the close value and clear currently_in_file"""
+ assert self.iwf.currently_in_file
+ self.iwf.currently_in_file = None
+ type, object = self.iwf._get()
+ assert type == 'h', type
+ self.close_value = object
+
def close(self):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
self.buffer = ""
self.closed = 1
+ return self.close_value
class FileWrappingIter:
@@ -214,13 +222,16 @@ class FileWrappingIter:
buf = robust.check_common_error(self.read_error_handler,
self.currently_in_file.read,
[Globals.blocksize])
- if buf == "" or buf is None:
- self.currently_in_file.close()
- self.currently_in_file = None
- if buf is None: # error occurred above, encode exception
- prefix_letter = "e"
- buf = cPickle.dumps(self.last_exception, 1)
- total = "".join((prefix_letter, C.long2str(long(len(buf))), buf))
+ if buf is None: # error occurred above, encode exception
+ self.currently_in_file = None
+ excstr = cPickle.dumps(self.last_exception, 1)
+ total = "".join(('e', C.long2str(long(len(excstr))), excstr))
+ else:
+ total = "".join((prefix_letter, C.long2str(long(len(buf))), buf))
+ if buf == "": # end of file
+ cstr = cPickle.dumps(self.currently_in_file.close(), 1)
+ self.currently_in_file = None
+ total += "".join(('h', C.long2str(long(len(cstr))), cstr))
self.array_buf.fromstring(total)
def read_error_handler(self, exc, blocksize):
@@ -386,11 +397,7 @@ class FileToMiscIter(IterWrappingFile):
def get_file(self):
"""Read file object from file"""
type, data = self._get()
- if type == "f":
- file = IterVirtualFile(self, data)
- if data: self.currently_in_file = file
- else: self.currently_in_file = None
- return file
+ if type == "f": return IterVirtualFile(self, data)
assert type == "e", "Expected type e, got %s" % (type,)
assert isinstance(data, Exception)
return ErrorFile(data)
@@ -411,7 +418,7 @@ class FileToMiscIter(IterWrappingFile):
type, length = self.buf[0], C.str2long(self.buf[1:8])
data = self.buf[8:8+length]
self.buf = self.buf[8+length:]
- if type in "oer": return type, cPickle.loads(data)
+ if type in "oerh": return type, cPickle.loads(data)
else: return type, data
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index 2ce5218..0d7ba4f 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -267,8 +267,10 @@ class FlatExtractor:
"""Yield all text records in order"""
while 1:
next_pos = self.get_next_pos()
+ if self.at_end:
+ if next_pos: yield self.buf[:next_pos]
+ break
yield self.buf[:next_pos]
- if self.at_end: break
self.buf = self.buf[next_pos:]
assert not self.fileobj.close()
@@ -428,16 +430,23 @@ class Manager:
def __init__(self):
"""Set listing of rdiff-backup-data dir"""
self.rplist = []
- self.timerpmap = {}
+ self.timerpmap, self.prefixmap = {}, {}
for filename in Globals.rbdir.listdir():
rp = Globals.rbdir.append(filename)
- if rp.isincfile():
- self.rplist.append(rp)
- time = rp.getinctime()
- if self.timerpmap.has_key(time):
- self.timerpmap[time].append(rp)
- else: self.timerpmap[time] = [rp]
+ if rp.isincfile(): self.add_incrp(rp)
+ def add_incrp(self, rp):
+ """Add rp to list of inc rps in the rbdir"""
+ self.rplist.append(rp)
+ time = rp.getinctime()
+ if self.timerpmap.has_key(time):
+ self.timerpmap[time].append(rp)
+ else: self.timerpmap[time] = [rp]
+
+ incbase = rp.getincbase_str()
+ if self.prefixmap.has_key(incbase): self.prefixmap[incbase].append(rp)
+ else: self.prefixmap[incbase] = [rp]
+
def _iter_helper(self, prefix, flatfileclass, time, restrict_index):
"""Used below to find the right kind of file by time"""
if not self.timerpmap.has_key(time): return None
@@ -490,6 +499,8 @@ class Manager:
filename = '%s.%s.%s.gz' % (prefix, timestr, typestr)
rp = Globals.rbdir.append(filename)
assert not rp.lstat(), "File %s already exists!" % (rp.path,)
+ assert rp.isincfile()
+ self.add_incrp(rp)
return flatfileclass(rp, 'w')
def get_meta_writer(self, typestr, time):
@@ -514,49 +525,112 @@ class Manager:
return metawriter # no need for a CombinedWriter
if Globals.eas_active: ea_writer = self.get_ea_writer(typestr, time)
+ else: ea_writer = None
if Globals.acls_active: acl_writer = self.get_acl_writer(typestr, time)
+ else: acl_writer = None
return CombinedWriter(metawriter, ea_writer, acl_writer)
-ManagerObj = None # Set this later to Manager instance
-def SetManager():
- global ManagerObj
- ManagerObj = Manager()
-
+class PatchDiffMan(Manager):
+ """Contains functions for patching and diffing metadata
-def patch(*meta_iters):
- """Return an iterator of metadata files by combining all the given iters
+ To save space, we can record a full list of only the most recent
+ metadata, using the normal rdiff-backup reverse increment
+ strategy. Instead of using librsync to compute diffs, though, we
+ use our own technique so that the diff files are still
+ hand-editable.
- The iters should be given as a list/tuple in reverse chronological
- order. The earliest rorp in each iter will supercede all the
- later ones.
+ A mirror_metadata diff has the same format as a mirror_metadata
+ snapshot. If the record for an index is missing from the diff, it
+ indicates no change from the original. If it is present it
+ replaces the mirror_metadata entry, unless it has Type None, which
+ indicates the record should be deleted from the original.
"""
- for meta_tuple in rorpiter.CollateIterators(*meta_iters):
- for i in range(len(meta_tuple)-1, -1, -1):
- if meta_tuple[i]:
- if meta_tuple[i].lstat(): yield meta_tuple[i]
- break # move to next index
- else: assert 0, "No valid rorps"
+ max_diff_chain = 9 # After this many diffs, make a new snapshot
+
+ def get_diffiter(self, new_iter, old_iter):
+ """Iterate meta diffs of new_iter -> old_iter"""
+ for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter):
+ if not old_rorp: yield rpath.RORPath(new_rorp.index)
+ elif not new_rorp or new_rorp.data != old_rorp.data:
+ # exact compare here, can't use == on rorps
+ yield old_rorp
+
+ def sorted_meta_inclist(self, min_time = 0):
+ """Return list of mirror_metadata incs, reverse sorted by time"""
+ if not self.prefixmap.has_key('mirror_metadata'): return []
+ sortlist = [(rp.getinctime(), rp)
+ for rp in self.prefixmap['mirror_metadata']]
+ sortlist.sort()
+ sortlist.reverse()
+ return [rp for (time, rp) in sortlist if time >= min_time]
+
+ def check_needs_diff(self):
+ """Check if we should diff, returns (new, old) rps, or (None, None)"""
+ inclist = self.sorted_meta_inclist()
+ assert len(inclist) >= 1
+ if len(inclist) == 1: return (None, None)
+ newrp, oldrp = inclist[:2]
+ assert newrp.getinctype() == oldrp.getinctype() == 'snapshot'
+
+ chainlen = 1
+ for rp in inclist[2:]:
+ if rp.getinctype() != 'diff': break
+ chainlen += 1
+ if chainlen >= self.max_diff_chain: return (None, None)
+ return (newrp, oldrp)
+
+ def ConvertMetaToDiff(self):
+ """Replace a mirror snapshot with a diff if it's appropriate"""
+ newrp, oldrp = self.check_needs_diff()
+ if not newrp: return
+ log.Log("Writing mirror_metadata diff", 6)
+
+ diff_writer = self.get_meta_writer('diff', oldrp.getinctime())
+ new_iter = MetadataFile(newrp, 'r').get_objects()
+ old_iter = MetadataFile(oldrp, 'r').get_objects()
+ for diff_rorp in self.get_diffiter(new_iter, old_iter):
+ diff_writer.write_object(diff_rorp)
+ diff_writer.close() # includes sync
+ oldrp.delete()
-def Convert_diff(cur_time, old_time):
- """Convert the metadata snapshot at old_time to diff format
+ def get_meta_at_time(self, time, restrict_index):
+ """Get metadata rorp iter, possibly by patching with diffs"""
+ meta_iters = [MetadataFile(rp, 'r').get_objects(restrict_index)
+ for rp in self.relevant_meta_incs(time)]
+ if not meta_iters: return None
+ if len(meta_iters) == 1: return meta_iters[0]
+ return self.iterate_patched_meta(meta_iters)
+
+ def relevant_meta_incs(self, time):
+ """Return list [snapshotrp, diffrps ...] time sorted"""
+ inclist = self.sorted_meta_inclist(min_time = time)
+ if not inclist: return inclist
+ assert inclist[-1].getinctime() == time, inclist[-1]
+ for i in range(len(inclist)-1, -1, -1):
+ if inclist[i].getinctype() == 'snapshot':
+ return inclist[i:]
+ assert 0, "Inclist %s contains no snapshots" % (inclist,)
+
+ def iterate_patched_meta(self, meta_iter_list):
+ """Return an iter of metadata rorps by combining the given iters
+
+ The iters should be given as a list/tuple in reverse
+ chronological order. The earliest rorp in each iter will
+ supercede all the later ones.
- The point is just to save space. The diff format is simple, just
- include in the diff all of the older rorps that are different in
- the two metadata rorps.
+ """
+ for meta_tuple in rorpiter.CollateIterators(*meta_iter_list):
+ for i in range(len(meta_tuple)-1, -1, -1):
+ if meta_tuple[i]:
+ if meta_tuple[i].lstat(): yield meta_tuple[i]
+ break # move to next index
+ else: assert 0, "No valid rorps"
- """
- rblist = [Globals.rbdir.append(filename)
- for filename in robust.listrp(Globals.rbdir)]
- cur_iter = MetadataFile.get_objects_at_time(
- Globals.rbdir, cur_time, None, rblist)
- old_iter = MetadataFile.get_objects_at_time(
- Globals.rbdir, old_time, None, rblist)
- assert cur_iter.type == old_iter.type == 'snapshot'
- diff_file = MetadataFile.open_file(None, 1, 'diff', old_time)
-
- for cur_rorp, old_rorp in rorpiter.Collate2Iters(cur_iter, old_iter):
- XXX
+ManagerObj = None # Set this later to Manager instance
+def SetManager():
+ global ManagerObj
+ ManagerObj = PatchDiffMan()
import eas_acls # put at bottom to avoid python circularity bug
diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py
index 8ab1d77..8079511 100644
--- a/rdiff-backup/rdiff_backup/restore.py
+++ b/rdiff-backup/rdiff_backup/restore.py
@@ -177,7 +177,7 @@ class MirrorStruct:
"""
if rest_time is None: rest_time = cls._rest_time
- if not metadata.ManagerObj: metadata.SetManager()
+ metadata.SetManager()
rorp_iter = metadata.ManagerObj.GetAtTime(rest_time,
cls.mirror_base.index)
if not rorp_iter:
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 753712f..7647c62 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -116,9 +116,9 @@ def copy_reg_file(rpin, rpout, compress = 0):
try:
if (rpout.conn is rpin.conn and
rpout.conn is not Globals.local_connection):
- rpout.conn.rpath.copy_reg_file(rpin.path, rpout.path, compress)
+ v = rpout.conn.rpath.copy_reg_file(rpin.path, rpout.path, compress)
rpout.setdata()
- return
+ return v
except AttributeError: pass
return rpout.write_from_fileobj(rpin.open("rb"), compress = compress)