diff options
author | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-11-04 22:41:13 +0000 |
---|---|---|
committer | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-11-04 22:41:13 +0000 |
commit | 828d9e44d4417ca9ee3831919d6023492805b7a9 (patch) | |
tree | 773c464f98092b7fefc1b6b974f6e966f692f3ad /rdiff-backup/rdiff_backup | |
parent | 070e5c4080dac3de8e26a7d5d7314ceb36d32440 (diff) | |
download | rdiff-backup-828d9e44d4417ca9ee3831919d6023492805b7a9.tar.gz |
Added metadata diffing, and an iterfile hash bugfix
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@669 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup')
-rw-r--r-- | rdiff-backup/rdiff_backup/Rdiff.py | 2 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/Security.py | 2 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/backup.py | 1 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/iterfile.py | 47 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/metadata.py | 156 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/restore.py | 2 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/rpath.py | 4 |
7 files changed, 148 insertions, 66 deletions
diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py index 3ed589d..aade8af 100644 --- a/rdiff-backup/rdiff_backup/Rdiff.py +++ b/rdiff-backup/rdiff_backup/Rdiff.py @@ -55,7 +55,7 @@ def get_delta_sigrp(rp_signature, rp_new): def get_delta_sigrp_hash(rp_signature, rp_new): """Like above but also calculate hash of new as close() value""" - log.Log("Getting delta with hash of %s with signature %s" % + log.Log("Getting delta (with hash) of %s with signature %s" % (rp_new.path, rp_signature.get_indexpath()), 7) return librsync.DeltaFile(rp_signature.open("rb"), hash.FileWrapper(rp_new.open("rb"))) diff --git a/rdiff-backup/rdiff_backup/Security.py b/rdiff-backup/rdiff_backup/Security.py index ba61c60..55a8f37 100644 --- a/rdiff-backup/rdiff_backup/Security.py +++ b/rdiff-backup/rdiff_backup/Security.py @@ -206,12 +206,12 @@ def vet_request(request, arglist): """Examine request for security violations""" #if Globals.server: sys.stderr.write(str(request) + "\n") security_level = Globals.security_level + if security_level == "override": return if Globals.restrict_path: for arg in arglist: if isinstance(arg, rpath.RPath): vet_rpath(arg) if request.function_string in file_requests: vet_filename(request, arglist) - if security_level == "override": return if request.function_string in allowed_requests: return if request.function_string in ("Globals.set", "Globals.set_local"): if arglist[0] not in disallowed_server_globals: return diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index 78d3c22..f92d163 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -443,6 +443,7 @@ class CacheCollatedPostProcess: dir_rp, perms = self.dir_perms_list.pop() dir_rp.chmod(perms) self.metawriter.close() + metadata.ManagerObj.ConvertMetaToDiff() if Globals.print_statistics: statistics.print_active_stats() if Globals.file_statistics: statistics.FileStats.close() diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py index 608f251..bed285a 100644 --- a/rdiff-backup/rdiff_backup/iterfile.py +++ b/rdiff-backup/rdiff_backup/iterfile.py @@ -44,6 +44,7 @@ class UnwrapFile: "o" for an object, "f" for file, "c" for a continution of a file, + "h" for the close value of a file "e" for an exception, or None if no more data can be read. @@ -57,7 +58,7 @@ class UnwrapFile: assert None, "Header %s is only %d bytes" % (header, len(header)) type, length = header[0], C.str2long(header[1:]) buf = self.file.read(length) - if type in ("o", "e"): return type, cPickle.loads(buf) + if type in ("o", "e", "h"): return type, cPickle.loads(buf) else: assert type in ("f", "c") return type, buf @@ -82,11 +83,7 @@ class IterWrappingFile(UnwrapFile): type, data = self._get() if not type: raise StopIteration if type == "o" or type == "e": return data - elif type == "f": - file = IterVirtualFile(self, data) - if data: self.currently_in_file = file - else: self.currently_in_file = None - return file + elif type == "f": return IterVirtualFile(self, data) else: raise IterFileException("Bad file type %s" % type) @@ -107,8 +104,10 @@ class IterVirtualFile(UnwrapFile): """ UnwrapFile.__init__(self, iwf.file) self.iwf = iwf + iwf.currently_in_file = self self.buffer = initial_data self.closed = None + if not initial_data: self.set_close_val() def read(self, length = -1): """Read length bytes from the file, updating buffers as necessary""" @@ -140,15 +139,24 @@ class IterVirtualFile(UnwrapFile): self.buffer += data return 1 else: - self.iwf.currently_in_file = None + self.set_close_val() return None + def set_close_val(self): + """Read the close value and clear currently_in_file""" + assert self.iwf.currently_in_file + self.iwf.currently_in_file = None + type, object = self.iwf._get() + assert type == 'h', type + self.close_value = object + def close(self): """Currently just reads whats left and discards it""" while self.iwf.currently_in_file: self.addtobuffer() self.buffer = "" self.closed = 1 + return self.close_value class FileWrappingIter: @@ -214,13 +222,16 @@ class FileWrappingIter: buf = robust.check_common_error(self.read_error_handler, self.currently_in_file.read, [Globals.blocksize]) - if buf == "" or buf is None: - self.currently_in_file.close() - self.currently_in_file = None - if buf is None: # error occurred above, encode exception - prefix_letter = "e" - buf = cPickle.dumps(self.last_exception, 1) - total = "".join((prefix_letter, C.long2str(long(len(buf))), buf)) + if buf is None: # error occurred above, encode exception + self.currently_in_file = None + excstr = cPickle.dumps(self.last_exception, 1) + total = "".join(('e', C.long2str(long(len(excstr))), excstr)) + else: + total = "".join((prefix_letter, C.long2str(long(len(buf))), buf)) + if buf == "": # end of file + cstr = cPickle.dumps(self.currently_in_file.close(), 1) + self.currently_in_file = None + total += "".join(('h', C.long2str(long(len(cstr))), cstr)) self.array_buf.fromstring(total) def read_error_handler(self, exc, blocksize): @@ -386,11 +397,7 @@ class FileToMiscIter(IterWrappingFile): def get_file(self): """Read file object from file""" type, data = self._get() - if type == "f": - file = IterVirtualFile(self, data) - if data: self.currently_in_file = file - else: self.currently_in_file = None - return file + if type == "f": return IterVirtualFile(self, data) assert type == "e", "Expected type e, got %s" % (type,) assert isinstance(data, Exception) return ErrorFile(data) @@ -411,7 +418,7 @@ class FileToMiscIter(IterWrappingFile): type, length = self.buf[0], C.str2long(self.buf[1:8]) data = self.buf[8:8+length] self.buf = self.buf[8+length:] - if type in "oer": return type, cPickle.loads(data) + if type in "oerh": return type, cPickle.loads(data) else: return type, data diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index 2ce5218..0d7ba4f 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -267,8 +267,10 @@ class FlatExtractor: """Yield all text records in order""" while 1: next_pos = self.get_next_pos() + if self.at_end: + if next_pos: yield self.buf[:next_pos] + break yield self.buf[:next_pos] - if self.at_end: break self.buf = self.buf[next_pos:] assert not self.fileobj.close() @@ -428,16 +430,23 @@ class Manager: def __init__(self): """Set listing of rdiff-backup-data dir""" self.rplist = [] - self.timerpmap = {} + self.timerpmap, self.prefixmap = {}, {} for filename in Globals.rbdir.listdir(): rp = Globals.rbdir.append(filename) - if rp.isincfile(): - self.rplist.append(rp) - time = rp.getinctime() - if self.timerpmap.has_key(time): - self.timerpmap[time].append(rp) - else: self.timerpmap[time] = [rp] + if rp.isincfile(): self.add_incrp(rp) + def add_incrp(self, rp): + """Add rp to list of inc rps in the rbdir""" + self.rplist.append(rp) + time = rp.getinctime() + if self.timerpmap.has_key(time): + self.timerpmap[time].append(rp) + else: self.timerpmap[time] = [rp] + + incbase = rp.getincbase_str() + if self.prefixmap.has_key(incbase): self.prefixmap[incbase].append(rp) + else: self.prefixmap[incbase] = [rp] + def _iter_helper(self, prefix, flatfileclass, time, restrict_index): """Used below to find the right kind of file by time""" if not self.timerpmap.has_key(time): return None @@ -490,6 +499,8 @@ class Manager: filename = '%s.%s.%s.gz' % (prefix, timestr, typestr) rp = Globals.rbdir.append(filename) assert not rp.lstat(), "File %s already exists!" % (rp.path,) + assert rp.isincfile() + self.add_incrp(rp) return flatfileclass(rp, 'w') def get_meta_writer(self, typestr, time): @@ -514,49 +525,112 @@ class Manager: return metawriter # no need for a CombinedWriter if Globals.eas_active: ea_writer = self.get_ea_writer(typestr, time) + else: ea_writer = None if Globals.acls_active: acl_writer = self.get_acl_writer(typestr, time) + else: acl_writer = None return CombinedWriter(metawriter, ea_writer, acl_writer) -ManagerObj = None # Set this later to Manager instance -def SetManager(): - global ManagerObj - ManagerObj = Manager() - +class PatchDiffMan(Manager): + """Contains functions for patching and diffing metadata -def patch(*meta_iters): - """Return an iterator of metadata files by combining all the given iters + To save space, we can record a full list of only the most recent + metadata, using the normal rdiff-backup reverse increment + strategy. Instead of using librsync to compute diffs, though, we + use our own technique so that the diff files are still + hand-editable. - The iters should be given as a list/tuple in reverse chronological - order. The earliest rorp in each iter will supercede all the - later ones. + A mirror_metadata diff has the same format as a mirror_metadata + snapshot. If the record for an index is missing from the diff, it + indicates no change from the original. If it is present it + replaces the mirror_metadata entry, unless it has Type None, which + indicates the record should be deleted from the original. """ - for meta_tuple in rorpiter.CollateIterators(*meta_iters): - for i in range(len(meta_tuple)-1, -1, -1): - if meta_tuple[i]: - if meta_tuple[i].lstat(): yield meta_tuple[i] - break # move to next index - else: assert 0, "No valid rorps" + max_diff_chain = 9 # After this many diffs, make a new snapshot + + def get_diffiter(self, new_iter, old_iter): + """Iterate meta diffs of new_iter -> old_iter""" + for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter): + if not old_rorp: yield rpath.RORPath(new_rorp.index) + elif not new_rorp or new_rorp.data != old_rorp.data: + # exact compare here, can't use == on rorps + yield old_rorp + + def sorted_meta_inclist(self, min_time = 0): + """Return list of mirror_metadata incs, reverse sorted by time""" + if not self.prefixmap.has_key('mirror_metadata'): return [] + sortlist = [(rp.getinctime(), rp) + for rp in self.prefixmap['mirror_metadata']] + sortlist.sort() + sortlist.reverse() + return [rp for (time, rp) in sortlist if time >= min_time] + + def check_needs_diff(self): + """Check if we should diff, returns (new, old) rps, or (None, None)""" + inclist = self.sorted_meta_inclist() + assert len(inclist) >= 1 + if len(inclist) == 1: return (None, None) + newrp, oldrp = inclist[:2] + assert newrp.getinctype() == oldrp.getinctype() == 'snapshot' + + chainlen = 1 + for rp in inclist[2:]: + if rp.getinctype() != 'diff': break + chainlen += 1 + if chainlen >= self.max_diff_chain: return (None, None) + return (newrp, oldrp) + + def ConvertMetaToDiff(self): + """Replace a mirror snapshot with a diff if it's appropriate""" + newrp, oldrp = self.check_needs_diff() + if not newrp: return + log.Log("Writing mirror_metadata diff", 6) + + diff_writer = self.get_meta_writer('diff', oldrp.getinctime()) + new_iter = MetadataFile(newrp, 'r').get_objects() + old_iter = MetadataFile(oldrp, 'r').get_objects() + for diff_rorp in self.get_diffiter(new_iter, old_iter): + diff_writer.write_object(diff_rorp) + diff_writer.close() # includes sync + oldrp.delete() -def Convert_diff(cur_time, old_time): - """Convert the metadata snapshot at old_time to diff format + def get_meta_at_time(self, time, restrict_index): + """Get metadata rorp iter, possibly by patching with diffs""" + meta_iters = [MetadataFile(rp, 'r').get_objects(restrict_index) + for rp in self.relevant_meta_incs(time)] + if not meta_iters: return None + if len(meta_iters) == 1: return meta_iters[0] + return self.iterate_patched_meta(meta_iters) + + def relevant_meta_incs(self, time): + """Return list [snapshotrp, diffrps ...] time sorted""" + inclist = self.sorted_meta_inclist(min_time = time) + if not inclist: return inclist + assert inclist[-1].getinctime() == time, inclist[-1] + for i in range(len(inclist)-1, -1, -1): + if inclist[i].getinctype() == 'snapshot': + return inclist[i:] + assert 0, "Inclist %s contains no snapshots" % (inclist,) + + def iterate_patched_meta(self, meta_iter_list): + """Return an iter of metadata rorps by combining the given iters + + The iters should be given as a list/tuple in reverse + chronological order. The earliest rorp in each iter will + supercede all the later ones. - The point is just to save space. The diff format is simple, just - include in the diff all of the older rorps that are different in - the two metadata rorps. + """ + for meta_tuple in rorpiter.CollateIterators(*meta_iter_list): + for i in range(len(meta_tuple)-1, -1, -1): + if meta_tuple[i]: + if meta_tuple[i].lstat(): yield meta_tuple[i] + break # move to next index + else: assert 0, "No valid rorps" - """ - rblist = [Globals.rbdir.append(filename) - for filename in robust.listrp(Globals.rbdir)] - cur_iter = MetadataFile.get_objects_at_time( - Globals.rbdir, cur_time, None, rblist) - old_iter = MetadataFile.get_objects_at_time( - Globals.rbdir, old_time, None, rblist) - assert cur_iter.type == old_iter.type == 'snapshot' - diff_file = MetadataFile.open_file(None, 1, 'diff', old_time) - - for cur_rorp, old_rorp in rorpiter.Collate2Iters(cur_iter, old_iter): - XXX +ManagerObj = None # Set this later to Manager instance +def SetManager(): + global ManagerObj + ManagerObj = PatchDiffMan() import eas_acls # put at bottom to avoid python circularity bug diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py index 8ab1d77..8079511 100644 --- a/rdiff-backup/rdiff_backup/restore.py +++ b/rdiff-backup/rdiff_backup/restore.py @@ -177,7 +177,7 @@ class MirrorStruct: """ if rest_time is None: rest_time = cls._rest_time - if not metadata.ManagerObj: metadata.SetManager() + metadata.SetManager() rorp_iter = metadata.ManagerObj.GetAtTime(rest_time, cls.mirror_base.index) if not rorp_iter: diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 753712f..7647c62 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -116,9 +116,9 @@ def copy_reg_file(rpin, rpout, compress = 0): try: if (rpout.conn is rpin.conn and rpout.conn is not Globals.local_connection): - rpout.conn.rpath.copy_reg_file(rpin.path, rpout.path, compress) + v = rpout.conn.rpath.copy_reg_file(rpin.path, rpout.path, compress) rpout.setdata() - return + return v except AttributeError: pass return rpout.write_from_fileobj(rpin.open("rb"), compress = compress) |