diff options
author | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-11-04 22:41:13 +0000 |
---|---|---|
committer | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-11-04 22:41:13 +0000 |
commit | 828d9e44d4417ca9ee3831919d6023492805b7a9 (patch) | |
tree | 773c464f98092b7fefc1b6b974f6e966f692f3ad /rdiff-backup/rdiff_backup/metadata.py | |
parent | 070e5c4080dac3de8e26a7d5d7314ceb36d32440 (diff) | |
download | rdiff-backup-828d9e44d4417ca9ee3831919d6023492805b7a9.tar.gz |
Added metadata diffing, and an iterfile hash bugfix
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@669 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup/metadata.py')
-rw-r--r-- | rdiff-backup/rdiff_backup/metadata.py | 156 |
1 files changed, 115 insertions, 41 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index 2ce5218..0d7ba4f 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -267,8 +267,10 @@ class FlatExtractor: """Yield all text records in order""" while 1: next_pos = self.get_next_pos() + if self.at_end: + if next_pos: yield self.buf[:next_pos] + break yield self.buf[:next_pos] - if self.at_end: break self.buf = self.buf[next_pos:] assert not self.fileobj.close() @@ -428,16 +430,23 @@ class Manager: def __init__(self): """Set listing of rdiff-backup-data dir""" self.rplist = [] - self.timerpmap = {} + self.timerpmap, self.prefixmap = {}, {} for filename in Globals.rbdir.listdir(): rp = Globals.rbdir.append(filename) - if rp.isincfile(): - self.rplist.append(rp) - time = rp.getinctime() - if self.timerpmap.has_key(time): - self.timerpmap[time].append(rp) - else: self.timerpmap[time] = [rp] + if rp.isincfile(): self.add_incrp(rp) + def add_incrp(self, rp): + """Add rp to list of inc rps in the rbdir""" + self.rplist.append(rp) + time = rp.getinctime() + if self.timerpmap.has_key(time): + self.timerpmap[time].append(rp) + else: self.timerpmap[time] = [rp] + + incbase = rp.getincbase_str() + if self.prefixmap.has_key(incbase): self.prefixmap[incbase].append(rp) + else: self.prefixmap[incbase] = [rp] + def _iter_helper(self, prefix, flatfileclass, time, restrict_index): """Used below to find the right kind of file by time""" if not self.timerpmap.has_key(time): return None @@ -490,6 +499,8 @@ class Manager: filename = '%s.%s.%s.gz' % (prefix, timestr, typestr) rp = Globals.rbdir.append(filename) assert not rp.lstat(), "File %s already exists!" % (rp.path,) + assert rp.isincfile() + self.add_incrp(rp) return flatfileclass(rp, 'w') def get_meta_writer(self, typestr, time): @@ -514,49 +525,112 @@ class Manager: return metawriter # no need for a CombinedWriter if Globals.eas_active: ea_writer = self.get_ea_writer(typestr, time) + else: ea_writer = None if Globals.acls_active: acl_writer = self.get_acl_writer(typestr, time) + else: acl_writer = None return CombinedWriter(metawriter, ea_writer, acl_writer) -ManagerObj = None # Set this later to Manager instance -def SetManager(): - global ManagerObj - ManagerObj = Manager() - +class PatchDiffMan(Manager): + """Contains functions for patching and diffing metadata -def patch(*meta_iters): - """Return an iterator of metadata files by combining all the given iters + To save space, we can record a full list of only the most recent + metadata, using the normal rdiff-backup reverse increment + strategy. Instead of using librsync to compute diffs, though, we + use our own technique so that the diff files are still + hand-editable. - The iters should be given as a list/tuple in reverse chronological - order. The earliest rorp in each iter will supercede all the - later ones. + A mirror_metadata diff has the same format as a mirror_metadata + snapshot. If the record for an index is missing from the diff, it + indicates no change from the original. If it is present it + replaces the mirror_metadata entry, unless it has Type None, which + indicates the record should be deleted from the original. """ - for meta_tuple in rorpiter.CollateIterators(*meta_iters): - for i in range(len(meta_tuple)-1, -1, -1): - if meta_tuple[i]: - if meta_tuple[i].lstat(): yield meta_tuple[i] - break # move to next index - else: assert 0, "No valid rorps" + max_diff_chain = 9 # After this many diffs, make a new snapshot + + def get_diffiter(self, new_iter, old_iter): + """Iterate meta diffs of new_iter -> old_iter""" + for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter): + if not old_rorp: yield rpath.RORPath(new_rorp.index) + elif not new_rorp or new_rorp.data != old_rorp.data: + # exact compare here, can't use == on rorps + yield old_rorp + + def sorted_meta_inclist(self, min_time = 0): + """Return list of mirror_metadata incs, reverse sorted by time""" + if not self.prefixmap.has_key('mirror_metadata'): return [] + sortlist = [(rp.getinctime(), rp) + for rp in self.prefixmap['mirror_metadata']] + sortlist.sort() + sortlist.reverse() + return [rp for (time, rp) in sortlist if time >= min_time] + + def check_needs_diff(self): + """Check if we should diff, returns (new, old) rps, or (None, None)""" + inclist = self.sorted_meta_inclist() + assert len(inclist) >= 1 + if len(inclist) == 1: return (None, None) + newrp, oldrp = inclist[:2] + assert newrp.getinctype() == oldrp.getinctype() == 'snapshot' + + chainlen = 1 + for rp in inclist[2:]: + if rp.getinctype() != 'diff': break + chainlen += 1 + if chainlen >= self.max_diff_chain: return (None, None) + return (newrp, oldrp) + + def ConvertMetaToDiff(self): + """Replace a mirror snapshot with a diff if it's appropriate""" + newrp, oldrp = self.check_needs_diff() + if not newrp: return + log.Log("Writing mirror_metadata diff", 6) + + diff_writer = self.get_meta_writer('diff', oldrp.getinctime()) + new_iter = MetadataFile(newrp, 'r').get_objects() + old_iter = MetadataFile(oldrp, 'r').get_objects() + for diff_rorp in self.get_diffiter(new_iter, old_iter): + diff_writer.write_object(diff_rorp) + diff_writer.close() # includes sync + oldrp.delete() -def Convert_diff(cur_time, old_time): - """Convert the metadata snapshot at old_time to diff format + def get_meta_at_time(self, time, restrict_index): + """Get metadata rorp iter, possibly by patching with diffs""" + meta_iters = [MetadataFile(rp, 'r').get_objects(restrict_index) + for rp in self.relevant_meta_incs(time)] + if not meta_iters: return None + if len(meta_iters) == 1: return meta_iters[0] + return self.iterate_patched_meta(meta_iters) + + def relevant_meta_incs(self, time): + """Return list [snapshotrp, diffrps ...] time sorted""" + inclist = self.sorted_meta_inclist(min_time = time) + if not inclist: return inclist + assert inclist[-1].getinctime() == time, inclist[-1] + for i in range(len(inclist)-1, -1, -1): + if inclist[i].getinctype() == 'snapshot': + return inclist[i:] + assert 0, "Inclist %s contains no snapshots" % (inclist,) + + def iterate_patched_meta(self, meta_iter_list): + """Return an iter of metadata rorps by combining the given iters + + The iters should be given as a list/tuple in reverse + chronological order. The earliest rorp in each iter will + supercede all the later ones. - The point is just to save space. The diff format is simple, just - include in the diff all of the older rorps that are different in - the two metadata rorps. + """ + for meta_tuple in rorpiter.CollateIterators(*meta_iter_list): + for i in range(len(meta_tuple)-1, -1, -1): + if meta_tuple[i]: + if meta_tuple[i].lstat(): yield meta_tuple[i] + break # move to next index + else: assert 0, "No valid rorps" - """ - rblist = [Globals.rbdir.append(filename) - for filename in robust.listrp(Globals.rbdir)] - cur_iter = MetadataFile.get_objects_at_time( - Globals.rbdir, cur_time, None, rblist) - old_iter = MetadataFile.get_objects_at_time( - Globals.rbdir, old_time, None, rblist) - assert cur_iter.type == old_iter.type == 'snapshot' - diff_file = MetadataFile.open_file(None, 1, 'diff', old_time) - - for cur_rorp, old_rorp in rorpiter.Collate2Iters(cur_iter, old_iter): - XXX +ManagerObj = None # Set this later to Manager instance +def SetManager(): + global ManagerObj + ManagerObj = PatchDiffMan() import eas_acls # put at bottom to avoid python circularity bug |