diff options
author | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-10-27 06:16:39 +0000 |
---|---|---|
committer | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2005-10-27 06:16:39 +0000 |
commit | d9b68d73175d004caed8c781c97308f7c2e3dccc (patch) | |
tree | 7be1e3b4c7a23324d6d4ef0ed5483b890ee58210 /rdiff-backup/rdiff_backup | |
parent | 80470345fa1998a033078314f77930a60ea14107 (diff) | |
download | rdiff-backup-d9b68d73175d004caed8c781c97308f7c2e3dccc.tar.gz |
Write SHA1 digests for all regular files
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@662 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup')
-rw-r--r-- | rdiff-backup/rdiff_backup/Hardlink.py | 13 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/Rdiff.py | 19 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/backup.py | 83 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/hash.py | 53 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/librsync.py | 12 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/metadata.py | 5 | ||||
-rw-r--r-- | rdiff-backup/rdiff_backup/rpath.py | 30 |
7 files changed, 178 insertions, 37 deletions
diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py index 4bfd2ee..855c512 100644 --- a/rdiff-backup/rdiff_backup/Hardlink.py +++ b/rdiff-backup/rdiff_backup/Hardlink.py @@ -1,4 +1,4 @@ -# Copyright 2002 Ben Escoto +# Copyright 2002 2005 Ben Escoto # # This file is part of rdiff-backup. # @@ -58,13 +58,14 @@ def get_inode_key(rorp): def add_rorp(rorp, dest_rorp = None): """Process new rorp and update hard link dictionaries""" - if not rorp.isreg() or rorp.getnumlinks() < 2: return + if not rorp.isreg() or rorp.getnumlinks() < 2: return None rp_inode_key = get_inode_key(rorp) if not _inode_index.has_key(rp_inode_key): if not dest_rorp: dest_key = None elif dest_rorp.getnumlinks() == 1: dest_key = "NA" else: dest_key = get_inode_key(dest_rorp) _inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key) + return rp_inode_key def del_rorp(rorp): """Remove rorp information from dictionary if seen all links""" @@ -73,8 +74,12 @@ def del_rorp(rorp): val = _inode_index.get(rp_inode_key) if not val: return index, remaining, dest_key = val - if remaining == 1: del _inode_index[rp_inode_key] - else: _inode_index[rp_inode_key] = (index, remaining-1, dest_key) + if remaining == 1: + del _inode_index[rp_inode_key] + return 1 + else: + _inode_index[rp_inode_key] = (index, remaining-1, dest_key) + return 0 def rorp_eq(src_rorp, dest_rorp): """Compare hardlinked for equality diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py index 5428e19..f183141 100644 --- a/rdiff-backup/rdiff_backup/Rdiff.py +++ b/rdiff-backup/rdiff_backup/Rdiff.py @@ -1,4 +1,4 @@ -# Copyright 2002 Ben Escoto +# Copyright 2002 2005 Ben Escoto # # This file is part of rdiff-backup. # @@ -20,7 +20,7 @@ """Invoke rdiff utility to make signatures, deltas, or patch""" import os, librsync -import Globals, log, static, TempFile, rpath +import Globals, log, static, TempFile, rpath, hash def get_signature(rp, blocksize = None): @@ -53,6 +53,14 @@ def get_delta_sigrp(rp_signature, rp_new): (rp_new.path, rp_signature.get_indexpath()), 7) return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb")) +def get_delta_sigrp_hash(rp_signature, rp_new): + """Like above but also calculate hash of new as close() value""" + log.Log("Getting delta with hash of %s with signature %s" % + (rp_new.path, rp_signature.get_indexpath()), 7) + return librsync.DeltaFile(rp_signature.open("rb"), + hash.FileWrapper(rp_new.open("rb"))) + + def write_delta(basis, new, delta, compress = None): """Write rdiff delta which brings basis to new""" log.Log("Writing delta %s from %s -> %s" % @@ -68,8 +76,9 @@ def write_patched_fp(basis_fp, delta_fp, out_fp): def write_via_tempfile(fp, rp): """Write fileobj fp to rp by writing to tempfile and renaming""" tf = TempFile.new(rp) - tf.write_from_fileobj(fp) + retval = tf.write_from_fileobj(fp) rpath.rename(tf, rp) + return retval def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None): """Patch routine that must be run locally, writes to outrp @@ -83,8 +92,8 @@ def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None): if delta_compressed: deltafile = rp_delta.open("rb", 1) else: deltafile = rp_delta.open("rb") patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile) - if outrp: outrp.write_from_fileobj(patchfile) - else: write_via_tempfile(patchfile, rp_basis) + if outrp: return outrp.write_from_fileobj(patchfile) + else: return write_via_tempfile(patchfile, rp_basis) def copy_local(rpin, rpout, rpnew = None): """Write rpnew == rpin using rpout as basis. rpout and rpnew local""" diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py index 3ee760e..0854371 100644 --- a/rdiff-backup/rdiff_backup/backup.py +++ b/rdiff-backup/rdiff_backup/backup.py @@ -23,7 +23,7 @@ from __future__ import generators import errno import Globals, metadata, rorpiter, TempFile, Hardlink, robust, increment, \ rpath, static, log, selection, Time, Rdiff, statistics, iterfile, \ - eas_acls + eas_acls, hash def Mirror(src_rpath, dest_rpath): """Turn dest_rpath into a copy of src_rpath""" @@ -85,14 +85,14 @@ class SourceStruct: """Attach file of snapshot to diff_rorp, w/ error checking""" fileobj = robust.check_common_error( error_handler, rpath.RPath.open, (src_rp, "rb")) - if fileobj: diff_rorp.setfile(fileobj) + if fileobj: diff_rorp.setfile(hash.FileWrapper(fileobj)) else: diff_rorp.zero() diff_rorp.set_attached_filetype('snapshot') def attach_diff(diff_rorp, src_rp, dest_sig): """Attach file of diff to diff_rorp, w/ error checking""" fileobj = robust.check_common_error( - error_handler, Rdiff.get_delta_sigrp, (dest_sig, src_rp)) + error_handler, Rdiff.get_delta_sigrp_hash, (dest_sig, src_rp)) if fileobj: diff_rorp.setfile(fileobj) diff_rorp.set_attached_filetype('diff') @@ -255,6 +255,9 @@ class CacheCollatedPostProcess: we enter them to computer signatures, and then reset after we are done patching everything inside them. + 4. We need some place to put hashes (like SHA1) after computing + them and before writing them to the metadata. + The class caches older source_rorps and dest_rps so the patch function can retrieve them if necessary. The patch function can also update the processed correctly flag. When an item falls out @@ -294,6 +297,11 @@ class CacheCollatedPostProcess: # after we're finished with them self.dir_perms_list = [] + # A dictionary of {index: source_rorp}. We use this to + # hold the digest of a hard linked file so it only needs to be + # computed once. + self.inode_digest_dict = {} + def __iter__(self): return self def next(self): @@ -316,7 +324,8 @@ class CacheCollatedPostProcess: """ if Globals.preserve_hardlinks and source_rorp: - Hardlink.add_rorp(source_rorp, dest_rorp) + if Hardlink.add_rorp(source_rorp, dest_rorp): + self.inode_digest_dict[source_rorp.index] = source_rorp if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0 and dest_rorp.getperms() % 01000 < 0700): self.unreadable_dir_init(source_rorp, dest_rorp) @@ -359,7 +368,8 @@ class CacheCollatedPostProcess: """ if Globals.preserve_hardlinks and source_rorp: - Hardlink.del_rorp(source_rorp) + if Hardlink.del_rorp(source_rorp): + del self.inode_digest_dict[source_rorp.index] if not changed or success: if source_rorp: self.statfileobj.add_source_file(source_rorp) @@ -424,6 +434,17 @@ class CacheCollatedPostProcess: """Retrieve mirror_rorp with given index from cache""" return self.cache_dict[index][1] + def update_hash(self, index, sha1sum): + """Update the source rorp's SHA1 hash""" + self.get_source_rorp(index).set_sha1(sha1sum) + + def update_hardlink_hash(self, diff_rorp): + """Tag associated source_rorp with same hash diff_rorp points to""" + orig_rorp = self.inode_digest_dict[diff_rorp.get_link_flag()] + if orig_rorp.has_sha1(): + new_source_rorp = self.get_source_rorp(diff_rorp.index) + new_source_rorp.set_sha1(orig_rorp.get_sha1()) + def close(self): """Process the remaining elements in the cache""" while self.cache_indicies: self.shorten_cache() @@ -486,24 +507,52 @@ class PatchITRB(rorpiter.ITRBranch): if tf.lstat(): tf.delete() def patch_to_temp(self, basis_rp, diff_rorp, new): - """Patch basis_rp, writing output in new, which doesn't exist yet""" + """Patch basis_rp, writing output in new, which doesn't exist yet + + Returns true if able to write new as desired, false if + UpdateError or similar gets in the way. + + """ if diff_rorp.isflaglinked(): - Hardlink.link_rp(diff_rorp, new, self.basis_root_rp) + self.patch_hardlink_to_temp(diff_rorp, new) elif diff_rorp.get_attached_filetype() == 'snapshot': - if diff_rorp.isspecial(): - self.write_special(diff_rorp, new) - rpath.copy_attribs(diff_rorp, new) - return 1 - elif robust.check_common_error(self.error_handler, rpath.copy, - (diff_rorp, new)) == 0: return 0 - else: - assert diff_rorp.get_attached_filetype() == 'diff' - if robust.check_common_error(self.error_handler, - Rdiff.patch_local, (basis_rp, diff_rorp, new)) == 0: return 0 + if not self.patch_snapshot_to_temp(diff_rorp, new): + return 0 + elif not self.patch_diff_to_temp(basis_rp, diff_rorp, new): + return 0 if new.lstat() and not diff_rorp.isflaglinked(): rpath.copy_attribs(diff_rorp, new) return self.matches_cached_rorp(diff_rorp, new) + def patch_hardlink_to_temp(self, diff_rorp, new): + """Hardlink diff_rorp to temp, update hash if necessary""" + Hardlink.link_rp(diff_rorp, new, self.basis_root_rp) + self.CCPP.update_hardlink_hash(diff_rorp) + + def patch_snapshot_to_temp(self, diff_rorp, new): + """Write diff_rorp to new, return true if successful""" + if diff_rorp.isspecial(): + self.write_special(diff_rorp, new) + rpath.copy_attribs(diff_rorp, new) + return 1 + + report = robust.check_common_error(self.error_handler, rpath.copy, + (diff_rorp, new)) + if isinstance(report, hash.Report): + self.CCPP.update_hash(diff_rorp.index, report.sha1_digest) + return 1 + return report != 0 # if == 0, error_handler caught something + + def patch_diff_to_temp(self, basis_rp, diff_rorp, new): + """Apply diff_rorp to basis_rp, write output in new""" + assert diff_rorp.get_attached_filetype() == 'diff' + report = robust.check_common_error(self.error_handler, + Rdiff.patch_local, (basis_rp, diff_rorp, new)) + if isinstance(report, hash.Report): + self.CCPP.update_hash(diff_rorp.index, report.sha1_digest) + return 1 + return report != 0 # if report == 0, error + def matches_cached_rorp(self, diff_rorp, new_rp): """Return true if new_rp matches cached src rorp diff --git a/rdiff-backup/rdiff_backup/hash.py b/rdiff-backup/rdiff_backup/hash.py new file mode 100644 index 0000000..4fcbdab --- /dev/null +++ b/rdiff-backup/rdiff_backup/hash.py @@ -0,0 +1,53 @@ +# Copyright 2005 Ben Escoto +# +# This file is part of rdiff-backup. +# +# rdiff-backup is free software; you can redistribute it and/or modify +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# rdiff-backup is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rdiff-backup; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +"""Contains a file wrapper that returns a hash on close""" + +import sha + +class FileWrapper: + """Wrapper around a file-like object + + Only use this with files that will be read through in a single + pass and then closed. (There is no seek().) When you close it, + return value will be a Report. + + Currently this just calculates a sha1sum of the datastream. + + """ + def __init__(self, fileobj): + self.fileobj = fileobj + self.sha1 = sha.new() + self.closed = 0 + + def read(self, length = -1): + assert not self.closed + buf = self.fileobj.read(length) + self.sha1.update(buf) + return buf + + def close(self): + return Report(self.fileobj.close(), self.sha1.hexdigest()) + + +class Report: + """Hold final information about a byte stream""" + def __init__(self, close_val, sha1_digest): + assert not close_val # For now just assume inner file closes correctly + self.sha1_digest = sha1_digest diff --git a/rdiff-backup/rdiff_backup/librsync.py b/rdiff-backup/rdiff_backup/librsync.py index a8e37ca..d71e475 100644 --- a/rdiff-backup/rdiff_backup/librsync.py +++ b/rdiff-backup/rdiff_backup/librsync.py @@ -1,4 +1,4 @@ -# Copyright 2002 Ben Escoto +# Copyright 2002 2005 Ben Escoto # # This file is part of rdiff-backup. # @@ -24,7 +24,8 @@ which is written in C. The goal was to use C as little as possible... """ -import _librsync, types, array +import types, array +import _librsync blocksize = _librsync.RS_JOB_BLOCKSIZE @@ -95,15 +96,16 @@ class LikeFile: new_in = self.infile.read(blocksize) if not new_in: self.infile_eof = 1 - assert not self.infile.close() + self.infile_closeval = self.infile.close() self.infile_closed = 1 break self.inbuf += new_in def close(self): - """Close infile""" - if not self.infile_closed: assert not self.infile.close() + """Close infile and pass on infile close value""" self.closed = 1 + if self.infile_closed: return self.infile_closeval + else: return self.infile.close() class SigFile(LikeFile): diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index 8b261a7..c8e6579 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -119,6 +119,10 @@ def RORP2Record(rorpath): str_list.append(" Inode %s\n" % rorpath.getinode()) str_list.append(" DeviceLoc %s\n" % rorpath.getdevloc()) + # Save any hashes, if available + if rorpath.has_sha1(): + str_list.append(' SHA1Digest %s\n' % rorpath.get_sha1()) + elif type == "None": return "".join(str_list) elif type == "dir" or type == "sock" or type == "fifo": pass elif type == "sym": @@ -166,6 +170,7 @@ def Record2RORP(record_string): elif field == "CarbonFile": if data == "None": data_dict['carbonfile'] = None else: data_dict['carbonfile'] = string2carbonfile(data) + elif field == "SHA1Digest": data_dict['sha1'] = data elif field == "NumHardLinks": data_dict['nlink'] = int(data) elif field == "Inode": data_dict['inode'] = long(data) elif field == "DeviceLoc": data_dict['devloc'] = long(data) diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 3914d58..dac4899 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -82,7 +82,12 @@ def move(rpin, rpout): rpin.delete() def copy(rpin, rpout, compress = 0): - """Copy RPath rpin to rpout. Works for symlinks, dirs, etc.""" + """Copy RPath rpin to rpout. Works for symlinks, dirs, etc. + + Returns close value of input for regular file, which can be used + to pass hashes on. + + """ log.Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6) if not rpin.lstat(): if rpout.lstat(): rpout.delete() @@ -93,7 +98,7 @@ def copy(rpin, rpout, compress = 0): rpout.delete() # easier to write than compare else: return - if rpin.isreg(): copy_reg_file(rpin, rpout, compress) + if rpin.isreg(): return copy_reg_file(rpin, rpout, compress) elif rpin.isdir(): rpout.mkdir() elif rpin.issym(): rpout.symlink(rpin.readlink()) elif rpin.ischardev(): @@ -115,7 +120,7 @@ def copy_reg_file(rpin, rpout, compress = 0): rpout.setdata() return except AttributeError: pass - rpout.write_from_fileobj(rpin.open("rb"), compress = compress) + return rpout.write_from_fileobj(rpin.open("rb"), compress = compress) def cmp(rpin, rpout): """True if rpin has the same data as rpout @@ -349,6 +354,7 @@ class RORPath: elif key == 'carbonfile' and not Globals.carbonfile_write: pass elif key == 'resourcefork' and not Globals.resource_forks_write: pass + elif key == 'sha1': pass # one or other may not have set elif (not other.data.has_key(key) or self.data[key] != other.data[key]): return 0 @@ -646,6 +652,18 @@ class RORPath: """Record resource fork in dictionary. Does not write""" self.data['resourcefork'] = rfork + def has_sha1(self): + """True iff self has its sha1 digest set""" + return self.data.has_key('sha1') + + def get_sha1(self): + """Return sha1 digest. Causes exception unless set_sha1 first""" + return self.data['sha1'] + + def set_sha1(self, digest): + """Set sha1 hash (should be in hexdecimal)""" + self.data['sha1'] = digest + class RPath(RORPath): """Remote Path class - wrapper around a possibly non-local pathname @@ -978,16 +996,16 @@ class RPath(RORPath): """Reads fp and writes to self.path. Closes both when done If compress is true, fp will be gzip compressed before being - written to self. + written to self. Returns closing value of fp. """ log.Log("Writing file object to " + self.path, 7) assert not self.lstat(), "File %s already exists" % self.path outfp = self.open("wb", compress = compress) copyfileobj(fp, outfp) - if fp.close() or outfp.close(): - raise RPathException("Error closing file") + if outfp.close(): raise RPathException("Error closing file") self.setdata() + return fp.close() def write_string(self, s, compress = None): """Write string s into rpath""" |