summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-10-27 06:16:39 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2005-10-27 06:16:39 +0000
commitd9b68d73175d004caed8c781c97308f7c2e3dccc (patch)
tree7be1e3b4c7a23324d6d4ef0ed5483b890ee58210
parent80470345fa1998a033078314f77930a60ea14107 (diff)
downloadrdiff-backup-d9b68d73175d004caed8c781c97308f7c2e3dccc.tar.gz
Write SHA1 digests for all regular files
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@662 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/CHANGELOG3
-rw-r--r--rdiff-backup/rdiff_backup/Hardlink.py13
-rw-r--r--rdiff-backup/rdiff_backup/Rdiff.py19
-rw-r--r--rdiff-backup/rdiff_backup/backup.py83
-rw-r--r--rdiff-backup/rdiff_backup/hash.py53
-rw-r--r--rdiff-backup/rdiff_backup/librsync.py12
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py5
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py30
-rw-r--r--rdiff-backup/testing/hashtest.py2
9 files changed, 182 insertions, 38 deletions
diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG
index f32f156..a0e1fa5 100644
--- a/rdiff-backup/CHANGELOG
+++ b/rdiff-backup/CHANGELOG
@@ -1,6 +1,9 @@
New in v1.1.1 (????/??/??)
--------------------------
+rdiff-backup now writes SHA1 sums into its mirror_metadata file for
+all regular files.
+
Applied Alec Berryman's patch to update the no-compression regexp.
Alec Berryman's fs_abilities patch is supposed to help with AFS.
diff --git a/rdiff-backup/rdiff_backup/Hardlink.py b/rdiff-backup/rdiff_backup/Hardlink.py
index 4bfd2ee..855c512 100644
--- a/rdiff-backup/rdiff_backup/Hardlink.py
+++ b/rdiff-backup/rdiff_backup/Hardlink.py
@@ -1,4 +1,4 @@
-# Copyright 2002 Ben Escoto
+# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
@@ -58,13 +58,14 @@ def get_inode_key(rorp):
def add_rorp(rorp, dest_rorp = None):
"""Process new rorp and update hard link dictionaries"""
- if not rorp.isreg() or rorp.getnumlinks() < 2: return
+ if not rorp.isreg() or rorp.getnumlinks() < 2: return None
rp_inode_key = get_inode_key(rorp)
if not _inode_index.has_key(rp_inode_key):
if not dest_rorp: dest_key = None
elif dest_rorp.getnumlinks() == 1: dest_key = "NA"
else: dest_key = get_inode_key(dest_rorp)
_inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key)
+ return rp_inode_key
def del_rorp(rorp):
"""Remove rorp information from dictionary if seen all links"""
@@ -73,8 +74,12 @@ def del_rorp(rorp):
val = _inode_index.get(rp_inode_key)
if not val: return
index, remaining, dest_key = val
- if remaining == 1: del _inode_index[rp_inode_key]
- else: _inode_index[rp_inode_key] = (index, remaining-1, dest_key)
+ if remaining == 1:
+ del _inode_index[rp_inode_key]
+ return 1
+ else:
+ _inode_index[rp_inode_key] = (index, remaining-1, dest_key)
+ return 0
def rorp_eq(src_rorp, dest_rorp):
"""Compare hardlinked for equality
diff --git a/rdiff-backup/rdiff_backup/Rdiff.py b/rdiff-backup/rdiff_backup/Rdiff.py
index 5428e19..f183141 100644
--- a/rdiff-backup/rdiff_backup/Rdiff.py
+++ b/rdiff-backup/rdiff_backup/Rdiff.py
@@ -1,4 +1,4 @@
-# Copyright 2002 Ben Escoto
+# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
@@ -20,7 +20,7 @@
"""Invoke rdiff utility to make signatures, deltas, or patch"""
import os, librsync
-import Globals, log, static, TempFile, rpath
+import Globals, log, static, TempFile, rpath, hash
def get_signature(rp, blocksize = None):
@@ -53,6 +53,14 @@ def get_delta_sigrp(rp_signature, rp_new):
(rp_new.path, rp_signature.get_indexpath()), 7)
return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb"))
+def get_delta_sigrp_hash(rp_signature, rp_new):
+ """Like above but also calculate hash of new as close() value"""
+ log.Log("Getting delta with hash of %s with signature %s" %
+ (rp_new.path, rp_signature.get_indexpath()), 7)
+ return librsync.DeltaFile(rp_signature.open("rb"),
+ hash.FileWrapper(rp_new.open("rb")))
+
+
def write_delta(basis, new, delta, compress = None):
"""Write rdiff delta which brings basis to new"""
log.Log("Writing delta %s from %s -> %s" %
@@ -68,8 +76,9 @@ def write_patched_fp(basis_fp, delta_fp, out_fp):
def write_via_tempfile(fp, rp):
"""Write fileobj fp to rp by writing to tempfile and renaming"""
tf = TempFile.new(rp)
- tf.write_from_fileobj(fp)
+ retval = tf.write_from_fileobj(fp)
rpath.rename(tf, rp)
+ return retval
def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None):
"""Patch routine that must be run locally, writes to outrp
@@ -83,8 +92,8 @@ def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None):
if delta_compressed: deltafile = rp_delta.open("rb", 1)
else: deltafile = rp_delta.open("rb")
patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile)
- if outrp: outrp.write_from_fileobj(patchfile)
- else: write_via_tempfile(patchfile, rp_basis)
+ if outrp: return outrp.write_from_fileobj(patchfile)
+ else: return write_via_tempfile(patchfile, rp_basis)
def copy_local(rpin, rpout, rpnew = None):
"""Write rpnew == rpin using rpout as basis. rpout and rpnew local"""
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index 3ee760e..0854371 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -23,7 +23,7 @@ from __future__ import generators
import errno
import Globals, metadata, rorpiter, TempFile, Hardlink, robust, increment, \
rpath, static, log, selection, Time, Rdiff, statistics, iterfile, \
- eas_acls
+ eas_acls, hash
def Mirror(src_rpath, dest_rpath):
"""Turn dest_rpath into a copy of src_rpath"""
@@ -85,14 +85,14 @@ class SourceStruct:
"""Attach file of snapshot to diff_rorp, w/ error checking"""
fileobj = robust.check_common_error(
error_handler, rpath.RPath.open, (src_rp, "rb"))
- if fileobj: diff_rorp.setfile(fileobj)
+ if fileobj: diff_rorp.setfile(hash.FileWrapper(fileobj))
else: diff_rorp.zero()
diff_rorp.set_attached_filetype('snapshot')
def attach_diff(diff_rorp, src_rp, dest_sig):
"""Attach file of diff to diff_rorp, w/ error checking"""
fileobj = robust.check_common_error(
- error_handler, Rdiff.get_delta_sigrp, (dest_sig, src_rp))
+ error_handler, Rdiff.get_delta_sigrp_hash, (dest_sig, src_rp))
if fileobj:
diff_rorp.setfile(fileobj)
diff_rorp.set_attached_filetype('diff')
@@ -255,6 +255,9 @@ class CacheCollatedPostProcess:
we enter them to computer signatures, and then reset after we
are done patching everything inside them.
+ 4. We need some place to put hashes (like SHA1) after computing
+ them and before writing them to the metadata.
+
The class caches older source_rorps and dest_rps so the patch
function can retrieve them if necessary. The patch function can
also update the processed correctly flag. When an item falls out
@@ -294,6 +297,11 @@ class CacheCollatedPostProcess:
# after we're finished with them
self.dir_perms_list = []
+ # A dictionary of {index: source_rorp}. We use this to
+ # hold the digest of a hard linked file so it only needs to be
+ # computed once.
+ self.inode_digest_dict = {}
+
def __iter__(self): return self
def next(self):
@@ -316,7 +324,8 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
- Hardlink.add_rorp(source_rorp, dest_rorp)
+ if Hardlink.add_rorp(source_rorp, dest_rorp):
+ self.inode_digest_dict[source_rorp.index] = source_rorp
if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0
and dest_rorp.getperms() % 01000 < 0700):
self.unreadable_dir_init(source_rorp, dest_rorp)
@@ -359,7 +368,8 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
- Hardlink.del_rorp(source_rorp)
+ if Hardlink.del_rorp(source_rorp):
+ del self.inode_digest_dict[source_rorp.index]
if not changed or success:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
@@ -424,6 +434,17 @@ class CacheCollatedPostProcess:
"""Retrieve mirror_rorp with given index from cache"""
return self.cache_dict[index][1]
+ def update_hash(self, index, sha1sum):
+ """Update the source rorp's SHA1 hash"""
+ self.get_source_rorp(index).set_sha1(sha1sum)
+
+ def update_hardlink_hash(self, diff_rorp):
+ """Tag associated source_rorp with same hash diff_rorp points to"""
+ orig_rorp = self.inode_digest_dict[diff_rorp.get_link_flag()]
+ if orig_rorp.has_sha1():
+ new_source_rorp = self.get_source_rorp(diff_rorp.index)
+ new_source_rorp.set_sha1(orig_rorp.get_sha1())
+
def close(self):
"""Process the remaining elements in the cache"""
while self.cache_indicies: self.shorten_cache()
@@ -486,24 +507,52 @@ class PatchITRB(rorpiter.ITRBranch):
if tf.lstat(): tf.delete()
def patch_to_temp(self, basis_rp, diff_rorp, new):
- """Patch basis_rp, writing output in new, which doesn't exist yet"""
+ """Patch basis_rp, writing output in new, which doesn't exist yet
+
+ Returns true if able to write new as desired, false if
+ UpdateError or similar gets in the way.
+
+ """
if diff_rorp.isflaglinked():
- Hardlink.link_rp(diff_rorp, new, self.basis_root_rp)
+ self.patch_hardlink_to_temp(diff_rorp, new)
elif diff_rorp.get_attached_filetype() == 'snapshot':
- if diff_rorp.isspecial():
- self.write_special(diff_rorp, new)
- rpath.copy_attribs(diff_rorp, new)
- return 1
- elif robust.check_common_error(self.error_handler, rpath.copy,
- (diff_rorp, new)) == 0: return 0
- else:
- assert diff_rorp.get_attached_filetype() == 'diff'
- if robust.check_common_error(self.error_handler,
- Rdiff.patch_local, (basis_rp, diff_rorp, new)) == 0: return 0
+ if not self.patch_snapshot_to_temp(diff_rorp, new):
+ return 0
+ elif not self.patch_diff_to_temp(basis_rp, diff_rorp, new):
+ return 0
if new.lstat() and not diff_rorp.isflaglinked():
rpath.copy_attribs(diff_rorp, new)
return self.matches_cached_rorp(diff_rorp, new)
+ def patch_hardlink_to_temp(self, diff_rorp, new):
+ """Hardlink diff_rorp to temp, update hash if necessary"""
+ Hardlink.link_rp(diff_rorp, new, self.basis_root_rp)
+ self.CCPP.update_hardlink_hash(diff_rorp)
+
+ def patch_snapshot_to_temp(self, diff_rorp, new):
+ """Write diff_rorp to new, return true if successful"""
+ if diff_rorp.isspecial():
+ self.write_special(diff_rorp, new)
+ rpath.copy_attribs(diff_rorp, new)
+ return 1
+
+ report = robust.check_common_error(self.error_handler, rpath.copy,
+ (diff_rorp, new))
+ if isinstance(report, hash.Report):
+ self.CCPP.update_hash(diff_rorp.index, report.sha1_digest)
+ return 1
+ return report != 0 # if == 0, error_handler caught something
+
+ def patch_diff_to_temp(self, basis_rp, diff_rorp, new):
+ """Apply diff_rorp to basis_rp, write output in new"""
+ assert diff_rorp.get_attached_filetype() == 'diff'
+ report = robust.check_common_error(self.error_handler,
+ Rdiff.patch_local, (basis_rp, diff_rorp, new))
+ if isinstance(report, hash.Report):
+ self.CCPP.update_hash(diff_rorp.index, report.sha1_digest)
+ return 1
+ return report != 0 # if report == 0, error
+
def matches_cached_rorp(self, diff_rorp, new_rp):
"""Return true if new_rp matches cached src rorp
diff --git a/rdiff-backup/rdiff_backup/hash.py b/rdiff-backup/rdiff_backup/hash.py
new file mode 100644
index 0000000..4fcbdab
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/hash.py
@@ -0,0 +1,53 @@
+# Copyright 2005 Ben Escoto
+#
+# This file is part of rdiff-backup.
+#
+# rdiff-backup is free software; you can redistribute it and/or modify
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# rdiff-backup is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with rdiff-backup; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+"""Contains a file wrapper that returns a hash on close"""
+
+import sha
+
+class FileWrapper:
+ """Wrapper around a file-like object
+
+ Only use this with files that will be read through in a single
+ pass and then closed. (There is no seek().) When you close it,
+ return value will be a Report.
+
+ Currently this just calculates a sha1sum of the datastream.
+
+ """
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.sha1 = sha.new()
+ self.closed = 0
+
+ def read(self, length = -1):
+ assert not self.closed
+ buf = self.fileobj.read(length)
+ self.sha1.update(buf)
+ return buf
+
+ def close(self):
+ return Report(self.fileobj.close(), self.sha1.hexdigest())
+
+
+class Report:
+ """Hold final information about a byte stream"""
+ def __init__(self, close_val, sha1_digest):
+ assert not close_val # For now just assume inner file closes correctly
+ self.sha1_digest = sha1_digest
diff --git a/rdiff-backup/rdiff_backup/librsync.py b/rdiff-backup/rdiff_backup/librsync.py
index a8e37ca..d71e475 100644
--- a/rdiff-backup/rdiff_backup/librsync.py
+++ b/rdiff-backup/rdiff_backup/librsync.py
@@ -1,4 +1,4 @@
-# Copyright 2002 Ben Escoto
+# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
@@ -24,7 +24,8 @@ which is written in C. The goal was to use C as little as possible...
"""
-import _librsync, types, array
+import types, array
+import _librsync
blocksize = _librsync.RS_JOB_BLOCKSIZE
@@ -95,15 +96,16 @@ class LikeFile:
new_in = self.infile.read(blocksize)
if not new_in:
self.infile_eof = 1
- assert not self.infile.close()
+ self.infile_closeval = self.infile.close()
self.infile_closed = 1
break
self.inbuf += new_in
def close(self):
- """Close infile"""
- if not self.infile_closed: assert not self.infile.close()
+ """Close infile and pass on infile close value"""
self.closed = 1
+ if self.infile_closed: return self.infile_closeval
+ else: return self.infile.close()
class SigFile(LikeFile):
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index 8b261a7..c8e6579 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -119,6 +119,10 @@ def RORP2Record(rorpath):
str_list.append(" Inode %s\n" % rorpath.getinode())
str_list.append(" DeviceLoc %s\n" % rorpath.getdevloc())
+ # Save any hashes, if available
+ if rorpath.has_sha1():
+ str_list.append(' SHA1Digest %s\n' % rorpath.get_sha1())
+
elif type == "None": return "".join(str_list)
elif type == "dir" or type == "sock" or type == "fifo": pass
elif type == "sym":
@@ -166,6 +170,7 @@ def Record2RORP(record_string):
elif field == "CarbonFile":
if data == "None": data_dict['carbonfile'] = None
else: data_dict['carbonfile'] = string2carbonfile(data)
+ elif field == "SHA1Digest": data_dict['sha1'] = data
elif field == "NumHardLinks": data_dict['nlink'] = int(data)
elif field == "Inode": data_dict['inode'] = long(data)
elif field == "DeviceLoc": data_dict['devloc'] = long(data)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 3914d58..dac4899 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -82,7 +82,12 @@ def move(rpin, rpout):
rpin.delete()
def copy(rpin, rpout, compress = 0):
- """Copy RPath rpin to rpout. Works for symlinks, dirs, etc."""
+ """Copy RPath rpin to rpout. Works for symlinks, dirs, etc.
+
+ Returns close value of input for regular file, which can be used
+ to pass hashes on.
+
+ """
log.Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6)
if not rpin.lstat():
if rpout.lstat(): rpout.delete()
@@ -93,7 +98,7 @@ def copy(rpin, rpout, compress = 0):
rpout.delete() # easier to write than compare
else: return
- if rpin.isreg(): copy_reg_file(rpin, rpout, compress)
+ if rpin.isreg(): return copy_reg_file(rpin, rpout, compress)
elif rpin.isdir(): rpout.mkdir()
elif rpin.issym(): rpout.symlink(rpin.readlink())
elif rpin.ischardev():
@@ -115,7 +120,7 @@ def copy_reg_file(rpin, rpout, compress = 0):
rpout.setdata()
return
except AttributeError: pass
- rpout.write_from_fileobj(rpin.open("rb"), compress = compress)
+ return rpout.write_from_fileobj(rpin.open("rb"), compress = compress)
def cmp(rpin, rpout):
"""True if rpin has the same data as rpout
@@ -349,6 +354,7 @@ class RORPath:
elif key == 'carbonfile' and not Globals.carbonfile_write: pass
elif key == 'resourcefork' and not Globals.resource_forks_write:
pass
+ elif key == 'sha1': pass # one or other may not have set
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return 0
@@ -646,6 +652,18 @@ class RORPath:
"""Record resource fork in dictionary. Does not write"""
self.data['resourcefork'] = rfork
+ def has_sha1(self):
+ """True iff self has its sha1 digest set"""
+ return self.data.has_key('sha1')
+
+ def get_sha1(self):
+ """Return sha1 digest. Causes exception unless set_sha1 first"""
+ return self.data['sha1']
+
+ def set_sha1(self, digest):
+ """Set sha1 hash (should be in hexdecimal)"""
+ self.data['sha1'] = digest
+
class RPath(RORPath):
"""Remote Path class - wrapper around a possibly non-local pathname
@@ -978,16 +996,16 @@ class RPath(RORPath):
"""Reads fp and writes to self.path. Closes both when done
If compress is true, fp will be gzip compressed before being
- written to self.
+ written to self. Returns closing value of fp.
"""
log.Log("Writing file object to " + self.path, 7)
assert not self.lstat(), "File %s already exists" % self.path
outfp = self.open("wb", compress = compress)
copyfileobj(fp, outfp)
- if fp.close() or outfp.close():
- raise RPathException("Error closing file")
+ if outfp.close(): raise RPathException("Error closing file")
self.setdata()
+ return fp.close()
def write_string(self, s, compress = None):
"""Write string s into rpath"""
diff --git a/rdiff-backup/testing/hashtest.py b/rdiff-backup/testing/hashtest.py
index 653dda0..ad993f1 100644
--- a/rdiff-backup/testing/hashtest.py
+++ b/rdiff-backup/testing/hashtest.py
@@ -90,7 +90,7 @@ class HashTest(unittest.TestCase):
hashlist = self.extract_hashs(metadata_rp)
assert hashlist == hashlist1, (hashlist1, hashlist)
- rdiff_backup(1, 1, in_rp2.path, "testfiles/output", 20000, "-v5")
+ rdiff_backup(1, 1, in_rp2.path, "testfiles/output", 20000, "-v7")
incs = restore.get_inclist(meta_prefix)
assert len(incs) == 2
metadata_rp.delete() # easy way to find the other one