diff options
author | James Murty <james@murty.co> | 2014-05-24 21:34:01 +0100 |
---|---|---|
committer | James Murty <james@murty.co> | 2014-05-24 21:34:01 +0100 |
commit | 6ca25da37333bbffe10cbb837c0002fd250931b1 (patch) | |
tree | 3dd6604749b99eac3a778724db114362230cfcd5 /git-fat | |
parent | 456d22cb3c7ba60e15d5d0f7de771c2acf573481 (diff) | |
download | git-fat-6ca25da37333bbffe10cbb837c0002fd250931b1.tar.gz |
Add `verify` command to check git-fat object data matches hash filename.
While experimenting with using the --partial option with rsync I
managed to corrupt one of my git-fat objects by truncating it. This
caused some behaviour in git-fat which seemed odd until I worked out
what had happened: it would check out the truncated data but git would
see the file as modified and show the changed hash in a diff, while a
re-checkout did not reset the file to its original data/hash.
This commit adds a `verify` command that cross-checks git-fat object
file names (the original SHA1) against the SHA1 of the object's
actual data and prints any mismatches. So you can quickly find any
dubious objects and decide what to do about them.
A better solution might be to calcuate and verify objects' data hash
during filter-smudge/checkout though this would likely hurt performance.
Diffstat (limited to 'git-fat')
-rwxr-xr-x | git-fat | 22 |
1 files changed, 21 insertions, 1 deletions
@@ -414,6 +414,24 @@ class GitFat(object): fname = os.path.join(self.objdir, obj) print('%10d %s' % (os.stat(fname).st_size, obj)) os.remove(fname) + + def cmd_verify(self): + """Print details of git-fat objects with incorrect data hash""" + corrupted_objects = [] + for obj in self.catalog_objects(): + fname = os.path.join(self.objdir, obj) + h = hashlib.new('sha1') + for block in readblocks(open(fname)): + h.update(block) + data_hash = h.hexdigest() + if obj != data_hash: + corrupted_objects.append((obj, data_hash)) + if corrupted_objects: + print('Corrupted objects: %d' % len(corrupted_objects)) + for obj, data_hash in corrupted_objects: + print('%s data hash is %s' % (obj, data_hash)) + sys.exit(1) + def cmd_init(self): self.setup() if gitconfig_get('filter.fat.clean') or gitconfig_get('filter.fat.smudge'): @@ -539,6 +557,8 @@ if __name__ == '__main__': fat.cmd_pull(sys.argv[2:]) elif cmd == 'gc': fat.cmd_gc() + elif cmd == 'verify': + fat.cmd_verify() elif cmd == 'checkout': fat.cmd_checkout(sys.argv[2:]) elif cmd == 'find': @@ -546,4 +566,4 @@ if __name__ == '__main__': elif cmd == 'index-filter': fat.cmd_index_filter(sys.argv[2:]) else: - print('Usage: git fat [init|status|push|pull|gc|checkout|find|index-filter]', file=sys.stderr) + print('Usage: git fat [init|status|push|pull|gc|verify|checkout|find|index-filter]', file=sys.stderr) |