From 6ca25da37333bbffe10cbb837c0002fd250931b1 Mon Sep 17 00:00:00 2001 From: James Murty Date: Sat, 24 May 2014 21:34:01 +0100 Subject: Add `verify` command to check git-fat object data matches hash filename. While experimenting with using the --partial option with rsync I managed to corrupt one of my git-fat objects by truncating it. This caused some behaviour in git-fat which seemed odd until I worked out what had happened: it would check out the truncated data but git would see the file as modified and show the changed hash in a diff, while a re-checkout did not reset the file to its original data/hash. This commit adds a `verify` command that cross-checks git-fat object file names (the original SHA1) against the SHA1 of the object's actual data and prints any mismatches. So you can quickly find any dubious objects and decide what to do about them. A better solution might be to calcuate and verify objects' data hash during filter-smudge/checkout though this would likely hurt performance. --- git-fat | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'git-fat') diff --git a/git-fat b/git-fat index 97315ea..e5f75cc 100755 --- a/git-fat +++ b/git-fat @@ -414,6 +414,24 @@ class GitFat(object): fname = os.path.join(self.objdir, obj) print('%10d %s' % (os.stat(fname).st_size, obj)) os.remove(fname) + + def cmd_verify(self): + """Print details of git-fat objects with incorrect data hash""" + corrupted_objects = [] + for obj in self.catalog_objects(): + fname = os.path.join(self.objdir, obj) + h = hashlib.new('sha1') + for block in readblocks(open(fname)): + h.update(block) + data_hash = h.hexdigest() + if obj != data_hash: + corrupted_objects.append((obj, data_hash)) + if corrupted_objects: + print('Corrupted objects: %d' % len(corrupted_objects)) + for obj, data_hash in corrupted_objects: + print('%s data hash is %s' % (obj, data_hash)) + sys.exit(1) + def cmd_init(self): self.setup() if gitconfig_get('filter.fat.clean') or gitconfig_get('filter.fat.smudge'): @@ -539,6 +557,8 @@ if __name__ == '__main__': fat.cmd_pull(sys.argv[2:]) elif cmd == 'gc': fat.cmd_gc() + elif cmd == 'verify': + fat.cmd_verify() elif cmd == 'checkout': fat.cmd_checkout(sys.argv[2:]) elif cmd == 'find': @@ -546,4 +566,4 @@ if __name__ == '__main__': elif cmd == 'index-filter': fat.cmd_index_filter(sys.argv[2:]) else: - print('Usage: git fat [init|status|push|pull|gc|checkout|find|index-filter]', file=sys.stderr) + print('Usage: git fat [init|status|push|pull|gc|verify|checkout|find|index-filter]', file=sys.stderr) -- cgit v1.2.1