summaryrefslogtreecommitdiff
path: root/git-fat
diff options
context:
space:
mode:
authorJames Murty <james@murty.co>2014-05-24 21:34:01 +0100
committerJames Murty <james@murty.co>2014-05-24 21:34:01 +0100
commit6ca25da37333bbffe10cbb837c0002fd250931b1 (patch)
tree3dd6604749b99eac3a778724db114362230cfcd5 /git-fat
parent456d22cb3c7ba60e15d5d0f7de771c2acf573481 (diff)
downloadgit-fat-6ca25da37333bbffe10cbb837c0002fd250931b1.tar.gz
Add `verify` command to check git-fat object data matches hash filename.
While experimenting with using the --partial option with rsync I managed to corrupt one of my git-fat objects by truncating it. This caused some behaviour in git-fat which seemed odd until I worked out what had happened: it would check out the truncated data but git would see the file as modified and show the changed hash in a diff, while a re-checkout did not reset the file to its original data/hash. This commit adds a `verify` command that cross-checks git-fat object file names (the original SHA1) against the SHA1 of the object's actual data and prints any mismatches. So you can quickly find any dubious objects and decide what to do about them. A better solution might be to calcuate and verify objects' data hash during filter-smudge/checkout though this would likely hurt performance.
Diffstat (limited to 'git-fat')
-rwxr-xr-xgit-fat22
1 files changed, 21 insertions, 1 deletions
diff --git a/git-fat b/git-fat
index 97315ea..e5f75cc 100755
--- a/git-fat
+++ b/git-fat
@@ -414,6 +414,24 @@ class GitFat(object):
fname = os.path.join(self.objdir, obj)
print('%10d %s' % (os.stat(fname).st_size, obj))
os.remove(fname)
+
+ def cmd_verify(self):
+ """Print details of git-fat objects with incorrect data hash"""
+ corrupted_objects = []
+ for obj in self.catalog_objects():
+ fname = os.path.join(self.objdir, obj)
+ h = hashlib.new('sha1')
+ for block in readblocks(open(fname)):
+ h.update(block)
+ data_hash = h.hexdigest()
+ if obj != data_hash:
+ corrupted_objects.append((obj, data_hash))
+ if corrupted_objects:
+ print('Corrupted objects: %d' % len(corrupted_objects))
+ for obj, data_hash in corrupted_objects:
+ print('%s data hash is %s' % (obj, data_hash))
+ sys.exit(1)
+
def cmd_init(self):
self.setup()
if gitconfig_get('filter.fat.clean') or gitconfig_get('filter.fat.smudge'):
@@ -539,6 +557,8 @@ if __name__ == '__main__':
fat.cmd_pull(sys.argv[2:])
elif cmd == 'gc':
fat.cmd_gc()
+ elif cmd == 'verify':
+ fat.cmd_verify()
elif cmd == 'checkout':
fat.cmd_checkout(sys.argv[2:])
elif cmd == 'find':
@@ -546,4 +566,4 @@ if __name__ == '__main__':
elif cmd == 'index-filter':
fat.cmd_index_filter(sys.argv[2:])
else:
- print('Usage: git fat [init|status|push|pull|gc|checkout|find|index-filter]', file=sys.stderr)
+ print('Usage: git fat [init|status|push|pull|gc|verify|checkout|find|index-filter]', file=sys.stderr)