summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Murty <james@murty.co>2014-05-24 21:34:01 +0100
committerJames Murty <james@murty.co>2014-05-24 21:34:01 +0100
commit6ca25da37333bbffe10cbb837c0002fd250931b1 (patch)
tree3dd6604749b99eac3a778724db114362230cfcd5
parent456d22cb3c7ba60e15d5d0f7de771c2acf573481 (diff)
downloadgit-fat-6ca25da37333bbffe10cbb837c0002fd250931b1.tar.gz
Add `verify` command to check git-fat object data matches hash filename.
While experimenting with using the --partial option with rsync I managed to corrupt one of my git-fat objects by truncating it. This caused some behaviour in git-fat which seemed odd until I worked out what had happened: it would check out the truncated data but git would see the file as modified and show the changed hash in a diff, while a re-checkout did not reset the file to its original data/hash. This commit adds a `verify` command that cross-checks git-fat object file names (the original SHA1) against the SHA1 of the object's actual data and prints any mismatches. So you can quickly find any dubious objects and decide what to do about them. A better solution might be to calcuate and verify objects' data hash during filter-smudge/checkout though this would likely hurt performance.
-rwxr-xr-xgit-fat22
-rwxr-xr-xtest.sh8
2 files changed, 29 insertions, 1 deletions
diff --git a/git-fat b/git-fat
index 97315ea..e5f75cc 100755
--- a/git-fat
+++ b/git-fat
@@ -414,6 +414,24 @@ class GitFat(object):
fname = os.path.join(self.objdir, obj)
print('%10d %s' % (os.stat(fname).st_size, obj))
os.remove(fname)
+
+ def cmd_verify(self):
+ """Print details of git-fat objects with incorrect data hash"""
+ corrupted_objects = []
+ for obj in self.catalog_objects():
+ fname = os.path.join(self.objdir, obj)
+ h = hashlib.new('sha1')
+ for block in readblocks(open(fname)):
+ h.update(block)
+ data_hash = h.hexdigest()
+ if obj != data_hash:
+ corrupted_objects.append((obj, data_hash))
+ if corrupted_objects:
+ print('Corrupted objects: %d' % len(corrupted_objects))
+ for obj, data_hash in corrupted_objects:
+ print('%s data hash is %s' % (obj, data_hash))
+ sys.exit(1)
+
def cmd_init(self):
self.setup()
if gitconfig_get('filter.fat.clean') or gitconfig_get('filter.fat.smudge'):
@@ -539,6 +557,8 @@ if __name__ == '__main__':
fat.cmd_pull(sys.argv[2:])
elif cmd == 'gc':
fat.cmd_gc()
+ elif cmd == 'verify':
+ fat.cmd_verify()
elif cmd == 'checkout':
fat.cmd_checkout(sys.argv[2:])
elif cmd == 'find':
@@ -546,4 +566,4 @@ if __name__ == '__main__':
elif cmd == 'index-filter':
fat.cmd_index_filter(sys.argv[2:])
else:
- print('Usage: git fat [init|status|push|pull|gc|checkout|find|index-filter]', file=sys.stderr)
+ print('Usage: git fat [init|status|push|pull|gc|verify|checkout|find|index-filter]', file=sys.stderr)
diff --git a/test.sh b/test.sh
index e9c9163..92f2db4 100755
--- a/test.sh
+++ b/test.sh
@@ -38,3 +38,11 @@ git commit -m'add d with normal content'
rm d
git fat pull
+# Check verify command finds corrupt object
+mv .git/fat/objects/6ecec2e21d3033e7ba53e2db63f69dbd3a011fa8 \
+ .git/fat/objects/6ecec2e21d3033e7ba53e2db63f69dbd3a011fa8.bak
+echo "Not the right data" > .git/fat/objects/6ecec2e21d3033e7ba53e2db63f69dbd3a011fa8
+git fat verify && true
+if [ $? -eq 0 ]; then echo "Verify did not detect invalid object"; exit 1; fi
+mv .git/fat/objects/6ecec2e21d3033e7ba53e2db63f69dbd3a011fa8.bak \
+ .git/fat/objects/6ecec2e21d3033e7ba53e2db63f69dbd3a011fa8