summaryrefslogtreecommitdiff
path: root/git-fat
diff options
context:
space:
mode:
authorJed Brown <jed@59A2.org>2012-11-25 23:21:45 +0100
committerJed Brown <jed@59A2.org>2012-11-25 23:21:45 +0100
commit40996c304ba56aad88cac0b9de02a384adb35d3f (patch)
tree56845fffc0da164f99f45b05fe8cfac7231bbc52 /git-fat
parentd5f924d9f040a49c0d0114b8ef03c6ec3a96bd9c (diff)
downloadgit-fat-40996c304ba56aad88cac0b9de02a384adb35d3f.tar.gz
Extend worked example and make several refinements
* Verbosity control * Automatically update working tree * Identify orphan files in filter-clean and pass through so they don't show up as suprious diffs.
Diffstat (limited to 'git-fat')
-rwxr-xr-xgit-fat116
1 files changed, 82 insertions, 34 deletions
diff --git a/git-fat b/git-fat
index ab36470..84be082 100755
--- a/git-fat
+++ b/git-fat
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-from __future__ import print_function
+from __future__ import print_function, with_statement
import sys
import hashlib
@@ -8,6 +8,7 @@ import tempfile
import os
import subprocess
import shlex
+import shutil
BLOCK_SIZE = 4096
@@ -15,10 +16,7 @@ def verbose_stderr(*args, **kwargs):
return print(*args, file=sys.stderr, **kwargs)
def verbose_ignore(*args, **kwargs):
pass
-verbose = verbose_ignore
-def gitroot():
- return subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
def mkdir_p(path):
import errno
try:
@@ -45,7 +43,8 @@ def cat(instream, outstream):
class GitFat(object):
DecodeError = RuntimeError
def __init__(self):
- self.gitroot = gitroot()
+ self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore
+ self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
self.objdir = os.path.join(self.gitroot, '.git', 'fat', 'objects')
self.magiclen = len(self.encode(hashlib.sha1('dummy').hexdigest()))
def setup(self):
@@ -62,12 +61,16 @@ class GitFat(object):
return remote
except ConfigParser.NoSectionError:
raise RuntimeError('No rsync.remote in %s' % cfgpath)
+ def revparse(self, revname):
+ return subprocess.check_output(['git', 'rev-parse', revname]).strip()
def encode(self, digest):
return '#$# git-fat %s\n' % digest
- def decode(self, string):
+ def decode(self, string, noraise=False):
cookie = '#$# git-fat '
if string.startswith(cookie):
return string[len(cookie):].split()[0]
+ elif noraise:
+ return None
else:
raise GitFat.DecodeError('Could not decode %s' % (string))
def decode_stream(self, stream):
@@ -94,38 +97,54 @@ class GitFat(object):
h = hashlib.new('sha1')
fd, tmpname = tempfile.mkstemp(dir=self.objdir)
try:
+ ishanging = False
+ cached = False # changes to True when file is cached, means we
with os.fdopen(fd, 'w') as cache:
- for block in readblocks(sys.stdin):
+ outstream = cache
+ blockiter = readblocks(sys.stdin)
+ # Check whether this file is hanging
+ block = next(blockiter)
+ if self.decode(block[0:self.magiclen], noraise=True):
+ ishanging = True
+ outstream = sys.stdout
+ h.update(block)
+ outstream.write(block)
+ for block in blockiter:
h.update(block)
- cache.write(block)
+ outstream.write(block)
+ outstream.flush()
digest = h.hexdigest()
objfile = os.path.join(self.objdir, digest)
- os.rename(tmpname, objfile)
- verbose('git-fat filter-clean: caching to %s' % objfile)
- sys.stdout.write(self.encode(digest))
- sys.stdout.flush()
- except:
- raise
- #os.remove(tmpname)
+ if not ishanging:
+ os.rename(tmpname, objfile)
+ cached = True
+ self.verbose('git-fat filter-clean: caching to %s' % objfile)
+ sys.stdout.write(self.encode(digest))
+ finally:
+ if not cached:
+ os.remove(tmpname)
+
def cmd_smudge(self):
self.setup()
result = self.decode_stream(sys.stdin)
if isinstance(result, str): # We got a digest
objfile = os.path.join(self.objdir, result)
- verbose('git-fat filter-smudge: restoring from %s' % objfile)
+ self.verbose('git-fat filter-smudge: restoring from %s' % objfile)
try:
cat(open(objfile), sys.stdout)
except:
sys.stdout.write(self.encode(result)) # could leave a better notice about how to recover this file
else: # We have an iterable over the original input.
- verbose('git-fat filter-smudge: not a managed file')
+ self.verbose('git-fat filter-smudge: not a managed file')
cat(result, sys.stdout)
def catalog_objects(self):
return set(os.listdir(self.objdir))
- def referenced_objects(self, rev=None):
+ def referenced_objects(self, rev=None, all=False):
referenced = set()
- if rev is None:
+ if all:
rev = '--all'
+ elif rev is None:
+ rev = self.revparse('HEAD')
p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE)
p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p1.communicate()[0].splitlines():
@@ -136,17 +155,26 @@ class GitFat(object):
fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))
referenced.add(fathash)
return referenced
+ def orphan_files(self):
+ 'generator for all orphan placeholders in the working tree'
+ for fname in subprocess.check_output(['git', 'ls-files']).splitlines():
+ digest = self.decode_file(fname)
+ if digest:
+ yield (digest, fname)
def cmd_status(self, args):
self.setup()
catalog = self.catalog_objects()
- referenced = self.referenced_objects()
+ refargs = dict()
+ if '--all' in args:
+ refargs['all'] = True
+ referenced = self.referenced_objects(**refargs)
garbage = catalog - referenced
orphans = referenced - catalog
if '--all' in args:
for obj in referenced:
print(obj)
if orphans:
- print('Orphan files:')
+ print('Orphan objects:')
for orph in orphans:
print(' ' + orph)
if garbage:
@@ -156,23 +184,44 @@ class GitFat(object):
def cmd_push(self):
'Push anything that I have stored and referenced'
self.setup()
- files = self.referenced_objects() & self.catalog_objects()
+ # Pushing *all* objects because it's safer. Could implement partial push.
+ files = self.referenced_objects(all=True) & self.catalog_objects()
remote = self.get_rsync()
- verbose('Pushing to %s' % (remote))
+ self.verbose('Pushing to %s' % (remote))
cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', self.objdir + '/', remote]
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.communicate(input='\x00'.join(files))
- def checkout(self):
- files = subprocess.check_output(['git', 'ls-files', ''])
- def cmd_pull(self):
+ def checkout(self, show_orphans=False):
+ 'Update any stale files in the present working tree'
+ orphans = []
+ for digest, fname in self.orphan_files():
+ objpath = os.path.join(self.objdir, digest)
+ if os.access(objpath, os.R_OK):
+ print('Restoring %s -> %s' % (digest, fname))
+ shutil.copy(objpath, fname)
+ elif show_orphans:
+ print('Data unavailable: %s %s' % (digest,fname))
+ subprocess.call(['git', 'checkout', '.'])
+ def cmd_pull(self, args):
'Pull anything that I have referenced, but not stored'
self.setup()
- files = self.referenced_objects() - self.catalog_objects()
+ refargs = dict()
+ if '--all' in args:
+ refargs['all'] = True
+ for arg in args:
+ if arg.startswith('-') or len(arg) != 40:
+ continue
+ rev = self.revparse(arg)
+ if rev:
+ refargs['rev'] = rev
+ files = self.referenced_objects(**refargs) - self.catalog_objects()
remote = self.get_rsync()
cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', remote + '/', self.objdir]
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.communicate(input='\x00'.join(files))
self.checkout()
+ def cmd_checkout(self, args):
+ self.checkout(show_orphans=True)
def cmd_gc(self):
garbage = self.catalog_objects() - self.referenced_objects()
print('Unreferenced objects to remove: %d' % len(garbage))
@@ -189,24 +238,23 @@ class GitFat(object):
])
if __name__ == '__main__':
- if os.environ.get('GIT_FAT_VERBOSE'):
- global verbose
- verbose = verbose_stderr
fat = GitFat()
cmd = sys.argv[1] if len(sys.argv) > 1 else ''
if cmd == 'filter-clean':
fat.cmd_clean()
elif cmd == 'filter-smudge':
fat.cmd_smudge()
+ elif cmd == 'init':
+ fat.cmd_init()
elif cmd == 'status':
fat.cmd_status(sys.argv[2:])
elif cmd == 'push':
fat.cmd_push()
elif cmd == 'pull':
- fat.cmd_pull()
+ fat.cmd_pull(sys.argv[2:])
elif cmd == 'gc':
fat.cmd_gc()
- elif cmd == 'init':
- fat.cmd_init()
+ elif cmd == 'checkout':
+ fat.cmd_checkout(sys.argv[2:])
else:
- print('Usage: git fat [status|push|pull|gc|init]', file=sys.stderr)
+ print('Usage: git fat [init|status|push|pull|gc|checkout]', file=sys.stderr)