summaryrefslogtreecommitdiff
path: root/git-fat
diff options
context:
space:
mode:
authorJed Brown <jed@59A2.org>2012-11-25 20:43:19 +0100
committerJed Brown <jed@59A2.org>2012-11-25 20:43:19 +0100
commitd5f924d9f040a49c0d0114b8ef03c6ec3a96bd9c (patch)
tree985232f72aa1df42a1b5196c1e37c42c2888d80c /git-fat
downloadgit-fat-d5f924d9f040a49c0d0114b8ef03c6ec3a96bd9c.tar.gz
initial import
Diffstat (limited to 'git-fat')
-rwxr-xr-xgit-fat212
1 files changed, 212 insertions, 0 deletions
diff --git a/git-fat b/git-fat
new file mode 100755
index 0000000..ab36470
--- /dev/null
+++ b/git-fat
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+import hashlib
+import tempfile
+import os
+import subprocess
+import shlex
+
+BLOCK_SIZE = 4096
+
+def verbose_stderr(*args, **kwargs):
+ return print(*args, file=sys.stderr, **kwargs)
+def verbose_ignore(*args, **kwargs):
+ pass
+verbose = verbose_ignore
+
+def gitroot():
+ return subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
+def mkdir_p(path):
+ import errno
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST and os.path.isdir(path):
+ pass
+ else: raise
+
+def readblocks(stream):
+ bytes = 0
+ while True:
+ data = stream.read(BLOCK_SIZE)
+ bytes += len(data)
+ if not data:
+ break
+ yield data
+def cat_iter(initer, outstream):
+ for block in initer:
+ outstream.write(block)
+def cat(instream, outstream):
+ return cat_iter(readblocks(instream), outstream)
+
+class GitFat(object):
+ DecodeError = RuntimeError
+ def __init__(self):
+ self.gitroot = gitroot()
+ self.objdir = os.path.join(self.gitroot, '.git', 'fat', 'objects')
+ self.magiclen = len(self.encode(hashlib.sha1('dummy').hexdigest()))
+ def setup(self):
+ mkdir_p(self.objdir)
+ def get_rsync(self):
+ import ConfigParser
+ cfgpath = os.path.join(self.gitroot,'.gitfat')
+ try:
+ config = ConfigParser.RawConfigParser()
+ config.read(cfgpath)
+ remote = config.get('rsync', 'remote')
+ if remote[0] in ['"', "'"] and remote[-1] in ['"', "'"]:
+ remote = remote[1:-1]
+ return remote
+ except ConfigParser.NoSectionError:
+ raise RuntimeError('No rsync.remote in %s' % cfgpath)
+ def encode(self, digest):
+ return '#$# git-fat %s\n' % digest
+ def decode(self, string):
+ cookie = '#$# git-fat '
+ if string.startswith(cookie):
+ return string[len(cookie):].split()[0]
+ else:
+ raise GitFat.DecodeError('Could not decode %s' % (string))
+ def decode_stream(self, stream):
+ 'Return digest if git-fat cache, otherwise return iterator over entire file contents'
+ preamble = stream.read(self.magiclen)
+ try:
+ return self.decode(preamble)
+ except GitFat.DecodeError:
+ 'Not sure if this is the right behavior'
+ return itertools.chain([preamble], readblocks(stream))
+ def decode_file(self, fname):
+ # Fast check
+ stat = os.stat(fname)
+ if stat.st_size != self.magiclen:
+ return False
+ # read file
+ digest = self.decode_stream(open(fname))
+ if isinstance(digest, str):
+ return digest
+ else:
+ return None
+ def cmd_clean(self):
+ self.setup()
+ h = hashlib.new('sha1')
+ fd, tmpname = tempfile.mkstemp(dir=self.objdir)
+ try:
+ with os.fdopen(fd, 'w') as cache:
+ for block in readblocks(sys.stdin):
+ h.update(block)
+ cache.write(block)
+ digest = h.hexdigest()
+ objfile = os.path.join(self.objdir, digest)
+ os.rename(tmpname, objfile)
+ verbose('git-fat filter-clean: caching to %s' % objfile)
+ sys.stdout.write(self.encode(digest))
+ sys.stdout.flush()
+ except:
+ raise
+ #os.remove(tmpname)
+ def cmd_smudge(self):
+ self.setup()
+ result = self.decode_stream(sys.stdin)
+ if isinstance(result, str): # We got a digest
+ objfile = os.path.join(self.objdir, result)
+ verbose('git-fat filter-smudge: restoring from %s' % objfile)
+ try:
+ cat(open(objfile), sys.stdout)
+ except:
+ sys.stdout.write(self.encode(result)) # could leave a better notice about how to recover this file
+ else: # We have an iterable over the original input.
+ verbose('git-fat filter-smudge: not a managed file')
+ cat(result, sys.stdout)
+ def catalog_objects(self):
+ return set(os.listdir(self.objdir))
+ def referenced_objects(self, rev=None):
+ referenced = set()
+ if rev is None:
+ rev = '--all'
+ p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE)
+ p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ for line in p1.communicate()[0].splitlines():
+ p2.stdin.write(line.split()[0] + '\n')
+ for line in p2.communicate()[0].splitlines():
+ objhash, objtype, size = line.split()
+ if objtype == 'blob' and int(size) == self.magiclen:
+ fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))
+ referenced.add(fathash)
+ return referenced
+ def cmd_status(self, args):
+ self.setup()
+ catalog = self.catalog_objects()
+ referenced = self.referenced_objects()
+ garbage = catalog - referenced
+ orphans = referenced - catalog
+ if '--all' in args:
+ for obj in referenced:
+ print(obj)
+ if orphans:
+ print('Orphan files:')
+ for orph in orphans:
+ print(' ' + orph)
+ if garbage:
+ print('Garbage objects:')
+ for g in garbage:
+ print(' ' + g)
+ def cmd_push(self):
+ 'Push anything that I have stored and referenced'
+ self.setup()
+ files = self.referenced_objects() & self.catalog_objects()
+ remote = self.get_rsync()
+ verbose('Pushing to %s' % (remote))
+ cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', self.objdir + '/', remote]
+ p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+ p.communicate(input='\x00'.join(files))
+ def checkout(self):
+ files = subprocess.check_output(['git', 'ls-files', ''])
+ def cmd_pull(self):
+ 'Pull anything that I have referenced, but not stored'
+ self.setup()
+ files = self.referenced_objects() - self.catalog_objects()
+ remote = self.get_rsync()
+ cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', remote + '/', self.objdir]
+ p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+ p.communicate(input='\x00'.join(files))
+ self.checkout()
+ def cmd_gc(self):
+ garbage = self.catalog_objects() - self.referenced_objects()
+ print('Unreferenced objects to remove: %d' % len(garbage))
+ for obj in garbage:
+ fname = os.path.join(self.objdir, obj)
+ print('%10d %s' % (os.stat(fname).st_size, obj))
+ os.remove(fname)
+ def cmd_init(self):
+ self.setup()
+ open(os.path.join(self.gitroot,'.git','config'), 'a').writelines([
+ '[filter "fat"]\n',
+ ' clean = git-fat filter-clean\n',
+ ' smudge = git-fat filter-smudge\n',
+ ])
+
+if __name__ == '__main__':
+ if os.environ.get('GIT_FAT_VERBOSE'):
+ global verbose
+ verbose = verbose_stderr
+ fat = GitFat()
+ cmd = sys.argv[1] if len(sys.argv) > 1 else ''
+ if cmd == 'filter-clean':
+ fat.cmd_clean()
+ elif cmd == 'filter-smudge':
+ fat.cmd_smudge()
+ elif cmd == 'status':
+ fat.cmd_status(sys.argv[2:])
+ elif cmd == 'push':
+ fat.cmd_push()
+ elif cmd == 'pull':
+ fat.cmd_pull()
+ elif cmd == 'gc':
+ fat.cmd_gc()
+ elif cmd == 'init':
+ fat.cmd_init()
+ else:
+ print('Usage: git fat [status|push|pull|gc|init]', file=sys.stderr)