diff options
Diffstat (limited to 'git-fat')
-rwxr-xr-x | git-fat | 212 |
1 files changed, 212 insertions, 0 deletions
@@ -0,0 +1,212 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import sys +import hashlib +import tempfile +import os +import subprocess +import shlex + +BLOCK_SIZE = 4096 + +def verbose_stderr(*args, **kwargs): + return print(*args, file=sys.stderr, **kwargs) +def verbose_ignore(*args, **kwargs): + pass +verbose = verbose_ignore + +def gitroot(): + return subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() +def mkdir_p(path): + import errno + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: raise + +def readblocks(stream): + bytes = 0 + while True: + data = stream.read(BLOCK_SIZE) + bytes += len(data) + if not data: + break + yield data +def cat_iter(initer, outstream): + for block in initer: + outstream.write(block) +def cat(instream, outstream): + return cat_iter(readblocks(instream), outstream) + +class GitFat(object): + DecodeError = RuntimeError + def __init__(self): + self.gitroot = gitroot() + self.objdir = os.path.join(self.gitroot, '.git', 'fat', 'objects') + self.magiclen = len(self.encode(hashlib.sha1('dummy').hexdigest())) + def setup(self): + mkdir_p(self.objdir) + def get_rsync(self): + import ConfigParser + cfgpath = os.path.join(self.gitroot,'.gitfat') + try: + config = ConfigParser.RawConfigParser() + config.read(cfgpath) + remote = config.get('rsync', 'remote') + if remote[0] in ['"', "'"] and remote[-1] in ['"', "'"]: + remote = remote[1:-1] + return remote + except ConfigParser.NoSectionError: + raise RuntimeError('No rsync.remote in %s' % cfgpath) + def encode(self, digest): + return '#$# git-fat %s\n' % digest + def decode(self, string): + cookie = '#$# git-fat ' + if string.startswith(cookie): + return string[len(cookie):].split()[0] + else: + raise GitFat.DecodeError('Could not decode %s' % (string)) + def decode_stream(self, stream): + 'Return digest if git-fat cache, otherwise return iterator over entire file contents' + preamble = stream.read(self.magiclen) + try: + return self.decode(preamble) + except GitFat.DecodeError: + 'Not sure if this is the right behavior' + return itertools.chain([preamble], readblocks(stream)) + def decode_file(self, fname): + # Fast check + stat = os.stat(fname) + if stat.st_size != self.magiclen: + return False + # read file + digest = self.decode_stream(open(fname)) + if isinstance(digest, str): + return digest + else: + return None + def cmd_clean(self): + self.setup() + h = hashlib.new('sha1') + fd, tmpname = tempfile.mkstemp(dir=self.objdir) + try: + with os.fdopen(fd, 'w') as cache: + for block in readblocks(sys.stdin): + h.update(block) + cache.write(block) + digest = h.hexdigest() + objfile = os.path.join(self.objdir, digest) + os.rename(tmpname, objfile) + verbose('git-fat filter-clean: caching to %s' % objfile) + sys.stdout.write(self.encode(digest)) + sys.stdout.flush() + except: + raise + #os.remove(tmpname) + def cmd_smudge(self): + self.setup() + result = self.decode_stream(sys.stdin) + if isinstance(result, str): # We got a digest + objfile = os.path.join(self.objdir, result) + verbose('git-fat filter-smudge: restoring from %s' % objfile) + try: + cat(open(objfile), sys.stdout) + except: + sys.stdout.write(self.encode(result)) # could leave a better notice about how to recover this file + else: # We have an iterable over the original input. + verbose('git-fat filter-smudge: not a managed file') + cat(result, sys.stdout) + def catalog_objects(self): + return set(os.listdir(self.objdir)) + def referenced_objects(self, rev=None): + referenced = set() + if rev is None: + rev = '--all' + p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE) + p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + for line in p1.communicate()[0].splitlines(): + p2.stdin.write(line.split()[0] + '\n') + for line in p2.communicate()[0].splitlines(): + objhash, objtype, size = line.split() + if objtype == 'blob' and int(size) == self.magiclen: + fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash])) + referenced.add(fathash) + return referenced + def cmd_status(self, args): + self.setup() + catalog = self.catalog_objects() + referenced = self.referenced_objects() + garbage = catalog - referenced + orphans = referenced - catalog + if '--all' in args: + for obj in referenced: + print(obj) + if orphans: + print('Orphan files:') + for orph in orphans: + print(' ' + orph) + if garbage: + print('Garbage objects:') + for g in garbage: + print(' ' + g) + def cmd_push(self): + 'Push anything that I have stored and referenced' + self.setup() + files = self.referenced_objects() & self.catalog_objects() + remote = self.get_rsync() + verbose('Pushing to %s' % (remote)) + cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', self.objdir + '/', remote] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + def checkout(self): + files = subprocess.check_output(['git', 'ls-files', '']) + def cmd_pull(self): + 'Pull anything that I have referenced, but not stored' + self.setup() + files = self.referenced_objects() - self.catalog_objects() + remote = self.get_rsync() + cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-', remote + '/', self.objdir] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + self.checkout() + def cmd_gc(self): + garbage = self.catalog_objects() - self.referenced_objects() + print('Unreferenced objects to remove: %d' % len(garbage)) + for obj in garbage: + fname = os.path.join(self.objdir, obj) + print('%10d %s' % (os.stat(fname).st_size, obj)) + os.remove(fname) + def cmd_init(self): + self.setup() + open(os.path.join(self.gitroot,'.git','config'), 'a').writelines([ + '[filter "fat"]\n', + ' clean = git-fat filter-clean\n', + ' smudge = git-fat filter-smudge\n', + ]) + +if __name__ == '__main__': + if os.environ.get('GIT_FAT_VERBOSE'): + global verbose + verbose = verbose_stderr + fat = GitFat() + cmd = sys.argv[1] if len(sys.argv) > 1 else '' + if cmd == 'filter-clean': + fat.cmd_clean() + elif cmd == 'filter-smudge': + fat.cmd_smudge() + elif cmd == 'status': + fat.cmd_status(sys.argv[2:]) + elif cmd == 'push': + fat.cmd_push() + elif cmd == 'pull': + fat.cmd_pull() + elif cmd == 'gc': + fat.cmd_gc() + elif cmd == 'init': + fat.cmd_init() + else: + print('Usage: git fat [status|push|pull|gc|init]', file=sys.stderr) |