diff options
author | Jed Brown <jed@59A2.org> | 2012-12-24 16:11:25 -0600 |
---|---|---|
committer | Jed Brown <jed@59A2.org> | 2012-12-24 16:11:25 -0600 |
commit | e8aa02c259b9cd22768d17f2307d0212118d59d8 (patch) | |
tree | c084a85fa74428751ce72ac209126b666c9cf080 /git-fat | |
parent | 01c66130e6f007a4eb49abf0c59d0da1145810a9 (diff) | |
download | git-fat-e8aa02c259b9cd22768d17f2307d0212118d59d8.tar.gz |
Fix deadlock when filtering large rev-list, reported by Jozsef Bakoski
subprocess.communicate() cannot be used with large files because it is a
fully-synchronous interface. Since there is no stream/generator/coroutine
support in subprocess.communicate, we have to roll the line iterator ourselves,
putting the filter in its own thread.
Diffstat (limited to 'git-fat')
-rwxr-xr-x | git-fat | 14 |
1 files changed, 11 insertions, 3 deletions
@@ -10,6 +10,7 @@ import subprocess import shlex import shutil import itertools +import threading BLOCK_SIZE = 4096 @@ -194,13 +195,20 @@ class GitFat(object): rev = self.revparse('HEAD') p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE) p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - for line in p1.communicate()[0].splitlines(): - p2.stdin.write(line.split()[0] + '\n') - for line in p2.communicate()[0].splitlines(): + def cut_sha1hash(input, output): + for line in input: + output.write(line.split()[0] + '\n') + output.close() + cut_thread = threading.Thread(target=cut_sha1hash, args=(p1.stdout, p2.stdin)) + cut_thread.start() + for line in p2.stdout: objhash, objtype, size = line.split() if objtype == 'blob' and int(size) in self.magiclens: fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))[0] referenced.add(fathash) + cut_thread.join() + p1.wait() + p2.wait() return referenced def orphan_files(self): 'generator for all orphan placeholders in the working tree' |