summaryrefslogtreecommitdiff
path: root/git-fat
diff options
context:
space:
mode:
authorJed Brown <jed@59A2.org>2012-12-24 16:11:25 -0600
committerJed Brown <jed@59A2.org>2012-12-24 16:11:25 -0600
commite8aa02c259b9cd22768d17f2307d0212118d59d8 (patch)
treec084a85fa74428751ce72ac209126b666c9cf080 /git-fat
parent01c66130e6f007a4eb49abf0c59d0da1145810a9 (diff)
downloadgit-fat-e8aa02c259b9cd22768d17f2307d0212118d59d8.tar.gz
Fix deadlock when filtering large rev-list, reported by Jozsef Bakoski
subprocess.communicate() cannot be used with large files because it is a fully-synchronous interface. Since there is no stream/generator/coroutine support in subprocess.communicate, we have to roll the line iterator ourselves, putting the filter in its own thread.
Diffstat (limited to 'git-fat')
-rwxr-xr-xgit-fat14
1 files changed, 11 insertions, 3 deletions
diff --git a/git-fat b/git-fat
index 14983f4..824f394 100755
--- a/git-fat
+++ b/git-fat
@@ -10,6 +10,7 @@ import subprocess
import shlex
import shutil
import itertools
+import threading
BLOCK_SIZE = 4096
@@ -194,13 +195,20 @@ class GitFat(object):
rev = self.revparse('HEAD')
p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE)
p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- for line in p1.communicate()[0].splitlines():
- p2.stdin.write(line.split()[0] + '\n')
- for line in p2.communicate()[0].splitlines():
+ def cut_sha1hash(input, output):
+ for line in input:
+ output.write(line.split()[0] + '\n')
+ output.close()
+ cut_thread = threading.Thread(target=cut_sha1hash, args=(p1.stdout, p2.stdin))
+ cut_thread.start()
+ for line in p2.stdout:
objhash, objtype, size = line.split()
if objtype == 'blob' and int(size) in self.magiclens:
fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))[0]
referenced.add(fathash)
+ cut_thread.join()
+ p1.wait()
+ p2.wait()
return referenced
def orphan_files(self):
'generator for all orphan placeholders in the working tree'