diff options
author | Phillip Smyth <phillipsmyth@Nexus-x240.dyn.ducie.codethink.co.uk> | 2018-05-29 13:48:19 +0100 |
---|---|---|
committer | Phillip Smyth <phillipsmyth@Nexus-x240.dyn.ducie.codethink.co.uk> | 2018-06-15 07:59:15 +0100 |
commit | 0fd230671d0c865b976ff1d9a4f70ae8ff631cfb (patch) | |
tree | 4c22da874a4a3479f0ce7702e8b82d1b7f8aa8fe | |
parent | 6da05577407545bf6511054d8a822cebc38824b9 (diff) | |
download | buildstream-0fd230671d0c865b976ff1d9a4f70ae8ff631cfb.tar.gz |
Limiting git history to reduce cache size
-rw-r--r-- | buildstream/plugins/sources/git.py | 80 | ||||
-rw-r--r-- | tests/sources/git.py | 1 |
2 files changed, 72 insertions, 9 deletions
diff --git a/buildstream/plugins/sources/git.py b/buildstream/plugins/sources/git.py index 44065ad8f..4b53c7f51 100644 --- a/buildstream/plugins/sources/git.py +++ b/buildstream/plugins/sources/git.py @@ -70,6 +70,7 @@ git - stage files from a git repository import os import re import shutil +import shlex from collections import Mapping from io import StringIO @@ -150,15 +151,76 @@ class GitMirror(): def stage(self, directory): fullpath = os.path.join(directory, self.path) - # We need to pass '--no-hardlinks' because there's nothing to - # stop the build from overwriting the files in the .git directory - # inside the sandbox. - self.source.call([self.source.host_git, 'clone', '--no-checkout', '--no-hardlinks', self.mirror, fullpath], - fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath)) - - self.source.call([self.source.host_git, 'checkout', '--force', self.ref], - fail="Failed to checkout git ref {}".format(self.ref), - cwd=fullpath) + # Need to get every commit since the last tagged object until the tracking commit + if self.has_ref(): + all_tags = self.source.check_output([self.source.host_git, 'tag'], cwd=self.mirror)[1] + all_tags = [x.strip() for x in all_tags.split('\n')] + if all_tags: + tags_since_sha = self.source.check_output([self.source.host_git, + 'tag', + '--sort', + '--contains', + self.ref], + cwd=self.mirror)[1] + + tags_since_sha = [x.strip() for x in tags_since_sha.split('\n')] + preceeding_tags = [x for x in all_tags if x not in tags_since_sha] + if preceeding_tags: + last_tag_before_ref = preceeding_tags[-1] + else: + last_tag_before_ref = 'HEAD' + + # find number of commits since last_tag_before_ref + target_depth = self.source.check_output([self.source.host_git, + 'rev-list', + '--count', + 'HEAD...{}'.format(last_tag_before_ref)])[1] + + else: + target_depth = self.source.check_output([self.source.host_git, + 'rev-list', + '--count', + 'HEAD...{}'.format(self.ref)], cwd=self.mirror)[1] + + if int(target_depth) == 0: + target_depth = 1 + + branch = self.source.check_output([self.source.host_git, + 'rev-parse', + '--abbrev-ref', + 'HEAD'], cwd=self.mirror)[1] + + self.source.call([self.source.host_git, + 'init', + fullpath]) + + self.source.call([self.source.host_git, + 'fetch', + '--depth={}'.format(int(target_depth)), + 'ext::git -c uploadpack.allowReachableSHA1InWant=true %s {}' + .format(shlex.quote(self.mirror)), + self.ref], + env=dict(os.environ, GIT_ALLOW_PROTOCOL="ext"), cwd=fullpath) + + self.source.call([self.source.host_git, + 'checkout', + 'FETCH_HEAD'], cwd=fullpath) + + if "master" not in branch: + self.source.call([self.source.host_git, + 'branch', + '-D', + 'master'], cwd=fullpath) + + self.source.call([self.source.host_git, + 'reflog', + 'expire', + '--expire-unreachable=all' + '--all'], cwd=fullpath) + + self.source.call([self.source.host_git, + 'repack', + '-ad'], cwd=fullpath) def init_workspace(self, directory): fullpath = os.path.join(directory, self.path) diff --git a/tests/sources/git.py b/tests/sources/git.py index 06888c311..495c147dc 100644 --- a/tests/sources/git.py +++ b/tests/sources/git.py @@ -288,6 +288,7 @@ def test_submodule_fetch_submodule_individual_checkout_explicit(cli, tmpdir, dat @pytest.mark.skipif(HAVE_GIT is False, reason="git is not available") @pytest.mark.datafiles(os.path.join(DATA_DIR, 'project-override')) def test_submodule_fetch_project_override(cli, tmpdir, datafiles): + print("\n\n\nTemp Directory: {}\n\n\n".format(tmpdir)) project = os.path.join(datafiles.dirname, datafiles.basename) checkoutdir = os.path.join(str(tmpdir), "checkout") |