summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Schubert <contact@benschubert.me>2019-12-16 11:33:11 +0000
committerBenjamin Schubert <contact@benschubert.me>2019-12-16 11:33:11 +0000
commit0118773753c571aec6c153d30f85c87e0d501d2c (patch)
tree49928a955ada4a73ecba4bc279db9805b509f607
parent2fc5a5cfbcf5c16407914849ea1a99bfdb71cb0e (diff)
parent7fe2bb4bd2f28f49567100eaf388685e53ad4bf3 (diff)
downloadbuildstream-0118773753c571aec6c153d30f85c87e0d501d2c.tar.gz
Merge branch 'tmewett/recursive-git' into 'master'
Make Git source plugin clone submodules recursively Closes #1162 See merge request BuildStream/buildstream!1765
-rw-r--r--src/buildstream/_gitsourcebase.py153
-rw-r--r--src/buildstream/plugins/sources/git.py8
-rw-r--r--tests/sources/git.py40
3 files changed, 133 insertions, 68 deletions
diff --git a/src/buildstream/_gitsourcebase.py b/src/buildstream/_gitsourcebase.py
index 4e9e59161..5b7f051b9 100644
--- a/src/buildstream/_gitsourcebase.py
+++ b/src/buildstream/_gitsourcebase.py
@@ -48,9 +48,20 @@ class _RefFormat(FastEnum):
GIT_DESCRIBE = "git-describe"
-# Because of handling of submodules, we maintain a _GitMirror
-# for the primary git source and also for each submodule it
-# might have at a given time
+# This class represents a single Git repository. The Git source needs to account for
+# submodules, but we don't want to cache them all under the umbrella of the
+# superproject - so we use this class which caches them independently, according
+# to their URL. Instances keep reference to their "parent" GitSourceBase,
+# and if applicable, where in the superproject they are found.
+#
+# Args:
+# source (_GitSourceBase or subclass): The parent source
+# path (str): The relative location of the submodule in the superproject;
+# the empty string for the superproject itself
+# url (str): Where to clone the repo from
+# ref (str): Specified 'ref' from the source configuration
+# primary (bool): Whether this is the primary URL for the source
+# tags (list): Tag configuration; see _GitSourceBase._load_tags
#
class _GitMirror(SourceFetcher):
def __init__(self, source, path, url, ref, *, primary=False, tags=[]):
@@ -63,7 +74,6 @@ class _GitMirror(SourceFetcher):
self.tags = tags
self.primary = primary
self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
- self.mark_download_url(url)
# Ensures that the mirror exists
def ensure(self, alias_override=None):
@@ -244,6 +254,19 @@ class _GitMirror(SourceFetcher):
cwd=fullpath,
)
+ # get_submodule_mirrors():
+ #
+ # Returns:
+ # An iterator through new instances of this class, one of each submodule
+ # in the repo
+ #
+ def get_submodule_mirrors(self):
+ for path, url in self.submodule_list():
+ ref = self.submodule_ref(path)
+ if ref is not None:
+ mirror = self.__class__(self.source, os.path.join(self.path, path), url, ref)
+ yield mirror
+
# List the submodules (path/url tuples) present at the given ref of this repo
def submodule_list(self):
modules = "{}:{}".format(self.ref, GIT_MODULES)
@@ -445,7 +468,6 @@ class _GitSourceBase(Source):
)
self.checkout_submodules = node.get_bool("checkout-submodules", default=True)
- self.submodules = []
# Parse a dict of submodule overrides, stored in the submodule_overrides
# and submodule_checkout_overrides dictionaries.
@@ -554,34 +576,27 @@ class _GitSourceBase(Source):
return ret
def init_workspace(self, directory):
- # XXX: may wish to refactor this as some code dupe with stage()
- self._refresh_submodules()
-
with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
self.mirror.init_workspace(directory)
- for mirror in self.submodules:
+ for mirror in self._recurse_submodules(configure=True):
mirror.init_workspace(directory)
def stage(self, directory):
-
- # Need to refresh submodule list here again, because
- # it's possible that we did not load in the main process
- # with submodules present (source needed fetching) and
- # we may not know about the submodule yet come time to build.
- #
- self._refresh_submodules()
-
# Stage the main repo in the specified directory
#
with self.timed_activity("Staging {}".format(self.mirror.url), silent_nested=True):
self.mirror.stage(directory)
- for mirror in self.submodules:
+ for mirror in self._recurse_submodules(configure=True):
mirror.stage(directory)
def get_source_fetchers(self):
+ self.mirror.mark_download_url(self.mirror.url)
yield self.mirror
- self._refresh_submodules()
- for submodule in self.submodules:
+ # _recurse_submodules only iterates those which are known at the current
+ # cached state - but fetch is called on each result as we go, so this will
+ # yield all configured submodules
+ for submodule in self._recurse_submodules(configure=True):
+ submodule.mark_download_url(submodule.url)
yield submodule
def validate_cache(self):
@@ -589,14 +604,13 @@ class _GitSourceBase(Source):
unlisted_submodules = []
invalid_submodules = []
- for path, url in self.mirror.submodule_list():
- discovered_submodules[path] = url
- if self._ignore_submodule(path):
+ for submodule in self._recurse_submodules(configure=False):
+ discovered_submodules[submodule.path] = submodule.url
+ if self._ignoring_submodule(submodule.path):
continue
- override_url = self.submodule_overrides.get(path)
- if not override_url:
- unlisted_submodules.append((path, url))
+ if submodule.path not in self.submodule_overrides:
+ unlisted_submodules.append((submodule.path, submodule.url))
# Warn about submodules which are explicitly configured but do not exist
for path, url in self.submodule_overrides.items():
@@ -668,44 +682,50 @@ class _GitSourceBase(Source):
###########################################################
def _have_all_refs(self):
- if not self.mirror.has_ref():
- return False
-
- self._refresh_submodules()
- for mirror in self.submodules:
- if not os.path.exists(mirror.mirror):
- return False
- if not mirror.has_ref():
- return False
-
- return True
+ return self.mirror.has_ref() and all(
+ submodule.has_ref() for submodule in self._recurse_submodules(configure=True)
+ )
- # Refreshes the BST_MIRROR_CLASS objects for submodules
+ # _configure_submodules():
#
- # Assumes that we have our mirror and we have the ref which we point to
+ # Args:
+ # submodules: An iterator of _GitMirror (or similar) objects for submodules
#
- def _refresh_submodules(self):
- self.mirror.ensure()
- submodules = []
-
- for path, url in self.mirror.submodule_list():
-
- # Completely ignore submodules which are disabled for checkout
- if self._ignore_submodule(path):
+ # Returns:
+ # An iterator through `submodules` but filtered of any ignored submodules
+ # and modified to use any custom URLs configured in the source
+ #
+ def _configure_submodules(self, submodules):
+ for submodule in submodules:
+ if self._ignoring_submodule(submodule.path):
continue
+ # Allow configuration to override the upstream location of the submodules.
+ submodule.url = self.submodule_overrides.get(submodule.path, submodule.url)
+ yield submodule
- # Allow configuration to override the upstream
- # location of the submodules.
- override_url = self.submodule_overrides.get(path)
- if override_url:
- url = override_url
+ # _recurse_submodules():
+ #
+ # Recursively iterates through GitMirrors for submodules of the main repo. Only
+ # submodules that are cached are recursed into - but this is decided at
+ # iteration time, so you can fetch in a for loop over this function to fetch
+ # all submodules.
+ #
+ # Args:
+ # configure (bool): Whether to apply the 'submodule' config while recursing
+ # (URL changing and 'checkout' overrides)
+ #
+ def _recurse_submodules(self, configure):
+ def recurse(mirror):
+ submodules = mirror.get_submodule_mirrors()
+ if configure:
+ submodules = self._configure_submodules(submodules)
- ref = self.mirror.submodule_ref(path)
- if ref is not None:
- mirror = self.BST_MIRROR_CLASS(self, path, url, ref)
- submodules.append(mirror)
+ for submodule in submodules:
+ yield submodule
+ if submodule.has_ref():
+ yield from recurse(submodule)
- self.submodules = submodules
+ yield from recurse(self.mirror)
def _load_tags(self, node):
tags = []
@@ -717,12 +737,13 @@ class _GitSourceBase(Source):
tags.append((tag, commit_ref, annotated))
return tags
- # Checks whether the plugin configuration has explicitly
- # configured this submodule to be ignored
- def _ignore_submodule(self, path):
- try:
- checkout = self.submodule_checkout_overrides[path]
- except KeyError:
- checkout = self.checkout_submodules
-
- return not checkout
+ # _ignoring_submodule():
+ #
+ # Args:
+ # path (str): The path of a submodule in the superproject
+ #
+ # Returns:
+ # (bool): Whether to not clone/checkout this submodule
+ #
+ def _ignoring_submodule(self, path):
+ return not self.submodule_checkout_overrides.get(path, self.checkout_submodules)
diff --git a/src/buildstream/plugins/sources/git.py b/src/buildstream/plugins/sources/git.py
index 5e6834979..d04ccfe1b 100644
--- a/src/buildstream/plugins/sources/git.py
+++ b/src/buildstream/plugins/sources/git.py
@@ -57,6 +57,7 @@ git - stage files from a git repository
ref: d63cbb6fdc0bbdadc4a1b92284826a6d63a7ebcd
# Optionally specify whether submodules should be checked-out.
+ # This is done recursively, as with `git clone --recurse-submodules`.
# If not set, this will default to 'True'
checkout-submodules: True
@@ -66,12 +67,15 @@ git - stage files from a git repository
# especially handy when used with project defined aliases which
# can be redefined at a later time.
# You may also explicitly specify whether to check out this
- # submodule. If 'checkout' is set, it will override
- # 'checkout-submodules' with the value set below.
+ # submodule. If 'checkout' is set, it will control whether to
+ # checkout that submodule and recurse into it. It defaults to the
+ # value of 'checkout-submodules'.
submodules:
plugins/bar:
url: upstream:bar.git
checkout: True
+ plugins/bar/quux:
+ checkout: False
plugins/baz:
url: upstream:baz.git
checkout: False
diff --git a/tests/sources/git.py b/tests/sources/git.py
index e4252fe51..096338bbe 100644
--- a/tests/sources/git.py
+++ b/tests/sources/git.py
@@ -94,6 +94,46 @@ def test_submodule_fetch_checkout(cli, tmpdir, datafiles):
@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
@pytest.mark.datafiles(os.path.join(DATA_DIR, "template"))
+def test_recursive_submodule_fetch_checkout(cli, tmpdir, datafiles):
+ project = str(datafiles)
+ checkoutdir = os.path.join(str(tmpdir), "checkout")
+
+ # Create a submodule from the 'othersubrepofiles' subdir
+ subsubrepo = create_repo("git", str(tmpdir), "subsubrepo")
+ subsubrepo.create(os.path.join(project, "othersubrepofiles"))
+
+ # Create another submodule from the 'subrepofiles' subdir
+ subrepo = create_repo("git", str(tmpdir), "subrepo")
+ subrepo.create(os.path.join(project, "subrepofiles"))
+
+ # Create the repo from 'repofiles' subdir
+ repo = create_repo("git", str(tmpdir))
+ repo.create(os.path.join(project, "repofiles"))
+
+ # Configure submodules
+ subrepo.add_submodule("subdir", "file://" + subsubrepo.repo)
+ ref = repo.add_submodule("subdir", "file://" + subrepo.repo)
+
+ # Write out our test target
+ element = {"kind": "import", "sources": [repo.source_config(ref=ref)]}
+ _yaml.roundtrip_dump(element, os.path.join(project, "target.bst"))
+
+ # Fetch, build, checkout
+ result = cli.run(project=project, args=["source", "fetch", "target.bst"])
+ result.assert_success()
+ result = cli.run(project=project, args=["build", "target.bst"])
+ result.assert_success()
+ result = cli.run(project=project, args=["artifact", "checkout", "target.bst", "--directory", checkoutdir])
+ result.assert_success()
+
+ # Assert we checked out all files at their expected location
+ assert os.path.exists(os.path.join(checkoutdir, "file.txt"))
+ assert os.path.exists(os.path.join(checkoutdir, "subdir", "ponyfile.txt"))
+ assert os.path.exists(os.path.join(checkoutdir, "subdir", "subdir", "unicornfile.txt"))
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "template"))
def test_submodule_fetch_source_enable_explicit(cli, tmpdir, datafiles):
project = str(datafiles)
checkoutdir = os.path.join(str(tmpdir), "checkout")