path: root/morphlib/
diff options
Diffstat (limited to 'morphlib/')
1 files changed, 575 insertions, 0 deletions
diff --git a/morphlib/ b/morphlib/
new file mode 100644
index 00000000..edd2f0c5
--- /dev/null
+++ b/morphlib/
@@ -0,0 +1,575 @@
+# Copyright (C) 2011-2014 Codethink Limited
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import itertools
+import os
+import shutil
+import logging
+import tempfile
+import morphlib
+import distbuild
+class MultipleRootArtifactsError(morphlib.Error):
+ def __init__(self, artifacts):
+ self.msg = ('System build has multiple root artifacts: %r'
+ % [ for a in artifacts])
+ self.artifacts = artifacts
+class BuildCommand(object):
+ '''High level logic for building.
+ This controls how the whole build process goes. This is a separate
+ class to enable easy experimentation of different approaches to
+ the various parts of the process.
+ '''
+ def __init__(self, app, build_env = None):
+ self.supports_local_build = True
+ = app
+ self.lac, self.rac = self.new_artifact_caches()
+ self.lrc, self.rrc = self.new_repo_caches()
+ def build(self, args):
+ '''Build triplets specified on command line.'''
+'Build starts', chatty=True)
+ for repo_name, ref, filename in
+'Building %(repo_name)s %(ref)s %(filename)s',
+ repo_name=repo_name, ref=ref, filename=filename)
+'Deciding on task order')
+ srcpool = self.create_source_pool(repo_name, ref, filename)
+ self.validate_sources(srcpool)
+ root_artifact = self.resolve_artifacts(srcpool)
+ self.build_in_order(root_artifact)
+'Build ends successfully')
+ def new_artifact_caches(self):
+ '''Create interfaces for the build artifact caches.
+ This includes creating the directories on disk if they are missing.
+ '''
+ return morphlib.util.new_artifact_caches(
+ def new_repo_caches(self):
+ return morphlib.util.new_repo_caches(
+ def new_build_env(self, arch):
+ '''Create a new BuildEnvironment instance.'''
+ return morphlib.buildenvironment.BuildEnvironment(,
+ arch)
+ def create_source_pool(self, repo_name, ref, filename):
+ '''Find the source objects required for building a the given artifact
+ The SourcePool will contain every stratum and chunk dependency of the
+ given artifact (which must be a system) but will not take into account
+ any Git submodules which are required in the build.
+ '''
+'Creating source pool', chatty=True)
+ srcpool =
+ self.lrc, self.rrc, repo_name, ref, filename)
+ return srcpool
+ def validate_sources(self, srcpool):
+ msg='Validating cross-morphology references', chatty=True)
+ self._validate_cross_morphology_references(srcpool)
+'Validating for there being non-bootstrap chunks',
+ chatty=True)
+ self._validate_has_non_bootstrap_chunks(srcpool)
+ def _validate_root_artifact(self, root_artifact):
+ self._validate_root_kind(root_artifact)
+ self._validate_architecture(root_artifact)
+ @staticmethod
+ def _validate_root_kind(root_artifact):
+ root_kind = root_artifact.source.morphology['kind']
+ if root_kind != 'system':
+ raise morphlib.Error(
+ 'Building a %s directly is not supported' % root_kind)
+ def _validate_architecture(self, root_artifact):
+ '''Perform the validation between root and target architectures.'''
+ root_arch = root_artifact.source.morphology['arch']
+ host_arch = morphlib.util.get_host_architecture()
+ if root_arch != host_arch:
+ raise morphlib.Error(
+ 'Are you trying to cross-build? '
+ 'Host architecture is %s but target is %s'
+ % (host_arch, root_arch))
+ @staticmethod
+ def _validate_has_non_bootstrap_chunks(srcpool):
+ stratum_sources = [src for src in srcpool
+ if src.morphology['kind'] == 'stratum']
+ # any will return true for an empty iterable, which will give
+ # a false positive when there are no strata.
+ # This is an error by itself, but the source of this error can
+ # be better diagnosed later, so we abort validating here.
+ if not stratum_sources:
+ return
+ if not any(spec.get('build-mode', 'staging') != 'bootstrap'
+ for src in stratum_sources
+ for spec in src.morphology['chunks']):
+ raise morphlib.Error('No non-bootstrap chunks found.')
+ def resolve_artifacts(self, srcpool):
+ '''Resolve the artifacts that will be built for a set of sources'''
+'Creating artifact resolver', chatty=True)
+ ar = morphlib.artifactresolver.ArtifactResolver()
+'Resolving artifacts', chatty=True)
+ artifacts = ar.resolve_artifacts(srcpool)
+'Computing build order', chatty=True)
+ root_artifacts = self._find_root_artifacts(artifacts)
+ if len(root_artifacts) > 1:
+ # Validate root artifacts, since validation covers errors
+ # such as trying to build a chunk or stratum directly,
+ # and this is one cause for having multiple root artifacts
+ for root_artifact in root_artifacts:
+ self._validate_root_artifact(root_artifact)
+ raise MultipleRootArtifactsError(root_artifacts)
+ root_artifact = root_artifacts[0]
+ # Validate the root artifact here, since it's a costly function
+ # to finalise it, so any pre finalisation validation is better
+ # done before that happens, but we also don't want to expose
+ # the root artifact until it's finalised.
+'Validating root artifact', chatty=True)
+ self._validate_root_artifact(root_artifact)
+ arch = root_artifact.source.morphology['arch']
+'Creating build environment for %(arch)s',
+ arch=arch, chatty=True)
+ build_env = self.new_build_env(arch)
+'Computing cache keys', chatty=True)
+ ckc = morphlib.cachekeycomputer.CacheKeyComputer(build_env)
+ for source in set(a.source for a in artifacts):
+ source.cache_key = ckc.compute_key(source)
+ source.cache_id = ckc.get_cache_id(source)
+ root_artifact.build_env = build_env
+ return root_artifact
+ def _validate_cross_morphology_references(self, srcpool):
+ '''Perform validation across all morphologies involved in the build'''
+ stratum_names = []
+ for src in srcpool:
+ kind = src.morphology['kind']
+ # Verify that chunks pointed to by strata really are chunks, etc.
+ method_name = '_validate_cross_refs_for_%s' % kind
+ if hasattr(self, method_name):
+ logging.debug('Calling %s' % method_name)
+ getattr(self, method_name)(src, srcpool)
+ else:
+ logging.warning('No %s' % method_name)
+ # Verify stratum build-depends agree with the system's contents.
+ # It is permissible for a stratum to build-depend on a stratum that
+ # isn't specified in the target system morphology.
+ # Multiple references to the same stratum are permitted. This is
+ # handled by the SourcePool deduplicating added Sources.
+ # It is forbidden to have two different strata with the same name.
+ # Hence if a Stratum is defined in the System, and in a Stratum as
+ # a build-dependency, then they must both have the same Repository
+ # and Ref specified.
+ if src.morphology['kind'] == 'stratum':
+ name =
+ ref = src.sha1[:7]
+'Stratum [%(name)s] version is %(ref)s',
+ name=name, ref=ref)
+ if name in stratum_names:
+ raise morphlib.Error(
+ "Conflicting versions of stratum '%s' appear in the "
+ "build. Check the contents of the system against the "
+ "build-depends of the strata." % name)
+ stratum_names.append(name)
+ def _validate_cross_refs_for_system(self, src, srcpool):
+ self._validate_cross_refs_for_xxx(
+ src, srcpool, src.morphology['strata'], 'stratum')
+ def _validate_cross_refs_for_stratum(self, src, srcpool):
+ self._validate_cross_refs_for_xxx(
+ src, srcpool, src.morphology['chunks'], 'chunk')
+ def _validate_cross_refs_for_xxx(self, src, srcpool, specs, wanted):
+ for spec in specs:
+ repo_name = spec.get('repo') or src.repo_name
+ ref = spec.get('ref') or src.original_ref
+ filename = morphlib.util.sanitise_morphology_path(
+ spec.get('morph', spec.get('name')))
+ logging.debug(
+ 'Validating cross ref to %s:%s:%s' %
+ (repo_name, ref, filename))
+ for other in srcpool.lookup(repo_name, ref, filename):
+ if other.morphology['kind'] != wanted:
+ raise morphlib.Error(
+ '%s %s references %s:%s:%s which is a %s, '
+ 'instead of a %s' %
+ (src.morphology['kind'],
+ repo_name,
+ ref,
+ filename,
+ other.morphology['kind'],
+ wanted))
+ def _find_root_artifacts(self, artifacts):
+ '''Find all the root artifacts among a set of artifacts in a DAG.
+ It would be nice if the ArtifactResolver would return its results in a
+ more useful order to save us from needing to do this -- the root object
+ is known already since that's the one the user asked us to build.
+ '''
+ return [a for a in artifacts if not a.dependents]
+ @staticmethod
+ def get_ordered_sources(artifacts):
+ ordered_sources = []
+ known_sources = set()
+ for artifact in artifacts:
+ if artifact.source not in known_sources:
+ known_sources.add(artifact.source)
+ yield artifact.source
+ def build_in_order(self, root_artifact):
+ '''Build everything specified in a build order.'''
+'Building a set of sources', chatty=True)
+ build_env = root_artifact.build_env
+ ordered_sources = list(self.get_ordered_sources(root_artifact.walk()))
+ old_prefix =
+ for i, s in enumerate(ordered_sources):
+ = (
+ old_prefix + '[Build %(index)d/%(total)d] [%(name)s] ' % {
+ 'index': (i+1),
+ 'total': len(ordered_sources),
+ 'name':,
+ })
+ self.cache_or_build_source(s, build_env)
+ = old_prefix
+ def cache_or_build_source(self, source, build_env):
+ '''Make artifacts of the built source available in the local cache.
+ This can be done by retrieving from a remote artifact cache, or if
+ that doesn't work for some reason, by building the source locally.
+ '''
+ artifacts = source.artifacts.values()
+ if self.rac is not None:
+ try:
+ self.cache_artifacts_locally(artifacts)
+ except morphlib.remoteartifactcache.GetError:
+ # Error is logged by the RemoteArtifactCache object.
+ pass
+ if any(not self.lac.has(artifact) for artifact in artifacts):
+ self.build_source(source, build_env)
+ for a in artifacts:
+'%(kind)s %(name)s is cached at %(cachepath)s',
+ kind=source.morphology['kind'],,
+ cachepath=self.lac.artifact_filename(a),
+ chatty=(source.morphology['kind'] != "system"))
+ def build_source(self, source, build_env):
+ '''Build all artifacts for one source.
+ All the dependencies are assumed to be built and available
+ in either the local or remote cache already.
+ '''
+'Building %(kind)s %(name)s',
+ kind=source.morphology['kind'])
+ self.fetch_sources(source)
+ # TODO: Make an artifact.walk() that takes multiple root artifacts.
+ # as this does a walk for every artifact. This was the status
+ # quo before build logic was made to work per-source, but we can
+ # now do better.
+ deps = self.get_recursive_deps(source.artifacts.values())
+ self.cache_artifacts_locally(deps)
+ use_chroot = False
+ setup_mounts = False
+ if source.morphology['kind'] == 'chunk':
+ build_mode = source.build_mode
+ extra_env = {'PREFIX': source.prefix}
+ dep_prefix_set = set(a.source.prefix for a in deps
+ if a.source.morphology['kind'] == 'chunk')
+ extra_path = [os.path.join(d, 'bin') for d in dep_prefix_set]
+ if build_mode not in ['bootstrap', 'staging', 'test']:
+ logging.warning('Unknown build mode %s for chunk %s. '
+ 'Defaulting to staging mode.' %
+ (build_mode,
+ build_mode = 'staging'
+ if build_mode == 'staging':
+ use_chroot = True
+ setup_mounts = True
+ staging_area = self.create_staging_area(build_env,
+ use_chroot,
+ extra_env=extra_env,
+ extra_path=extra_path)
+ try:
+ self.install_dependencies(staging_area, deps, source)
+ except BaseException:
+ staging_area.abort()
+ raise
+ else:
+ staging_area = self.create_staging_area(build_env, False)
+ self.build_and_cache(staging_area, source, setup_mounts)
+ self.remove_staging_area(staging_area)
+ def get_recursive_deps(self, artifacts):
+ deps = set()
+ ordered_deps = []
+ for artifact in artifacts:
+ for dep in artifact.walk():
+ if dep not in deps and dep not in artifacts:
+ deps.add(dep)
+ ordered_deps.append(dep)
+ return ordered_deps
+ def fetch_sources(self, source):
+ '''Update the local git repository cache with the sources.'''
+ repo_name = source.repo_name
+ if['no-git-update']:
+'Not updating existing git repository '
+ '%(repo_name)s '
+ 'because of no-git-update being set',
+ chatty=True,
+ repo_name=repo_name)
+ source.repo = self.lrc.get_repo(repo_name)
+ return
+ if self.lrc.has_repo(repo_name):
+ source.repo = self.lrc.get_repo(repo_name)
+ try:
+ sha1 = source.sha1
+ source.repo.resolve_ref(sha1)
+'Not updating git repository '
+ '%(repo_name)s because it '
+ 'already contains sha1 %(sha1)s',
+ chatty=True, repo_name=repo_name,
+ sha1=sha1)
+ except morphlib.cachedrepo.InvalidReferenceError:
+'Updating %(repo_name)s',
+ repo_name=repo_name)
+ source.repo.update()
+ else:
+'Cloning %(repo_name)s',
+ repo_name=repo_name)
+ source.repo = self.lrc.cache_repo(repo_name)
+ # Update submodules.
+ done = set()
+ self.lrc, source.repo.url,
+ source.sha1, done)
+ def cache_artifacts_locally(self, artifacts):
+ '''Get artifacts missing from local cache from remote cache.'''
+ def fetch_files(to_fetch):
+ '''Fetch a set of files atomically.
+ If an error occurs during the transfer of any files, all downloaded
+ data is deleted, to ensure integrity of the local cache.
+ '''
+ try:
+ for remote, local in to_fetch:
+ shutil.copyfileobj(remote, local)
+ except BaseException:
+ for remote, local in to_fetch:
+ local.abort()
+ raise
+ else:
+ for remote, local in to_fetch:
+ remote.close()
+ local.close()
+ for artifact in artifacts:
+ # This block should fetch all artifact files in one go, using the
+ # 1.0/artifacts method of morph-cache-server. The code to do that
+ # needs bringing in from the distbuild.worker_build_connection
+ # module into morphlib.remoteartififactcache first.
+ to_fetch = []
+ if not self.lac.has(artifact):
+ to_fetch.append((self.rac.get(artifact),
+ self.lac.put(artifact)))
+ if artifact.source.morphology.needs_artifact_metadata_cached:
+ if not self.lac.has_artifact_metadata(artifact, 'meta'):
+ to_fetch.append((
+ self.rac.get_artifact_metadata(artifact, 'meta'),
+ self.lac.put_artifact_metadata(artifact, 'meta')))
+ if len(to_fetch) > 0:
+ msg='Fetching to local cache: artifact %(name)s',
+ fetch_files(to_fetch)
+ def create_staging_area(self, build_env, use_chroot=True, extra_env={},
+ extra_path=[]):
+ '''Create the staging area for building a single artifact.'''
+'Creating staging area')
+ staging_dir = tempfile.mkdtemp(
+ dir=os.path.join(['tempdir'], 'staging'))
+ staging_area = morphlib.stagingarea.StagingArea(
+, staging_dir, build_env, use_chroot, extra_env,
+ extra_path)
+ return staging_area
+ def remove_staging_area(self, staging_area):
+ '''Remove the staging area.'''
+'Removing staging area')
+ staging_area.remove()
+ # Nasty hack to avoid installing chunks built in 'bootstrap' mode in a
+ # different stratum when constructing staging areas.
+ # TODO: make nicer by having chunk morphs keep a reference to the
+ # stratum they were in
+ def in_same_stratum(self, s1, s2):
+ '''Checks whether two chunk sources are from the same stratum.
+ In the absence of morphologies tracking where they came from,
+ this checks whether both sources are depended on by artifacts
+ that belong to sources which have the same morphology.
+ '''
+ def dependent_stratum_morphs(source):
+ dependents = set(itertools.chain.from_iterable(
+ a.dependents for a in source.artifacts.itervalues()))
+ dependent_strata = set(s for s in dependents
+ if s.morphology['kind'] == 'stratum')
+ return set(s.morphology for s in dependent_strata)
+ return dependent_stratum_morphs(s1) == dependent_stratum_morphs(s2)
+ def install_dependencies(self, staging_area, artifacts, target_source):
+ '''Install chunk artifacts into staging area.
+ We only ever care about chunk artifacts as build dependencies,
+ so this is not a generic artifact installer into staging area.
+ Any non-chunk artifacts are silently ignored.
+ All artifacts MUST be in the local artifact cache already.
+ '''
+ for artifact in artifacts:
+ if artifact.source.morphology['kind'] != 'chunk':
+ continue
+ if artifact.source.build_mode == 'bootstrap':
+ if not self.in_same_stratum(artifact.source, target_source):
+ continue
+ msg='Installing chunk %(chunk_name)s from cache %(cache)s',
+ cache=artifact.source.cache_key[:7],
+ chatty=True)
+ handle = self.lac.get(artifact)
+ staging_area.install_artifact(handle)
+ if target_source.build_mode == 'staging':
+ morphlib.builder2.ldconfig(, staging_area.dirname)
+ def build_and_cache(self, staging_area, source, setup_mounts):
+ '''Build a source and put its artifacts into the local cache.'''
+'Starting actual build: %(name)s '
+ '%(sha1)s',
+, sha1=source.sha1[:7])
+ builder = morphlib.builder2.Builder(
+, staging_area, self.lac, self.rac, self.lrc,
+['max-jobs'], setup_mounts)
+ return builder.build_and_cache(source)
+class InitiatorBuildCommand(BuildCommand):
+ RECONNECT_INTERVAL = 30 # seconds
+ def __init__(self, app, addr, port):
+ = app
+ self.addr = addr
+ self.port = port
+['push-build-branches'] = True
+ super(InitiatorBuildCommand, self).__init__(app)
+ def build(self, args):
+ '''Initiate a distributed build on a controller'''
+ distbuild.add_crash_conditions(['crash-condition'])
+ if len(args) != 3:
+ raise morphlib.Error(
+ 'Need repo, ref, morphology triplet to build')
+ if self.addr == '':
+ raise morphlib.Error(
+ 'Need address of controller to run a distbuild')
+'Starting distributed build')
+ loop = distbuild.MainLoop()
+ cm = distbuild.InitiatorConnectionMachine(,
+ self.addr,
+ self.port,
+ distbuild.Initiator,
+ [] + args,
+ loop.add_state_machine(cm)