diff options
30 files changed, 857 insertions, 439 deletions
diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 387b410f..aa11ae8f 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -1,6 +1,6 @@ # distbuild/build_controller.py -- control the steps for one build # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -37,11 +37,6 @@ class _Start(object): pass class _Annotated(object): pass class _Built(object): pass -class _AnnotationFailed(object): - - def __init__(self, http_status_code, error_msg): - self.http_status_code = http_status_code - self.error_msg = error_msg class _GotGraph(object): @@ -49,11 +44,6 @@ class _GotGraph(object): self.artifact = artifact -class _GraphFailed(object): - - pass - - class BuildCancel(object): def __init__(self, id): @@ -192,14 +182,13 @@ class BuildController(distbuild.StateMachine): 'graphing', self._maybe_finish_graph), ('graphing', self, _GotGraph, 'annotating', self._start_annotating), - ('graphing', self, _GraphFailed, None, None), + ('graphing', self, BuildFailed, None, None), ('graphing', self._initiator_connection, distbuild.InitiatorDisconnect, None, None), ('annotating', distbuild.HelperRouter, distbuild.HelperResult, 'annotating', self._maybe_handle_cache_response), - ('annotating', self, _AnnotationFailed, None, - self._notify_annotation_failed), + ('annotating', self, BuildFailed, None, None), ('annotating', self, _Annotated, 'building', self._queue_worker_builds), ('annotating', self._initiator_connection, @@ -244,6 +233,29 @@ class BuildController(distbuild.StateMachine): self.mainloop.queue_event(self, _Start()) + def fail(self, reason): + logging.error(reason) + message = BuildFailed(self._request['id'], reason) + + # The message is sent twice so that it can be matched both by state + # transitions listening for this specific controller instance, and by + # state transitions listening for messages from the BuildController + # class that then filter the message based on the request ID field. + self.mainloop.queue_event(self, message) + self.mainloop.queue_event(BuildController, message) + + def _request_command_execution(self, argv, request_id): + '''Tell the controller's distbuild-helper to run a command.''' + if self.mainloop.n_state_machines_of_type(distbuild.HelperRouter) == 0: + self.fail('No distbuild-helper process running on controller!') + + msg = distbuild.message('exec-request', + id=request_id, + argv=argv, + stdin_contents='') + req = distbuild.HelperRequest(msg) + self.mainloop.queue_event(distbuild.HelperRouter, req) + def _start_graphing(self, event_source, event): distbuild.crash_point() @@ -260,14 +272,10 @@ class BuildController(distbuild.StateMachine): ] if 'original_ref' in self._request: argv.append(self._request['original_ref']) - msg = distbuild.message('exec-request', - id=self._idgen.next(), - argv=argv, - stdin_contents='') - self._helper_id = msg['id'] - req = distbuild.HelperRequest(msg) - self.mainloop.queue_event(distbuild.HelperRouter, req) - + + self._helper_id = self._idgen.next() + self._request_command_execution(argv, self._helper_id) + progress = BuildProgress(self._request['id'], 'Computing build graph') self.mainloop.queue_event(BuildController, progress) @@ -281,16 +289,6 @@ class BuildController(distbuild.StateMachine): def _maybe_finish_graph(self, event_source, event): distbuild.crash_point() - def notify_failure(msg_text): - logging.error('Graph creation failed: %s' % msg_text) - - failed = BuildFailed( - self._request['id'], - 'Failed to compute build graph: %s' % msg_text) - self.mainloop.queue_event(BuildController, failed) - - self.mainloop.queue_event(self, _GraphFailed()) - def notify_success(artifact): logging.debug('Graph is finished') @@ -308,8 +306,7 @@ class BuildController(distbuild.StateMachine): error_text = self._artifact_error.peek() if event.msg['exit'] != 0 or error_text: - notify_failure('Problem with serialise-artifact: %s' - % error_text) + self.fail(error_text) if event.msg['exit'] != 0: return @@ -319,7 +316,7 @@ class BuildController(distbuild.StateMachine): artifact = distbuild.deserialise_artifact(text) except ValueError, e: logging.error(traceback.format_exc()) - notify_failure(str(e)) + self.fail('Failed to compute build graph: %s' % e) return notify_success(artifact) @@ -362,13 +359,11 @@ class BuildController(distbuild.StateMachine): logging.debug('Got cache response: %s' % repr(event.msg)) http_status_code = event.msg['status'] - error_msg = event.msg['body'] if http_status_code != httplib.OK: - logging.debug('Cache request failed with status: %s' - % event.msg['status']) - self.mainloop.queue_event(self, - _AnnotationFailed(http_status_code, error_msg)) + self.fail('Failed to annotate build graph: HTTP request to %s got ' + '%d: %s' % (self._artifact_cache_server, + http_status_code, event.msg['body'])) return cache_state = json.loads(event.msg['body']) @@ -581,14 +576,6 @@ class BuildController(distbuild.StateMachine): self._queue_worker_builds(None, event) - def _notify_annotation_failed(self, event_source, event): - errmsg = ('Failed to annotate build graph: http request got %d: %s' - % (event.http_status_code, event.error_msg)) - - logging.error(errmsg) - failed = BuildFailed(self._request['id'], errmsg) - self.mainloop.queue_event(BuildController, failed) - def _maybe_notify_build_failed(self, event_source, event): distbuild.crash_point() @@ -613,10 +600,7 @@ class BuildController(distbuild.StateMachine): self._request['id'], build_step_name(artifact)) self.mainloop.queue_event(BuildController, step_failed) - build_failed = BuildFailed( - self._request['id'], - 'Building failed for %s' % artifact.name) - self.mainloop.queue_event(BuildController, build_failed) + self.fail('Building failed for %s' % artifact.name) # Cancel any jobs waiting to be executed, since there is no point # running them if this build has failed, it would just waste diff --git a/distbuild/initiator.py b/distbuild/initiator.py index aaae7d62..7f82827c 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -1,6 +1,6 @@ # distbuild/initiator.py -- state machine for the initiator # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ import cliapp import logging import os import random -import sys +import time import distbuild @@ -83,11 +83,12 @@ class Initiator(distbuild.StateMachine): repo=self._repo_name, ref=self._ref, morphology=self._morphology, - original_ref=self._original_ref + original_ref=self._original_ref, + protocol_version=distbuild.protocol.VERSION ) self._jm.send(msg) logging.debug('Initiator: sent to controller: %s', repr(msg)) - + def _handle_json_message(self, event_source, event): distbuild.crash_point() @@ -137,23 +138,35 @@ class Initiator(distbuild.StateMachine): self._step_outputs[msg['step_name']].close() del self._step_outputs[msg['step_name']] + def _get_output(self, msg): + return self._step_outputs[msg['step_name']] + def _handle_step_already_started_message(self, msg): - self._app.status( - msg='%s is already building on %s' % (msg['step_name'], - msg['worker_name'])) + status = '%s is already building on %s' % ( + msg['step_name'], msg['worker_name']) + self._app.status(msg=status) + self._open_output(msg) + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + f.flush() + def _handle_step_started_message(self, msg): - self._app.status( - msg='Started building %(step_name)s on %(worker_name)s', - step_name=msg['step_name'], - worker_name=msg['worker_name']) + status = 'Started building %s on %s' % ( + msg['step_name'], msg['worker_name']) + self._app.status(msg=status) + self._open_output(msg) + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + f.flush() + def _handle_step_output_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - f = self._step_outputs[step_name] + f = self._get_output(msg) f.write(msg['stdout']) f.write(msg['stderr']) f.flush() @@ -164,9 +177,12 @@ class Initiator(distbuild.StateMachine): def _handle_step_finished_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - self._app.status( - msg='Finished building %(step_name)s', - step_name=step_name) + status = 'Finished building %s' % step_name + self._app.status(msg=status) + + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + self._close_output(msg) else: logging.warning( @@ -175,9 +191,12 @@ class Initiator(distbuild.StateMachine): def _handle_step_failed_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - self._app.status( - msg='Build failed: %(step_name)s', - step_name=step_name) + status = 'Build of %s failed.' % step_name + self._app.status(msg=status) + + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + self._close_output(msg) else: logging.warning( diff --git a/distbuild/initiator_connection.py b/distbuild/initiator_connection.py index db982230..86df28f1 100644 --- a/distbuild/initiator_connection.py +++ b/distbuild/initiator_connection.py @@ -1,6 +1,6 @@ # distbuild/initiator_connection.py -- communicate with initiator # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014 - 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -100,15 +100,31 @@ class InitiatorConnection(distbuild.StateMachine): logging.debug('InitiatorConnection: from %s: %r', self.initiator_name, event.msg) - if event.msg['type'] == 'build-request': - new_id = self._idgen.next() - self.our_ids.add(new_id) - self._route_map.add(event.msg['id'], new_id) - event.msg['id'] = new_id - build_controller = distbuild.BuildController( - self, event.msg, self.artifact_cache_server, - self.morph_instance) - self.mainloop.add_state_machine(build_controller) + try: + if event.msg['type'] == 'build-request': + if (event.msg.get('protocol_version') != + distbuild.protocol.VERSION): + msg = distbuild.message('build-failed', + id=event.msg['id'], + reason=('Protocol version mismatch between server & ' + 'initiator: distbuild network uses distbuild ' + 'protocol version %i, but client uses version' + ' %i.', distbuild.protocol.VERSION, + event.msg.get('protocol_version'))) + self.jm.send(msg) + self._log_send(msg) + return + new_id = self._idgen.next() + self.our_ids.add(new_id) + self._route_map.add(event.msg['id'], new_id) + event.msg['id'] = new_id + build_controller = distbuild.BuildController( + self, event.msg, self.artifact_cache_server, + self.morph_instance) + self.mainloop.add_state_machine(build_controller) + except (KeyError, ValueError) as ex: + logging.error('Invalid message from initiator: %s: exception %s', + event.msg, ex) def _disconnect(self, event_source, event): for id in self.our_ids: diff --git a/distbuild/jm.py b/distbuild/jm.py index 615100e4..85510924 100644 --- a/distbuild/jm.py +++ b/distbuild/jm.py @@ -1,6 +1,6 @@ # mainloop/jm.py -- state machine for JSON communication between nodes # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014 - 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -109,8 +109,13 @@ class JsonMachine(StateMachine): line = line.rstrip() if self.debug_json: logging.debug('JsonMachine: line: %s' % repr(line)) - msg = yaml.load(json.loads(line)) - self.mainloop.queue_event(self, JsonNewMessage(msg)) + msg = None + try: + msg = yaml.safe_load(json.loads(line)) + except Exception: + logging.error('Invalid input: %s' % line) + if msg: + self.mainloop.queue_event(self, JsonNewMessage(msg)) def _send_eof(self, event_source, event): self.mainloop.queue_event(self, JsonEof()) diff --git a/distbuild/mainloop.py b/distbuild/mainloop.py index f0e5eebc..97e439f3 100644 --- a/distbuild/mainloop.py +++ b/distbuild/mainloop.py @@ -1,6 +1,6 @@ # mainloop/mainloop.py -- select-based main loop # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -56,7 +56,10 @@ class MainLoop(object): def remove_state_machine(self, machine): logging.debug('MainLoop.remove_state_machine: %s' % machine) self._machines.remove(machine) - + + def n_state_machines_of_type(self, machine_type): + return len([m for m in self._machines if isinstance(m, machine_type)]) + def add_event_source(self, event_source): logging.debug('MainLoop.add_event_source: %s' % event_source) self._sources.append(event_source) diff --git a/distbuild/protocol.py b/distbuild/protocol.py index ffce1fe7..f2c74819 100644 --- a/distbuild/protocol.py +++ b/distbuild/protocol.py @@ -1,6 +1,6 @@ # distbuild/protocol.py -- abstractions for the JSON messages # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014 - 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,12 +19,20 @@ '''Construct protocol message objects (dicts).''' +# Version refers to an integer that should be incremented by one each time a +# time a change is introduced that would break server/initiator compatibility + + +VERSION = 1 + + _required_fields = { 'build-request': [ 'id', 'repo', 'ref', 'morphology', + 'protocol_version', ], 'build-progress': [ 'id', diff --git a/morphlib/__init__.py b/morphlib/__init__.py index a10ebe7b..d54340df 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -68,7 +68,6 @@ import gitindex import localartifactcache import localrepocache import mountableimage -import morphologyfactory import morphologyfinder import morphology import morphloader diff --git a/morphlib/app.py b/morphlib/app.py index 0c87f814..b8bae850 100644 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -297,26 +297,6 @@ class Morph(cliapp.Application): morphlib.util.sanitise_morphology_path(args[2])) args = args[3:] - def cache_repo_and_submodules(self, cache, url, ref, done): - subs_to_process = set() - subs_to_process.add((url, ref)) - while subs_to_process: - url, ref = subs_to_process.pop() - done.add((url, ref)) - cached_repo = cache.cache_repo(url) - cached_repo.update() - - try: - submodules = morphlib.git.Submodules(self, cached_repo.path, - ref) - submodules.load() - except morphlib.git.NoModulesFileError: - pass - else: - for submod in submodules: - if (submod.url, submod.commit) not in done: - subs_to_process.add((submod.url, submod.commit)) - def _write_status(self, text): timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) self.output.write('%s %s\n' % (timestamp, text)) diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index a22e689b..8572450d 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -96,6 +96,7 @@ class BuildCommand(object): self.app.status(msg='Creating source pool', chatty=True) srcpool = morphlib.sourceresolver.create_source_pool( self.lrc, self.rrc, repo_name, ref, filename, + cachedir=self.app.settings['cachedir'], original_ref=original_ref, update_repos=not self.app.settings['no-git-update'], status_cb=self.app.status) @@ -271,7 +272,7 @@ class BuildCommand(object): def build_in_order(self, root_artifact): '''Build everything specified in a build order.''' - self.app.status(msg='Building a set of sources', chatty=True) + self.app.status(msg='Building a set of sources') build_env = root_artifact.build_env ordered_sources = list(self.get_ordered_sources(root_artifact.walk())) old_prefix = self.app.status_prefix @@ -386,39 +387,8 @@ class BuildCommand(object): '''Update the local git repository cache with the sources.''' repo_name = source.repo_name - if self.app.settings['no-git-update']: - self.app.status(msg='Not updating existing git repository ' - '%(repo_name)s ' - 'because of no-git-update being set', - chatty=True, - repo_name=repo_name) - source.repo = self.lrc.get_repo(repo_name) - return - - if self.lrc.has_repo(repo_name): - source.repo = self.lrc.get_repo(repo_name) - try: - sha1 = source.sha1 - source.repo.resolve_ref_to_commit(sha1) - self.app.status(msg='Not updating git repository ' - '%(repo_name)s because it ' - 'already contains sha1 %(sha1)s', - chatty=True, repo_name=repo_name, - sha1=sha1) - except morphlib.gitdir.InvalidRefError: - self.app.status(msg='Updating %(repo_name)s', - repo_name=repo_name) - source.repo.update() - else: - self.app.status(msg='Cloning %(repo_name)s', - repo_name=repo_name) - source.repo = self.lrc.cache_repo(repo_name) - - # Update submodules. - done = set() - self.app.cache_repo_and_submodules( - self.lrc, source.repo.url, - source.sha1, done) + source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1) + self.lrc.ensure_submodules(source.repo, source.sha1) def cache_artifacts_locally(self, artifacts): '''Get artifacts missing from local cache from remote cache.''' diff --git a/morphlib/builder.py b/morphlib/builder.py index 1bf4d454..0bb21434 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -14,9 +14,6 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from collections import defaultdict -import datetime -import errno import json import logging import os @@ -28,7 +25,6 @@ import time import traceback import subprocess import tempfile -import gzip import cliapp @@ -146,23 +142,6 @@ def download_depends(constituents, lac, rac, metadatas=None): src.close() -def get_chunk_files(f): # pragma: no cover - tar = tarfile.open(fileobj=f) - for member in tar.getmembers(): - if member.type is not tarfile.DIRTYPE: - yield member.name - tar.close() - - -def get_stratum_files(f, lac): # pragma: no cover - for ca in (ArtifactCacheReference(a) - for a in json.load(f, encoding='unicode-escape')): - cf = lac.get(ca) - for filename in get_chunk_files(cf): - yield filename - cf.close() - - class BuilderBase(object): '''Base class for building artifacts.''' diff --git a/morphlib/cachedrepo.py b/morphlib/cachedrepo.py index aa2b5af1..8b38c5c9 100644 --- a/morphlib/cachedrepo.py +++ b/morphlib/cachedrepo.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,7 +15,9 @@ import cliapp + import os +import tempfile import morphlib @@ -169,6 +171,28 @@ class CachedRepo(object): self._checkout_ref_in_clone(ref, target_dir) + def extract_commit(self, ref, target_dir): + '''Extract files from a given commit into target_dir. + + This is different to a 'checkout': a checkout assumes a working tree + associated with a repository. Here, the repository is immutable (it's + in the cache) and we just want to look at the files in a quick way + (quicker than going 'git cat-file everything'). + + This seems marginally quicker than doing a shallow clone. Running + `morph list-artifacts` 10 times gave an average time of 1.334s + using `git clone --depth 1` and an average time of 1.261s using + this code. + + ''' + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + with tempfile.NamedTemporaryFile() as index_file: + index = self._gitdir.get_index(index_file=index_file.name) + index.set_to_tree(ref) + index.checkout(working_tree=target_dir) + def requires_update_for_ref(self, ref): '''Returns False if there's no need to update this cached repo. diff --git a/morphlib/cachedrepo_tests.py b/morphlib/cachedrepo_tests.py index 6f87bfdd..6fe69ef5 100644 --- a/morphlib/cachedrepo_tests.py +++ b/morphlib/cachedrepo_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,6 +33,21 @@ class FakeApplication(object): } +class FakeIndex(object): + + def __init__(self, index_file): + self.index_file = index_file + self.ref = None + + def set_to_tree(self, ref): + self.ref = ref + + def checkout(self, working_tree=None): + if working_tree: + with open(os.path.join(working_tree, 'foo.morph'), 'w') as f: + f.write('contents of foo.morph') + + class CachedRepoTests(unittest.TestCase): known_commit = 'a4da32f5a81c8bc6d660404724cedc3bc0914a75' @@ -77,6 +92,9 @@ class CachedRepoTests(unittest.TestCase): def update_with_failure(self, **kwargs): raise cliapp.AppException('git remote update origin') + def get_index(self, index_file=None): + return FakeIndex(index_file) + def setUp(self): self.repo_name = 'foo' self.repo_url = 'git://foo.bar/foo.git' @@ -141,6 +159,16 @@ class CachedRepoTests(unittest.TestCase): morph_filename = os.path.join(unpack_dir, 'foo.morph') self.assertTrue(os.path.exists(morph_filename)) + def test_extract_commit_into_new_directory(self): + self.repo._gitdir.get_index = self.get_index + unpack_dir = self.tempfs.getsyspath('unpack-dir') + self.repo.extract_commit('e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', + unpack_dir) + self.assertTrue(os.path.exists(unpack_dir)) + + morph_filename = os.path.join(unpack_dir, 'foo.morph') + self.assertTrue(os.path.exists(morph_filename)) + def test_successful_update(self): self.repo._gitdir.update_remotes = self.update_successfully self.repo.update() diff --git a/morphlib/gitindex.py b/morphlib/gitindex.py index e22f6225..c5c07bd6 100644 --- a/morphlib/gitindex.py +++ b/morphlib/gitindex.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2014 Codethink Limited +# Copyright (C) 2013-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,8 +48,16 @@ class GitIndex(object): def _run_git(self, *args, **kwargs): if self._index_file is not None: - kwargs['env'] = kwargs.get('env', dict(os.environ)) - kwargs['env']['GIT_INDEX_FILE'] = self._index_file + extra_env = kwargs.get('extra_env', {}) + extra_env['GIT_INDEX_FILE'] = self._index_file + kwargs['extra_env'] = extra_env + + if 'extra_env' in kwargs: + env = kwargs.get('env', dict(os.environ)) + env.update(kwargs['extra_env']) + kwargs['env'] = env + del kwargs['extra_env'] + return morphlib.git.gitcmd(self._gd._runcmd, *args, **kwargs) def _get_status(self): @@ -159,3 +167,11 @@ class GitIndex(object): def write_tree(self): '''Transform the index into a tree in the object store.''' return self._run_git('write-tree').strip() + + def checkout(self, working_tree=None): + '''Copy files from the index to the working tree.''' + if working_tree: + extra_env = {'GIT_WORK_TREE': working_tree} + else: + extra_env = {} + self._run_git('checkout-index', '--all', extra_env=extra_env) diff --git a/morphlib/gitindex_tests.py b/morphlib/gitindex_tests.py index 32d40a8c..3f9ff303 100644 --- a/morphlib/gitindex_tests.py +++ b/morphlib/gitindex_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2014 Codethink Limited +# Copyright (C) 2013-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -38,6 +38,8 @@ class GitIndexTests(unittest.TestCase): self.mirror = os.path.join(self.tempdir, 'mirror') morphlib.git.gitcmd(gd._runcmd, 'clone', '--mirror', self.dirname, self.mirror) + self.working_dir = os.path.join(self.tempdir, 'bar') + os.makedirs(self.working_dir) def tearDown(self): shutil.rmtree(self.tempdir) @@ -91,3 +93,15 @@ class GitIndexTests(unittest.TestCase): gd = morphlib.gitdir.GitDirectory(self.dirname) idx = gd.get_index() self.assertEqual(idx.write_tree(), gd.resolve_ref_to_tree(gd.HEAD)) + + def test_checkout(self): + gd = morphlib.gitdir.GitDirectory(self.dirname) + idx = gd.get_index() + idx.checkout(working_tree=self.working_dir) + self.assertTrue(os.path.exists(os.path.join(self.working_dir, 'foo'))) + + def test_checkout_without_working_dir(self): + gd = morphlib.gitdir.GitDirectory(self.dirname) + idx = gd.get_index() + idx.checkout() + self.assertTrue(os.path.exists(os.path.join(self.dirname, 'foo'))) diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py index 9bccb20b..1565b913 100644 --- a/morphlib/localrepocache.py +++ b/morphlib/localrepocache.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,10 +14,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import logging import os -import re -import urllib2 import urlparse import string import sys @@ -204,7 +201,9 @@ class LocalRepoCache(object): if self._tarball_base_url: ok, error = self._clone_with_tarball(repourl, path) if ok: - return self.get_repo(reponame) + repo = self.get_repo(reponame) + repo.update() + return repo else: errors.append(error) self._app.status( @@ -244,15 +243,68 @@ class LocalRepoCache(object): return repo raise NotCached(reponame) - def get_updated_repo(self, reponame): # pragma: no cover - '''Return object representing cached repository, which is updated.''' + def get_updated_repo(self, repo_name, ref=None): # pragma: no cover + '''Return object representing cached repository. - if not self._app.settings['no-git-update']: - cached_repo = self.cache_repo(reponame) - self._app.status( - msg='Updating git repository %s in cache' % reponame) - cached_repo.update() - else: - cached_repo = self.get_repo(reponame) - return cached_repo + If 'ref' is None, the repo will be updated unless + app.settings['no-git-update'] is set. + + If 'ref' is set to a SHA1, the repo will only be updated if 'ref' isn't + already available locally. + ''' + + if self._app.settings['no-git-update']: + self._app.status(msg='Not updating existing git repository ' + '%(repo_name)s ' + 'because of no-git-update being set', + chatty=True, + repo_name=repo_name) + return self.get_repo(repo_name) + + if self.has_repo(repo_name): + repo = self.get_repo(repo_name) + if ref and morphlib.git.is_valid_sha1(ref): + try: + repo.resolve_ref_to_commit(ref) + self._app.status(msg='Not updating git repository ' + '%(repo_name)s because it ' + 'already contains sha1 %(sha1)s', + chatty=True, repo_name=repo_name, + sha1=ref) + return repo + except morphlib.gitdir.InvalidRefError: + pass + + self._app.status(msg='Updating %(repo_name)s', + repo_name=repo_name) + repo.update() + return repo + else: + self._app.status(msg='Cloning %(repo_name)s', + repo_name=repo_name) + return self.cache_repo(repo_name) + + def ensure_submodules(self, toplevel_repo, + toplevel_ref): # pragma: no cover + '''Ensure any submodules of a given repo are cached and up to date.''' + + def submodules_for_repo(repo_path, ref): + try: + submodules = morphlib.git.Submodules(self._app, repo_path, ref) + submodules.load() + return [(submod.url, submod.commit) for submod in submodules] + except morphlib.git.NoModulesFileError: + return [] + + done = set() + subs_to_process = submodules_for_repo(toplevel_repo.path, toplevel_ref) + while subs_to_process: + url, ref = subs_to_process.pop() + done.add((url, ref)) + + cached_repo = self.get_updated_repo(url, ref=ref) + + for submod in submodules_for_repo(cached_repo.path, ref): + if (submod.url, submod.commit) not in done: + subs_to_process.add((submod.url, submod.commit)) diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py index ab6e71fd..aeb32961 100644 --- a/morphlib/localrepocache_tests.py +++ b/morphlib/localrepocache_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -139,7 +139,11 @@ class LocalRepoCacheTests(unittest.TestCase): self.lrc._fetch = lambda url, path: self.fetched.append(url) self.unpacked_tar = "" self.mkdir_path = "" - self.lrc.cache_repo(self.repourl) + + with morphlib.gitdir_tests.monkeypatch( + morphlib.cachedrepo.CachedRepo, 'update', lambda self: None): + self.lrc.cache_repo(self.repourl) + self.assertEqual(self.fetched, [self.tarball_url]) self.assertFalse(self.lrc.fs.exists(self.cache_path + '.tar')) self.assertEqual(self.remotes['origin']['url'], self.repourl) diff --git a/morphlib/morphologyfactory.py b/morphlib/morphologyfactory.py deleted file mode 100644 index a3ac2749..00000000 --- a/morphlib/morphologyfactory.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2012-2014 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - -import os - -import morphlib -import cliapp - - -class MorphologyFactoryError(cliapp.AppException): - pass - - -class MorphologyNotFoundError(MorphologyFactoryError): - def __init__(self, filename): - MorphologyFactoryError.__init__( - self, "Couldn't find morphology: %s" % filename) - - -class NotcachedError(MorphologyFactoryError): - def __init__(self, repo_name): - MorphologyFactoryError.__init__( - self, "Repository %s is not cached locally and there is no " - "remote cache specified" % repo_name) - - -class MorphologyFactory(object): - - '''A way of creating morphologies which will provide a default''' - - def __init__(self, local_repo_cache, remote_repo_cache=None, - status_cb=None): - self._lrc = local_repo_cache - self._rrc = remote_repo_cache - - null_status_function = lambda **kwargs: None - self.status = status_cb or null_status_function - - def get_morphology(self, reponame, sha1, filename): - morph_name = os.path.splitext(os.path.basename(filename))[0] - loader = morphlib.morphloader.MorphologyLoader() - if self._lrc.has_repo(reponame): - self.status(msg="Looking for %s in local repo cache" % filename, - chatty=True) - try: - repo = self._lrc.get_repo(reponame) - text = repo.read_file(filename, sha1) - morph = loader.load_from_string(text) - except IOError: - morph = None - file_list = repo.list_files(ref=sha1, recurse=False) - elif self._rrc is not None: - self.status(msg="Retrieving %(reponame)s %(sha1)s %(filename)s" - " from the remote git cache.", - reponame=reponame, sha1=sha1, filename=filename, - chatty=True) - try: - text = self._rrc.cat_file(reponame, sha1, filename) - morph = loader.load_from_string(text) - except morphlib.remoterepocache.CatFileError: - morph = None - file_list = self._rrc.ls_tree(reponame, sha1) - else: - raise NotcachedError(reponame) - - if morph is None: - self.status(msg="File %s doesn't exist: attempting to infer " - "chunk morph from repo's build system" - % filename, chatty=True) - bs = morphlib.buildsystem.detect_build_system(file_list) - if bs is None: - raise MorphologyNotFoundError(filename) - morph = bs.get_morphology(morph_name) - loader.validate(morph) - loader.set_commands(morph) - loader.set_defaults(morph) - return morph diff --git a/morphlib/plugins/distbuild_plugin.py b/morphlib/plugins/distbuild_plugin.py index 66d86dcf..a7d69472 100644 --- a/morphlib/plugins/distbuild_plugin.py +++ b/morphlib/plugins/distbuild_plugin.py @@ -1,6 +1,6 @@ # distbuild_plugin.py -- Morph distributed build plugin # -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -238,6 +238,11 @@ class ControllerDaemon(cliapp.Plugin): distbuild.add_crash_conditions(self.app.settings['crash-condition']) + if not self.app.settings['worker']: + raise cliapp.AppException( + 'Distbuild controller has no workers configured. Refusing to ' + 'start.') + artifact_cache_server = ( self.app.settings['artifact-cache-server'] or self.app.settings['cache-server']) diff --git a/morphlib/plugins/list_artifacts_plugin.py b/morphlib/plugins/list_artifacts_plugin.py index 6944cff4..53056bad 100644 --- a/morphlib/plugins/list_artifacts_plugin.py +++ b/morphlib/plugins/list_artifacts_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -86,6 +86,7 @@ class ListArtifactsPlugin(cliapp.Plugin): msg='Creating source pool for %s' % system_filename, chatty=True) source_pool = morphlib.sourceresolver.create_source_pool( self.lrc, self.rrc, repo, ref, system_filename, + cachedir=self.app.settings['cachedir'], update_repos = not self.app.settings['no-git-update'], status_cb=self.app.status) diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index 3a328eb7..22e643d2 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,20 +14,89 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import cliapp - import collections +import cPickle import logging +import os +import pylru +import shutil +import tempfile + +import cliapp import morphlib +tree_cache_size = 10000 +tree_cache_filename = 'trees.cache.pickle' +buildsystem_cache_size = 10000 +buildsystem_cache_filename = 'detected-chunk-buildsystems.cache.pickle' + + +class PickleCacheManager(object): # pragma: no cover + '''Cache manager for PyLRU that reads and writes to Pickle files. + + The 'pickle' format is less than ideal in many ways and is actually + slower than JSON in Python. However, the data we need to cache is keyed + by tuples and in JSON a dict can only be keyed with strings. For now, + using 'pickle' seems to be the least worst option. + + ''' + + def __init__(self, filename, size): + self.filename = filename + self.size = size + + def _populate_cache_from_file(self, filename, cache): + try: + with open(filename, 'r') as f: + data = cPickle.load(f) + for key, value in data.iteritems(): + cache[key] = value + except (EOFError, IOError, cPickle.PickleError) as e: + logging.warning('Failed to load cache %s: %s', self.filename, e) + + def load_cache(self): + '''Create a pylru.lrucache object prepopulated with saved data.''' + cache = pylru.lrucache(self.size) + # There should be a more efficient way to do this, by hooking into + # the json module directly. + self._populate_cache_from_file(self.filename, cache) + return cache + + def save_cache(self, cache): + '''Save the data from a pylru.lrucache object to disk. + + Any changes that have been made by other instances or processes since + load_cache() was called will be overwritten. + + ''' + data = {} + for key, value in cache.items(): + data[key] = value + try: + with morphlib.savefile.SaveFile(self.filename, 'w') as f: + cPickle.dump(data, f) + except (IOError, cPickle.PickleError) as e: + logging.warning('Failed to save cache to %s: %s', self.filename, e) + + +class SourceResolverError(cliapp.AppException): + pass + + +class MorphologyNotFoundError(SourceResolverError): # pragma: no cover + def __init__(self, filename): + SourceResolverError.__init__( + self, "Couldn't find morphology: %s" % filename) + class SourceResolver(object): '''Provides a way of resolving the set of sources for a given system. - There are two levels of caching involved in resolving the sources to build. + There are three levels of caching involved in resolving the sources to + build. - The canonical source for each source is specified in the build-command + The canonical repo for each source is specified in the build-command (for strata and systems) or in the stratum morphology (for chunks). It will be either a normal URL, or a keyed URL using a repo-alias like 'baserock:baserock/definitions'. @@ -44,25 +113,72 @@ class SourceResolver(object): entire repositories in $cachedir/gits. If a repo is not in the remote repo cache then it must be present in the local repo cache. + The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It + turns out that even if all repos are available locally, running + 'git rev-parse' on hundreds of repos requires a lot of IO and can take + several minutes. Likewise, on a slow network connection it is time + consuming to keep querying the remote repo cache. This third layer of + caching works around both of those issues. + + The need for 3 levels of caching highlights design inconsistencies in + Baserock, but for now it is worth the effort to maintain this code to save + users from waiting 7 minutes each time that they want to build. The level 3 + cache is fairly simple because commits are immutable, so there is no danger + of this cache being stale as long as it is indexed by commit SHA1. Due to + the policy in Baserock of always using a commit SHA1 (rather than a named + ref) in the system definitions, it makes repeated builds of a system very + fast as no resolution needs to be done at all. + ''' - def __init__(self, local_repo_cache, remote_repo_cache, update_repos, + def __init__(self, local_repo_cache, remote_repo_cache, + tree_cache_manager, buildsystem_cache_manager, update_repos, status_cb=None): self.lrc = local_repo_cache self.rrc = remote_repo_cache + self.tree_cache_manager = tree_cache_manager + self.buildsystem_cache_manager = buildsystem_cache_manager self.update = update_repos - self.status = status_cb - def resolve_ref(self, reponame, ref): + self._resolved_trees = {} + self._resolved_morphologies = {} + self._resolved_buildsystems = {} + + self._definitions_checkout_dir = None + + def cache_repo_locally(self, reponame): + if self.update: + self.status(msg='Caching git repository %(reponame)s', + reponame=reponame) + repo = self.lrc.cache_repo(reponame) + else: # pragma: no cover + # This is likely to raise a morphlib.localrepocache.NotCached + # exception, because the caller should have checked if the + # localrepocache already had the repo. But we may as well try. + repo = self.lrc.get_repo(reponame) + return repo + + def _resolve_ref(self, reponame, ref): # pragma: no cover '''Resolves commit and tree sha1s of the ref in a repo and returns it. - If update is True then this has the side-effect of updating - or cloning the repository into the local repo cache. + If update is True then this has the side-effect of updating or cloning + the repository into the local repo cache. + + This function is complex due to the 3 layers of caching described in + the SourceResolver docstring. + ''' - absref = None + # The Baserock reference definitions use absolute refs so, and, if the + # absref is cached, we can short-circuit all this code. + if (reponame, ref) in self._resolved_trees: + logging.debug('Returning tree (%s, %s) from tree cache', + reponame, ref) + return ref, self._resolved_trees[(reponame, ref)] + + absref = None if self.lrc.has_repo(reponame): repo = self.lrc.get_repo(reponame) if self.update and repo.requires_update_for_ref(ref): @@ -84,49 +200,160 @@ class SourceResolver(object): chatty=True) except BaseException, e: logging.warning('Caught (and ignored) exception: %s' % str(e)) + if absref is None: - if self.update: - self.status(msg='Caching git repository %(reponame)s', - reponame=reponame) - repo = self.lrc.cache_repo(reponame) - repo.update() - else: - repo = self.lrc.get_repo(reponame) + repo = self.cache_repo_locally(reponame) absref = repo.resolve_ref_to_commit(ref) tree = repo.resolve_ref_to_tree(absref) + + logging.debug('Writing tree to cache with ref (%s, %s)', + reponame, absref) + self._resolved_trees[(reponame, absref)] = tree + return absref, tree - def traverse_morphs(self, definitions_repo, definitions_ref, - system_filenames, - visit=lambda rn, rf, fn, arf, m: None, - definitions_original_ref=None): - morph_factory = morphlib.morphologyfactory.MorphologyFactory( - self.lrc, self.rrc, self.status) - definitions_queue = collections.deque(system_filenames) - chunk_in_definitions_repo_queue = [] - chunk_in_source_repo_queue = [] + def _get_morphology_from_definitions(self, loader, + filename): # pragma: no cover + if os.path.exists(filename): + return loader.load_from_file(filename) + else: + return None - resolved_commits = {} - resolved_trees = {} - resolved_morphologies = {} + def _get_morphology_from_repo(self, loader, reponame, sha1, filename): + if self.lrc.has_repo(reponame): + self.status(msg="Looking for %(reponame)s:%(filename)s in the " + "local repo cache.", + reponame=reponame, filename=filename, chatty=True) + try: + repo = self.lrc.get_repo(reponame) + text = repo.read_file(filename, sha1) + morph = loader.load_from_string(text) + except IOError: + morph = None + elif self.rrc is not None: + self.status(msg="Looking for %(reponame)s:%(filename)s in the " + "remote repo cache.", + reponame=reponame, filename=filename, chatty=True) + try: + text = self.rrc.cat_file(reponame, sha1, filename) + morph = loader.load_from_string(text) + except morphlib.remoterepocache.CatFileError: + morph = None + else: # pragma: no cover + repo = self.cache_repo_locally(reponame) + text = repo.read_file(filename, sha1) + morph = loader.load_from_string(text) - # Resolve the (repo, ref) pair for the definitions repo, cache result. - definitions_absref, definitions_tree = self.resolve_ref( - definitions_repo, definitions_ref) + return morph - if definitions_original_ref: - definitions_ref = definitions_original_ref + def _get_morphology(self, reponame, sha1, filename): + '''Read the morphology at the specified location. + + Returns None if the file does not exist in the specified commit. + + ''' + key = (reponame, sha1, filename) + if key in self._resolved_morphologies: + return self._resolved_morphologies[key] + + loader = morphlib.morphloader.MorphologyLoader() + morph = None + + if reponame == self._definitions_repo and \ + sha1 == self._definitions_absref: # pragma: no cover + # There is a temporary local checkout of the definitions repo which + # we can quickly read definitions files from. + defs_filename = os.path.join(self._definitions_checkout_dir, + filename) + morph = self._get_morphology_from_definitions(loader, + defs_filename) + else: + morph = self._get_morphology_from_repo(loader, reponame, sha1, + filename) + + if morph is None: + return None + else: + loader.validate(morph) + loader.set_commands(morph) + loader.set_defaults(morph) + self._resolved_morphologies[key] = morph + return morph + + def _detect_build_system(self, reponame, sha1, expected_filename): + '''Attempt to detect buildsystem of the given commit. + + Returns None if no known build system was detected. + + ''' + self.status(msg="File %s doesn't exist: attempting to infer " + "chunk morph from repo's build system" % + expected_filename, chatty=True) + + file_list = None + + if self.lrc.has_repo(reponame): + repo = self.lrc.get_repo(reponame) + try: + file_list = repo.list_files(ref=sha1, recurse=False) + except morphlib.gitdir.InvalidRefError: # pragma: no cover + pass + elif self.rrc is not None: + try: + # This may or may not succeed; if the is repo not + # hosted on the same Git server as the cache server then + # it'll definitely fail. + file_list = self.rrc.ls_tree(reponame, sha1) + except morphlib.remoterepocache.LsTreeError: + pass + + if not file_list: + repo = self.cache_repo_locally(reponame) + file_list = repo.list_files(ref=sha1, recurse=False) + + buildsystem = morphlib.buildsystem.detect_build_system(file_list) + + if buildsystem is None: + # It might surprise you to discover that if we can't autodetect a + # build system, we raise MorphologyNotFoundError. Users are + # required to provide a morphology for any chunk where Morph can't + # infer the build instructions automatically, so this is the right + # error. + raise MorphologyNotFoundError(expected_filename) + + return buildsystem.name + + def _create_morphology_for_build_system(self, buildsystem_name, + morph_name): # pragma: no cover + bs = morphlib.buildsystem.lookup_build_system(buildsystem_name) + loader = morphlib.morphloader.MorphologyLoader() + morph = bs.get_morphology(morph_name) + loader.validate(morph) + loader.set_commands(morph) + loader.set_defaults(morph) + return morph + + def _process_definitions_with_children(self, system_filenames, + definitions_repo, + definitions_ref, + definitions_absref, + definitions_tree, + visit): # pragma: no cover + definitions_queue = collections.deque(system_filenames) + chunk_queue = set() while definitions_queue: filename = definitions_queue.popleft() - key = (definitions_repo, definitions_absref, filename) - if not key in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] + morphology = self._get_morphology( + definitions_repo, definitions_absref, filename) + + if morphology is None: + raise MorphologyNotFoundError(filename) visit(definitions_repo, definitions_ref, filename, definitions_absref, definitions_tree, morphology) + if morphology['kind'] == 'cluster': raise cliapp.AppException( "Cannot build a morphology of type 'cluster'.") @@ -141,44 +368,117 @@ class SourceResolver(object): for s in morphology['build-depends']) for c in morphology['chunks']: if 'morph' not in c: + # Autodetect a path if one is not given. This is to + # support the deprecated approach of putting the chunk + # .morph file in the toplevel directory of the chunk + # repo, instead of putting it in the definitions.git + # repo. + # + # All users should be specifying a full path to the + # chunk morph file, using the 'morph' field, and this + # code path should be removed. path = morphlib.util.sanitise_morphology_path( c.get('morph', c['name'])) - chunk_in_source_repo_queue.append( - (c['repo'], c['ref'], path)) - continue - chunk_in_definitions_repo_queue.append( - (c['repo'], c['ref'], c['morph'])) - - for repo, ref, filename in chunk_in_definitions_repo_queue: - if (repo, ref) not in resolved_trees: - commit_sha1, tree_sha1 = self.resolve_ref(repo, ref) - resolved_commits[repo, ref] = commit_sha1 - resolved_trees[repo, commit_sha1] = tree_sha1 - absref = resolved_commits[repo, ref] - tree = resolved_trees[repo, absref] - key = (definitions_repo, definitions_absref, filename) - if not key in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] - visit(repo, ref, filename, absref, tree, morphology) - - for repo, ref, filename in chunk_in_source_repo_queue: - if (repo, ref) not in resolved_trees: - commit_sha1, tree_sha1 = self.resolve_ref(repo, ref) - resolved_commits[repo, ref] = commit_sha1 - resolved_trees[repo, commit_sha1] = tree_sha1 - absref = resolved_commits[repo, ref] - tree = resolved_trees[repo, absref] - key = (repo, absref, filename) - if key not in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] - visit(repo, ref, filename, absref, tree, morphology) - - -def create_source_pool(lrc, rrc, repo, ref, filename, + chunk_queue.add((c['repo'], c['ref'], path)) + else: + chunk_queue.add((c['repo'], c['ref'], c['morph'])) + + return chunk_queue + + def process_chunk(self, definition_repo, definition_ref, chunk_repo, + chunk_ref, filename, visit): # pragma: no cover + absref = None + tree = None + + definition_key = (definition_repo, definition_ref, filename) + chunk_key = None + + morph_name = os.path.splitext(os.path.basename(filename))[0] + + morphology = self._get_morphology(*definition_key) + buildsystem = None + + if chunk_key in self._resolved_buildsystems: + buildsystem = self._resolved_buildsystems[chunk_key] + + if morphology is None and buildsystem is None: + # This is a slow operation (looking for a file in Git repo may + # potentially require cloning the whole thing). + absref, tree = self._resolve_ref(chunk_repo, chunk_ref) + chunk_key = (chunk_repo, absref, filename) + morphology = self._get_morphology(*chunk_key) + + if morphology is None: + if buildsystem is None: + buildsystem = self._detect_build_system(*chunk_key) + if buildsystem is None: + raise MorphologyNotFoundError(filename) + else: + self._resolved_buildsystems[chunk_key] = buildsystem + morphology = self._create_morphology_for_build_system( + buildsystem, morph_name) + self._resolved_morphologies[definition_key] = morphology + + if not absref or not tree: + absref, tree = self._resolve_ref(chunk_repo, chunk_ref) + + visit(chunk_repo, chunk_ref, filename, absref, tree, morphology) + + def traverse_morphs(self, definitions_repo, definitions_ref, + system_filenames, + visit=lambda rn, rf, fn, arf, m: None, + definitions_original_ref=None): # pragma: no cover + self._resolved_trees = self.tree_cache_manager.load_cache() + self._resolved_buildsystems = \ + self.buildsystem_cache_manager.load_cache() + + # Resolve the (repo, ref) pair for the definitions repo, cache result. + definitions_absref, definitions_tree = self._resolve_ref( + definitions_repo, definitions_ref) + + if definitions_original_ref: + definitions_ref = definitions_original_ref + + self._definitions_checkout_dir = tempfile.mkdtemp() + + try: + # FIXME: not an ideal way of passing this info across + self._definitions_repo = definitions_repo + self._definitions_absref = definitions_absref + try: + definitions_cached_repo = self.lrc.get_repo(definitions_repo) + except morphlib.localrepocache.NotCached: + definitions_cached_repo = self.cache_repo_locally( + definitions_repo) + definitions_cached_repo.extract_commit( + definitions_absref, self._definitions_checkout_dir) + + # First, process the system and its stratum morphologies. These + # will all live in the same Git repository, and will point to + # various chunk morphologies. + chunk_queue = self._process_definitions_with_children( + system_filenames, definitions_repo, definitions_ref, + definitions_absref, definitions_tree, visit) + + # Now process all the chunks involved in the build. + for repo, ref, filename in chunk_queue: + self.process_chunk(definitions_repo, definitions_absref, repo, + ref, filename, visit) + finally: + shutil.rmtree(self._definitions_checkout_dir) + self._definitions_checkout_dir = None + + logging.debug('Saving contents of resolved tree cache') + self.tree_cache_manager.save_cache(self._resolved_trees) + + logging.debug('Saving contents of build systems cache') + self.buildsystem_cache_manager.save_cache( + self._resolved_buildsystems) + + +def create_source_pool(lrc, rrc, repo, ref, filename, cachedir, original_ref=None, update_repos=True, - status_cb=None): + status_cb=None): # pragma: no cover '''Find all the sources involved in building a given system. Given a system morphology, this function will traverse the tree of stratum @@ -202,7 +502,16 @@ def create_source_pool(lrc, rrc, repo, ref, filename, for source in sources: pool.add(source) - resolver = SourceResolver(lrc, rrc, update_repos, status_cb) + tree_cache_manager = PickleCacheManager( + os.path.join(cachedir, tree_cache_filename), tree_cache_size) + + buildsystem_cache_manager = PickleCacheManager( + os.path.join(cachedir, buildsystem_cache_filename), + buildsystem_cache_size) + + resolver = SourceResolver(lrc, rrc, tree_cache_manager, + buildsystem_cache_manager, update_repos, + status_cb) resolver.traverse_morphs(repo, ref, [filename], visit=add_to_pool, definitions_original_ref=original_ref) diff --git a/morphlib/morphologyfactory_tests.py b/morphlib/sourceresolver_tests.py index 5222ca6d..638f593f 100644 --- a/morphlib/morphologyfactory_tests.py +++ b/morphlib/sourceresolver_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,13 +14,16 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import os +import shutil +import tempfile import unittest import morphlib -from morphlib.morphologyfactory import (MorphologyFactory, - MorphologyNotFoundError, - NotcachedError) -from morphlib.remoterepocache import CatFileError +from morphlib.sourceresolver import (SourceResolver, + PickleCacheManager, + MorphologyNotFoundError) +from morphlib.remoterepocache import CatFileError, LsTreeError class FakeRemoteRepoCache(object): @@ -123,16 +126,17 @@ class FakeLocalRepo(object): } return self.morphologies[filename] % values elif filename.endswith('.morph'): - return '''{ - "name": "%s", - "kind": "chunk", - "build-system": "dummy" - }''' % filename[:-len('.morph')] + return '''name: %s + kind: chunk + build-system: dummy''' % filename[:-len('.morph')] return 'text' def list_files(self, ref, recurse): return self.morphologies.keys() + def update(self): + pass + class FakeLocalRepoCache(object): @@ -145,15 +149,43 @@ class FakeLocalRepoCache(object): def get_repo(self, reponame): return self.lr + def cache_repo(self, reponame): + return self.lr -class MorphologyFactoryTests(unittest.TestCase): + +class SourceResolverTests(unittest.TestCase): def setUp(self): + # create temp "definitions" repo + # set self.sr._definitions_repo to that + # trick it into presenting temp repo using FakeLocalRepoCache + # magic self.lr = FakeLocalRepo() self.lrc = FakeLocalRepoCache(self.lr) self.rrc = FakeRemoteRepoCache() - self.mf = MorphologyFactory(self.lrc, self.rrc) - self.lmf = MorphologyFactory(self.lrc, None) + + self.cachedir = tempfile.mkdtemp() + buildsystem_cache_file = os.path.join(self.cachedir, + 'detected-chunk-buildsystems.cache.pickle') + buildsystem_cache_manager = PickleCacheManager( + buildsystem_cache_file, 1000) + + tree_cache_file = os.path.join(self.cachedir, 'trees.cache.pickle') + tree_cache_manager = PickleCacheManager(tree_cache_file, 1000) + + def status(msg='', **kwargs): + pass + + self.sr = SourceResolver(self.lrc, self.rrc, tree_cache_manager, + buildsystem_cache_manager, True, status) + self.lsr = SourceResolver(self.lrc, None, tree_cache_manager, + buildsystem_cache_manager, True, status) + + self.sr._definitions_repo = None + self.lsr._definitions_repo = None + + def tearDown(self): + shutil.rmtree(self.cachedir) def nolocalfile(self, *args): raise IOError('File not found') @@ -161,6 +193,9 @@ class MorphologyFactoryTests(unittest.TestCase): def noremotefile(self, *args): raise CatFileError('reponame', 'ref', 'filename') + def noremoterepo(self, *args): + raise LsTreeError('reponame', 'ref') + def localmorph(self, *args): return ['chunk.morph'] @@ -172,6 +207,9 @@ class MorphologyFactoryTests(unittest.TestCase): def autotoolsbuildsystem(self, *args, **kwargs): return ['configure.in'] + def emptytree(self, *args, **kwargs): + return [] + def remotemorph(self, *args, **kwargs): return ['remote-chunk.morph'] @@ -185,97 +223,129 @@ class MorphologyFactoryTests(unittest.TestCase): def test_gets_morph_from_local_repo(self): self.lr.list_files = self.localmorph - morph = self.mf.get_morphology('reponame', 'sha1', + morph = self.sr._get_morphology('reponame', 'sha1', 'chunk.morph') self.assertEqual('chunk', morph['name']) + def test_gets_morph_from_cache(self): + self.lr.list_files = self.localmorph + morph_from_repo = self.sr._get_morphology('reponame', 'sha1', + 'chunk.morph') + morph_from_cache = self.sr._get_morphology('reponame', 'sha1', + 'chunk.morph') + self.assertEqual(morph_from_repo, morph_from_cache) + def test_gets_morph_from_remote_repo(self): self.rrc.ls_tree = self.remotemorph self.lrc.has_repo = self.doesnothaverepo - morph = self.mf.get_morphology('reponame', 'sha1', + morph = self.sr._get_morphology('reponame', 'sha1', 'remote-chunk.morph') self.assertEqual('remote-chunk', morph['name']) def test_autodetects_local_morphology(self): self.lr.read_file = self.nolocalmorph self.lr.list_files = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-local.morph') - self.assertEqual('assumed-local', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-local.morph') + self.assertEqual('autotools', name) + + def test_cache_repo_if_not_in_either_cache(self): + self.lrc.has_repo = self.doesnothaverepo + self.lr.read_file = self.nolocalmorph + self.lr.list_files = self.autotoolsbuildsystem + self.rrc.ls_tree = self.noremoterepo + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-local.morph') + self.assertEqual('autotools', name) def test_autodetects_remote_morphology(self): self.lrc.has_repo = self.doesnothaverepo self.rrc.cat_file = self.noremotemorph self.rrc.ls_tree = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-remote.morph') - self.assertEqual('assumed-remote', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-remote.morph') + self.assertEqual('autotools', name) - def test_raises_error_when_no_local_morph(self): + def test_returns_none_when_no_local_morph(self): self.lr.read_file = self.nolocalfile - self.assertRaises(MorphologyNotFoundError, self.mf.get_morphology, - 'reponame', 'sha1', 'unreached.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'unreached.morph') + self.assertEqual(morph, None) - def test_raises_error_when_fails_no_remote_morph(self): + def test_returns_none_when_fails_no_remote_morph(self): self.lrc.has_repo = self.doesnothaverepo self.rrc.cat_file = self.noremotefile - self.assertRaises(MorphologyNotFoundError, self.mf.get_morphology, - 'reponame', 'sha1', 'unreached.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'unreached.morph') + self.assertEqual(morph, None) + + def test_raises_error_when_repo_does_not_exist(self): + self.lrc.has_repo = self.doesnothaverepo + self.assertRaises(MorphologyNotFoundError, + self.lsr._detect_build_system, + 'reponame', 'sha1', 'non-existent.morph') + + def test_raises_error_when_failed_to_detect_build_system(self): + self.lr.read_file = self.nolocalfile + self.lr.list_files = self.emptytree + self.assertRaises(MorphologyNotFoundError, + self.sr._detect_build_system, + 'reponame', 'sha1', 'undetected.morph') def test_raises_error_when_name_mismatches(self): - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'name-mismatch.morph') def test_looks_locally_with_no_remote(self): self.lr.list_files = self.localmorph - morph = self.lmf.get_morphology('reponame', 'sha1', - 'chunk.morph') + morph = self.lsr._get_morphology('reponame', 'sha1', + 'chunk.morph') self.assertEqual('chunk', morph['name']) def test_autodetects_locally_with_no_remote(self): self.lr.read_file = self.nolocalmorph self.lr.list_files = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-local.morph') - self.assertEqual('assumed-local', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-local.morph') + self.assertEqual('autotools', name) - def test_fails_when_local_not_cached_and_no_remote(self): + def test_succeeds_when_local_not_cached_and_no_remote(self): self.lrc.has_repo = self.doesnothaverepo - self.assertRaises(NotcachedError, self.lmf.get_morphology, - 'reponame', 'sha1', 'unreached.morph') + self.lr.list_files = self.localmorph + morph = self.sr._get_morphology('reponame', 'sha1', + 'chunk.morph') + self.assertEqual('chunk', morph['name']) def test_arch_is_validated(self): self.lr.arch = 'unknown' - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'system.morph') def test_arch_arm_defaults_to_le(self): self.lr.arch = 'armv7' - morph = self.mf.get_morphology('reponame', 'sha1', 'system.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'system.morph') self.assertEqual(morph['arch'], 'armv7l') def test_fails_on_parse_error(self): - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'parse-error.morph') def test_fails_on_no_chunk_bdeps(self): self.assertRaises(morphlib.morphloader.NoBuildDependenciesError, - self.mf.get_morphology, 'reponame', 'sha1', + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-no-chunk-bdeps.morph') def test_fails_on_no_bdeps_or_bootstrap(self): self.assertRaises( morphlib.morphloader.NoStratumBuildDependenciesError, - self.mf.get_morphology, 'reponame', 'sha1', + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-no-bdeps-no-bootstrap.morph') def test_succeeds_on_bdeps_no_bootstrap(self): - self.mf.get_morphology( + self.sr._get_morphology( 'reponame', 'sha1', 'stratum-bdeps-no-bootstrap.morph') def test_fails_on_empty_stratum(self): self.assertRaises( morphlib.morphloader.EmptyStratumError, - self.mf.get_morphology, 'reponame', 'sha1', 'stratum-empty.morph') + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-empty.morph') diff --git a/morphlib/writeexts.py b/morphlib/writeexts.py index 6ab2dd55..ab451d14 100644 --- a/morphlib/writeexts.py +++ b/morphlib/writeexts.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -237,8 +237,32 @@ class WriteExtension(cliapp.Application): def mkfs_btrfs(self, location): '''Create a btrfs filesystem on the disk.''' + self.status(msg='Creating btrfs filesystem') - cliapp.runcmd(['mkfs.btrfs', '-f', '-L', 'baserock', location]) + try: + # The following command disables some new filesystem features. We + # need to do this because at the time of writing, SYSLINUX has not + # been updated to understand these new features and will fail to + # boot if the kernel is on a filesystem where they are enabled. + cliapp.runcmd( + ['mkfs.btrfs','-f', '-L', 'baserock', + '--features', '^extref', + '--features', '^skinny-metadata', + '--features', '^mixed-bg', + '--nodesize', '4096', + location]) + except cliapp.AppException as e: + if 'unrecognized option \'--features\'' in e.msg: + # Old versions of mkfs.btrfs (including v0.20, present in many + # Baserock releases) don't support the --features option, but + # also don't enable the new features by default. So we can + # still create a bootable system in this situation. + logging.debug( + 'Assuming mkfs.btrfs failure was because the tool is too ' + 'old to have --features flag.') + cliapp.runcmd(['mkfs.btrfs','-f', '-L', 'baserock', location]) + else: + raise def get_uuid(self, location): '''Get the UUID of a block device's file system.''' diff --git a/tests.build/empty-stratum.exit b/tests.build/empty-stratum.exit deleted file mode 100644 index d00491fd..00000000 --- a/tests.build/empty-stratum.exit +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests.build/empty-stratum.script b/tests.build/empty-stratum.script deleted file mode 100755 index 19c36558..00000000 --- a/tests.build/empty-stratum.script +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# Copyright (C) 2013-2014 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -set -eu - -morphsrepo="$DATADIR/morphs-repo" -cd "$morphsrepo" - -git checkout --quiet -b empty-stratum - -# Create empty stratum to test S4585 -cat <<EOF > hello-stratum.morph -name: hello-stratum -kind: stratum -EOF -sed -i 's/master/empty-stratum/' hello-system.morph -git add hello-stratum.morph hello-system.morph - -git commit --quiet -m "add empty stratum" - -"$SRCDIR/scripts/test-morph" build-morphology \ - test:morphs-repo empty-stratum hello-system diff --git a/tests.build/empty-stratum.stderr b/tests.build/empty-stratum.stderr deleted file mode 100644 index 6a4ecb05..00000000 --- a/tests.build/empty-stratum.stderr +++ /dev/null @@ -1 +0,0 @@ -ERROR: Stratum hello-stratum has no chunks in string diff --git a/without-test-modules b/without-test-modules index 530deb4f..55e5291d 100644 --- a/without-test-modules +++ b/without-test-modules @@ -52,7 +52,3 @@ distbuild/timer_event_source.py distbuild/worker_build_scheduler.py # Not unit tested, since it needs a full system branch morphlib/buildbranch.py - -# Requires rather a lot of fake data in order to be unit tested; better to -# leave it to the functional tests. -morphlib/sourceresolver.py diff --git a/yarns/branches-workspaces.yarn b/yarns/branches-workspaces.yarn index 34aa97e0..a757822e 100644 --- a/yarns/branches-workspaces.yarn +++ b/yarns/branches-workspaces.yarn @@ -233,6 +233,7 @@ build branch is made to include that change. WHEN the user makes changes to test-chunk in branch master AND the user builds systems/test-system.morph of the master branch THEN the changes to test-chunk in branch master are included in the temporary build branch + FINALLY the git server is shut down ### When branches are created ### diff --git a/yarns/building.yarn b/yarns/building.yarn index 52f2b561..b5e46b73 100644 --- a/yarns/building.yarn +++ b/yarns/building.yarn @@ -63,6 +63,7 @@ so when we deploy the system, we can check whether it exists. WHEN the user attempts to deploy the cluster test-cluster.morph in branch master with options test-system.location="$DATADIR/test.tar" THEN morph succeeded AND tarball test.tar contains etc/passwd + FINALLY the git server is shut down Distbuilding ------------ @@ -100,3 +101,14 @@ repos cached locally. AND the distbuild worker is terminated AND the communal cache server is terminated AND the git server is shut down + +Empty strata don't build +------------------------ + + SCENARIO empty-strata + GIVEN a workspace + AND a git server + WHEN the user checks out the system branch called empty-stratum + AND the user attempts to build the system systems/empty-stratum-system.morph in branch empty-stratum + THEN morph failed + FINALLY the git server is shut down diff --git a/yarns/deployment.yarn b/yarns/deployment.yarn index 47aeff5d..6ec8c0af 100644 --- a/yarns/deployment.yarn +++ b/yarns/deployment.yarn @@ -345,3 +345,4 @@ Once it is rebuilt, it can be deployed. WHEN the user attempts to deploy the cluster test-cluster.morph in branch mybranch THEN morph succeeded AND file workspace/mybranch/test/morphs/test-system.tar exists + FINALLY the git server is shut down diff --git a/yarns/implementations.yarn b/yarns/implementations.yarn index 8b43286f..2557e2e5 100644 --- a/yarns/implementations.yarn +++ b/yarns/implementations.yarn @@ -336,6 +336,32 @@ another to hold a chunk. git commit -m Initial. git tag -a "test-tag" -m "Tagging test-tag" + # A new branch is created here as the presence of an empty stratum will + # break any morph commands which load all definitions in the repository. + git checkout -b empty-stratum + + install -m644 -D /dev/stdin << EOF "systems/empty-stratum-system.morph" + name: empty-stratum-system + kind: system + arch: $arch + strata: + - name: build-essential + morph: strata/build-essential.morph + - name: core + morph: strata/core.morph + - name: empty + morph: strata/empty.morph + EOF + + install -m644 -D /dev/stdin << EOF "strata/empty.morph" + name: empty + kind: stratum + EOF + + git add . + git commit -m 'Add an empty stratum' + git checkout master + # Start a git daemon to serve our git repositories port_file="$DATADIR/git-daemon-port" pid_file="$DATADIR/git-daemon-pid" |