diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-04-21 20:54:07 +0100 |
---|---|---|
committer | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-04-21 20:54:07 +0100 |
commit | 7477ddf869711df158e5828c300bfe93a427b0f6 (patch) | |
tree | bb7b43d2601d9c8166724f5832f8c15757a0fba8 | |
parent | 9fd7ef6fe83282139f7f636a955bcf069e8fb79c (diff) | |
parent | 208d1a46887b76daada82c4b9d79086b1ad871a4 (diff) | |
download | morph-sam/distbuild-megamix.tar.gz |
Merge branch 'review/lauren_perry/lauren/distbuild-status' into sam/distbuild-megamixsam/distbuild-megamix
Change-Id: I321bfd8422acbddfee2ab5a506294c2f531d4adb
-rw-r--r-- | distbuild/__init__.py | 3 | ||||
-rw-r--r-- | distbuild/build_controller.py | 33 | ||||
-rw-r--r-- | distbuild/initiator.py | 148 | ||||
-rw-r--r-- | distbuild/initiator_connection.py | 77 | ||||
-rw-r--r-- | distbuild/mainloop.py | 9 | ||||
-rw-r--r-- | distbuild/protocol.py | 12 | ||||
-rw-r--r-- | morphlib/buildcommand.py | 10 | ||||
-rw-r--r-- | morphlib/plugins/build_plugin.py | 31 | ||||
-rw-r--r-- | morphlib/plugins/distbuild_plugin.py | 89 |
9 files changed, 380 insertions, 32 deletions
diff --git a/distbuild/__init__.py b/distbuild/__init__.py index e6ceda1f..9cb640ef 100644 --- a/distbuild/__init__.py +++ b/distbuild/__init__.py @@ -54,7 +54,8 @@ from build_controller import (BuildController, BuildFailed, BuildProgress, BuildOutput, BuildStepFinished, BuildStepFailed, BuildFinished, BuildCancel, build_step_name, map_build_graph) -from initiator import (Initiator, InitiatorListJobs) +from initiator import (Initiator, InitiatorStart, InitiatorCancel, + InitiatorListJobs, InitiatorStatus) from protocol import message from crashpoint import (crash_point, add_crash_condition, add_crash_conditions, diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 35b231f9..a0dba147 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -174,6 +174,13 @@ class BuildController(distbuild.StateMachine): self._helper_id = None self.debug_transitions = False self.debug_graph_state = False + self.allow_detach = build_request_message['allow_detach'] + self.build_status = 'Building' + self.build_info = { + 'id': build_request_message['id'], + 'morphology': build_request_message['morphology'], + 'build-status': self.build_status, + } def __repr__(self): return '<BuildController at 0x%x, request-id %s>' % (id(self), @@ -185,6 +192,14 @@ class BuildController(distbuild.StateMachine): def get_request(self): return self._request + def get_status(self): + return self.build_status + + def get_status_message(self): + return ('Build request ID: %s\n System build: %s\n Build status: %s' % + (self.build_info['id'], self.build_info['morphology'], + self.build_status)) + def setup(self): distbuild.crash_point() @@ -496,10 +511,18 @@ class BuildController(distbuild.StateMachine): logging.debug("BuildController %r: initiator id %s disconnected", self, event.id) - cancel_pending = distbuild.WorkerCancelPending(event.id) - self.mainloop.queue_event(distbuild.WorkerBuildQueuer, cancel_pending) + if self.allow_detach: + logging.debug('Detaching from client; build continuing remotely.') + else: + self.mainloop.queue_event(self, build_cancel(event.id)) + self.build_status = 'Cancelled' + + def build_cancel(self, event_id): + cancel_pending = distbuild.WorkerCancelPending(event_id) + self.mainloop.queue_event(distbuild.WorkerBuildQueuer, + cancel_pending) - cancel = BuildCancel(event.id) + cancel = BuildCancel(event_id) self.mainloop.queue_event(BuildController, cancel) self.mainloop.queue_event(self, _Abort()) @@ -533,6 +556,7 @@ class BuildController(distbuild.StateMachine): return logging.debug('BC: got build step started: %s' % artifact.name) + self.build_status = ('Building %s' % artifact.name) started = BuildStepStarted( self._request['id'], build_step_name(artifact), event.worker_name) self.mainloop.queue_event(BuildController, started) @@ -652,6 +676,8 @@ class BuildController(distbuild.StateMachine): self.fail('Building failed for %s' % artifact.name) + self.build_status = ('Failed building %s' % artifact.name) + # Cancel any jobs waiting to be executed, since there is no point # running them if this build has failed, it would just waste # resources @@ -671,6 +697,7 @@ class BuildController(distbuild.StateMachine): distbuild.crash_point() logging.debug('Notifying initiator of successful build') + self.build_status = 'Finished' baseurl = urlparse.urljoin( self._artifact_cache_server, '/1.0/artifacts') urls = [] diff --git a/distbuild/initiator.py b/distbuild/initiator.py index 0119ecd6..b5af1696 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -69,6 +69,7 @@ class Initiator(distbuild.StateMachine): self._partial = True self._step_outputs = {} self.debug_transitions = False + self.allow_detach = False # The build-log output dir is set up in _open_output() when we # receive the first log message. Thus if we never get that far, we @@ -107,7 +108,8 @@ class Initiator(distbuild.StateMachine): original_ref=self._original_ref, component_names=self._component_names, partial=self._partial, - protocol_version=distbuild.protocol.VERSION + protocol_version=distbuild.protocol.VERSION, + allow_detach=self.allow_detach, ) self._jm.send(msg) logging.debug('Initiator: sent to controller: %s', repr(msg)) @@ -127,7 +129,7 @@ class Initiator(distbuild.StateMachine): 'step-finished': self._handle_step_finished_message, 'step-failed': self._handle_step_failed_message, } - + handler = handlers[event.msg['type']] handler(event.msg) @@ -178,6 +180,12 @@ class Initiator(distbuild.StateMachine): self._write_status_to_build_log(self._get_output(msg), status) def _handle_step_started_message(self, msg): + if self.allow_detach: + self._app.status(msg='Detaching distbuild from controller (build' + ' will continue on the distbuild network)') + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + self.allow_detach = False status = 'Started building %s on %s' % ( msg['step_name'], msg['worker_name']) self._app.status(msg=status) @@ -224,7 +232,7 @@ class Initiator(distbuild.StateMachine): self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) self._jm.close() logging.info('Build finished OK') - + urls = event.msg['urls'] if urls: for url in urls: @@ -257,6 +265,72 @@ class Initiator(distbuild.StateMachine): self._step_outputs = {} +class InitiatorStart(Initiator): + + def __init__(self, cm, conn, app, repo_name, ref, morphology, + original_ref, component_names): + super(InitiatorStart, self).__init__(cm, conn, app, repo_name, ref, + morphology, original_ref, + component_names) + self._step_outputs = {} + self.debug_transitions = False + self.allow_detach = True + + +class InitiatorCancel(distbuild.StateMachine): + + def __init__(self, cm, conn, app, job_id): + distbuild.StateMachine.__init__(self, 'waiting') + self._cm = cm + self._conn = conn + self._app = app + self._job_id = job_id + + def setup(self): + distbuild.crash_point() + + self._jm = distbuild.JsonMachine(self._conn) + self.mainloop.add_state_machine(self._jm) + logging.debug('initiator: _jm=%s' % repr(self._jm)) + + spec = [ + # state, source, event_class, new_state, callback + ('waiting', self._jm, distbuild.JsonEof, None, self._terminate), + ('waiting', self._jm, distbuild.JsonNewMessage, None, + self._handle_json_message), + ] + self.add_transitions(spec) + + self._app.status(msg='Sending cancel request for distbuild job.') + msg = distbuild.message('build-cancel', + id=self._job_id, + protocol_version=distbuild.protocol.VERSION, + ) + self._jm.send(msg) + logging.debug('Initiator: sent to controller: %s', repr(msg)) + + def _handle_json_message(self, event_source, event): + distbuild.crash_point() + + logging.debug('Initiator: from controller: %s', str(event.msg)) + + handlers = { + 'request-output': self._handle_request_output, + } + + handler = handlers[event.msg['type']] + handler(event.msg) + + def _handle_request_output(self, msg): + self._app.status(msg=str(msg['message'])) + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + def _terminate(self, event_source, event): + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + class InitiatorListJobs(distbuild.StateMachine): def __init__(self, cm, conn, app): @@ -285,6 +359,70 @@ class InitiatorListJobs(distbuild.StateMachine): self._app.status(msg='Requesting currently running distbuilds.') msg = distbuild.message('list-requests', id=msg_uuid, + protocol_version=distbuild.protocol.VERSION, + ) + self._jm.send(msg) + logging.debug('Initiator: sent to controller: %s', repr(msg)) + + def _handle_json_message(self, event_source, event): + distbuild.crash_point() + + logging.debug('Initiator: from controller: %s', str(event.msg)) + + handlers = { + # set build-failed rather than request-failed so old versions of + # morph recognise the message and don't ignore it + 'build-failed': self._handle_request_failed, + 'request-output': self._handle_request_output, + } + + handler = handlers[event.msg['type']] + handler(event.msg) + + def _handle_request_failed(self, msg): + self._app.status(msg=str(msg['reason'])) + self.mainloop.queue_event(self, _Failed(msg)) + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + def _handle_request_output(self, msg): + self._app.status(msg=str(msg['message'])) + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + def _terminate(self, event_source, event): + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + +class InitiatorStatus(distbuild.StateMachine): + + def __init__(self, cm, conn, app, job_id): + distbuild.StateMachine.__init__(self, 'waiting') + self._cm = cm + self._conn = conn + self._app = app + self._job_id = job_id + + def setup(self): + distbuild.crash_point() + + self._jm = distbuild.JsonMachine(self._conn) + self.mainloop.add_state_machine(self._jm) + logging.debug('initiator: _jm=%s' % repr(self._jm)) + + spec = [ + # state, source, event_class, new_state, callback + ('waiting', self._jm, distbuild.JsonEof, None, self._terminate), + ('waiting', self._jm, distbuild.JsonNewMessage, None, + self._handle_json_message), + ] + self.add_transitions(spec) + + self._app.status(msg='Requesting status of recently-ran distbuilds.') + msg = distbuild.message('build-status', + id=self._job_id, + protocol_version=distbuild.protocol.VERSION, ) self._jm.send(msg) logging.debug('Initiator: sent to controller: %s', repr(msg)) @@ -295,13 +433,13 @@ class InitiatorListJobs(distbuild.StateMachine): logging.debug('Initiator: from controller: %s', str(event.msg)) handlers = { - 'list-request-output': self._handle_list_request_output, + 'request-output': self._handle_request_output, } handler = handlers[event.msg['type']] handler(event.msg) - def _handle_list_request_output(self, msg): + def _handle_request_output(self, msg): self._app.status(msg=str(msg['message'])) self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) self._jm.close() diff --git a/distbuild/initiator_connection.py b/distbuild/initiator_connection.py index 54322f6a..c566e548 100644 --- a/distbuild/initiator_connection.py +++ b/distbuild/initiator_connection.py @@ -97,29 +97,34 @@ class InitiatorConnection(distbuild.StateMachine): logging.debug('InitiatorConnection: from %s: %r', self.initiator_name, event.msg) + msg_handler = { + 'build-request': self._handle_build_request, + 'list-requests': self._handle_list_requests, + 'build-cancel': self._handle_build_cancel, + 'build-status': self._handle_build_status, + } try: - if event.msg['type'] == 'build-request': - self._handle_build_request(event) - elif event.msg['type'] == 'list-requests': - self._handle_list_requests(event) - else: - logging.error('Invalid message type: %s', event.msg) + if event.msg.get('protocol_version') != distbuild.protocol.VERSION: + msg = distbuild.message('build-failed', + # use build-failed as it is understood by older versions of + # morph; if morph is old enough (protocol_version < 1) it + # won't understand a new message and ignore it or cause the + # request to hang + id=event.msg['id'], + reason=('Protocol version mismatch between server & ' + 'initiator: distbuild network uses distbuild ' + 'protocol version %i, but client uses version %i.' + % (distbuild.protocol.VERSION, + event.msg.get('protocol_version')))) + self.jm.send(msg) + self._log_send(msg) + return + msg_handler[event.msg['type']](event) except (KeyError, ValueError) as ex: logging.error('Invalid message from initiator: %s: exception %s', event.msg, ex) def _handle_build_request(self, event): - if event.msg.get('protocol_version') != distbuild.protocol.VERSION: - msg = distbuild.message('build-failed', - id=event.msg['id'], - reason=('Protocol version mismatch between server & initiator:' - ' distbuild network uses distbuild protocol version %i' - ', but client uses version %i.' % ( - distbuild.protocol.VERSION, - event.msg.get('protocol_version')))) - self.jm.send(msg) - self._log_send(msg) - return new_id = self._idgen.next() self.our_ids.add(new_id) self._route_map.add(event.msg['id'], new_id) @@ -128,6 +133,7 @@ class InitiatorConnection(distbuild.StateMachine): self, event.msg, self.artifact_cache_server, self.morph_instance) self.mainloop.add_state_machine(build_controller) + self.mainloop.store_state_machine(build_controller) def _handle_list_requests(self, event): requests = self.mainloop.state_machines_of_type( @@ -143,10 +149,45 @@ class InitiatorConnection(distbuild.StateMachine): build.get_request()['repo'], build.get_request()['ref'], build.get_request()['morphology'])) - msg = distbuild.message('list-request-output', + msg = distbuild.message('request-output', message=('\n\n'.join(output_msg))) self.jm.send(msg) + def _handle_build_cancel(self, event): + requests = self.mainloop.state_machines_of_type( + distbuild.BuildController) + for build in requests: + if build.get_request()['id'] == event.msg['id']: + self.mainloop.queue_event(InitiatorConnection, + InitiatorDisconnect(event.msg['id'])) + self.mainloop.queue_event(distbuild.BuildController, + build.build_cancel(event.msg['id'])) + msg = distbuild.message('request-output', message=( + 'Cancelling build request with ID %s' % + event.msg['id'])) + self.jm.send(msg) + break + else: + msg = distbuild.message('request-output', message=('Given ' + 'build-request ID does not match any ' + 'running build IDs.')) + self.jm.send(msg) + + def _handle_build_status(self, event): + requests = self.mainloop.state_stored_requests( + distbuild.BuildController) + for build in requests: + if build.get_request()['id'] == event.msg['id']: + msg = distbuild.message('request-output', message=( + build.get_status_message())) + self.jm.send(msg) + break + else: + msg = distbuild.message('request-output', message=('Given ' + 'build-request ID does not match any ' + 'recent build IDs.')) + self.jm.send(msg) + def _disconnect(self, event_source, event): for id in self.our_ids: logging.debug('InitiatorConnection: %s: InitiatorDisconnect(%s)', diff --git a/distbuild/mainloop.py b/distbuild/mainloop.py index e7c0cc3b..3a5963dd 100644 --- a/distbuild/mainloop.py +++ b/distbuild/mainloop.py @@ -40,6 +40,7 @@ class MainLoop(object): self._machines = [] self._sources = [] self._events = [] + self.store_request = [] self.dump_filename = None def add_state_machine(self, machine): @@ -51,7 +52,10 @@ class MainLoop(object): filename = '%s%s.dot' % (self.dump_filename, machine.__class__.__name__) machine.dump_dot(filename) - + + def store_state_machine(self, machine): + self.store_request.append(machine) + def remove_state_machine(self, machine): logging.debug('MainLoop.remove_state_machine: %s' % machine) self._machines.remove(machine) @@ -59,6 +63,9 @@ class MainLoop(object): def state_machines_of_type(self, machine_type): return [m for m in self._machines if isinstance(m, machine_type)] + def state_stored_requests(self, machine_type): + return [m for m in self.store_request if isinstance(m, machine_type)] + def n_state_machines_of_type(self, machine_type): return len(self.state_machines_of_type(machine_type)) diff --git a/distbuild/protocol.py b/distbuild/protocol.py index 0f936946..d6bb513a 100644 --- a/distbuild/protocol.py +++ b/distbuild/protocol.py @@ -33,6 +33,7 @@ _required_fields = { 'morphology', 'partial', 'protocol_version', + 'allow_detach', ], 'build-progress': [ 'id', @@ -87,10 +88,19 @@ _required_fields = { ], 'list-requests': [ 'id', + 'protocol_version', ], - 'list-request-output': [ + 'request-output': [ 'message', ], + 'build-cancel': [ + 'id', + 'protocol_version', + ], + 'build-status': [ + 'id', + 'protocol_version', + ], } diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index fd5acdf5..f34b898c 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -519,10 +519,11 @@ class InitiatorBuildCommand(BuildCommand): RECONNECT_INTERVAL = 30 # seconds MAX_RETRIES = 1 - def __init__(self, app, addr, port): + def __init__(self, app, addr, port, allow_detach): self.app = app self.addr = addr self.port = port + self.allow_detach = allow_detach self.app.settings['push-build-branches'] = True super(InitiatorBuildCommand, self).__init__(app) @@ -540,10 +541,15 @@ class InitiatorBuildCommand(BuildCommand): loop = distbuild.MainLoop() args = [repo_name, ref, filename, original_ref or ref, component_names] + if self.allow_detach: + initiator_type = distbuild.InitiatorStart + else: + initiator_type = distbuild.Initiator + cm = distbuild.InitiatorConnectionMachine(self.app, self.addr, self.port, - distbuild.Initiator, + initiator_type, [self.app] + args, self.RECONNECT_INTERVAL, self.MAX_RETRIES) diff --git a/morphlib/plugins/build_plugin.py b/morphlib/plugins/build_plugin.py index e5b35853..8c94b928 100644 --- a/morphlib/plugins/build_plugin.py +++ b/morphlib/plugins/build_plugin.py @@ -46,10 +46,14 @@ class BuildPlugin(cliapp.Plugin): '[COMPONENT...]') self.app.add_subcommand('distbuild', self.distbuild, arg_synopsis='SYSTEM [COMPONENT...]') + self.app.add_subcommand('distbuild-start', self.distbuild_start, + arg_synopsis='SYSTEM [COMPONENT...]') self.use_distbuild = False + self.allow_detach = False def disable(self): self.use_distbuild = False + self.allow_detach = False def distbuild_morphology(self, args): '''Distbuild a system, outside of a system branch. @@ -97,6 +101,12 @@ class BuildPlugin(cliapp.Plugin): your system, the system artifact will be copied from your trove and cached locally. + Log information can be found in the current working directory, in + directories called build-xx. + + If you do not have a persistent connection to the server on which + the distbuild runs, consider using `morph distbuild-start` instead. + Example: morph distbuild devel-system-x86_64-generic.morph @@ -106,6 +116,25 @@ class BuildPlugin(cliapp.Plugin): self.use_distbuild = True self.build(args) + def distbuild_start(self, args): + '''Distbuild a system image without a lasting client-server connection. + + This command launches a distributed build, and disconnects from the + distbuild cluster once the build starts, leaving the build running + remotely. + + The command will return a build-ID which can be used to cancel the + distbuild via `morph distbuild-cancel`. Builds started in this manner + can be found via `morph distbuild-list-jobs` + + See `morph help distbuild` for more information and example usage. + + ''' + + self.use_distbuild = True + self.allow_detach = True + self.build(args) + def build_morphology(self, args): '''Build a system, outside of a system branch. @@ -210,7 +239,7 @@ class BuildPlugin(cliapp.Plugin): port = self.app.settings['controller-initiator-port'] build_command = morphlib.buildcommand.InitiatorBuildCommand( - self.app, addr, port) + self.app, addr, port, self.allow_detach) else: build_command = morphlib.buildcommand.BuildCommand(self.app) diff --git a/morphlib/plugins/distbuild_plugin.py b/morphlib/plugins/distbuild_plugin.py index 09669988..69e815e9 100644 --- a/morphlib/plugins/distbuild_plugin.py +++ b/morphlib/plugins/distbuild_plugin.py @@ -40,6 +40,95 @@ class DistbuildOptionsPlugin(cliapp.Plugin): pass +class DistbuildCancel(cliapp.Plugin): + + RECONNECT_INTERVAL = 30 # seconds + MAX_RETRIES = 1 + + def enable(self): + self.app.add_subcommand('distbuild-cancel', self.distbuild_cancel, + arg_synopsis='ID') + + def disable(self): + pass + + def distbuild_cancel(self, args): + '''Cancels a currently-running distbuild + + Command line arguments: + + `ID` of the running process that you wish to cancel + (this can be found via distbuild-list-jobs) + + Example: + + * morph distbuild-cancel InitiatorConnection-1 + + ''' + + if len(args) == 0: + raise cliapp.AppException( + 'This command requires a build-request ID to run.') + + addr = self.app.settings['controller-initiator-address'] + port = self.app.settings['controller-initiator-port'] + icm = distbuild.InitiatorConnectionMachine(self.app, addr, port, + distbuild.InitiatorCancel, + [self.app] + args, + self.RECONNECT_INTERVAL, + self.MAX_RETRIES) + loop = distbuild.MainLoop() + loop.add_state_machine(icm) + loop.run() + + +class DistbuildStatusPlugin(cliapp.Plugin): + + RECONNECT_INTERVAL = 30 # seconds + MAX_RETRIES = 1 + + def enable(self): + self.app.add_subcommand('distbuild-get-status', self.distbuild_status, + arg_synopsis='ID') + + def disable(self): + pass + + def distbuild_status(self, args): + '''Displays build status of recent distbuild requests. + + Lists last known build status for all distbuilds (e.g. Building, + Failed, Finished, Cancelled) on a given distbuild server as set in + /etc/morph.conf + + Example: + + morph distbuild-get-status InitiatorConnection-1 + + Example output: + + Build request ID: InitiatorConnection-1 + System build: systems/devel-system-x86_64-generic.morph + Build status: Building stage1-binutils-misc + + ''' + + if len(args) == 0: + raise cliapp.AppException( + 'This command requires a build-request ID to run.') + + addr = self.app.settings['controller-initiator-address'] + port = self.app.settings['controller-initiator-port'] + icm = distbuild.InitiatorConnectionMachine(self.app, addr, port, + distbuild.InitiatorStatus, + [self.app] + args, + self.RECONNECT_INTERVAL, + self.MAX_RETRIES) + loop = distbuild.MainLoop() + loop.add_state_machine(icm) + loop.run() + + class DistbuildListJobsPlugin(cliapp.Plugin): RECONNECT_INTERVAL = 30 # seconds |