From 8ca37a744338a4f32b1ff4c20ec6bd36e393aa25 Mon Sep 17 00:00:00 2001 From: Mason Test Runner Date: Mon, 10 Nov 2014 16:08:14 +0000 Subject: Add modified copies of the release-build and release-test-os scripts These need to be put in /usr/lib/mason/ to allow this prototype to function properly. --- mason-build | 172 +++++++++++++++++++ mason-test-os | 530 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 702 insertions(+) create mode 100755 mason-build create mode 100755 mason-test-os diff --git a/mason-build b/mason-build new file mode 100755 index 0000000..0fc59b9 --- /dev/null +++ b/mason-build @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import cliapp +import morphlib +import os +import subprocess +import sys +import time + + +class Build(object): + '''A single distbuild instance.''' + + def __init__(self, name, arch, app): + self.system_name = name + self.controller = app.controllers[arch] + self.command = [ + 'morph', 'build', self.system_name] + + def start(self): + self.process = subprocess.Popen(self.command) + + def completed(self): + return (self.process.poll() is not None) + + +class ReleaseApp(cliapp.Application): + + '''Cliapp app that handles distbuilding and deploying a cluster.''' + + def add_settings(self): + self.settings.string_list(['controllers'], + 'a list of distbuild controllers and their ' + 'architecture') + + self.settings.string(['trove-host'], + 'hostname of Trove instance') + + self.settings.string(['artifact-cache-server'], + 'server to fetch artifacts from', default=None) + + self.settings.string(['release-number'], + 'Baserock version of the systems being built', + default='yy.ww') + + def process_args(self, args): + '''Process the command line''' + self.controllers = {} + controllers_list = self.settings['controllers'] + for item in controllers_list: + arch, controller = item.split(':') + self.controllers[arch] = controller + + self.ref = cliapp.runcmd(['git', 'rev-parse', 'HEAD']).strip() + + sb = morphlib.sysbranchdir.open_from_within('.') + definitions = sb.get_git_directory_name(sb.root_repository_url) + defs_repo = morphlib.gitdir.GitDirectory(definitions) + self.loader = morphlib.morphloader.MorphologyLoader() + self.finder = morphlib.morphologyfinder.MorphologyFinder(defs_repo) + + cluster_name = args[0] + cluster, cluster_path = self.load_morphology(cluster_name) + + builds = self.prepare_builds(cluster) + if not os.path.exists('builds'): + os.mkdir('builds') + os.chdir('builds') + for build in builds: + build.start() + + while not all(build.completed() for build in builds): + time.sleep(1) + + fail = False + for build in builds: + if build.process.returncode != 0: + fail = True + sys.stderr.write( + 'Building failed for %s\n' % build.system_name) + if fail: + raise cliapp.AppException('Building of systems failed') + + os.chdir('..') + if not os.path.exists('release'): + os.mkdir('release') + + def load_morphology(self, name, kind=None): + path = morphlib.util.sanitise_morphology_path(name) + morph = self.loader.load_from_string( + self.finder.read_morphology(path)) + if kind: + assert morph['kind'] == kind + return morph, path + + def iterate_systems(self, system_list): + for system in system_list: + yield system['morph'] + if 'subsystems' in system: + for subsystem in self.iterate_systems(system['subsystems']): + yield subsystem + + def prepare_builds(self, cluster): + '''Prepare a list of builds''' + systems = set(self.iterate_systems(cluster['systems'])) + builds = [] + for system_name in systems: + system, _ = self.load_morphology(system_name) + if system['arch'] in self.controllers: + builds.append(Build(system_name, system['arch'], self)) + return builds + + def deploy_images(self, cluster, cluster_path): + version_label = 'baserock-%s' % self.settings['release-number'] + outputs = {} + + for system in cluster['systems']: + morphology_name = system['morph'] + morphology = self.load_morphology(morphology_name)[0] + if morphology['arch'] not in self.controllers: + continue + + for deployment_name, deployment_info in system['deploy'].iteritems(): + # The release.morph cluster must specify a basename for the file, + # of name and extension. This script knows about name, but it + # can't find out the appropriate file extension without second + # guessing the behaviour of write extensions. + basename = deployment_info['location'] + + if '/' in basename or basename.startswith(version_label): + raise cliapp.AppException( + 'In %s: system %s.location should be just the base name, ' + 'e.g. "%s.img"' % (cluster_path, deployment_name, deployment_name)) + + filename = os.path.join('release', '%s-%s' % (version_label, basename)) + if os.path.exists(filename): + self.output.write('Reusing existing deployment of %s\n' % filename) + else: + self.output.write('Creating %s from release.morph\n' % filename) + self.deploy_single_image(cluster_path, deployment_name, filename, version_label) + + def deploy_single_image(self, cluster_path, name, location, version_label): + deploy_command = [ + 'morph', 'deploy', cluster_path, name, + '--trove-host=%s' % self.settings['trove-host']] + artifact_server = self.settings['artifact-cache-server'] + if artifact_server is not None: + deploy_command.append('--artifact-cache-server=%s' % artifact_server) + deploy_command.extend(( + '%s.location=%s' % (name, location), + '%s.VERSION_LABEL=%s' % (name, version_label) + )) + + cliapp.runcmd(deploy_command, stdout=sys.stdout) + + +ReleaseApp().run() diff --git a/mason-test-os b/mason-test-os new file mode 100755 index 0000000..a41386b --- /dev/null +++ b/mason-test-os @@ -0,0 +1,530 @@ +#!/usr/bin/env python +# +# Copyright 2014 Codethink Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +'''release-test + +This script deploys the set of systems in the cluster morphology it is +instructed to read, to test that they work correctly. + +''' + +import cliapp +import os +import pipes +import shlex +import shutil +import socket +import tempfile +import time +import uuid + +import morphlib + + +class NovaList: + def __init__(self): + self.output = [] + self.lines = [] + self.instance = [] + + def update(self): + self.output = cliapp.runcmd(['nova', 'list']) + self.lines = self.output.split('\n') + self.lines = self.lines[3:-2] + + def get_nova_details_for_instance(self, name): + self.update() + + for line in self.lines: + entries = line.split('|') + stripped_line = [entry.strip() for entry in entries] + if stripped_line.count(name) == 1: + self.instance = stripped_line + + def get_nova_state_for_instance(self, name): + self.get_nova_details_for_instance(name) + if not self.instance: + return + return self.instance[3] + + def get_nova_ip_for_instance(self, name): + self.get_nova_details_for_instance(name) + if not self.instance: + return + + if self.get_nova_state_for_instance(name) != 'ACTIVE': + return + + return self.instance[6] + + def get_nova_ip_for_instance_timeout(self, name, timeout=120): + start_time = time.time() + + while self.get_nova_state_for_instance(name) != 'ACTIVE': + + if time.time() > start_time + timeout: + print "%s not ACTIVE after %i seconds" % (name, timeout) + return + + time.sleep(1) + + ip_addr = self.get_nova_ip_for_instance(name) + if not ip_addr: + return + + if ip_addr.count('=') == 0: + return + + ip_addr = ip_addr[ip_addr.find('=') + 1:] + + if ip_addr.count(',') == 0: + return ip_addr + + return ip_addr[:ip_addr.find(',')] + + + +class MorphologyHelper(object): + + def __init__(self): + self.sb = sb = morphlib.sysbranchdir.open_from_within('.') + defs_repo_path = sb.get_git_directory_name(sb.root_repository_url) + self.defs_repo = morphlib.gitdir.GitDirectory(defs_repo_path) + self.loader = morphlib.morphloader.MorphologyLoader() + self.finder = morphlib.morphologyfinder.MorphologyFinder(self.defs_repo) + + def load_morphology(self, path): + text = self.finder.read_morphology(path) + return self.loader.load_from_string(text) + + @classmethod + def iterate_systems(cls, systems_list): + for system in systems_list: + yield morphlib.util.sanitise_morphology_path(system['morph']) + if 'subsystems' in system: + for subsystem in cls.iterate_systems(system['subsystems']): + yield subsystem + + def iterate_cluster_deployments(cls, cluster_morph): + for system in cluster_morph['systems']: + path = morphlib.util.sanitise_morphology_path(system['morph']) + defaults = system.get('deploy-defaults', {}) + for name, options in system['deploy'].iteritems(): + config = dict(defaults) + config.update(options) + yield path, name, config + + def load_cluster_systems(self, cluster_morph): + for system_path in set(self.iterate_systems(cluster_morph['systems'])): + system_morph = self.load_morphology(system_path) + yield system_path, system_morph + + +class TimeoutError(cliapp.AppException): + + """Error to be raised when a connection waits too long""" + + def __init__(self, msg): + super(TimeoutError, self).__init__(msg) + + +class VMHost(object): + + def __init__(self, user, address, disk_path): + self.user = user + self.address = address + self.disk_path = disk_path + + @property + def ssh_host(self): + return '{user}@{address}'.format(user=self.user, address=self.address) + + def runcmd(self, *args, **kwargs): + cliapp.ssh_runcmd(self.ssh_host, *args, **kwargs) + + +class DeployedSystemInstance(object): + + def __init__(self, deployment, config, host_machine, vm_id, rootfs_path, + ip_addr, hostname): + self.deployment = deployment + self.config = config + self.ip_address = ip_addr + self.host_machine = host_machine + self.vm_id = vm_id + self.rootfs_path = rootfs_path + self.hostname = hostname + + @property + def ssh_host(self): + # TODO: Stop assuming we ssh into test instances as root + return 'root@{host}'.format(host=self.ip_address) + + def runcmd(self, argv, chdir='.', **kwargs): + ssh_cmd = ['ssh', '-o', 'StrictHostKeyChecking=no', + '-o', 'UserKnownHostsFile=/dev/null', self.ssh_host] + cmd = ['sh', '-c', 'cd "$1" && shift && exec "$@"', '-', chdir] + cmd += argv + ssh_cmd.append(' '.join(map(pipes.quote, cmd))) + return cliapp.runcmd(ssh_cmd, **kwargs) + + def _wait_for_dhcp(self, timeout): + '''Block until given hostname resolves successfully. + + Raises TimeoutError if the hostname has not appeared in 'timeout' + seconds. + + ''' + start_time = time.time() + while True: + try: + socket.gethostbyname(self.ip_address) + return + except socket.gaierror: + pass + if time.time() > start_time + timeout: + raise TimeoutError("Host %s did not appear after %i seconds" % + (self.ip_address, timeout)) + time.sleep(0.5) + + def _wait_for_ssh(self, timeout): + """Wait until the deployed VM is responding via SSH""" + start_time = time.time() + while True: + try: + self.runcmd(['true'], stdin=None, stdout=None, stderr=None) + return + except cliapp.AppException: + # TODO: Stop assuming the ssh part of the command is what failed + if time.time() > start_time + timeout: + raise TimeoutError("%s sshd did not start after %i seconds" + % (self.ip_address, timeout)) + time.sleep(0.5) + + def _wait_for_cloud_init(self, timeout): + """Wait until cloud init has resized the disc""" + start_time = time.time() + while True: + try: + out = self.runcmd(['sh', '-c', + 'test -e "$1" && echo exists || echo does not exist', + '-', + '/root/cloud-init-finished']) + except: + import traceback + traceback.print_exc() + raise + if out.strip() == 'exists': + return + if time.time() > start_time + timeout: + raise TimeoutError("Disc size not increased after %i seconds" + % (timeout)) + time.sleep(3) + + def wait_until_online(self, timeout=120): + self._wait_for_dhcp(timeout) + self._wait_for_ssh(timeout) + self._wait_for_cloud_init(timeout) + print "Test system %s ready to run tests." % (self.hostname) + + def delete(self): + # Stop and remove VM + print "Deleting %s test instance" % (self.hostname) + try: + cliapp.runcmd(['nova', 'delete', self.hostname]) + except cliapp.AppException as e: + # TODO: Stop assuming that delete failed because the instance + # wasn't running + print "- Failed" + pass + print "Deleting %s test disc image" % (self.hostname) + try: + cliapp.runcmd(['nova', 'image-delete', self.hostname]) + except cliapp.AppException as e: + # TODO: Stop assuming that image-delete failed because it was + # already removed + print "- Failed" + pass + + +class Deployment(object): + + def __init__(self, cluster_path, name, deployment_config, + host_machine, net_id): + self.cluster_path = cluster_path + self.name = name + self.deployment_config = deployment_config + self.host_machine = host_machine + self.net_id = net_id + + @staticmethod + def _ssh_host_key_exists(hostname): + """Check if an ssh host key exists in known_hosts""" + if not os.path.exists('/root/.ssh/known_hosts'): + return False + with open('/root/.ssh/known_hosts', 'r') as known_hosts: + return any(line.startswith(hostname) for line in known_hosts) + + def _update_known_hosts(self): + if not self._ssh_host_key_exists(self.host_machine.address): + with open('/root/.ssh/known_hosts', 'a') as known_hosts: + cliapp.runcmd(['ssh-keyscan', self.host_machine.address], + stdout=known_hosts) + + @staticmethod + def _generate_sshkey_config(tempdir, config): + manifest = os.path.join(tempdir, 'manifest') + with open(manifest, 'w') as f: + f.write('0040700 0 0 /root/.ssh\n') + f.write('overwrite 0100600 0 0 /root/.ssh/authorized_keys\n') + authkeys = os.path.join(tempdir, 'root', '.ssh', 'authorized_keys') + os.makedirs(os.path.dirname(authkeys)) + with open(authkeys, 'w') as auth_f: + with open('/root/.ssh/id_rsa.pub', 'r') as key_f: + shutil.copyfileobj(key_f, auth_f) + + install_files = shlex.split(config.get('INSTALL_FILES', '')) + install_files.append(manifest) + yield 'INSTALL_FILES', ' '.join(pipes.quote(f) for f in install_files) + + def deploy(self): + self._update_known_hosts() + + hostname = str(uuid.uuid4()) + vm_id = hostname + image_base = self.host_machine.disk_path + rootpath = '{image_base}/{hostname}.img'.format(image_base=image_base, + hostname=hostname) + loc = 'http://{ssh_host}:5000/v2.0'.format( + ssh_host=self.host_machine.ssh_host, id=vm_id, path=rootpath) + + options = { + 'type': 'openstack', + 'location': loc, + 'HOSTNAME': hostname, + 'DISK_SIZE': '5G', + 'RAM_SIZE': '2G', + 'VERSION_LABEL': 'release-test', + 'OPENSTACK_USER': os.environ['OS_USERNAME'], + 'OPENSTACK_TENANT': os.environ['OS_TENANT_NAME'], + 'OPENSTACK_PASSWORD': os.environ['OS_PASSWORD'], + 'OPENSTACK_IMAGENAME': hostname, + 'CLOUD_INIT': 'yes', + 'KERNEL_ARGS': 'console=tty0 console=ttyS0', + } + + tempdir = tempfile.mkdtemp() + try: + options.update( + self._generate_sshkey_config(tempdir, + self.deployment_config)) + + # Deploy the image to openstack + args = ['morph', 'deploy', self.cluster_path, self.name] + for k, v in options.iteritems(): + args.append('%s.%s=%s' % (self.name, k, v)) + cliapp.runcmd(args, stdin=None, stdout=None, stderr=None) + + config = dict(self.deployment_config) + config.update(options) + + # Boot an instance from the image + args = ['nova', 'boot', + '--flavor', 'm1.medium', + '--image', hostname, + '--user-data', '/usr/lib/mason/os-init-script', + '--nic', "net-id=%s" % (self.net_id), + hostname] + output = cliapp.runcmd(args) + + # Print nova boot output, with adminPass line removed + output_lines = output.split('\n') + for line in output_lines: + if line.find('adminPass') != -1: + password_line = line + output_lines.remove(password_line) + output = '\n'.join(output_lines) + print output + + # Get ip address from nova list + nl = NovaList() + ip_addr = nl.get_nova_ip_for_instance_timeout(hostname) + print "IP address for instance %s: %s" % (hostname, ip_addr) + + return DeployedSystemInstance(self, config, self.host_machine, + vm_id, rootpath, ip_addr, hostname) + finally: + shutil.rmtree(tempdir) + + +class ReleaseApp(cliapp.Application): + + """Cliapp application which handles automatic builds and tests""" + + def add_settings(self): + """Add the command line options needed""" + group_main = 'Program Options' + self.settings.string_list(['deployment-host'], + 'ARCH:HOST:PATH that VMs can be deployed to', + default=None, + group=group_main) + self.settings.string(['trove-host'], + 'Address of Trove for test systems to build from', + default=None, + group=group_main) + self.settings.string(['trove-id'], + 'ID of Trove for test systems to build from', + default=None, + group=group_main) + self.settings.string(['build-ref-prefix'], + 'Prefix of build branches for test systems', + default=None, + group=group_main) + self.settings.string(['net-id'], + 'Openstack network ID', + default=None, + group=group_main) + self.settings.string(['test-ref'], + 'Ref of definitions to deploy test system from', + default=None, + group=group_main) + + @staticmethod + def _run_tests(instance, system_path, system_morph, + (trove_host, trove_id, build_ref_prefix), + morph_helper, systems, ref): + instance.wait_until_online() + + tests = [] + def baserock_build_test(instance): + instance.runcmd(['git', 'config', '--global', 'user.name', + 'Test Instance of %s' % instance.deployment.name]) + instance.runcmd(['git', 'config', '--global', 'user.email', + 'ci-test@%s' % instance.config['HOSTNAME']]) + instance.runcmd(['mkdir', '-p', '/src/ws', '/src/cache', + '/src/tmp']) + def morph_cmd(*args, **kwargs): + # TODO: decide whether to use cached artifacts or not by + # adding --artifact-cache-server= --cache-server= + argv = ['morph', '--log=/src/morph.log', '--cachedir=/src/cache', + '--tempdir=/src/tmp', '--log-max=100M', + '--trove-host', trove_host, '--trove-id', trove_id, + '--build-ref-prefix', build_ref_prefix] + argv.extend(args) + instance.runcmd(argv, **kwargs) + + repo = morph_helper.sb.root_repository_url + sha1 = morph_helper.defs_repo.resolve_ref_to_commit(ref) + morph_cmd('init', '/src/ws') + chdir = '/src/ws' + + morph_cmd('checkout', repo, ref, chdir=chdir) + # TODO: Add a morph subcommand that gives the path to the root repository. + repo_path = os.path.relpath( + morph_helper.sb.get_git_directory_name(repo), + morph_helper.sb.root_directory) + chdir = os.path.join(chdir, ref, repo_path) + + instance.runcmd(['git', 'reset', '--hard', sha1], chdir=chdir) + print 'Building test systems for {sys}'.format(sys=system_path) + for to_build_path, to_build_morph in systems.iteritems(): + if to_build_morph['arch'] == system_morph['arch']: + print 'Test building {path}'.format(path=to_build_path) + morph_cmd('build', to_build_path, chdir=chdir, + stdin=None, stdout=None, stderr=None) + print 'Finished Building test systems' + + def python_smoke_test(instance): + instance.runcmd(['python', '-c', 'print "Hello World"']) + + # TODO: Come up with a better way of determining which tests to run + if 'devel' in system_path: + tests.append(baserock_build_test) + else: + tests.append(python_smoke_test) + + for test in tests: + test(instance) + + def deploy_and_test_systems(self, cluster_path, + deployment_hosts, build_test_config, + net_id): + """Run the deployments and tests""" + + version = 'release-test' + + morph_helper = MorphologyHelper() + cluster_morph = morph_helper.load_morphology(cluster_path) + systems = dict(morph_helper.load_cluster_systems(cluster_morph)) + + for system_path, deployment_name, deployment_config in \ + morph_helper.iterate_cluster_deployments(cluster_morph): + + system_morph = systems[system_path] + # We can only test systems in KVM that have a BSP + if not any('bsp' in si['morph'] for si in system_morph['strata']): + continue + + # We can only test systems in KVM that we have a host for + if system_morph['arch'] not in deployment_hosts: + continue + host_machine = deployment_hosts[system_morph['arch']] + deployment = Deployment(cluster_path, deployment_name, + deployment_config, host_machine, + net_id) + + ref = self.settings['test-ref'] + instance = deployment.deploy() + try: + self._run_tests(instance, system_path, system_morph, + build_test_config, morph_helper, systems, ref) + finally: + instance.delete() + + def process_args(self, args): + """Process the command line args and kick off the builds/tests""" + if self.settings['build-ref-prefix'] is None: + self.settings['build-ref-prefix'] = ( + os.path.join(self.settings['trove-id'], 'builds')) + for setting in ('deployment-host', 'trove-host', + 'trove-id', 'build-ref-prefix', 'net-id'): + self.settings.require(setting) + + deployment_hosts = {} + for host_config in self.settings['deployment-host']: + arch, address = host_config.split(':', 1) + user, address = address.split('@', 1) + address, disk_path = address.split(':', 1) + if user == '': + user = 'root' + # TODO: Don't assume root is the user with deploy access + deployment_hosts[arch] = VMHost(user, address, disk_path) + + build_test_config = (self.settings['trove-host'], + self.settings['trove-id'], + self.settings['build-ref-prefix']) + + if len(args) != 1: + raise cliapp.AppException('Usage: release-test CLUSTER') + cluster_path = morphlib.util.sanitise_morphology_path(args[0]) + self.deploy_and_test_systems(cluster_path, deployment_hosts, + build_test_config, + self.settings['net-id']) + + +if __name__ == '__main__': + ReleaseApp().run() -- cgit v1.2.1