diff options
1 files changed, 673 insertions, 0 deletions
diff --git a/ b/
new file mode 100644
index 0000000..980ba81
--- /dev/null
+++ b/
@@ -0,0 +1,673 @@
+# Copyright (C) 2014 Codethink Limited
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+Baserock test suite for Trove upgrade
+Trove is the most complex system that we currently have in Baserock. Therefore
+we test the Baserock toolset's upgrade functionality using a Trove system.
+This will one day be a MUSTARD Loom Yarn.
+This test must be run on a Baserock devel system, which has passwordless SSH
+access to the KVM host specified as 'DEPLOY_URL' below. The tests will deploy a
+VM named 'brtests-$host', so a single KVM host can be used by multiple test
+machines, as long as each test machine has only one test running at a time.
+Ideas for improvement:
+ - use (ssh library for Python)
+How to fit this into Yarn:
+ - I don't know! We need to pass state in a totally different way (via the
+ environment).
+ - Should be enough to break the tests into function calls that save state
+ via pickle or the environment. An annoying extra layer of indirection on
+ an already complex codebase, though.
+Helpful advice:
+ - There is a '--reuse-fixture' option which reuses 'GIVEN' state for a
+ test suite instead of deploying a new VM (which takes ~5 minutes).
+ - Deployment is broken into separate create_config(), run_build() and
+ run_deploy() steps, so that you can comment out calls to one or more of
+ these when iterating over a specific test.
+import cliapp
+import contextlib
+import os
+import shutil
+import socket
+import subprocess
+import sys
+import tempfile
+import time
+import urlparse
+import yaml
+# The test host must have passwordless access to this machine. The tests set
+# set StrictHostKeyChecking=no for SSH connections so it does not need to be in
+# '~/.ssh/known_hosts'.
+DEPLOY_URL = 'kvm+ssh://'
+# Seconds to wait for machine to appear on network before assuming it didn't
+# boot or connect to network successfully.
+# FIXME: building should automatically use the version of Morph from the system
+# branch, really ... but for now, if the installed Morph can't build
+# baserock:baserock/morphs 'master' branch, you can change this!
+MORPH_COMMAND = ['/src/morph/morph', '--no-git-update']
+#MORPH_COMMAND = '/src/morph/morph'
+#MORPH_COMMAND = 'morph'
+BUILD_TEMPDIR = '/src/tmp'
+#BRANCH = 'master'
+BRANCH = 'baserock/sam/trove-upgrades'
+# For debugging. FIXME: would be better if cliapp's logging mechanism supported
+# logging to stdout, but .... :(
+def remote_runcmd(url, command, **kwargs):
+ '''
+ Execute a command on machine 'url'.
+ Command must be a list of arguments, not a single string.
+ FIXME: perhaps this functionality should be merged into cliapp.ssh_runcmd()
+ so that we can use that instead.
+ '''
+ print "%s: %s" % (url, ' '.join(command))
+ url = urlparse.urlsplit(url)
+ if url[0] in ['ssh', 'kvm+ssh']:
+ ssh_host = url[1]
+ ssh_cmd = ['ssh']
+ # The identity of the newly-created test machine will never be in
+ # '~/.ssh/known_hosts'; this switch avoids seeing the 'do you want to
+ # connect' prompt that SSH would normally present in this situation.
+ ssh_cmd.extend(['-o', 'StrictHostKeyChecking=no'])
+ return cliapp.runcmd(ssh_cmd + [ssh_host, ' '.join(command)], **kwargs)
+ else:
+ raise NotImplementedError("Remote machine must be an ssh:// URL")
+def run_morph(args, **kwargs):
+ '''
+ Run Morph on the current machine.
+ '''
+ morph_command = MORPH_COMMAND
+ if isinstance(morph_command, str):
+ morph_command = morph_command.split(' ')
+ cmd = morph_command + args
+ print ' '.join(cmd)
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = sys.stdout
+ if 'stderr' not in kwargs:
+ kwargs['stderr'] = sys.stdout
+ return cliapp.runcmd(cmd, **kwargs)
+def run_git(args, **kwargs):
+ return cliapp.runcmd(['git'] + args, **kwargs)
+def read_file(file_path):
+ with open(file_path, 'r') as f:
+ return
+def write_file(file_path, text):
+ with open(file_path, 'w') as f:
+ f.write(text)
+class Deployment(object):
+ '''
+ Base class for context of an initial deployment or upgrade.
+ Creating config, building the system and doing the deployment are
+ deliberately separated because building and deploying are currently
+ slow operations even when repeating an identical build or deployment,
+ so it's often necessary during development to comment out these steps
+ so that the edit-test cycle is not impossibly long.
+ '''
+ def __init__(self, system_morph_name, deploy_morph_name, systembranch,
+ vm_name):
+ self.branch = systembranch
+ self.vm_name = vm_name
+ self.system_morph_name = system_morph_name
+ self.deploy_morph_name = deploy_morph_name
+ def create_config(self):
+ pass
+ def run_build(self):
+ run_morph(
+ ['build', self.system_morph_name], cwd=self.branch.branch_dir)
+ def run_deploy(self):
+ run_morph(
+ ['deploy', self.deploy_morph_name,
+ '%s.AUTOSTART=yes' % self.vm_name],
+ cwd=self.branch.branch_dir)
+ def create_ssh_key(self, key_name):
+ file_path = os.path.join(self.branch.morphs_dir, '%s.key' % key_name)
+ # The '-N ""' is very important here: otherwise ssh-keygen will block
+ # waiting for input. If you try to pass this command to cliapp.runcmd()
+ # as a list it will mangle the quotes.
+ comment = "Generated by Baserock automated tests for '%s'" % key_name
+ keygen_cmd = 'ssh-keygen -N "" -t rsa -b 2048 -f %s -C "%s"' % \
+ (file_path, comment)
+ cliapp.runcmd(['sh', '-c', keygen_cmd])
+ def ensure_configure_extension_enabled(self, extension_name):
+ system_morph = os.path.join(
+ self.branch.morphs_dir, '%s.morph' % self.system_morph_name)
+ morph = yaml.load(read_file(system_morph))
+ if extension_name not in morph['configuration-extensions']:
+ morph['configuration-extensions'].append(extension_name)
+ write_file(system_morph, yaml.dump(morph))
+ def add_root_ssh_authorized_key(self, public_key_text):
+ self.ensure_configure_extension_enabled('install-files')
+ deploy_files_dir = os.path.join(
+ self.branch.morphs_dir, '%s-files' % self.deploy_morph_name)
+ root_ssh_dir = os.path.join(deploy_files_dir, 'root', '.ssh')
+ os.makedirs(root_ssh_dir)
+ with open(os.path.join(root_ssh_dir, 'authorized_keys'), 'w') as f:
+ f.write("# Added by Baserock automated test runner\n")
+ f.write(public_key_text)
+ with open(os.path.join(deploy_files_dir, 'manifest'), 'wa') as f:
+ f.write('0040700 0 0 /root/.ssh/\n')
+ f.write('0100644 0 0 /root/.ssh/authorized_keys\n')
+class TroveInitialDeployment(Deployment):
+ '''
+ Wraps creating necessary files for a Trove deployment.
+ Attribute 'deploy_morph_name' contains a value to passed to 'morph deploy'.
+ '''
+ def __init__(self, systembranch, vm_name):
+ super(TroveInitialDeployment, self).__init__(
+ 'trove-system-x86_64', 'trove-test-deploy', systembranch,
+ vm_name)
+ def create_config(self, initial_deploy_type='kvm'):
+ self.create_ssh_key('lorry')
+ self.create_ssh_key('mason')
+ self.create_ssh_key('worker')
+ self.create_ssh_key('testuser')
+ if initial_deploy_type == 'kvm':
+ deploy_location = ''.join(
+ [DEPLOY_URL, self.vm_name, DEPLOY_PATH, '%s.img' %
+ self.vm_name])
+ else:
+ raise NotImplementedError()
+ self.create_trove_deployment_morph(
+ deploy_type='kvm', location=deploy_location)
+ testuser_public_key = read_file(
+ os.path.join(self.branch.morphs_dir, ''))
+ self.add_root_ssh_authorized_key(testuser_public_key)
+ self.admin_id = os.path.join(self.branch.morphs_dir, 'testuser.key')
+ def create_trove_deployment_morph(self, deploy_type=None, location=None):
+ trove_config = dict(
+ type=deploy_type,
+ location=location,
+ VERSION_LABEL='trove-old',
+ INSTALL_FILES='%s-files/manifest' % self.deploy_morph_name,
+ TROVE_ID=self.vm_name,
+ TROVE_COMPANY='Codethink',
+ TROVE_ADMIN_USER='testuser',
+ TROVE_ADMIN_NAME='Automated Test Gitano Admin User',
+ LORRY_SSH_KEY='lorry.key',
+ )
+ cluster_morph = dict(
+ name=self.deploy_morph_name,
+ kind='cluster',
+ description='Generated by Baserock automated tests',
+ systems=[
+ dict(
+ morph=self.system_morph_name,
+ deploy={
+ self.vm_name: trove_config
+ }
+ )
+ ]
+ )
+ text = yaml.dump(cluster_morph)
+ file_path = os.path.join(
+ self.branch.morphs_dir, '%s.morph' % self.deploy_morph_name)
+ with open(file_path, 'w') as f:
+ f.write(text)
+class TroveUpgrade(Deployment):
+ def __init__(self, systembranch, vm_name):
+ super(TroveUpgrade, self).__init__(
+ 'trove-system-x86_64', 'trove-test-upgrade', systembranch,
+ vm_name)
+ def create_config(self, initial_deployment, upgrade_method='ssh-rsync'):
+ self.ensure_configure_extension_enabled('install-files')
+ if upgrade_method == 'ssh-rsync':
+ location = 'root@%s' % self.vm_name
+ else:
+ raise NotImplementedError()
+ self.create_trove_upgrade_morph(
+ initial_deployment,
+ upgrade_method=upgrade_method, location=location)
+ def create_trove_upgrade_morph(
+ self, initial_deployment, upgrade_method=None, location=None):
+ '''
+ FIXME: this is totally wrong!
+ Instead of having to provide exactly the config that the initial
+ deployment used, we should avoid configuration extensions for upgrades
+ entirely and propagate the deploy-time configuration using
+ baserock-system-config-sync.
+ '''
+ def copy_file_from_initial_deployment(filename, dest_filename=None):
+ src = os.path.join(initial_deployment.branch.morphs_dir, filename)
+ dest = os.path.join(self.branch.morphs_dir, dest_filename or filename)
+ shutil.copyfile(src, dest)
+ def copy_dir_from_initial_deployment(dirname):
+ src = os.path.join(initial_deployment.branch.morphs_dir, dirname)
+ dest = os.path.join(self.branch.morphs_dir, dirname)
+ shutil.copytree(src, dest)
+ for key in ['testuser', 'lorry', 'worker', 'mason']:
+ copy_file_from_initial_deployment('%s.key' % key)
+ copy_file_from_initial_deployment('' % key)
+ copy_file_from_initial_deployment(
+ '%s.morph' % initial_deployment.deploy_morph_name,
+ '%s.morph' % self.deploy_morph_name)
+ copy_dir_from_initial_deployment(
+ '%s-files' % initial_deployment.deploy_morph_name)
+ deploy_morph_file = os.path.join(
+ self.branch.morphs_dir, '%s.morph' % self.deploy_morph_name)
+ deploy_morph = yaml.load(read_file(deploy_morph_file))
+ deploy_morph['name'] = self.deploy_morph_name
+ system_config = deploy_morph['systems'][0]['deploy'][self.vm_name]
+ system_config['type'] = upgrade_method
+ system_config['location'] = location
+ system_config['VERSION_LABEL'] = 'trove-current'
+ write_file(deploy_morph_file, yaml.dump(deploy_morph))
+class SystemTestBranch(object):
+ def __init__(self, workspace_dir, name):
+ self.workspace_dir = workspace_dir
+ self.branch_dir = os.path.join(workspace_dir, name)
+ self.morphs_dir = os.path.join(
+ self.branch_dir, 'baserock:baserock', 'morphs')
+class TestInitialDeployment(object):
+ '''
+ FIXME: this is out of date! Make it use the 'BaseTestSuite' class instead!
+ '''
+ def initial_deploy(self, branch, vm_name, **deploy_kwargs):
+ '''
+ Initial deployment of trove-system-x86_64 to a newly-created VM.
+ Returns a context with the following things tied to it:
+ - the VM itself (FIXME: isn't actually deleted when the context
+ exists)
+ - an SSH identity added to the machine's SSH agent that provides
+ root access to the deployed VM
+ '''
+ if self.settings['reuse-workspace'] is not None:
+ # Hack to reuse an existing workspace and running VM because `morph
+ # deploy` currently takes several minutes.
+ class ReuseTroveDeploy(TroveDeployment):
+ def __init__(self, systembranch):
+ self.branch = systembranch
+ self.set_conveniences()
+ trove_deploy = ReuseTroveDeploy(branch)
+ else:
+ trove_deploy = TroveDeployment(branch, vm_name, **deploy_kwargs)
+ def test_rawdisk_upgrade(self, workspace_dir):
+ branch = self.create_system_branch(workspace_dir, 'testbranch')
+ # Script should:
+ # - deploy trove
+ # - apply patch in system branch
+ # - deploy trove as an upgrade
+ # FIXME: doesn't perform an upgrade, yet. Should we keep the rawdisk
+ # upgrade path?
+ run_morph(
+ ['build', 'trove-system-x86_64'], cwd=branch.branch_dir)
+ image_path = os.path.join(
+ branch.workspace_dir, 'deployed-system.img')
+ trove_deploy = TroveDeployment(
+ branch, deploy_type='rawdisk', location=image_path)
+ run_morph(['deploy', trove_deploy.deploy_morph_name], cwd=branch.branch_dir)
+class TimeoutError(Exception):
+ pass
+class BaseTestSuite(object):
+ def wait_for_hostname_to_appear(self, hostname, timeout=10):
+ '''
+ Block until given hostname resolves successfully.
+ Raises TimeoutError if the hostname has not appeared in 'timeout' seconds.
+ '''
+ start_time = time.time()
+ while True:
+ try:
+ socket.gethostbyname(hostname)
+ return time.time() - start_time
+ except socket.gaierror as e:
+ pass
+ if time.time() > start_time + timeout:
+ raise TimeoutError(
+ "Host %s did not appear after %i seconds" %
+ (hostname, timeout))
+ time.sleep(0.5)
+ def wait_for_ssh(self, host_url, timeout=BOOT_TIMEOUT, **kwargs):
+ print "Waiting for machine to respond over SSH ..."
+ start_time = time.time()
+ while True:
+ try:
+ print remote_runcmd(host_url, ['whoami'], **kwargs)
+ break
+ except cliapp.AppException as e:
+ if time.time() < start_time + timeout:
+ # Assume that this is because sshd hasn't started yet.
+ pass
+ else:
+ print("Waited > %s seconds for host %s to respond over "
+ "SSH" % (timeout, host_url))
+ raise
+ time.sleep(0.5)
+ def wait_for_machine_to_boot(self, instance):
+ wait_time = self.wait_for_hostname_to_appear(
+ instance.vm_name, timeout=BOOT_TIMEOUT)
+ print "Host %s appeared after %0.1f seconds" % \
+ (instance.vm_name, wait_time)
+ # Remove machine from 'known_hosts', as its identity has probably
+ # changed.
+ cliapp.runcmd(['ssh-keygen', '-R', instance.vm_name])
+ test_url = 'ssh://root@%s/' % instance.vm_name
+ self.wait_for_ssh(
+ test_url, timeout=BOOT_TIMEOUT-wait_time)
+ def create_system_branch(self, workspace_dir, name, parent=BRANCH):
+ run_morph(
+ ['branch', 'baserock:baserock/morphs', name, parent],
+ cwd=workspace_dir)
+ return SystemTestBranch(workspace_dir, name)
+class TestUpgrades(BaseTestSuite):
+ def set_lighttpd_version(self, branch, tag='baserock/morph'):
+ '''
+ Use 'morph edit' and 'git reset' to force a specific lighttpd version.
+ '''
+ run_morph(['edit', 'trove-system-x86_64', 'trove', 'lighttpd'],
+ cwd=branch.morphs_dir)
+ run_git(['add', 'trove.morph'], cwd=branch.morphs_dir)
+ run_git(['commit', '-m', 'Edit lighttpd chunk'], cwd=branch.morphs_dir)
+ chunk_dir = os.path.join(branch.branch_dir, 'upstream:lighttpd')
+ # Set the system branch's corresponding Git branch in the chunk repo
+ # to the specific ref. This chunk doesn't have a chunk morphology so
+ # there's no further work to do!
+ run_git(['reset', '--hard', tag], cwd=chunk_dir)
+ @contextlib.contextmanager
+ def given_out_of_date_trove_instance(self, vm_name, fixture_dir, reuse=False):
+ '''
+ GIVEN a running current Trove system but with lighttpd version 1.3.14
+ '''
+ if reuse:
+ branch = SystemTestBranch(fixture_dir, 'old')
+ instance = TroveInitialDeployment(branch, vm_name)
+ instance.admin_id = os.path.join(branch.morphs_dir, 'testuser.key')
+ else:
+ branch = self.create_system_branch(fixture_dir, 'old')
+ self.set_lighttpd_version(branch, tag='lighttpd-1.3.14')
+ instance = TroveInitialDeployment(branch, vm_name)
+ instance.create_config()
+ instance.run_build()
+ instance.run_deploy()
+ cliapp.runcmd(['ssh-add', instance.admin_id])
+ try:
+ self.wait_for_machine_to_boot(instance)
+ yield instance
+ finally:
+ # Should pass the .pub file really ...
+ cliapp.runcmd(['ssh-add', '-d', instance.admin_id])
+ def test_scenario_trove_upgrade(self,
+ vm_name, fixture_dir, workspace_dir, reuse_fixture=False):
+ '''
+ We want to be able to upgrade an old Trove system to the latest
+ Trove system. While in the real world the user would only want to
+ deploy a released Trove system, that would preclude using this test in
+ continuous integration as we would only notice breakages *after* we
+ had made a release, and the tests would need to be updated for every
+ release. Better to test that deploying 'master' of Trove still works.
+ We artificially create an 'out of date' Trove system because we need
+ to know what to test for (and there is only one public release of
+ Trove at the time of writing). This is more useful than just deploying
+ and upgrade and assuming that if there were no errors from the
+ Baserock deployment tool then it must have been successful.
+ Lighttpd is used in the test because it triggers very few rebuilds.
+ SCENARIO Bob upgrades his Trove (vague version)
+ GIVEN a running an out-of-date Trove system
+ WHEN Bob builds and upgrades to the current version of Trove with
+ THEN the Trove is at the new version
+ SCENARIO Bob upgrades his Trove (specific version)
+ GIVEN a running current Trove system but with lighttpd version 1.3.14
+ WHEN Bob upgrades to the current version of Trove and sets it to
+ autostart immediately
+ THEN the Trove uses a newer version of lightttpd than 1.3.14
+ '''
+ with self.given_out_of_date_trove_instance(
+ vm_name, fixture_dir, reuse=reuse_fixture) as instance:
+ branch = self.create_system_branch(workspace_dir, 'current')
+ test_url = 'ssh://root@%s/' % vm_name
+ old_lighttpd_output = remote_runcmd(test_url, ['lighttpd', '-v'])
+ print "Lighttpd outout: %s" % old_lighttpd_output
+ upgrade = TroveUpgrade(
+ branch, vm_name)
+ upgrade.create_config(instance, upgrade_method='ssh-rsync')
+ upgrade.run_build()
+ upgrade.run_deploy()
+ # FIXME: AUTOSTART=yes should do this
+ try:
+ remote_runcmd(test_url, ['reboot'])
+ except cliapp.AppException:
+ # Bit of a hack because we get disconnect before the command
+ # exits so SSH returns failure.
+ pass
+ self.wait_for_machine_to_boot(instance)
+ new_lighttpd_output = remote_runcmd(test_url, ['lighttpd', '-v'])
+ print "OLD Lighttpd outout: %s" % old_lighttpd_output
+ print "NEW Lighttpd outout: %s" % new_lighttpd_output
+ # We have a machine!
+ # Initial tests to run:
+ # check system metadata against what should have been built
+ # Trove tests:
+ # check you can lorry something
+ # check you can 'git pull' something
+ # check you can issue Gitano commands ...
+ # perhaps crib from Gitano test suite
+class SimpleTestRunner(cliapp.Application):
+ '''
+ Run a Baserock system test suite.
+ There is a test suite-wide Morph workspace provided, which should be shared
+ by all prerequisites ('GIVEN') implementations. This is called the
+ 'fixture_dir.' Multiple GIVEN implementations in a single test suite should
+ use differently-named system branches to avoid conflicting with each other.
+ It is up to the test suite's GIVEN implementations to deal with a
+ directory that already contains their data without failing.
+ Each test gets another, 'workspace_dir'. This is per-test and should be used
+ for the 'WHEN' implementations.
+ Since initial deployments currently take several minutes it is very useful
+ to reuse 'GIVEN' state instead of recreating it each time when working on a
+ specific test suite.
+ '''
+ def check_access_to_deploy_host(self):
+ # From:
+ deploy_url = urlparse.urlsplit(DEPLOY_URL)
+ assert deploy_url[0] == 'kvm+ssh'
+ try:
+ cliapp.runcmd(
+ ['ssh', '-o', 'NumberOfPasswordPrompts=0', deploy_url[1],
+ 'whoami'])
+ except cliapp.AppException:
+ raise cliapp.AppException(
+ "No passwordless access to deploy host '%s'. Check the SSH "
+ "authorized keys for the remote account." % deploy_url[1])
+ def maybe_delete_vm(self, vm_name):
+ # FIXME: Would be better if this would check if the machine was running
+ # before destroying it, and checked if it existed before undefining
+ # it, rather than just ignoring exceptions.
+ def run_virsh(args):
+ try:
+ remote_runcmd(DEPLOY_URL, ['virsh', '-c', 'qemu:///system'] + args)
+ except cliapp.AppException as e:
+ pass
+ run_virsh(['destroy', self.vm_name])
+ run_virsh(['undefine', self.vm_name])
+ def add_settings(self):
+ self.settings.string(
+ ['reuse-fixture', 'r'],
+ 'use an existing deployment from a test fixture instead of '
+ 'building a clean one, to avoid repeating a slow morph deploy',
+ metavar='DIR',
+ default=None)
+ def process_args(self, args):
+ self.check_access_to_deploy_host()
+ self.vm_name = 'brtests-%s' % (socket.gethostname())
+ if self.settings['reuse-fixture'] is None:
+ self.maybe_delete_vm(self.vm_name)
+ self.run_test()
+ def run_test(self):
+ test = TestUpgrades().test_scenario_trove_upgrade
+ if self.settings['reuse-fixture'] is not None:
+ fixture_dir = self.settings['reuse-fixture']
+ else:
+ fixture_dir = cliapp.runcmd(['mktemp', '-d', '-p', BUILD_TEMPDIR]).strip()
+ run_morph(['init', fixture_dir])
+ try:
+ print "Running %s" % test
+ workspace_dir = cliapp.runcmd(['mktemp', '-d', '-p', BUILD_TEMPDIR]).strip()
+ try:
+ run_morph(['init', workspace_dir])
+ reuse_fixture = self.settings['reuse-fixture'] is not None
+ test(self.vm_name, fixture_dir, workspace_dir,
+ reuse_fixture=reuse_fixture)
+ finally:
+ print "Workspace kept in %s" % workspace_dir
+ #cliapp.runcmd(['rm', '-r', workspace_dir])
+ except Exception as e:
+ import pdb
+ print 'Exception: ', e
+ pdb.post_mortem(sys.exc_traceback)
+ finally:
+ # Careful now!
+ print "Fixture kept in %s" % fixture_dir
+ #cliapp.runcmd(['rm', '-r', fixture_dir])
+if __name__ == '__main__':
+ SimpleTestRunner().run()