diff options
author | Matt Clay <matt@mystile.com> | 2022-11-29 13:35:53 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-29 13:35:53 -0800 |
commit | cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc (patch) | |
tree | 0a84f8b7bff9cad558c0c988b5ce3e8119d4037a /test/integration/targets/ansible-test-container/runme.py | |
parent | 3bda4eae6f1273a42f14b3dedc0d4f5928b290f6 (diff) | |
download | ansible-cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc.tar.gz |
ansible-test - Improve container management. (#78550)
See changelogs/fragments/ansible-test-container-management.yml for details.
Diffstat (limited to 'test/integration/targets/ansible-test-container/runme.py')
-rwxr-xr-x | test/integration/targets/ansible-test-container/runme.py | 1041 |
1 files changed, 1041 insertions, 0 deletions
diff --git a/test/integration/targets/ansible-test-container/runme.py b/test/integration/targets/ansible-test-container/runme.py new file mode 100755 index 0000000000..1577b62286 --- /dev/null +++ b/test/integration/targets/ansible-test-container/runme.py @@ -0,0 +1,1041 @@ +#!/usr/bin/env python +"""Test suite used to verify ansible-test is able to run its containers on various container hosts.""" + +from __future__ import annotations + +import abc +import dataclasses +import datetime +import errno +import functools +import json +import os +import pathlib +import pwd +import re +import secrets +import shlex +import shutil +import signal +import subprocess +import sys +import time +import typing as t + +UNPRIVILEGED_USER_NAME = 'ansible-test' +CGROUP_SYSTEMD = pathlib.Path('/sys/fs/cgroup/systemd') +LOG_PATH = pathlib.Path('/tmp/results') + +# The value of /proc/*/loginuid when it is not set. +# It is a reserved UID, which is the maximum 32-bit unsigned integer value. +# See: https://access.redhat.com/solutions/25404 +LOGINUID_NOT_SET = 4294967295 + +UID = os.getuid() + +try: + LOGINUID = int(pathlib.Path('/proc/self/loginuid').read_text()) + LOGINUID_MISMATCH = LOGINUID != LOGINUID_NOT_SET and LOGINUID != UID +except FileNotFoundError: + LOGINUID = None + LOGINUID_MISMATCH = False + + +def main() -> None: + """Main program entry point.""" + display.section('Startup check') + + try: + bootstrap_type = pathlib.Path('/etc/ansible-test.bootstrap').read_text().strip() + except FileNotFoundError: + bootstrap_type = 'undefined' + + display.info(f'Bootstrap type: {bootstrap_type}') + + if bootstrap_type != 'remote': + display.warning('Skipping destructive test on system which is not an ansible-test remote provisioned instance.') + return + + display.info(f'UID: {UID} / {LOGINUID}') + + if UID != 0: + raise Exception('This test must be run as root.') + + if not LOGINUID_MISMATCH: + if LOGINUID is None: + display.warning('Tests involving loginuid mismatch will be skipped on this host since it does not have audit support.') + elif LOGINUID == LOGINUID_NOT_SET: + display.warning('Tests involving loginuid mismatch will be skipped on this host since it is not set.') + elif LOGINUID == 0: + raise Exception('Use sudo, su, etc. as a non-root user to become root before running this test.') + else: + raise Exception() + + display.section(f'Bootstrapping {os_release}') + + bootstrapper = Bootstrapper.init() + bootstrapper.run() + + result_dir = LOG_PATH + + if result_dir.exists(): + shutil.rmtree(result_dir) + + result_dir.mkdir() + result_dir.chmod(0o777) + + scenarios = get_test_scenarios() + results = [run_test(scenario) for scenario in scenarios] + error_total = 0 + + for name in sorted(result_dir.glob('*.log')): + lines = name.read_text().strip().splitlines() + error_count = len([line for line in lines if line.startswith('FAIL: ')]) + error_total += error_count + + display.section(f'Log ({error_count=}/{len(lines)}): {name.name}') + + for line in lines: + if line.startswith('FAIL: '): + display.show(line, display.RED) + else: + display.show(line) + + error_count = len([result for result in results if result.message]) + error_total += error_count + + duration = datetime.timedelta(seconds=int(sum(result.duration.total_seconds() for result in results))) + + display.section(f'Test Results ({error_count=}/{len(results)}) [{duration}]') + + for result in results: + notes = f' <cleanup: {", ".join(result.cleanup)}>' if result.cleanup else '' + + if result.cgroup_dirs: + notes += f' <cgroup_dirs: {len(result.cgroup_dirs)}>' + + notes += f' [{result.duration}]' + + if result.message: + display.show(f'FAIL: {result.scenario} {result.message}{notes}', display.RED) + elif result.duration.total_seconds() >= 90: + display.show(f'SLOW: {result.scenario}{notes}', display.YELLOW) + else: + display.show(f'PASS: {result.scenario}{notes}') + + if error_total: + sys.exit(1) + + +def get_test_scenarios() -> list[TestScenario]: + """Generate and return a list of test scenarios.""" + + supported_engines = ('docker', 'podman') + available_engines = [engine for engine in supported_engines if shutil.which(engine)] + + if not available_engines: + raise ApplicationError(f'No supported container engines found: {", ".join(supported_engines)}') + + completion_lines = pathlib.Path(os.environ['PYTHONPATH'], '../test/lib/ansible_test/_data/completion/docker.txt').read_text().splitlines() + + # TODO: consider including testing for the collection default image + entries = {name: value for name, value in (parse_completion_entry(line) for line in completion_lines) if name != 'default'} + + unprivileged_user = User.get(UNPRIVILEGED_USER_NAME) + + scenarios: list[TestScenario] = [] + + for container_name, settings in entries.items(): + image = settings['image'] + cgroup = settings.get('cgroup', 'v1-v2') + + for engine in available_engines: + # TODO: figure out how to get tests passing using docker without disabling selinux + disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none' + expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1 + + if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd(): + expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support + + user_scenarios = [ + # TODO: test rootless docker + UserScenario(ssh=unprivileged_user), + ] + + if engine == 'podman': + user_scenarios.append(UserScenario(ssh=ROOT_USER)) + + # TODO: test podman remote on Alpine and Ubuntu hosts + # TODO: combine remote with ssh using different unprivileged users + if os_release.id not in ('alpine', 'ubuntu'): + user_scenarios.append(UserScenario(remote=unprivileged_user)) + + if LOGINUID_MISMATCH: + user_scenarios.append(UserScenario()) + + for user_scenario in user_scenarios: + scenarios.append( + TestScenario( + user_scenario=user_scenario, + engine=engine, + container_name=container_name, + image=image, + disable_selinux=disable_selinux, + expose_cgroup_v1=expose_cgroup_v1, + ) + ) + + return scenarios + + +def run_test(scenario: TestScenario) -> TestResult: + """Run a test scenario and return the test results.""" + display.section(f'Testing {scenario} Started') + + start = time.monotonic() + + integration = ['ansible-test', 'integration', 'split'] + integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v', '--dev-probe-cgroups', str(LOG_PATH), + '--dev-systemd-debug'] + + commands = [ + [*integration, *integration_options], + # For the split test we'll use alpine3 as the controller. There are two reasons for this: + # 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that. + # 2) It doesn't require disabling selinux, so we can test a target that doesn't need that. + [*integration, '--controller', 'docker:alpine3', *integration_options], + ] + + common_env: dict[str, str] = {} + test_env: dict[str, str] = {} + + if scenario.engine == 'podman': + if scenario.user_scenario.remote: + common_env.update( + # Podman 4.3.0 has a regression which requires a port for remote connections to work. + # See: https://github.com/containers/podman/issues/16509 + CONTAINER_HOST=f'ssh://{scenario.user_scenario.remote.name}@localhost:22' + f'/run/user/{scenario.user_scenario.remote.pwnam.pw_uid}/podman/podman.sock', + CONTAINER_SSHKEY=str(pathlib.Path('~/.ssh/id_rsa').expanduser()), # TODO: add support for ssh + remote when the ssh user is not root + ) + + test_env.update(ANSIBLE_TEST_PREFER_PODMAN='1') + + test_env.update(common_env) + + if scenario.user_scenario.ssh: + client_become_cmd = ['ssh', f'{scenario.user_scenario.ssh.name}@localhost'] + test_commands = [client_become_cmd + [f'cd ~/ansible; {format_env(test_env)}{sys.executable} bin/{shlex.join(command)}'] for command in commands] + else: + client_become_cmd = ['sh', '-c'] + test_commands = [client_become_cmd + [f'{format_env(test_env)}{shlex.join(command)}'] for command in commands] + + prime_storage_command = [] + + if scenario.engine == 'podman' and scenario.user_scenario.actual.name == UNPRIVILEGED_USER_NAME: + # When testing podman we need to make sure that the overlay filesystem is used instead of vfs. + # Using the vfs filesystem will result in running out of disk space during the tests. + # To change the filesystem used, the existing storage directory must be removed before "priming" the storage database. + # + # Without this change the following message may be displayed: + # + # User-selected graph driver "overlay" overwritten by graph driver "vfs" from database - delete libpod local files to resolve + # + # However, with this change it may be replaced with the following message: + # + # User-selected graph driver "vfs" overwritten by graph driver "overlay" from database - delete libpod local files to resolve + + actual_become_cmd = ['ssh', f'{scenario.user_scenario.actual.name}@localhost'] + prime_storage_command = actual_become_cmd + prepare_prime_podman_storage() + + message = '' + + if scenario.expose_cgroup_v1: + prepare_cgroup_systemd(scenario.user_scenario.actual.name, scenario.engine) + + try: + if prime_storage_command: + retry_command(lambda: run_command(*prime_storage_command)) + + if scenario.disable_selinux: + run_command('setenforce', 'permissive') + + for test_command in test_commands: + retry_command(lambda: run_command(*test_command)) + except SubprocessError as ex: + message = str(ex) + display.error(f'{scenario} {message}') + finally: + if scenario.disable_selinux: + run_command('setenforce', 'enforcing') + + if scenario.expose_cgroup_v1: + dirs = remove_cgroup_systemd() + else: + dirs = list_group_systemd() + + cleanup_command = [scenario.engine, 'rmi', '-f', scenario.image] + + try: + retry_command(lambda: run_command(*client_become_cmd + [f'{format_env(common_env)}{shlex.join(cleanup_command)}'])) + except SubprocessError as ex: + display.error(str(ex)) + + cleanup = cleanup_podman() if scenario.engine == 'podman' else tuple() + + finish = time.monotonic() + duration = datetime.timedelta(seconds=int(finish - start)) + + display.section(f'Testing {scenario} Completed in {duration}') + + return TestResult( + scenario=scenario, + message=message, + cleanup=cleanup, + duration=duration, + cgroup_dirs=tuple(str(path) for path in dirs), + ) + + +def prepare_prime_podman_storage() -> list[str]: + """Partially prime podman storage and return a command to complete the remainder.""" + prime_storage_command = ['rm -rf ~/.local/share/containers; STORAGE_DRIVER=overlay podman pull quay.io/bedrock/alpine:3.16.2'] + + test_containers = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local/share/containers').expanduser() + + if test_containers.is_dir(): + # First remove the directory as root, since the user may not have permissions on all the files. + # The directory will be removed again after login, before initializing the database. + rmtree(test_containers) + + return prime_storage_command + + +def cleanup_podman() -> tuple[str, ...]: + """Cleanup podman processes and files on disk.""" + cleanup = [] + + for remaining in range(3, -1, -1): + processes = [(int(item[0]), item[1]) for item in + [item.split(maxsplit=1) for item in run_command('ps', '-A', '-o', 'pid,comm', capture=True).stdout.splitlines()] + if pathlib.Path(item[1].split()[0]).name in ('catatonit', 'podman', 'conmon')] + + if not processes: + break + + for pid, name in processes: + display.info(f'Killing "{name}" ({pid}) ...') + + try: + os.kill(pid, signal.SIGTERM if remaining > 1 else signal.SIGKILL) + except ProcessLookupError: + pass + + cleanup.append(name) + + time.sleep(1) + else: + raise Exception('failed to kill all matching processes') + + uid = pwd.getpwnam(UNPRIVILEGED_USER_NAME).pw_uid + + container_tmp = pathlib.Path(f'/tmp/containers-user-{uid}') + podman_tmp = pathlib.Path(f'/tmp/podman-run-{uid}') + + user_config = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.config').expanduser() + user_local = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local').expanduser() + + if container_tmp.is_dir(): + rmtree(container_tmp) + + if podman_tmp.is_dir(): + rmtree(podman_tmp) + + if user_config.is_dir(): + rmtree(user_config) + + if user_local.is_dir(): + rmtree(user_local) + + return tuple(sorted(set(cleanup))) + + +def have_cgroup_systemd() -> bool: + """Return True if the container host has a systemd cgroup.""" + return pathlib.Path(CGROUP_SYSTEMD).is_dir() + + +def prepare_cgroup_systemd(username: str, engine: str) -> None: + """Prepare the systemd cgroup.""" + CGROUP_SYSTEMD.mkdir() + + run_command('mount', 'cgroup', '-t', 'cgroup', str(CGROUP_SYSTEMD), '-o', 'none,name=systemd,xattr', capture=True) + + if engine == 'podman': + run_command('chown', '-R', f'{username}:{username}', str(CGROUP_SYSTEMD)) + + run_command('find', str(CGROUP_SYSTEMD), '-type', 'd', '-exec', 'ls', '-l', '{}', ';') + + +def list_group_systemd() -> list[pathlib.Path]: + """List the systemd cgroup.""" + dirs = set() + + for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False): + for dirname in dirnames: + target_path = pathlib.Path(dirpath, dirname) + display.info(f'dir: {target_path}') + dirs.add(target_path) + + return sorted(dirs) + + +def remove_cgroup_systemd() -> list[pathlib.Path]: + """Remove the systemd cgroup.""" + dirs = set() + + for sleep_seconds in range(1, 10): + try: + for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False): + for dirname in dirnames: + target_path = pathlib.Path(dirpath, dirname) + display.info(f'rmdir: {target_path}') + dirs.add(target_path) + target_path.rmdir() + except OSError as ex: + if ex.errno != errno.EBUSY: + raise + + error = str(ex) + else: + break + + display.warning(f'{error} -- sleeping for {sleep_seconds} second(s) before trying again ...') # pylint: disable=used-before-assignment + + time.sleep(sleep_seconds) + + time.sleep(1) # allow time for cgroups to be fully removed before unmounting + + run_command('umount', str(CGROUP_SYSTEMD)) + + CGROUP_SYSTEMD.rmdir() + + time.sleep(1) # allow time for cgroup hierarchy to be removed after unmounting + + cgroup = pathlib.Path('/proc/self/cgroup').read_text() + + if 'systemd' in cgroup: + raise Exception('systemd hierarchy detected') + + return sorted(dirs) + + +def rmtree(path: pathlib.Path) -> None: + """Wrapper around shutil.rmtree with additional error handling.""" + for retries in range(10, -1, -1): + try: + display.info(f'rmtree: {path} ({retries} attempts remaining) ... ') + shutil.rmtree(path) + except Exception: + if not path.exists(): + display.info(f'rmtree: {path} (not found)') + return + + if not path.is_dir(): + display.info(f'rmtree: {path} (not a directory)') + return + + if retries: + continue + + raise + else: + display.info(f'rmtree: {path} (done)') + return + + +def format_env(env: dict[str, str]) -> str: + """Format an env dict for injection into a shell command and return the resulting string.""" + if env: + return ' '.join(f'{shlex.quote(key)}={shlex.quote(value)}' for key, value in env.items()) + ' ' + + return '' + + +class DockerInfo: + """The results of `docker info` for the container runtime.""" + + def __init__(self, data: dict[str, t.Any]) -> None: + self.data = data + + @property + def cgroup_version(self) -> int: + """The cgroup version of the container host.""" + data = self.data + host = data.get('host') + + if host: + version = int(host['cgroupVersion'].lstrip('v')) # podman + else: + version = int(data['CgroupVersion']) # docker + + return version + + +@functools.lru_cache +def get_docker_info(engine: str) -> DockerInfo: + """Return info for the current container runtime. The results are cached.""" + return DockerInfo(json.loads(run_command(engine, 'info', '--format', '{{ json . }}', capture=True).stdout)) + + +@dataclasses.dataclass(frozen=True) +class User: + name: str + pwnam: pwd.struct_passwd + + @classmethod + def get(cls, name: str) -> User: + return User( + name=name, + pwnam=pwd.getpwnam(name), + ) + + +@dataclasses.dataclass(frozen=True) +class UserScenario: + ssh: User = None + remote: User = None + + @property + def actual(self) -> User: + return self.remote or self.ssh or ROOT_USER + + +@dataclasses.dataclass(frozen=True) +class TestScenario: + user_scenario: UserScenario + engine: str + container_name: str + image: str + disable_selinux: bool + expose_cgroup_v1: bool + + @property + def tags(self) -> tuple[str, ...]: + tags = [] + + if self.user_scenario.ssh: + tags.append(f'ssh: {self.user_scenario.ssh.name}') + + if self.user_scenario.remote: + tags.append(f'remote: {self.user_scenario.remote.name}') + + if self.disable_selinux: + tags.append('selinux: permissive') + + if self.expose_cgroup_v1: + tags.append('cgroup: v1') + + return tuple(tags) + + @property + def tag_label(self) -> str: + return ' '.join(f'[{tag}]' for tag in self.tags) + + def __str__(self): + return f'[{self.container_name}] ({self.engine}) {self.tag_label}'.strip() + + +@dataclasses.dataclass(frozen=True) +class TestResult: + scenario: TestScenario + message: str + cleanup: tuple[str, ...] + duration: datetime.timedelta + cgroup_dirs: tuple[str, ...] + + +def parse_completion_entry(value: str) -> tuple[str, dict[str, str]]: + """Parse the given completion entry, returning the entry name and a dictionary of key/value settings.""" + values = value.split() + + name = values[0] + data = {kvp[0]: kvp[1] if len(kvp) > 1 else '' for kvp in [item.split('=', 1) for item in values[1:]]} + + return name, data + + +@dataclasses.dataclass(frozen=True) +class SubprocessResult: + """Result from execution of a subprocess.""" + + command: list[str] + stdout: str + stderr: str + status: int + + +class ApplicationError(Exception): + """An application error.""" + + def __init__(self, message: str) -> None: + self.message = message + + super().__init__(message) + + +class SubprocessError(ApplicationError): + """An error from executing a subprocess.""" + + def __init__(self, result: SubprocessResult) -> None: + self.result = result + + message = f'Command `{shlex.join(result.command)}` exited with status: {result.status}' + + stdout = (result.stdout or '').strip() + stderr = (result.stderr or '').strip() + + if stdout: + message += f'\n>>> Standard Output\n{stdout}' + + if stderr: + message += f'\n>>> Standard Error\n{stderr}' + + super().__init__(message) + + +class ProgramNotFoundError(ApplicationError): + """A required program was not found.""" + + def __init__(self, name: str) -> None: + self.name = name + + super().__init__(f'Missing program: {name}') + + +class Display: + """Display interface for sending output to the console.""" + + CLEAR = '\033[0m' + RED = '\033[31m' + GREEN = '\033[32m' + YELLOW = '\033[33m' + BLUE = '\033[34m' + PURPLE = '\033[35m' + CYAN = '\033[36m' + + def __init__(self) -> None: + self.sensitive: set[str] = set() + + def section(self, message: str) -> None: + """Print a section message to the console.""" + self.show(f'==> {message}', color=self.BLUE) + + def subsection(self, message: str) -> None: + """Print a subsection message to the console.""" + self.show(f'--> {message}', color=self.CYAN) + + def fatal(self, message: str) -> None: + """Print a fatal message to the console.""" + self.show(f'FATAL: {message}', color=self.RED) + + def error(self, message: str) -> None: + """Print an error message to the console.""" + self.show(f'ERROR: {message}', color=self.RED) + + def warning(self, message: str) -> None: + """Print a warning message to the console.""" + self.show(f'WARNING: {message}', color=self.PURPLE) + + def info(self, message: str) -> None: + """Print an info message to the console.""" + self.show(f'INFO: {message}', color=self.YELLOW) + + def show(self, message: str, color: str | None = None) -> None: + """Print a message to the console.""" + for item in self.sensitive: + message = message.replace(item, '*' * len(item)) + + print(f'{color or self.CLEAR}{message}{self.CLEAR}', flush=True) + + +def run_module( + module: str, + args: dict[str, t.Any], +) -> SubprocessResult: + """Run the specified Ansible module and return the result.""" + return run_command('ansible', '-m', module, '-v', '-a', json.dumps(args), 'localhost') + + +def retry_command(func: t.Callable[[], SubprocessResult], attempts: int = 3) -> SubprocessResult: + """Run the given command function up to the specified number of attempts when the failure is due to an SSH error.""" + for attempts_remaining in range(attempts - 1, -1, -1): + try: + return func() + except SubprocessError as ex: + if ex.result.command[0] == 'ssh' and ex.result.status == 255 and attempts_remaining: + # SSH connections on our Ubuntu 22.04 host sometimes fail for unknown reasons. + # This retry should allow the test suite to continue, maintaining CI stability. + # TODO: Figure out why local SSH connections sometimes fail during the test run. + display.warning('Command failed due to an SSH error. Waiting a few seconds before retrying.') + time.sleep(3) + continue + + raise + + +def run_command( + *command: str, + data: str | None = None, + stdin: int | t.IO[bytes] | None = None, + env: dict[str, str] | None = None, + capture: bool = False, +) -> SubprocessResult: + """Run the specified command and return the result.""" + stdin = subprocess.PIPE if data else stdin or subprocess.DEVNULL + stdout = subprocess.PIPE if capture else None + stderr = subprocess.PIPE if capture else None + + display.subsection(f'Run command: {shlex.join(command)}') + + try: + with subprocess.Popen(args=command, stdin=stdin, stdout=stdout, stderr=stderr, env=env, text=True) as process: + process_stdout, process_stderr = process.communicate(data) + process_status = process.returncode + except FileNotFoundError: + raise ProgramNotFoundError(command[0]) from None + + result = SubprocessResult( + command=list(command), + stdout=process_stdout, + stderr=process_stderr, + status=process_status, + ) + + if process.returncode != 0: + raise SubprocessError(result) + + return result + + +class Bootstrapper(metaclass=abc.ABCMeta): + """Bootstrapper for remote instances.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return False + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return False + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return False + + @classmethod + def init(cls) -> t.Type[Bootstrapper]: + """Return a bootstrapper type appropriate for the current system.""" + for bootstrapper in cls.__subclasses__(): + if bootstrapper.usable(): + return bootstrapper + + display.warning('No supported bootstrapper found.') + return Bootstrapper + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + cls.configure_root_user() + cls.configure_unprivileged_user() + cls.configure_source_trees() + cls.configure_ssh_keys() + cls.configure_podman_remote() + + @classmethod + def configure_root_user(cls) -> None: + """Configure the root user to run tests.""" + root_password_status = run_command('passwd', '--status', 'root', capture=True) + root_password_set = root_password_status.stdout.split()[1] + + if root_password_set not in ('P', 'PS'): + root_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip() + + run_module( + 'user', + dict( + user='root', + password=root_password, + ), + ) + + @classmethod + def configure_unprivileged_user(cls) -> None: + """Configure the unprivileged user to run tests.""" + unprivileged_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip() + + run_module( + 'user', + dict( + user=UNPRIVILEGED_USER_NAME, + password=unprivileged_password, + groups=['docker'] if cls.install_docker() else [], + append=True, + ), + ) + + if os_release.id == 'alpine': + # Most distros handle this automatically, but not Alpine. + # See: https://www.redhat.com/sysadmin/rootless-podman + start = 165535 + end = start + 65535 + id_range = f'{start}-{end}' + + run_command( + 'usermod', + '--add-subuids', + id_range, + '--add-subgids', + id_range, + UNPRIVILEGED_USER_NAME, + ) + + @classmethod + def configure_source_trees(cls): + """Configure the source trees needed to run tests for both root and the unprivileged user.""" + current_ansible = pathlib.Path(os.environ['PYTHONPATH']).parent + + root_ansible = pathlib.Path('~').expanduser() / 'ansible' + test_ansible = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}').expanduser() / 'ansible' + + if current_ansible != root_ansible: + display.info(f'copying {current_ansible} -> {root_ansible} ...') + rmtree(root_ansible) + shutil.copytree(current_ansible, root_ansible) + run_command('chown', '-R', 'root:root', str(root_ansible)) + + display.info(f'copying {current_ansible} -> {test_ansible} ...') + rmtree(test_ansible) + shutil.copytree(current_ansible, test_ansible) + run_command('chown', '-R', f'{UNPRIVILEGED_USER_NAME}:{UNPRIVILEGED_USER_NAME}', str(test_ansible)) + + paths = [pathlib.Path(test_ansible)] + + for root, dir_names, file_names in os.walk(test_ansible): + paths.extend(pathlib.Path(root, dir_name) for dir_name in dir_names) + paths.extend(pathlib.Path(root, file_name) for file_name in file_names) + + user = pwd.getpwnam(UNPRIVILEGED_USER_NAME) + uid = user.pw_uid + gid = user.pw_gid + + for path in paths: + os.chown(path, uid, gid) + + @classmethod + def configure_ssh_keys(cls) -> None: + """Configure SSH keys needed to run tests.""" + user = pwd.getpwnam(UNPRIVILEGED_USER_NAME) + uid = user.pw_uid + gid = user.pw_gid + + current_rsa_pub = pathlib.Path('~/.ssh/id_rsa.pub').expanduser() + + test_authorized_keys = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.ssh/authorized_keys').expanduser() + + test_authorized_keys.parent.mkdir(mode=0o755, parents=True, exist_ok=True) + os.chown(test_authorized_keys.parent, uid, gid) + + shutil.copyfile(current_rsa_pub, test_authorized_keys) + os.chown(test_authorized_keys, uid, gid) + test_authorized_keys.chmod(mode=0o644) + + @classmethod + def configure_podman_remote(cls) -> None: + """Configure podman remote support.""" + # TODO: figure out how to support remote podman without systemd (Alpine) + # TODO: figure out how to support remote podman on Ubuntu + if os_release.id in ('alpine', 'ubuntu'): + return + + # Support podman remote on any host with systemd available. + retry_command(lambda: run_command('ssh', f'{UNPRIVILEGED_USER_NAME}@localhost', 'systemctl', '--user', 'enable', '--now', 'podman.socket')) + run_command('loginctl', 'enable-linger', UNPRIVILEGED_USER_NAME) + + +class DnfBootstrapper(Bootstrapper): + """Bootstrapper for dnf based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return True + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return os_release.id != 'rhel' + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('dnf')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + # NOTE: Install crun to make it available to podman, otherwise installing moby-engine can cause podman to use runc instead. + packages = ['podman', 'crun'] + + if cls.install_docker(): + packages.append('moby-engine') + + if os_release.id == 'fedora' and os_release.version_id == '36': + # In Fedora 36 the current version of netavark, 1.2.0, causes TCP connect to hang between rootfull containers. + # The previously tested version, 1.1.0, did not have this issue. + # Unfortunately, with the release of 1.2.0 the 1.1.0 package was removed from the repositories. + # Thankfully the 1.0.2 version is available and also works, so we'll use that here until a fixed version is available. + # See: https://github.com/containers/netavark/issues/491 + packages.append('netavark-1.0.2') + + if os_release.id == 'rhel': + # As of the release of RHEL 9.1, installing podman on RHEL 9.0 results in a non-fatal error at install time: + # + # libsemanage.semanage_pipe_data: Child process /usr/libexec/selinux/hll/pp failed with code: 255. (No such file or directory). + # container: libsepol.policydb_read: policydb module version 21 does not match my version range 4-20 + # container: libsepol.sepol_module_package_read: invalid module in module package (at section 0) + # container: Failed to read policy package + # libsemanage.semanage_direct_commit: Failed to compile hll files into cil files. + # (No such file or directory). + # /usr/sbin/semodule: Failed! + # + # Unfortunately this is then fatal when running podman, resulting in no error message and a 127 return code. + # The solution is to update the policycoreutils package *before* installing podman. + # + # NOTE: This work-around can probably be removed once we're testing on RHEL 9.1, as the updated packages should already be installed. + # Unfortunately at this time there is no RHEL 9.1 AMI available (other than the Beta release). + + run_command('dnf', 'update', '-y', 'policycoreutils') + + run_command('dnf', 'install', '-y', *packages) + + if cls.install_docker(): + run_command('systemctl', 'start', 'docker') + + if os_release.id == 'rhel' and os_release.version_id.startswith('8.'): + # RHEL 8 defaults to using runc instead of crun. + # Unfortunately runc seems to have issues with podman remote. + # Specifically, it tends to cause conmon to burn CPU until it reaches the specified exit delay. + # So we'll just change the system default to crun instead. + # Unfortunately we can't do this with the `--runtime` option since that doesn't work with podman remote. + + conf = pathlib.Path('/usr/share/containers/containers.conf').read_text() + + conf = re.sub('^runtime .*', 'runtime = "crun"', conf, flags=re.MULTILINE) + + pathlib.Path('/etc/containers/containers.conf').write_text(conf) + + super().run() + + +class AptBootstrapper(Bootstrapper): + """Bootstrapper for apt based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return not (os_release.id == 'ubuntu' and os_release.version_id == '20.04') + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return True + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('apt-get')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + apt_env = os.environ.copy() + apt_env.update( + DEBIAN_FRONTEND='noninteractive', + ) + + packages = ['docker.io'] + + if cls.install_podman(): + # NOTE: Install crun to make it available to podman, otherwise installing docker.io can cause podman to use runc instead. + # Using podman rootless requires the `newuidmap` and `slirp4netns` commands. + packages.extend(('podman', 'crun', 'uidmap', 'slirp4netns')) + + run_command('apt-get', 'install', *packages, '-y', '--no-install-recommends', env=apt_env) + + super().run() + + +class ApkBootstrapper(Bootstrapper): + """Bootstrapper for apk based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return True + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return True + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('apk')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + # The `openssl` package is used to generate hashed passwords. + packages = ['docker', 'podman', 'openssl'] + + run_command('apk', 'add', *packages) + run_command('service', 'docker', 'start') + run_command('modprobe', 'tun') + + super().run() + + +@dataclasses.dataclass(frozen=True) +class OsRelease: + """Operating system identification.""" + + id: str + version_id: str + + @staticmethod + def init() -> OsRelease: + """Detect the current OS release and return the result.""" + lines = run_command('sh', '-c', '. /etc/os-release && echo $ID && echo $VERSION_ID', capture=True).stdout.splitlines() + + result = OsRelease( + id=lines[0], + version_id=lines[1], + ) + + display.show(f'Detected OS "{result.id}" version "{result.version_id}".') + + return result + + +display = Display() +os_release = OsRelease.init() + +ROOT_USER = User.get('root') + +if __name__ == '__main__': + main() |