ansible-test - Improve container management. (#78550)

See changelogs/fragments/ansible-test-container-management.yml for details.
author: Matt Clay <matt@mystile.com> 2022-11-29 13:35:53 -0800
committer: GitHub <noreply@github.com> 2022-11-29 13:35:53 -0800
commit: cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc (patch)
tree: 0a84f8b7bff9cad558c0c988b5ce3e8119d4037a /test/integration/targets/ansible-test-container/runme.py
parent: 3bda4eae6f1273a42f14b3dedc0d4f5928b290f6 (diff)
download: ansible-cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc.tar.gz
1 files changed, 1041 insertions, 0 deletions
diff --git a/test/integration/targets/ansible-test-container/runme.py b/test/integration/targets/ansible-test-container/runme.py
new file mode 100755
index 0000000000..1577b62286
--- /dev/null
+++ b/test/integration/targets/ansible-test-container/runme.py
@@ -0,0 +1,1041 @@
+#!/usr/bin/env python
+"""Test suite used to verify ansible-test is able to run its containers on various container hosts."""
+
+from __future__ import annotations
+
+import abc
+import dataclasses
+import datetime
+import errno
+import functools
+import json
+import os
+import pathlib
+import pwd
+import re
+import secrets
+import shlex
+import shutil
+import signal
+import subprocess
+import sys
+import time
+import typing as t
+
+UNPRIVILEGED_USER_NAME = 'ansible-test'
+CGROUP_SYSTEMD = pathlib.Path('/sys/fs/cgroup/systemd')
+LOG_PATH = pathlib.Path('/tmp/results')
+
+# The value of /proc/*/loginuid when it is not set.
+# It is a reserved UID, which is the maximum 32-bit unsigned integer value.
+# See: https://access.redhat.com/solutions/25404
+LOGINUID_NOT_SET = 4294967295
+
+UID = os.getuid()
+
+try:
+    LOGINUID = int(pathlib.Path('/proc/self/loginuid').read_text())
+    LOGINUID_MISMATCH = LOGINUID != LOGINUID_NOT_SET and LOGINUID != UID
+except FileNotFoundError:
+    LOGINUID = None
+    LOGINUID_MISMATCH = False
+
+
+def main() -> None:
+    """Main program entry point."""
+    display.section('Startup check')
+
+    try:
+        bootstrap_type = pathlib.Path('/etc/ansible-test.bootstrap').read_text().strip()
+    except FileNotFoundError:
+        bootstrap_type = 'undefined'
+
+    display.info(f'Bootstrap type: {bootstrap_type}')
+
+    if bootstrap_type != 'remote':
+        display.warning('Skipping destructive test on system which is not an ansible-test remote provisioned instance.')
+        return
+
+    display.info(f'UID: {UID} / {LOGINUID}')
+
+    if UID != 0:
+        raise Exception('This test must be run as root.')
+
+    if not LOGINUID_MISMATCH:
+        if LOGINUID is None:
+            display.warning('Tests involving loginuid mismatch will be skipped on this host since it does not have audit support.')
+        elif LOGINUID == LOGINUID_NOT_SET:
+            display.warning('Tests involving loginuid mismatch will be skipped on this host since it is not set.')
+        elif LOGINUID == 0:
+            raise Exception('Use sudo, su, etc. as a non-root user to become root before running this test.')
+        else:
+            raise Exception()
+
+    display.section(f'Bootstrapping {os_release}')
+
+    bootstrapper = Bootstrapper.init()
+    bootstrapper.run()
+
+    result_dir = LOG_PATH
+
+    if result_dir.exists():
+        shutil.rmtree(result_dir)
+
+    result_dir.mkdir()
+    result_dir.chmod(0o777)
+
+    scenarios = get_test_scenarios()
+    results = [run_test(scenario) for scenario in scenarios]
+    error_total = 0
+
+    for name in sorted(result_dir.glob('*.log')):
+        lines = name.read_text().strip().splitlines()
+        error_count = len([line for line in lines if line.startswith('FAIL: ')])
+        error_total += error_count
+
+        display.section(f'Log ({error_count=}/{len(lines)}): {name.name}')
+
+        for line in lines:
+            if line.startswith('FAIL: '):
+                display.show(line, display.RED)
+            else:
+                display.show(line)
+
+    error_count = len([result for result in results if result.message])
+    error_total += error_count
+
+    duration = datetime.timedelta(seconds=int(sum(result.duration.total_seconds() for result in results)))
+
+    display.section(f'Test Results ({error_count=}/{len(results)}) [{duration}]')
+
+    for result in results:
+        notes = f' <cleanup: {", ".join(result.cleanup)}>' if result.cleanup else ''
+
+        if result.cgroup_dirs:
+            notes += f' <cgroup_dirs: {len(result.cgroup_dirs)}>'
+
+        notes += f' [{result.duration}]'
+
+        if result.message:
+            display.show(f'FAIL: {result.scenario} {result.message}{notes}', display.RED)
+        elif result.duration.total_seconds() >= 90:
+            display.show(f'SLOW: {result.scenario}{notes}', display.YELLOW)
+        else:
+            display.show(f'PASS: {result.scenario}{notes}')
+
+    if error_total:
+        sys.exit(1)
+
+
+def get_test_scenarios() -> list[TestScenario]:
+    """Generate and return a list of test scenarios."""
+
+    supported_engines = ('docker', 'podman')
+    available_engines = [engine for engine in supported_engines if shutil.which(engine)]
+
+    if not available_engines:
+        raise ApplicationError(f'No supported container engines found: {", ".join(supported_engines)}')
+
+    completion_lines = pathlib.Path(os.environ['PYTHONPATH'], '../test/lib/ansible_test/_data/completion/docker.txt').read_text().splitlines()
+
+    # TODO: consider including testing for the collection default image
+    entries = {name: value for name, value in (parse_completion_entry(line) for line in completion_lines) if name != 'default'}
+
+    unprivileged_user = User.get(UNPRIVILEGED_USER_NAME)
+
+    scenarios: list[TestScenario] = []
+
+    for container_name, settings in entries.items():
+        image = settings['image']
+        cgroup = settings.get('cgroup', 'v1-v2')
+
+        for engine in available_engines:
+            # TODO: figure out how to get tests passing using docker without disabling selinux
+            disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none'
+            expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1
+
+            if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd():
+                expose_cgroup_v1 = True  # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support
+
+            user_scenarios = [
+                # TODO: test rootless docker
+                UserScenario(ssh=unprivileged_user),
+            ]
+
+            if engine == 'podman':
+                user_scenarios.append(UserScenario(ssh=ROOT_USER))
+
+                # TODO: test podman remote on Alpine and Ubuntu hosts
+                # TODO: combine remote with ssh using different unprivileged users
+                if os_release.id not in ('alpine', 'ubuntu'):
+                    user_scenarios.append(UserScenario(remote=unprivileged_user))
+
+                if LOGINUID_MISMATCH:
+                    user_scenarios.append(UserScenario())
+
+            for user_scenario in user_scenarios:
+                scenarios.append(
+                    TestScenario(
+                        user_scenario=user_scenario,
+                        engine=engine,
+                        container_name=container_name,
+                        image=image,
+                        disable_selinux=disable_selinux,
+                        expose_cgroup_v1=expose_cgroup_v1,
+                    )
+                )
+
+    return scenarios
+
+
+def run_test(scenario: TestScenario) -> TestResult:
+    """Run a test scenario and return the test results."""
+    display.section(f'Testing {scenario} Started')
+
+    start = time.monotonic()
+
+    integration = ['ansible-test', 'integration', 'split']
+    integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v', '--dev-probe-cgroups', str(LOG_PATH),
+                           '--dev-systemd-debug']
+
+    commands = [
+        [*integration, *integration_options],
+        # For the split test we'll use alpine3 as the controller. There are two reasons for this:
+        # 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that.
+        # 2) It doesn't require disabling selinux, so we can test a target that doesn't need that.
+        [*integration, '--controller', 'docker:alpine3', *integration_options],
+    ]
+
+    common_env: dict[str, str] = {}
+    test_env: dict[str, str] = {}
+
+    if scenario.engine == 'podman':
+        if scenario.user_scenario.remote:
+            common_env.update(
+                # Podman 4.3.0 has a regression which requires a port for remote connections to work.
+                # See: https://github.com/containers/podman/issues/16509
+                CONTAINER_HOST=f'ssh://{scenario.user_scenario.remote.name}@localhost:22'
+                               f'/run/user/{scenario.user_scenario.remote.pwnam.pw_uid}/podman/podman.sock',
+                CONTAINER_SSHKEY=str(pathlib.Path('~/.ssh/id_rsa').expanduser()),  # TODO: add support for ssh + remote when the ssh user is not root
+            )
+
+        test_env.update(ANSIBLE_TEST_PREFER_PODMAN='1')
+
+    test_env.update(common_env)
+
+    if scenario.user_scenario.ssh:
+        client_become_cmd = ['ssh', f'{scenario.user_scenario.ssh.name}@localhost']
+        test_commands = [client_become_cmd + [f'cd ~/ansible; {format_env(test_env)}{sys.executable} bin/{shlex.join(command)}'] for command in commands]
+    else:
+        client_become_cmd = ['sh', '-c']
+        test_commands = [client_become_cmd + [f'{format_env(test_env)}{shlex.join(command)}'] for command in commands]
+
+    prime_storage_command = []
+
+    if scenario.engine == 'podman' and scenario.user_scenario.actual.name == UNPRIVILEGED_USER_NAME:
+        # When testing podman we need to make sure that the overlay filesystem is used instead of vfs.
+        # Using the vfs filesystem will result in running out of disk space during the tests.
+        # To change the filesystem used, the existing storage directory must be removed before "priming" the storage database.
+        #
+        # Without this change the following message may be displayed:
+        #
+        #   User-selected graph driver "overlay" overwritten by graph driver "vfs" from database - delete libpod local files to resolve
+        #
+        # However, with this change it may be replaced with the following message:
+        #
+        #   User-selected graph driver "vfs" overwritten by graph driver "overlay" from database - delete libpod local files to resolve
+
+        actual_become_cmd = ['ssh', f'{scenario.user_scenario.actual.name}@localhost']
+        prime_storage_command = actual_become_cmd + prepare_prime_podman_storage()
+
+    message = ''
+
+    if scenario.expose_cgroup_v1:
+        prepare_cgroup_systemd(scenario.user_scenario.actual.name, scenario.engine)
+
+    try:
+        if prime_storage_command:
+            retry_command(lambda: run_command(*prime_storage_command))
+
+        if scenario.disable_selinux:
+            run_command('setenforce', 'permissive')
+
+        for test_command in test_commands:
+            retry_command(lambda: run_command(*test_command))
+    except SubprocessError as ex:
+        message = str(ex)
+        display.error(f'{scenario} {message}')
+    finally:
+        if scenario.disable_selinux:
+            run_command('setenforce', 'enforcing')
+
+        if scenario.expose_cgroup_v1:
+            dirs = remove_cgroup_systemd()
+        else:
+            dirs = list_group_systemd()
+
+        cleanup_command = [scenario.engine, 'rmi', '-f', scenario.image]
+
+        try:
+            retry_command(lambda: run_command(*client_become_cmd + [f'{format_env(common_env)}{shlex.join(cleanup_command)}']))
+        except SubprocessError as ex:
+            display.error(str(ex))
+
+        cleanup = cleanup_podman() if scenario.engine == 'podman' else tuple()
+
+    finish = time.monotonic()
+    duration = datetime.timedelta(seconds=int(finish - start))
+
+    display.section(f'Testing {scenario} Completed in {duration}')
+
+    return TestResult(
+        scenario=scenario,
+        message=message,
+        cleanup=cleanup,
+        duration=duration,
+        cgroup_dirs=tuple(str(path) for path in dirs),
+    )
+
+
+def prepare_prime_podman_storage() -> list[str]:
+    """Partially prime podman storage and return a command to complete the remainder."""
+    prime_storage_command = ['rm -rf ~/.local/share/containers; STORAGE_DRIVER=overlay podman pull quay.io/bedrock/alpine:3.16.2']
+
+    test_containers = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local/share/containers').expanduser()
+
+    if test_containers.is_dir():
+        # First remove the directory as root, since the user may not have permissions on all the files.
+        # The directory will be removed again after login, before initializing the database.
+        rmtree(test_containers)
+
+    return prime_storage_command
+
+
+def cleanup_podman() -> tuple[str, ...]:
+    """Cleanup podman processes and files on disk."""
+    cleanup = []
+
+    for remaining in range(3, -1, -1):
+        processes = [(int(item[0]), item[1]) for item in
+                     [item.split(maxsplit=1) for item in run_command('ps', '-A', '-o', 'pid,comm', capture=True).stdout.splitlines()]
+                     if pathlib.Path(item[1].split()[0]).name in ('catatonit', 'podman', 'conmon')]
+
+        if not processes:
+            break
+
+        for pid, name in processes:
+            display.info(f'Killing "{name}" ({pid}) ...')
+
+            try:
+                os.kill(pid, signal.SIGTERM if remaining > 1 else signal.SIGKILL)
+            except ProcessLookupError:
+                pass
+
+            cleanup.append(name)
+
+        time.sleep(1)
+    else:
+        raise Exception('failed to kill all matching processes')
+
+    uid = pwd.getpwnam(UNPRIVILEGED_USER_NAME).pw_uid
+
+    container_tmp = pathlib.Path(f'/tmp/containers-user-{uid}')
+    podman_tmp = pathlib.Path(f'/tmp/podman-run-{uid}')
+
+    user_config = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.config').expanduser()
+    user_local = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local').expanduser()
+
+    if container_tmp.is_dir():
+        rmtree(container_tmp)
+
+    if podman_tmp.is_dir():
+        rmtree(podman_tmp)
+
+    if user_config.is_dir():
+        rmtree(user_config)
+
+    if user_local.is_dir():
+        rmtree(user_local)
+
+    return tuple(sorted(set(cleanup)))
+
+
+def have_cgroup_systemd() -> bool:
+    """Return True if the container host has a systemd cgroup."""
+    return pathlib.Path(CGROUP_SYSTEMD).is_dir()
+
+
+def prepare_cgroup_systemd(username: str, engine: str) -> None:
+    """Prepare the systemd cgroup."""
+    CGROUP_SYSTEMD.mkdir()
+
+    run_command('mount', 'cgroup', '-t', 'cgroup', str(CGROUP_SYSTEMD), '-o', 'none,name=systemd,xattr', capture=True)
+
+    if engine == 'podman':
+        run_command('chown', '-R', f'{username}:{username}', str(CGROUP_SYSTEMD))
+
+    run_command('find', str(CGROUP_SYSTEMD), '-type', 'd', '-exec', 'ls', '-l', '{}', ';')
+
+
+def list_group_systemd() -> list[pathlib.Path]:
+    """List the systemd cgroup."""
+    dirs = set()
+
+    for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False):
+        for dirname in dirnames:
+            target_path = pathlib.Path(dirpath, dirname)
+            display.info(f'dir: {target_path}')
+            dirs.add(target_path)
+
+    return sorted(dirs)
+
+
+def remove_cgroup_systemd() -> list[pathlib.Path]:
+    """Remove the systemd cgroup."""
+    dirs = set()
+
+    for sleep_seconds in range(1, 10):
+        try:
+            for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False):
+                for dirname in dirnames:
+                    target_path = pathlib.Path(dirpath, dirname)
+                    display.info(f'rmdir: {target_path}')
+                    dirs.add(target_path)
+                    target_path.rmdir()
+        except OSError as ex:
+            if ex.errno != errno.EBUSY:
+                raise
+
+            error = str(ex)
+        else:
+            break
+
+        display.warning(f'{error} -- sleeping for {sleep_seconds} second(s) before trying again ...')  # pylint: disable=used-before-assignment
+
+        time.sleep(sleep_seconds)
+
+    time.sleep(1)  # allow time for cgroups to be fully removed before unmounting
+
+    run_command('umount', str(CGROUP_SYSTEMD))
+
+    CGROUP_SYSTEMD.rmdir()
+
+    time.sleep(1)  # allow time for cgroup hierarchy to be removed after unmounting
+
+    cgroup = pathlib.Path('/proc/self/cgroup').read_text()
+
+    if 'systemd' in cgroup:
+        raise Exception('systemd hierarchy detected')
+
+    return sorted(dirs)
+
+
+def rmtree(path: pathlib.Path) -> None:
+    """Wrapper around shutil.rmtree with additional error handling."""
+    for retries in range(10, -1, -1):
+        try:
+            display.info(f'rmtree: {path} ({retries} attempts remaining) ... ')
+            shutil.rmtree(path)
+        except Exception:
+            if not path.exists():
+                display.info(f'rmtree: {path} (not found)')
+                return
+
+            if not path.is_dir():
+                display.info(f'rmtree: {path} (not a directory)')
+                return
+
+            if retries:
+                continue
+
+            raise
+        else:
+            display.info(f'rmtree: {path} (done)')
+            return
+
+
+def format_env(env: dict[str, str]) -> str:
+    """Format an env dict for injection into a shell command and return the resulting string."""
+    if env:
+        return ' '.join(f'{shlex.quote(key)}={shlex.quote(value)}' for key, value in env.items()) + ' '
+
+    return ''
+
+
+class DockerInfo:
+    """The results of `docker info` for the container runtime."""
+
+    def __init__(self, data: dict[str, t.Any]) -> None:
+        self.data = data
+
+    @property
+    def cgroup_version(self) -> int:
+        """The cgroup version of the container host."""
+        data = self.data
+        host = data.get('host')
+
+        if host:
+            version = int(host['cgroupVersion'].lstrip('v'))  # podman
+        else:
+            version = int(data['CgroupVersion'])  # docker
+
+        return version
+
+
+@functools.lru_cache
+def get_docker_info(engine: str) -> DockerInfo:
+    """Return info for the current container runtime. The results are cached."""
+    return DockerInfo(json.loads(run_command(engine, 'info', '--format', '{{ json . }}', capture=True).stdout))
+
+
+@dataclasses.dataclass(frozen=True)
+class User:
+    name: str
+    pwnam: pwd.struct_passwd
+
+    @classmethod
+    def get(cls, name: str) -> User:
+        return User(
+            name=name,
+            pwnam=pwd.getpwnam(name),
+        )
+
+
+@dataclasses.dataclass(frozen=True)
+class UserScenario:
+    ssh: User = None
+    remote: User = None
+
+    @property
+    def actual(self) -> User:
+        return self.remote or self.ssh or ROOT_USER
+
+
+@dataclasses.dataclass(frozen=True)
+class TestScenario:
+    user_scenario: UserScenario
+    engine: str
+    container_name: str
+    image: str
+    disable_selinux: bool
+    expose_cgroup_v1: bool
+
+    @property
+    def tags(self) -> tuple[str, ...]:
+        tags = []
+
+        if self.user_scenario.ssh:
+            tags.append(f'ssh: {self.user_scenario.ssh.name}')
+
+        if self.user_scenario.remote:
+            tags.append(f'remote: {self.user_scenario.remote.name}')
+
+        if self.disable_selinux:
+            tags.append('selinux: permissive')
+
+        if self.expose_cgroup_v1:
+            tags.append('cgroup: v1')
+
+        return tuple(tags)
+
+    @property
+    def tag_label(self) -> str:
+        return ' '.join(f'[{tag}]' for tag in self.tags)
+
+    def __str__(self):
+        return f'[{self.container_name}] ({self.engine}) {self.tag_label}'.strip()
+
+
+@dataclasses.dataclass(frozen=True)
+class TestResult:
+    scenario: TestScenario
+    message: str
+    cleanup: tuple[str, ...]
+    duration: datetime.timedelta
+    cgroup_dirs: tuple[str, ...]
+
+
+def parse_completion_entry(value: str) -> tuple[str, dict[str, str]]:
+    """Parse the given completion entry, returning the entry name and a dictionary of key/value settings."""
+    values = value.split()
+
+    name = values[0]
+    data = {kvp[0]: kvp[1] if len(kvp) > 1 else '' for kvp in [item.split('=', 1) for item in values[1:]]}
+
+    return name, data
+
+
+@dataclasses.dataclass(frozen=True)
+class SubprocessResult:
+    """Result from execution of a subprocess."""
+
+    command: list[str]
+    stdout: str
+    stderr: str
+    status: int
+
+
+class ApplicationError(Exception):
+    """An application error."""
+
+    def __init__(self, message: str) -> None:
+        self.message = message
+
+        super().__init__(message)
+
+
+class SubprocessError(ApplicationError):
+    """An error from executing a subprocess."""
+
+    def __init__(self, result: SubprocessResult) -> None:
+        self.result = result
+
+        message = f'Command `{shlex.join(result.command)}` exited with status: {result.status}'
+
+        stdout = (result.stdout or '').strip()
+        stderr = (result.stderr or '').strip()
+
+        if stdout:
+            message += f'\n>>> Standard Output\n{stdout}'
+
+        if stderr:
+            message += f'\n>>> Standard Error\n{stderr}'
+
+        super().__init__(message)
+
+
+class ProgramNotFoundError(ApplicationError):
+    """A required program was not found."""
+
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+        super().__init__(f'Missing program: {name}')
+
+
+class Display:
+    """Display interface for sending output to the console."""
+
+    CLEAR = '\033[0m'
+    RED = '\033[31m'
+    GREEN = '\033[32m'
+    YELLOW = '\033[33m'
+    BLUE = '\033[34m'
+    PURPLE = '\033[35m'
+    CYAN = '\033[36m'
+
+    def __init__(self) -> None:
+        self.sensitive: set[str] = set()
+
+    def section(self, message: str) -> None:
+        """Print a section message to the console."""
+        self.show(f'==> {message}', color=self.BLUE)
+
+    def subsection(self, message: str) -> None:
+        """Print a subsection message to the console."""
+        self.show(f'--> {message}', color=self.CYAN)
+
+    def fatal(self, message: str) -> None:
+        """Print a fatal message to the console."""
+        self.show(f'FATAL: {message}', color=self.RED)
+
+    def error(self, message: str) -> None:
+        """Print an error message to the console."""
+        self.show(f'ERROR: {message}', color=self.RED)
+
+    def warning(self, message: str) -> None:
+        """Print a warning message to the console."""
+        self.show(f'WARNING: {message}', color=self.PURPLE)
+
+    def info(self, message: str) -> None:
+        """Print an info message to the console."""
+        self.show(f'INFO: {message}', color=self.YELLOW)
+
+    def show(self, message: str, color: str | None = None) -> None:
+        """Print a message to the console."""
+        for item in self.sensitive:
+            message = message.replace(item, '*' * len(item))
+
+        print(f'{color or self.CLEAR}{message}{self.CLEAR}', flush=True)
+
+
+def run_module(
+    module: str,
+    args: dict[str, t.Any],
+) -> SubprocessResult:
+    """Run the specified Ansible module and return the result."""
+    return run_command('ansible', '-m', module, '-v', '-a', json.dumps(args), 'localhost')
+
+
+def retry_command(func: t.Callable[[], SubprocessResult], attempts: int = 3) -> SubprocessResult:
+    """Run the given command function up to the specified number of attempts when the failure is due to an SSH error."""
+    for attempts_remaining in range(attempts - 1, -1, -1):
+        try:
+            return func()
+        except SubprocessError as ex:
+            if ex.result.command[0] == 'ssh' and ex.result.status == 255 and attempts_remaining:
+                # SSH connections on our Ubuntu 22.04 host sometimes fail for unknown reasons.
+                # This retry should allow the test suite to continue, maintaining CI stability.
+                # TODO: Figure out why local SSH connections sometimes fail during the test run.
+                display.warning('Command failed due to an SSH error. Waiting a few seconds before retrying.')
+                time.sleep(3)
+                continue
+
+            raise
+
+
+def run_command(
+    *command: str,
+    data: str | None = None,
+    stdin: int | t.IO[bytes] | None = None,
+    env: dict[str, str] | None = None,
+    capture: bool = False,
+) -> SubprocessResult:
+    """Run the specified command and return the result."""
+    stdin = subprocess.PIPE if data else stdin or subprocess.DEVNULL
+    stdout = subprocess.PIPE if capture else None
+    stderr = subprocess.PIPE if capture else None
+
+    display.subsection(f'Run command: {shlex.join(command)}')
+
+    try:
+        with subprocess.Popen(args=command, stdin=stdin, stdout=stdout, stderr=stderr, env=env, text=True) as process:
+            process_stdout, process_stderr = process.communicate(data)
+            process_status = process.returncode
+    except FileNotFoundError:
+        raise ProgramNotFoundError(command[0]) from None
+
+    result = SubprocessResult(
+        command=list(command),
+        stdout=process_stdout,
+        stderr=process_stderr,
+        status=process_status,
+    )
+
+    if process.returncode != 0:
+        raise SubprocessError(result)
+
+    return result
+
+
+class Bootstrapper(metaclass=abc.ABCMeta):
+    """Bootstrapper for remote instances."""
+
+    @classmethod
+    def install_podman(cls) -> bool:
+        """Return True if podman will be installed."""
+        return False
+
+    @classmethod
+    def install_docker(cls) -> bool:
+        """Return True if docker will be installed."""
+        return False
+
+    @classmethod
+    def usable(cls) -> bool:
+        """Return True if the bootstrapper can be used, otherwise False."""
+        return False
+
+    @classmethod
+    def init(cls) -> t.Type[Bootstrapper]:
+        """Return a bootstrapper type appropriate for the current system."""
+        for bootstrapper in cls.__subclasses__():
+            if bootstrapper.usable():
+                return bootstrapper
+
+        display.warning('No supported bootstrapper found.')
+        return Bootstrapper
+
+    @classmethod
+    def run(cls) -> None:
+        """Run the bootstrapper."""
+        cls.configure_root_user()
+        cls.configure_unprivileged_user()
+        cls.configure_source_trees()
+        cls.configure_ssh_keys()
+        cls.configure_podman_remote()
+
+    @classmethod
+    def configure_root_user(cls) -> None:
+        """Configure the root user to run tests."""
+        root_password_status = run_command('passwd', '--status', 'root', capture=True)
+        root_password_set = root_password_status.stdout.split()[1]
+
+        if root_password_set not in ('P', 'PS'):
+            root_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip()
+
+            run_module(
+                'user',
+                dict(
+                    user='root',
+                    password=root_password,
+                ),
+            )
+
+    @classmethod
+    def configure_unprivileged_user(cls) -> None:
+        """Configure the unprivileged user to run tests."""
+        unprivileged_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip()
+
+        run_module(
+            'user',
+            dict(
+                user=UNPRIVILEGED_USER_NAME,
+                password=unprivileged_password,
+                groups=['docker'] if cls.install_docker() else [],
+                append=True,
+            ),
+        )
+
+        if os_release.id == 'alpine':
+            # Most distros handle this automatically, but not Alpine.
+            # See: https://www.redhat.com/sysadmin/rootless-podman
+            start = 165535
+            end = start + 65535
+            id_range = f'{start}-{end}'
+
+            run_command(
+                'usermod',
+                '--add-subuids',
+                id_range,
+                '--add-subgids',
+                id_range,
+                UNPRIVILEGED_USER_NAME,
+            )
+
+    @classmethod
+    def configure_source_trees(cls):
+        """Configure the source trees needed to run tests for both root and the unprivileged user."""
+        current_ansible = pathlib.Path(os.environ['PYTHONPATH']).parent
+
+        root_ansible = pathlib.Path('~').expanduser() / 'ansible'
+        test_ansible = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}').expanduser() / 'ansible'
+
+        if current_ansible != root_ansible:
+            display.info(f'copying {current_ansible} -> {root_ansible} ...')
+            rmtree(root_ansible)
+            shutil.copytree(current_ansible, root_ansible)
+            run_command('chown', '-R', 'root:root', str(root_ansible))
+
+        display.info(f'copying {current_ansible} -> {test_ansible} ...')
+        rmtree(test_ansible)
+        shutil.copytree(current_ansible, test_ansible)
+        run_command('chown', '-R', f'{UNPRIVILEGED_USER_NAME}:{UNPRIVILEGED_USER_NAME}', str(test_ansible))
+
+        paths = [pathlib.Path(test_ansible)]
+
+        for root, dir_names, file_names in os.walk(test_ansible):
+            paths.extend(pathlib.Path(root, dir_name) for dir_name in dir_names)
+            paths.extend(pathlib.Path(root, file_name) for file_name in file_names)
+
+        user = pwd.getpwnam(UNPRIVILEGED_USER_NAME)
+        uid = user.pw_uid
+        gid = user.pw_gid
+
+        for path in paths:
+            os.chown(path, uid, gid)
+
+    @classmethod
+    def configure_ssh_keys(cls) -> None:
+        """Configure SSH keys needed to run tests."""
+        user = pwd.getpwnam(UNPRIVILEGED_USER_NAME)
+        uid = user.pw_uid
+        gid = user.pw_gid
+
+        current_rsa_pub = pathlib.Path('~/.ssh/id_rsa.pub').expanduser()
+
+        test_authorized_keys = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.ssh/authorized_keys').expanduser()
+
+        test_authorized_keys.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
+        os.chown(test_authorized_keys.parent, uid, gid)
+
+        shutil.copyfile(current_rsa_pub, test_authorized_keys)
+        os.chown(test_authorized_keys, uid, gid)
+        test_authorized_keys.chmod(mode=0o644)
+
+    @classmethod
+    def configure_podman_remote(cls) -> None:
+        """Configure podman remote support."""
+        # TODO: figure out how to support remote podman without systemd (Alpine)
+        # TODO: figure out how to support remote podman on Ubuntu
+        if os_release.id in ('alpine', 'ubuntu'):
+            return
+
+        # Support podman remote on any host with systemd available.
+        retry_command(lambda: run_command('ssh', f'{UNPRIVILEGED_USER_NAME}@localhost', 'systemctl', '--user', 'enable', '--now', 'podman.socket'))
+        run_command('loginctl', 'enable-linger', UNPRIVILEGED_USER_NAME)
+
+
+class DnfBootstrapper(Bootstrapper):
+    """Bootstrapper for dnf based systems."""
+
+    @classmethod
+    def install_podman(cls) -> bool:
+        """Return True if podman will be installed."""
+        return True
+
+    @classmethod
+    def install_docker(cls) -> bool:
+        """Return True if docker will be installed."""
+        return os_release.id != 'rhel'
+
+    @classmethod
+    def usable(cls) -> bool:
+        """Return True if the bootstrapper can be used, otherwise False."""
+        return bool(shutil.which('dnf'))
+
+    @classmethod
+    def run(cls) -> None:
+        """Run the bootstrapper."""
+        # NOTE: Install crun to make it available to podman, otherwise installing moby-engine can cause podman to use runc instead.
+        packages = ['podman', 'crun']
+
+        if cls.install_docker():
+            packages.append('moby-engine')
+
+        if os_release.id == 'fedora' and os_release.version_id == '36':
+            # In Fedora 36 the current version of netavark, 1.2.0, causes TCP connect to hang between rootfull containers.
+            # The previously tested version, 1.1.0, did not have this issue.
+            # Unfortunately, with the release of 1.2.0 the 1.1.0 package was removed from the repositories.
+            # Thankfully the 1.0.2 version is available and also works, so we'll use that here until a fixed version is available.
+            # See: https://github.com/containers/netavark/issues/491
+            packages.append('netavark-1.0.2')
+
+        if os_release.id == 'rhel':
+            # As of the release of RHEL 9.1, installing podman on RHEL 9.0 results in a non-fatal error at install time:
+            #
+            #   libsemanage.semanage_pipe_data: Child process /usr/libexec/selinux/hll/pp failed with code: 255. (No such file or directory).
+            #   container: libsepol.policydb_read: policydb module version 21 does not match my version range 4-20
+            #   container: libsepol.sepol_module_package_read: invalid module in module package (at section 0)
+            #   container: Failed to read policy package
+            #   libsemanage.semanage_direct_commit: Failed to compile hll files into cil files.
+            #    (No such file or directory).
+            #   /usr/sbin/semodule:  Failed!
+            #
+            # Unfortunately this is then fatal when running podman, resulting in no error message and a 127 return code.
+            # The solution is to update the policycoreutils package *before* installing podman.
+            #
+            # NOTE: This work-around can probably be removed once we're testing on RHEL 9.1, as the updated packages should already be installed.
+            #       Unfortunately at this time there is no RHEL 9.1 AMI available (other than the Beta release).
+
+            run_command('dnf', 'update', '-y', 'policycoreutils')
+
+        run_command('dnf', 'install', '-y', *packages)
+
+        if cls.install_docker():
+            run_command('systemctl', 'start', 'docker')
+
+        if os_release.id == 'rhel' and os_release.version_id.startswith('8.'):
+            # RHEL 8 defaults to using runc instead of crun.
+            # Unfortunately runc seems to have issues with podman remote.
+            # Specifically, it tends to cause conmon to burn CPU until it reaches the specified exit delay.
+            # So we'll just change the system default to crun instead.
+            # Unfortunately we can't do this with the `--runtime` option since that doesn't work with podman remote.
+
+            conf = pathlib.Path('/usr/share/containers/containers.conf').read_text()
+
+            conf = re.sub('^runtime .*', 'runtime = "crun"', conf, flags=re.MULTILINE)
+
+            pathlib.Path('/etc/containers/containers.conf').write_text(conf)
+
+        super().run()
+
+
+class AptBootstrapper(Bootstrapper):
+    """Bootstrapper for apt based systems."""
+
+    @classmethod
+    def install_podman(cls) -> bool:
+        """Return True if podman will be installed."""
+        return not (os_release.id == 'ubuntu' and os_release.version_id == '20.04')
+
+    @classmethod
+    def install_docker(cls) -> bool:
+        """Return True if docker will be installed."""
+        return True
+
+    @classmethod
+    def usable(cls) -> bool:
+        """Return True if the bootstrapper can be used, otherwise False."""
+        return bool(shutil.which('apt-get'))
+
+    @classmethod
+    def run(cls) -> None:
+        """Run the bootstrapper."""
+        apt_env = os.environ.copy()
+        apt_env.update(
+            DEBIAN_FRONTEND='noninteractive',
+        )
+
+        packages = ['docker.io']
+
+        if cls.install_podman():
+            # NOTE: Install crun to make it available to podman, otherwise installing docker.io can cause podman to use runc instead.
+            # Using podman rootless requires the `newuidmap` and `slirp4netns` commands.
+            packages.extend(('podman', 'crun', 'uidmap', 'slirp4netns'))
+
+        run_command('apt-get', 'install', *packages, '-y', '--no-install-recommends', env=apt_env)
+
+        super().run()
+
+
+class ApkBootstrapper(Bootstrapper):
+    """Bootstrapper for apk based systems."""
+
+    @classmethod
+    def install_podman(cls) -> bool:
+        """Return True if podman will be installed."""
+        return True
+
+    @classmethod
+    def install_docker(cls) -> bool:
+        """Return True if docker will be installed."""
+        return True
+
+    @classmethod
+    def usable(cls) -> bool:
+        """Return True if the bootstrapper can be used, otherwise False."""
+        return bool(shutil.which('apk'))
+
+    @classmethod
+    def run(cls) -> None:
+        """Run the bootstrapper."""
+        # The `openssl` package is used to generate hashed passwords.
+        packages = ['docker', 'podman', 'openssl']
+
+        run_command('apk', 'add', *packages)
+        run_command('service', 'docker', 'start')
+        run_command('modprobe', 'tun')
+
+        super().run()
+
+
+@dataclasses.dataclass(frozen=True)
+class OsRelease:
+    """Operating system identification."""
+
+    id: str
+    version_id: str
+
+    @staticmethod
+    def init() -> OsRelease:
+        """Detect the current OS release and return the result."""
+        lines = run_command('sh', '-c', '. /etc/os-release && echo $ID && echo $VERSION_ID', capture=True).stdout.splitlines()
+
+        result = OsRelease(
+            id=lines[0],
+            version_id=lines[1],
+        )
+
+        display.show(f'Detected OS "{result.id}" version "{result.version_id}".')
+
+        return result
+
+
+display = Display()
+os_release = OsRelease.init()
+
+ROOT_USER = User.get('root')
+
+if __name__ == '__main__':
+    main()
author	Matt Clay <matt@mystile.com>	2022-11-29 13:35:53 -0800
committer	GitHub <noreply@github.com>	2022-11-29 13:35:53 -0800
commit	cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc (patch)
tree	0a84f8b7bff9cad558c0c988b5ce3e8119d4037a /test/integration/targets/ansible-test-container/runme.py
parent	3bda4eae6f1273a42f14b3dedc0d4f5928b290f6 (diff)
download	ansible-cda16cc5e9aa8703fb4e1ac0a0be6b631d9076cc.tar.gz