diff options
author | Mikhail Shchatko <mikhail.shchatko@mongodb.com> | 2021-05-12 15:37:41 +0300 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-14 10:36:51 +0000 |
commit | 985e4fbf29d8b80101fa17cc00e28c6fa2eeba93 (patch) | |
tree | 4a309346d2621a4564683e16998388206bf3dcc5 | |
parent | 51a860a7644d5c769b47d5b814feeb3dd43dcec5 (diff) | |
download | mongo-985e4fbf29d8b80101fa17cc00e28c6fa2eeba93.tar.gz |
SERVER-56478 Run powercycle with extended host lifetime for easier debugging
-rwxr-xr-x | buildscripts/evergreen_gen_powercycle_tasks.py | 110 | ||||
-rwxr-xr-x | buildscripts/powercycle_sentinel.py | 119 | ||||
-rw-r--r-- | buildscripts/resmokelib/powercycle/__init__.py | 3 | ||||
-rw-r--r-- | buildscripts/resmokelib/powercycle/lib/__init__.py | 12 | ||||
-rw-r--r-- | buildscripts/resmokelib/powercycle/lib/remote_operations.py | 5 | ||||
-rwxr-xr-x | buildscripts/resmokelib/powercycle/powercycle.py | 15 | ||||
-rw-r--r-- | buildscripts/resmokelib/powercycle/setup/__init__.py | 18 | ||||
-rw-r--r-- | buildscripts/tests/test_powercycle_sentinel.py | 51 | ||||
-rw-r--r-- | etc/evergreen.yml | 102 | ||||
-rwxr-xr-x | evergreen/compiled_binaries_get.sh | 46 | ||||
-rwxr-xr-x | evergreen/move_multiversion_binaries.sh | 6 | ||||
-rw-r--r-- | evergreen/multiversion_setup.sh | 13 | ||||
-rw-r--r-- | evergreen/powercycle_run_test.sh | 5 | ||||
-rwxr-xr-x | evergreen/powercycle_sentinel_run.sh | 10 | ||||
-rwxr-xr-x | evergreen/powercycle_tasks_generate.sh | 10 |
15 files changed, 452 insertions, 73 deletions
diff --git a/buildscripts/evergreen_gen_powercycle_tasks.py b/buildscripts/evergreen_gen_powercycle_tasks.py new file mode 100755 index 00000000000..69c38ba54f3 --- /dev/null +++ b/buildscripts/evergreen_gen_powercycle_tasks.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Generate multiple powercycle tasks to run in evergreen.""" +from collections import namedtuple +from typing import Any, List, Tuple, Set + +import click +from shrub.v2 import BuildVariant, FunctionCall, ShrubProject, Task, TaskDependency +from shrub.v2.command import BuiltInCommand + +from buildscripts.util.fileops import write_file +from buildscripts.util.read_config import read_config_file +from buildscripts.util.taskname import name_generated_task + +Config = namedtuple("config", [ + "task_names", + "num_tasks", + "timeout_params", + "remote_credentials_vars", + "set_up_ec2_instance_vars", + "run_powercycle_vars", + "build_variant", + "distro", +]) + + +def make_config(expansions_file: Any) -> Config: + """Group expansions into config.""" + expansions = read_config_file(expansions_file) + task_names = expansions.get("task_names", "powercycle_smoke_skip_compile") + # Avoid duplicated task names + task_names = {task_name for task_name in task_names.split(" ")} + num_tasks = int(expansions.get("num_tasks", 10)) + timeout_params = { + "exec_timeout_secs": int(expansions.get("exec_timeout_secs", 7200)), + "timeout_secs": int(expansions.get("timeout_secs", 1800)), + } + remote_credentials_vars = { + "private_key_file": "src/powercycle.pem", + "private_key_remote": "${__project_aws_ssh_key_value}", + } + set_up_ec2_instance_vars = { + "set_up_retry_count": int(expansions.get("set_up_retry_count", 2)), + } + run_powercycle_vars = { + "run_powercycle_args": expansions.get("run_powercycle_args"), + } + build_variant = expansions.get("build_variant") + distro = expansions.get("distro_id") + + return Config(task_names, num_tasks, timeout_params, remote_credentials_vars, + set_up_ec2_instance_vars, run_powercycle_vars, build_variant, distro) + + +def get_setup_commands() -> Tuple[List[FunctionCall], Set[TaskDependency]]: + """Return setup commands.""" + return [ + FunctionCall("do setup"), + ], {TaskDependency("archive_dist_test_debug")} + + +def get_skip_compile_setup_commands() -> Tuple[List[FunctionCall], set]: + """Return skip compile setup commands.""" + return [ + FunctionCall("set task expansion macros"), + FunctionCall("set up venv"), + FunctionCall("upload pip requirements"), + FunctionCall("f_expansions_write"), + FunctionCall("configure evergreen api credentials"), + FunctionCall("get compiled binaries"), + ], set() + + +@click.command() +@click.argument("expansions_file", type=str, default="expansions.yml") +@click.argument("output_file", type=str, default="powercycle_tasks.json") +def main(expansions_file: str = "expansions.yml", + output_file: str = "powercycle_tasks.json") -> None: + """Generate multiple powercycle tasks to run in evergreen.""" + + config = make_config(expansions_file) + build_variant = BuildVariant(config.build_variant) + for task_name in config.task_names: + if "skip_compile" in task_name: + commands, task_dependency = get_skip_compile_setup_commands() + else: + commands, task_dependency = get_setup_commands() + + commands.extend([ + FunctionCall("set up remote credentials", config.remote_credentials_vars), + BuiltInCommand("timeout.update", config.timeout_params), + FunctionCall("set up EC2 instance", config.set_up_ec2_instance_vars), + FunctionCall("run powercycle test", config.run_powercycle_vars), + ]) + + build_variant.display_task( + task_name, { + Task( + name_generated_task(task_name, index, config.num_tasks, config.build_variant), + commands, task_dependency) + for index in range(config.num_tasks) + }, distros=[config.distro]) + + shrub_project = ShrubProject.empty() + shrub_project.add_build_variant(build_variant) + + write_file(output_file, shrub_project.json()) + + +if __name__ == '__main__': + main() diff --git a/buildscripts/powercycle_sentinel.py b/buildscripts/powercycle_sentinel.py new file mode 100755 index 00000000000..bb276e2b712 --- /dev/null +++ b/buildscripts/powercycle_sentinel.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +"""Powercycle tasks sentinel. + +Error out when any powercycle task on the same buildvariant runs for more than 2 hours. +""" +import logging +import os +import sys +import time +from datetime import datetime, timezone +from typing import List + +import click +import structlog +from evergreen import RetryingEvergreenApi, EvergreenApi + +from buildscripts.util.read_config import read_config_file + +LOGGER = structlog.getLogger(__name__) + +EVERGREEN_HOST = "https://evergreen.mongodb.com" +EVERGREEN_CONFIG_LOCATIONS = ( + # Common for machines in Evergreen + os.path.join(os.getcwd(), ".evergreen.yml"), + # Common for local machines + os.path.expanduser(os.path.join("~", ".evergreen.yml")), +) +POWERCYCLE_TASK_EXEC_TIMEOUT_SECS = 2 * 60 * 60 +WATCH_INTERVAL_SECS = 5 * 60 + + +def get_evergreen_api() -> EvergreenApi: + """Return evergreen API.""" + # Pickup the first config file found in common locations. + for file in EVERGREEN_CONFIG_LOCATIONS: + if os.path.isfile(file): + evg_api = RetryingEvergreenApi.get_api(config_file=file) + return evg_api + + LOGGER.error("Evergreen config not found in locations.", locations=EVERGREEN_CONFIG_LOCATIONS) + sys.exit(1) + + +def watch_tasks(task_ids: List[str], evg_api: EvergreenApi, watch_interval_secs: int) -> List[str]: + """Watch tasks if they run longer than exec timeout.""" + watch_task_ids = task_ids[:] + long_running_task_ids = [] + + while watch_task_ids: + LOGGER.info("Looking if powercycle tasks are still running on the current buildvariant.") + powercycle_tasks = [evg_api.task_by_id(task_id) for task_id in watch_task_ids] + for task in powercycle_tasks: + if task.finish_time: + watch_task_ids.remove(task.task_id) + elif task.start_time and (datetime.now(timezone.utc) - task.start_time + ).total_seconds() > POWERCYCLE_TASK_EXEC_TIMEOUT_SECS: + long_running_task_ids.append(task.task_id) + watch_task_ids.remove(task.task_id) + if watch_task_ids: + time.sleep(watch_interval_secs) + + return long_running_task_ids + + +def get_links(task_ids: List[str]) -> str: + """Return evergreen task urls delimited by newline.""" + return "\n".join([f"{EVERGREEN_HOST}/task/{task_id}" for task_id in task_ids]) + + +@click.command() +@click.argument("expansions_file", type=str, default="expansions.yml") +def main(expansions_file: str = "expansions.yml") -> None: + """Implementation.""" + + logging.basicConfig( + format="[%(levelname)s] %(message)s", + level=logging.INFO, + stream=sys.stdout, + ) + structlog.configure(logger_factory=structlog.stdlib.LoggerFactory()) + + expansions = read_config_file(expansions_file) + build_id = expansions["build_id"] + current_task_id = expansions["task_id"] + gen_task_name = expansions["gen_task"] + + evg_api = get_evergreen_api() + + build_tasks = evg_api.tasks_by_build(build_id) + gen_task_id = [task.task_id for task in build_tasks if gen_task_name in task.task_id][0] + gen_task_url = f"{EVERGREEN_HOST}/task/{gen_task_id}" + + while evg_api.task_by_id(gen_task_id).is_active(): + LOGGER.info( + f"Waiting for '{gen_task_name}' task to generate powercycle tasks:\n{gen_task_url}") + time.sleep(WATCH_INTERVAL_SECS) + + build_tasks = evg_api.tasks_by_build(build_id) + powercycle_task_ids = [ + task.task_id for task in build_tasks + if not task.display_only and task.task_id != current_task_id and task.task_id != gen_task_id + and "powercycle" in task.task_id + ] + LOGGER.info(f"Watching powercycle tasks:\n{get_links(powercycle_task_ids)}") + + long_running_task_ids = watch_tasks(powercycle_task_ids, evg_api, WATCH_INTERVAL_SECS) + if long_running_task_ids: + LOGGER.error( + f"Found powercycle tasks that are running for more than {POWERCYCLE_TASK_EXEC_TIMEOUT_SECS} " + f"seconds and most likely something is going wrong in those tasks:\n{get_links(long_running_task_ids)}" + ) + LOGGER.error( + "Hopefully hosts from the tasks are still in run at the time you are seeing this " + "and the Build team is able to check them to diagnose the issue.") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/buildscripts/resmokelib/powercycle/__init__.py b/buildscripts/resmokelib/powercycle/__init__.py index 2c734073a5f..943bdfc2d79 100644 --- a/buildscripts/resmokelib/powercycle/__init__.py +++ b/buildscripts/resmokelib/powercycle/__init__.py @@ -156,6 +156,9 @@ MongoDB Powercycle Tests. To run a powercycle test locally, use the following st f" config values will be used from '{powercycle_config.POWERCYCLE_TASKS_CONFIG}'." f" [default: '%(default)s']", default="powercycle") + test_options.add_argument("--sshAccessRetryCount", dest="ssh_access_retry_count", + help=argparse.SUPPRESS, type=int, default=5) + # MongoDB options mongodb_options.add_argument( "--downloadUrl", dest="tarball_url", diff --git a/buildscripts/resmokelib/powercycle/lib/__init__.py b/buildscripts/resmokelib/powercycle/lib/__init__.py index 01913512cf0..4cc3492d42f 100644 --- a/buildscripts/resmokelib/powercycle/lib/__init__.py +++ b/buildscripts/resmokelib/powercycle/lib/__init__.py @@ -25,9 +25,7 @@ class PowercycleCommand(Subcommand): def __init__(self): """Initialize PowercycleCommand.""" self.expansions = yaml.safe_load(open(powercycle_constants.EXPANSIONS_FILE)) - self.ssh_identity = self._get_ssh_identity() - self.ssh_connection_options = \ - f"{self.ssh_identity} {powercycle_constants.DEFAULT_SSH_CONNECTION_OPTIONS}" + self.ssh_connection_options = f"-i powercycle.pem {powercycle_constants.DEFAULT_SSH_CONNECTION_OPTIONS}" self.sudo = "" if self.is_windows() else "sudo" # The username on the Windows image that powercycle uses is currently the default user. self.user = "Administrator" if self.is_windows() else getpass.getuser() @@ -52,14 +50,6 @@ class PowercycleCommand(Subcommand): buff = buff_stdout.decode("utf-8", "replace") return process.poll(), buff - def _get_ssh_identity(self) -> str: - workdir = self.expansions['workdir'] - if self.is_windows(): - workdir = workdir.replace("\\", "/") - pem_file = '/'.join([workdir, 'src', 'powercycle.pem']) - - return f"-i {pem_file}" - def execute_cmd(cmd, use_file=False): """Execute command and returns return_code, output from command.""" diff --git a/buildscripts/resmokelib/powercycle/lib/remote_operations.py b/buildscripts/resmokelib/powercycle/lib/remote_operations.py index 8ae7d9caec7..a011e786f75 100644 --- a/buildscripts/resmokelib/powercycle/lib/remote_operations.py +++ b/buildscripts/resmokelib/powercycle/lib/remote_operations.py @@ -51,7 +51,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes def __init__( # pylint: disable=too-many-arguments self, user_host, ssh_connection_options=None, ssh_options=None, scp_options=None, - shell_binary="/bin/bash", use_shell=False, ignore_ret=False): + shell_binary="/bin/bash", use_shell=False, ignore_ret=False, access_retry_count=5): """Initialize RemoteOperations.""" self.user_host = user_host @@ -62,6 +62,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes self.ignore_ret = ignore_ret self.shell_binary = shell_binary self.use_shell = use_shell + self.access_retry_count = access_retry_count # Check if we can remotely access the host. self._access_code, self._access_buff = self._remote_access() @@ -99,7 +100,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes """Check if a remote session is possible.""" cmd = "ssh {} {} {} date".format(self.ssh_connection_options, self.ssh_options, self.user_host) - return self._call_retries(cmd, 5) + return self._call_retries(cmd, self.access_retry_count) def _perform_operation(self, cmd, retry, retry_count): if retry: diff --git a/buildscripts/resmokelib/powercycle/powercycle.py b/buildscripts/resmokelib/powercycle/powercycle.py index 36f5c79579c..43dada73b1d 100755 --- a/buildscripts/resmokelib/powercycle/powercycle.py +++ b/buildscripts/resmokelib/powercycle/powercycle.py @@ -724,13 +724,13 @@ class LocalToRemoteOperations(object): def __init__( # pylint: disable=too-many-arguments self, user_host, ssh_connection_options=None, ssh_options=None, - shell_binary="/bin/bash", use_shell=False): + shell_binary="/bin/bash", use_shell=False, access_retry_count=5): """Initialize LocalToRemoteOperations.""" self.remote_op = remote_operations.RemoteOperations( user_host=user_host, ssh_connection_options=ssh_connection_options, ssh_options=ssh_options, shell_binary=shell_binary, use_shell=use_shell, - ignore_ret=True) + ignore_ret=True, access_retry_count=access_retry_count) def shell(self, cmds, remote_dir=None): """Return tuple (ret, output) from performing remote shell operation.""" @@ -1332,7 +1332,7 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many LOGGER.info("powercycle invocation: %s", " ".join(sys.argv)) - task_name = options.task_name + task_name = re.sub(r"(_[0-9]+)(_[\w-]+)?$", "", options.task_name) task_config = powercycle_config.get_task_config(task_name, options.remote_operation) LOGGER.info("powercycle task config: %s", task_config) @@ -1450,9 +1450,9 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many ssh_options = "" if _IS_WINDOWS else "-tt" # Instantiate the local handler object. - local_ops = LocalToRemoteOperations(user_host=ssh_user_host, - ssh_connection_options=ssh_connection_options, - ssh_options=ssh_options, use_shell=True) + local_ops = LocalToRemoteOperations( + user_host=ssh_user_host, ssh_connection_options=ssh_connection_options, + ssh_options=ssh_options, use_shell=True, access_retry_count=options.ssh_access_retry_count) verify_remote_access(local_ops) # Pass client_args to the remote script invocation. @@ -1662,7 +1662,8 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many # Reestablish remote access after crash. local_ops = LocalToRemoteOperations(user_host=ssh_user_host, ssh_connection_options=ssh_connection_options, - ssh_options=ssh_options, use_shell=True) + ssh_options=ssh_options, use_shell=True, + access_retry_count=options.ssh_access_retry_count) verify_remote_access(local_ops) ret, output = call_remote_operation(local_ops, remote_python, script_name, client_args, "--remoteOperation noop") diff --git a/buildscripts/resmokelib/powercycle/setup/__init__.py b/buildscripts/resmokelib/powercycle/setup/__init__.py index f0b9be02b95..85ed66a5b88 100644 --- a/buildscripts/resmokelib/powercycle/setup/__init__.py +++ b/buildscripts/resmokelib/powercycle/setup/__init__.py @@ -15,6 +15,9 @@ class SetUpEC2Instance(PowercycleCommand): def execute(self) -> None: # pylint: disable=too-many-instance-attributes, too-many-locals, too-many-statements """:return: None.""" + default_retry_count = 2 + retry_count = int(self.expansions.get("set_up_retry_count", default_retry_count)) + # First operation - # Create remote_dir. group_cmd = f"id -Gn {self.user}" @@ -31,7 +34,7 @@ class SetUpEC2Instance(PowercycleCommand): cmds = f"{self.sudo} mkdir -p {remote_dir}; {self.sudo} chown -R {user_group} {remote_dir}; {set_permission_stmt} {remote_dir}; ls -ld {remote_dir}" cmds = f"{cmds}; {self.sudo} mkdir -p {db_path}; {self.sudo} chown -R {user_group} {db_path}; {set_permission_stmt} {db_path}; ls -ld {db_path}" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) # Second operation - # Copy buildscripts and mongoDB executables to the remote host. @@ -41,7 +44,8 @@ class SetUpEC2Instance(PowercycleCommand): if os.path.isdir(shared_libs): files.append(shared_libs) - self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir, retry=True, + retry_count=retry_count) # Third operation - # Set up virtualenv on remote. @@ -57,7 +61,7 @@ class SetUpEC2Instance(PowercycleCommand): cmds = f"{cmds}; . $activate" cmds = f"{cmds}; pip3 install -r $remote_dir/etc/pip/powercycle-requirements.txt" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) # Fourth operation - # Enable core dumps on non-Windows remote hosts. @@ -81,7 +85,7 @@ class SetUpEC2Instance(PowercycleCommand): # https://unix.stackexchange.com/a/349558 in order to ensure the ssh client gets a # response from the remote machine before it restarts. cmds = f"{cmds}; nohup {self.sudo} reboot &>/dev/null & exit" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) # Fifth operation - # Print the ulimit & kernel.core_pattern @@ -93,7 +97,7 @@ class SetUpEC2Instance(PowercycleCommand): cmds = f"{cmds}; then /sbin/sysctl kernel.core_pattern" cmds = f"{cmds}; fi" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) # Sixth operation - # Set up curator to collect system & process stats on remote. @@ -120,7 +124,7 @@ class SetUpEC2Instance(PowercycleCommand): cmds = f"{cmds}; crontab -l" cmds = f"{cmds}; {{ {self.sudo} $HOME/curator stat system --file {monitor_system_file} > /dev/null 2>&1 & {self.sudo} $HOME/curator stat process-all --file {monitor_proc_file} > /dev/null 2>&1 & }} & disown" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) # Seventh operation - # Install NotMyFault, used to crash Windows. @@ -132,4 +136,4 @@ class SetUpEC2Instance(PowercycleCommand): cmds = f"curl -s -o {windows_crash_zip} {windows_crash_dl}" cmds = f"{cmds}; unzip -q {windows_crash_zip} -d {windows_crash_dir}" cmds = f"{cmds}; chmod +x {windows_crash_dir}/*.exe" - self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2) + self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count) diff --git a/buildscripts/tests/test_powercycle_sentinel.py b/buildscripts/tests/test_powercycle_sentinel.py new file mode 100644 index 00000000000..3f2774d9b54 --- /dev/null +++ b/buildscripts/tests/test_powercycle_sentinel.py @@ -0,0 +1,51 @@ +"""Unit tests for powercycle_sentinel.py.""" +# pylint: disable=missing-docstring +import unittest +from datetime import datetime, timezone, timedelta +from unittest.mock import Mock + +from evergreen import EvergreenApi, Task + +from buildscripts.powercycle_sentinel import watch_tasks, POWERCYCLE_TASK_EXEC_TIMEOUT_SECS + + +def make_task_mock(evg_api, task_id, start_time, finish_time): + return Task({ + "task_id": task_id, + "start_time": start_time, + "finish_time": finish_time, + }, evg_api) + + +class TestWatchTasks(unittest.TestCase): + """Test watch_tasks.""" + + def test_no_long_running_tasks(self): + evg_api = EvergreenApi() + task_ids = ["1", "2"] + now = datetime.now(timezone.utc).isoformat() + task_1 = make_task_mock(evg_api, task_ids[0], now, now) + task_2 = make_task_mock(evg_api, task_ids[1], now, now) + evg_api.task_by_id = Mock( + side_effect=(lambda task_id: { + "1": task_1, + "2": task_2, + }[task_id])) + long_running_task_ids = watch_tasks(task_ids, evg_api, 0) + self.assertEqual([], long_running_task_ids) + + def test_found_long_running_tasks(self): + evg_api = EvergreenApi() + task_ids = ["1", "2"] + exec_timeout_seconds_ago = (datetime.now(timezone.utc) - + timedelta(hours=POWERCYCLE_TASK_EXEC_TIMEOUT_SECS)).isoformat() + now = datetime.now(timezone.utc).isoformat() + task_1 = make_task_mock(evg_api, task_ids[0], exec_timeout_seconds_ago, now) + task_2 = make_task_mock(evg_api, task_ids[1], exec_timeout_seconds_ago, None) + evg_api.task_by_id = Mock( + side_effect=(lambda task_id: { + "1": task_1, + "2": task_2, + }[task_id])) + long_running_task_ids = watch_tasks(task_ids, evg_api, 0) + self.assertEqual([task_2.task_id], long_running_task_ids) diff --git a/etc/evergreen.yml b/etc/evergreen.yml index a85622cbd5e..1bbee28e035 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -121,8 +121,6 @@ variables: - &powercycle_remote_credentials private_key_file: src/powercycle.pem private_key_remote: ${__project_aws_ssh_key_value} - aws_key_remote: ${powercycle_aws_key} - aws_secret_remote: ${powercycle_aws_secret} - &libfuzzertests name: libfuzzertests! @@ -891,12 +889,68 @@ functions: - *f_expansions_write - *do_multiversion_setup - "move multiversion binaries": &move_multiversion_binaries + # Used by generator + "get compiled binaries": command: subprocess.exec params: binary: bash args: - - "./src/evergreen/move_multiversion_binaries.sh" + - "./src/evergreen/compiled_binaries_get.sh" + env: + workdir: ${workdir} + + "generate powercycle tasks": + - *set_task_expansion_macros + - *f_expansions_write + - *set_up_venv + - *upload_pip_requirements + + - command: subprocess.exec + params: + binary: bash + args: + - "./src/evergreen/powercycle_tasks_generate.sh" + env: + workdir: ${workdir} + + - command: archive.targz_pack + params: + target: powercycle_tasks_config.tgz + source_dir: "./" + include: + - "powercycle_tasks.json" + + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: powercycle_tasks_config.tgz + remote_file: ${project}/${build_variant}/${revision}/powercycle_tasks/${task_name}-${build_id}.tgz + bucket: mciuploads + permissions: public-read + content_type: application/gzip + display_name: Generated Task Config - Execution ${execution} + + - command: generate.tasks + params: + files: + - powercycle_tasks.json + + "run powercycle sentinel": + - *set_task_expansion_macros + - *f_expansions_write + - *set_up_venv + - *upload_pip_requirements + - *configure_evergreen_api_credentials + + - command: subprocess.exec + type: system + params: + binary: bash + args: + - "./src/evergreen/powercycle_sentinel_run.sh" + env: + workdir: ${workdir} "execute resmoke tests": &execute_resmoke_tests command: subprocess.exec @@ -1607,6 +1661,7 @@ functions: params: provider: ec2 distro: ${distro_id} + timeout_teardown_secs: 604800 # 7 days security_group_ids: - sg-097bff6dd0d1d31d0 @@ -6075,26 +6130,24 @@ tasks: resmoke_args: --suites=json_schema --storageEngine=wiredTiger resmoke_jobs_max: 1 -- name: powercycle_smoke_skip_compile - exec_timeout_secs: 7200 # 2 hour timeout for the task overall +- name: powercycle_smoke_skip_compile_gen commands: - - *f_expansions_write - - func: "set task expansion macros" - - *f_expansions_write - - func: "set up venv" - - func: "upload pip requirements" - - *f_expansions_write - - func: "configure evergreen api credentials" - - func: "do multiversion setup" + - func: "generate powercycle tasks" vars: - install_master_bin: true - - *move_multiversion_binaries - - func: "set up remote credentials" + task_names: >- + powercycle_smoke_skip_compile + num_tasks: 20 + exec_timeout_secs: 604800 # 7 days + timeout_secs: 604800 # 7 days + set_up_retry_count: 1000000 + run_powercycle_args: --sshAccessRetryCount=1000000 + +- name: powercycle_sentinel + exec_timeout_secs: 604800 # 7 days + commands: + - func: "run powercycle sentinel" vars: - <<: *powercycle_remote_credentials - - func: "set up EC2 instance" - - func: "run powercycle test" - timeout_secs: 1800 # 30 minute timeout for no output + gen_task: powercycle_smoke_skip_compile_gen - name: powercycle_smoke exec_timeout_secs: 7200 # 2 hour timeout for the task overall @@ -8225,14 +8278,13 @@ buildvariants: run_on: - rhel80-small expansions: - multiversion_platform: amazon2 + multiversion_platform: rhel80 multiversion_edition: enterprise stepback: false tasks: - name: lint_fuzzer_sanity_all - - name: powercycle_smoke_skip_compile - distros: - - amazon2-test + - name: powercycle_sentinel + - name: powercycle_smoke_skip_compile_gen - name: security-daily-cron modules: diff --git a/evergreen/compiled_binaries_get.sh b/evergreen/compiled_binaries_get.sh new file mode 100755 index 00000000000..b007fd97709 --- /dev/null +++ b/evergreen/compiled_binaries_get.sh @@ -0,0 +1,46 @@ +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +. "$DIR/prelude.sh" + +cd src + +set -o errexit +set -o verbose + +activate_venv + +rm -rf /data/install dist-test/bin + +edition="${multiversion_edition}" +platform="${multiversion_platform}" +architecture="${multiversion_architecture}" + +if [ ! -z "${multiversion_edition_42_or_later}" ]; then + edition="${multiversion_edition_42_or_later}" +fi +if [ ! -z "${multiversion_platform_42_or_later}" ]; then + platform="${multiversion_platform_42_or_later}" +fi +if [ ! -z "${multiversion_architecture_42_or_later}" ]; then + architecture="${multiversion_architecture_42_or_later}" +fi + +if [ ! -z "${multiversion_edition_44_or_later}" ]; then + edition="${multiversion_edition_44_or_later}" +fi +if [ ! -z "${multiversion_platform_44_or_later}" ]; then + platform="${multiversion_platform_44_or_later}" +fi +if [ ! -z "${multiversion_architecture_44_or_later}" ]; then + architecture="${multiversion_architecture_44_or_later}" +fi + +# This is primarily for tests for infrastructure which don't always need the latest +# binaries. +$python buildscripts/resmoke.py setup-multiversion \ + --installDir /data/install \ + --linkDir dist-test/bin \ + --edition $edition \ + --platform $platform \ + --architecture $architecture \ + --githubOauthToken "${github_token}" \ + --useLatest master diff --git a/evergreen/move_multiversion_binaries.sh b/evergreen/move_multiversion_binaries.sh deleted file mode 100755 index 6429150bee2..00000000000 --- a/evergreen/move_multiversion_binaries.sh +++ /dev/null @@ -1,6 +0,0 @@ -set -o verbose - -cd src -# powercycle expects the binaries to be in dist-test/bin -mkdir -p dist-test/bin -mv /data/multiversion/* dist-test/bin/ diff --git a/evergreen/multiversion_setup.sh b/evergreen/multiversion_setup.sh index cd7f1d942f1..d287d4e4b10 100644 --- a/evergreen/multiversion_setup.sh +++ b/evergreen/multiversion_setup.sh @@ -74,16 +74,3 @@ $python buildscripts/resmoke.py setup-multiversion \ --architecture $architecture \ --githubOauthToken "${github_token}" \ --useLatest 4.4 4.7 4.8 4.9 - -# This is primarily for tests for infrastructure which don't always need the latest -# binaries. -if [ ! -z "${install_master_bin}" ]; then - $python buildscripts/resmoke.py setup-multiversion \ - --installDir /data/install \ - --linkDir /data/multiversion \ - --edition $edition \ - --platform $platform \ - --architecture $architecture \ - --githubOauthToken "${github_token}" \ - --useLatest master -fi diff --git a/evergreen/powercycle_run_test.sh b/evergreen/powercycle_run_test.sh index 6aa63c669e6..a0703faa161 100644 --- a/evergreen/powercycle_run_test.sh +++ b/evergreen/powercycle_run_test.sh @@ -18,5 +18,6 @@ trap 'echo $? > error_exit.txt; exit 0' EXIT set +o errexit eval $python -u buildscripts/resmoke.py powercycle run \ "--sshUserHost=$(printf "%s@%s" "$user" "${private_ip_address}") \ - --sshConnection=\"-i ${private_key_file}\" \ - --taskName=${task_name}" + --sshConnection=\"-i powercycle.pem\" \ + --taskName=${task_name} \ + ${run_powercycle_args}" diff --git a/evergreen/powercycle_sentinel_run.sh b/evergreen/powercycle_sentinel_run.sh new file mode 100755 index 00000000000..f36f94dda1d --- /dev/null +++ b/evergreen/powercycle_sentinel_run.sh @@ -0,0 +1,10 @@ +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +. "$DIR/prelude.sh" + +cd src + +set -o errexit +set -o verbose + +activate_venv +$python buildscripts/powercycle_sentinel.py ../expansions.yml diff --git a/evergreen/powercycle_tasks_generate.sh b/evergreen/powercycle_tasks_generate.sh new file mode 100755 index 00000000000..15b7555790e --- /dev/null +++ b/evergreen/powercycle_tasks_generate.sh @@ -0,0 +1,10 @@ +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +. "$DIR/prelude.sh" + +cd src + +set -o errexit +set -o verbose + +activate_venv +$python buildscripts/evergreen_gen_powercycle_tasks.py ../expansions.yml ../powercycle_tasks.json |