diff options
author | David Bradford <david.bradford@mongodb.com> | 2022-03-02 15:26:04 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-02 20:57:30 +0000 |
commit | d883e3e0c216f10f7a1368bdd9d59316c32d2cd6 (patch) | |
tree | f7543323cdfa346158df9d4c0de9caf6e807655d | |
parent | 5946ea0bf2aa23ae1021a626238a4d7809728bc2 (diff) | |
download | mongo-d883e3e0c216f10f7a1368bdd9d59316c32d2cd6.tar.gz |
Revert "SERVER-63827: Determine task timeout at the start of task execution"
This reverts commit 4080bd69384735eb49eb1799734d1bf8c93feb09.
-rwxr-xr-x | buildscripts/evergreen_task_timeout.py | 401 | ||||
-rw-r--r-- | buildscripts/task_generation/suite_split.py | 2 | ||||
-rw-r--r-- | buildscripts/task_generation/task_types/resmoke_tasks.py | 2 | ||||
-rw-r--r-- | buildscripts/task_generation/timeout.py (renamed from buildscripts/timeouts/timeout.py) | 3 | ||||
-rw-r--r-- | buildscripts/tests/task_generation/test_timeout.py (renamed from buildscripts/tests/timeouts/test_timeout.py) | 2 | ||||
-rw-r--r-- | buildscripts/tests/test_evergreen_task_timeout.py | 191 | ||||
-rw-r--r-- | buildscripts/tests/timeouts/__init__.py | 1 | ||||
-rw-r--r-- | buildscripts/tests/timeouts/test_timeout_service.py | 258 | ||||
-rw-r--r-- | buildscripts/tests/util/test_taskname.py | 28 | ||||
-rw-r--r-- | buildscripts/timeouts/__init__.py | 1 | ||||
-rw-r--r-- | buildscripts/timeouts/timeout_service.py | 189 | ||||
-rw-r--r-- | buildscripts/util/taskname.py | 23 | ||||
-rw-r--r-- | docs/evergreen-testing/index.md | 5 | ||||
-rw-r--r-- | docs/evergreen-testing/task_timeouts.md | 35 | ||||
-rw-r--r-- | etc/evergreen.yml | 6 | ||||
-rw-r--r-- | etc/evergreen_timeouts.yml | 93 | ||||
-rw-r--r-- | evergreen/functions/task_timeout_determine.sh | 16 |
17 files changed, 115 insertions, 1141 deletions
diff --git a/buildscripts/evergreen_task_timeout.py b/buildscripts/evergreen_task_timeout.py index cb3ec5653cb..d9e60a9fc87 100755 --- a/buildscripts/evergreen_task_timeout.py +++ b/buildscripts/evergreen_task_timeout.py @@ -3,31 +3,14 @@ import argparse import math -import os import sys -from datetime import datetime, timedelta -from pathlib import Path -from typing import Dict, List, Optional +from datetime import timedelta +from typing import Optional -import inject -import structlog import yaml -from pydantic import BaseModel -from evergreen import EvergreenApi, RetryingEvergreenApi - -from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file) -from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings) -from buildscripts.util.cmdutils import enable_logging -from buildscripts.util.taskname import determine_task_base_name - -LOGGER = structlog.get_logger(__name__) -DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml" -DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml" -DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml" + COMMIT_QUEUE_ALIAS = "__commit_queue" UNITTEST_TASK = "run_unittests" -IGNORED_SUITES = {"mongos_test"} -HISTORY_LOOKBACK = timedelta(weeks=2) COMMIT_QUEUE_TIMEOUT = timedelta(minutes=40) DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20) @@ -36,92 +19,45 @@ DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2) # which is 5 mins 47 secs, excluding outliers below UNITTESTS_TIMEOUT = timedelta(minutes=12) - -class TimeoutOverride(BaseModel): - """ - Specification for overriding a task timeout. - - * task: Name of task to overide. - * exec_timeout: Value to override exec timeout with. - * idle_timeout: Value to override idle timeout with. - """ - - task: str - exec_timeout: Optional[int] = None - idle_timeout: Optional[int] = None - - def get_exec_timeout(self) -> Optional[timedelta]: - """Get a timedelta of the exec timeout to use.""" - if self.exec_timeout is not None: - return timedelta(minutes=self.exec_timeout) - return None - - def get_idle_timeout(self) -> Optional[timedelta]: - """Get a timedelta of the idle timeout to use.""" - if self.idle_timeout is not None: - return timedelta(minutes=self.idle_timeout) - return None - - -class TimeoutOverrides(BaseModel): - """Collection of timeout overrides to apply.""" - - overrides: Dict[str, List[TimeoutOverride]] - - @classmethod - def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides": - """Read the timeout overrides from the given file.""" - with open(file_path) as file_handler: - return cls(**yaml.safe_load(file_handler)) - - def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]: - """ - Check if the given task on the given build variant has an override defined. - - Note: If multiple overrides are found, an exception will be raised. - - :param build_variant: Build Variant to check. - :param task_name: Task name to check. - :return: Timeout override if found. - """ - overrides = [ - override for override in self.overrides.get(build_variant, []) - if override.task == task_name - ] - if overrides: - if len(overrides) > 1: - LOGGER.error("Found multiple overrides for the same task", - build_variant=build_variant, task=task_name, - overrides=[override.dict() for override in overrides]) - raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'") - return overrides[0] - return None - - def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]: - """ - Look up the exec timeout override of the given build variant/task. - - :param build_variant: Build Variant to check. - :param task_name: Task name to check. - :return: Exec timeout override if found. - """ - override = self._lookup_override(build_variant, task_name) - if override is not None: - return override.get_exec_timeout() - return None - - def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]: - """ - Look up the idle timeout override of the given build variant/task. - - :param build_variant: Build Variant to check. - :param task_name: Task name to check. - :return: Idle timeout override if found. - """ - override = self._lookup_override(build_variant, task_name) - if override is not None: - return override.get_idle_timeout() - return None +SPECIFIC_TASK_OVERRIDES = { + "linux-64-debug": {"auth": timedelta(minutes=60)}, + "enterprise-windows-all-feature-flags-suggested": { + "replica_sets_jscore_passthrough": timedelta(hours=3), + "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30), + }, + "enterprise-windows-required": { + "replica_sets_jscore_passthrough": timedelta(hours=3), + "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30), + }, + "enterprise-windows-inmem": {"replica_sets_jscore_passthrough": timedelta(hours=3), }, + "enterprise-windows": {"replica_sets_jscore_passthrough": timedelta(hours=3), }, + "windows-debug-suggested": { + "replica_sets_initsync_jscore_passthrough": timedelta(hours=2, minutes=30), + "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), + "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30), + }, + "windows": { + "replica_sets": timedelta(hours=3), + "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), + }, + "ubuntu1804-debug-suggested": {"replica_sets_jscore_passthrough": timedelta(hours=3), }, + "enterprise-rhel-80-64-bit-coverage": { + "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), + }, + "macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), }, + "enterprise-macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), }, + + # unittests outliers + # repeated execution runs a suite 10 times + "linux-64-repeated-execution": {UNITTEST_TASK: 10 * UNITTESTS_TIMEOUT}, + # some of the a/ub/t san variants need a little extra time + "enterprise-ubuntu2004-debug-tsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, + "ubuntu1804-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, + "ubuntu1804-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, + "ubuntu1804-debug-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, + "ubuntu1804-debug-aubsan-lite": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, + "ubuntu1804-debug-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT}, +} def _is_required_build_variant(build_variant: str) -> bool: @@ -134,20 +70,63 @@ def _is_required_build_variant(build_variant: str) -> bool: return build_variant.endswith("-required") -def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta], - output_file: Optional[str]) -> None: +def _has_override(variant: str, task_name: str) -> bool: + """ + Determine if the given task has a timeout override. + + :param variant: Build Variant task is running on. + :param task_name: Task to check. + :return: True if override exists for task. + """ + return variant in SPECIFIC_TASK_OVERRIDES and task_name in SPECIFIC_TASK_OVERRIDES[variant] + + +def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timedelta] = None, + exec_timeout: Optional[timedelta] = None, evg_alias: str = '') -> timedelta: + """ + Determine what exec timeout should be used. + + :param task_name: Name of task being run. + :param variant: Name of build variant being run. + :param idle_timeout: Idle timeout if specified. + :param exec_timeout: Override to use for exec_timeout or 0 if no override. + :param evg_alias: Evergreen alias running the task. + :return: Exec timeout to use for running task. + """ + determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT + + if exec_timeout and exec_timeout.total_seconds() != 0: + determined_timeout = exec_timeout + + elif task_name == UNITTEST_TASK and not _has_override(variant, task_name): + determined_timeout = UNITTESTS_TIMEOUT + + elif evg_alias == COMMIT_QUEUE_ALIAS: + determined_timeout = COMMIT_QUEUE_TIMEOUT + + elif _has_override(variant, task_name): + determined_timeout = SPECIFIC_TASK_OVERRIDES[variant][task_name] + + elif _is_required_build_variant(variant): + determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT + + # The timeout needs to be at least as large as the idle timeout. + if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds(): + return idle_timeout + + return determined_timeout + + +def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None: """ Output timeout configuration to the specified location. - :param exec_timeout: Exec timeout to output. - :param idle_timeout: Idle timeout to output. + :param task_timeout: Timeout to output. :param output_file: Location of output file to write. """ output = { - "exec_timeout_secs": math.ceil(exec_timeout.total_seconds()), + "exec_timeout_secs": math.ceil(task_timeout.total_seconds()), } - if idle_timeout is not None: - output["timeout_secs"] = math.ceil(idle_timeout.total_seconds()) if output_file: with open(output_file, "w") as outfile: @@ -156,216 +135,28 @@ def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta], yaml.dump(output, stream=sys.stdout, default_flow_style=False) -class TaskTimeoutOrchestrator: - """An orchestrator for determining task timeouts.""" - - @inject.autoparams() - def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides, - evg_project_config: EvergreenProjectConfig) -> None: - """ - Initialize the orchestrator. - - :param timeout_service: Service for calculating historic timeouts. - :param timeout_overrides: Timeout overrides for specific tasks. - """ - self.timeout_service = timeout_service - self.timeout_overrides = timeout_overrides - self.evg_project_config = evg_project_config - - def determine_exec_timeout( - self, task_name: str, variant: str, idle_timeout: Optional[timedelta] = None, - exec_timeout: Optional[timedelta] = None, evg_alias: str = "") -> timedelta: - """ - Determine what exec timeout should be used. - - :param task_name: Name of task being run. - :param variant: Name of build variant being run. - :param idle_timeout: Idle timeout if specified. - :param exec_timeout: Override to use for exec_timeout or 0 if no override. - :param evg_alias: Evergreen alias running the task. - :return: Exec timeout to use for running task. - """ - determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT - - override = self.timeout_overrides.lookup_exec_override(variant, task_name) - - if exec_timeout and exec_timeout.total_seconds() != 0: - LOGGER.info("Using timeout from cmd line", - exec_timeout_secs=exec_timeout.total_seconds()) - determined_timeout = exec_timeout - - elif task_name == UNITTEST_TASK and override is None: - LOGGER.info("Overriding unittest timeout", - exec_timeout_secs=UNITTESTS_TIMEOUT.total_seconds()) - determined_timeout = UNITTESTS_TIMEOUT - - elif evg_alias == COMMIT_QUEUE_ALIAS: - LOGGER.info("Overriding commit-queue timeout", - exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds()) - determined_timeout = COMMIT_QUEUE_TIMEOUT - - elif override is not None: - LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds()) - determined_timeout = override - - elif _is_required_build_variant(variant): - LOGGER.info("Overriding required-builder timeout", - exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds()) - determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT - - # The timeout needs to be at least as large as the idle timeout. - if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds(): - LOGGER.info("Making exec timeout as large as idle timeout", - exec_timeout_secs=idle_timeout.total_seconds()) - return idle_timeout - - return determined_timeout - - def determine_idle_timeout(self, task_name: str, variant: str, - idle_timeout: Optional[timedelta] = None) -> Optional[timedelta]: - """ - Determine what idle timeout should be used. - - :param task_name: Name of task being run. - :param variant: Name of build variant being run. - :param idle_timeout: Override to use for idle_timeout. - :return: Idle timeout to use for running task. - """ - determined_timeout = None - override = self.timeout_overrides.lookup_idle_override(variant, task_name) - - if idle_timeout and idle_timeout.total_seconds() != 0: - LOGGER.info("Using timeout from cmd line", - idle_timeout_secs=idle_timeout.total_seconds()) - determined_timeout = idle_timeout - - elif override is not None: - LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds()) - determined_timeout = override - - return determined_timeout - - def determine_historic_timeout(self, task: str, variant: str, suite_name: str, - exec_timeout_factor: Optional[float]) -> Optional[timedelta]: - """ - Calculate the timeout based on historic test results. - - :param task: Name of task to query. - :param variant: Name of build variant to query. - :param suite_name: Name of test suite being run. - :param exec_timeout_factor: Scaling factor to use when determining timeout. - """ - if suite_name in IGNORED_SUITES: - return None - - timeout_params = TimeoutParams( - evg_project="mongodb-mongo-master", - build_variant=variant, - task_name=task, - suite_name=suite_name, - is_asan=self.is_build_variant_asan(variant), - ) - timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params) - if timeout_estimate and timeout_estimate.is_specified(): - exec_timeout = timeout_estimate.calculate_task_timeout( - repeat_factor=1, scaling_factor=exec_timeout_factor) - if exec_timeout is not None: - LOGGER.info("Using historic based timeout", exec_timeout_secs=exec_timeout) - return timedelta(seconds=exec_timeout) - return None - - def is_build_variant_asan(self, build_variant: str) -> bool: - """ - Determine if the given build variant is an ASAN build variant. - - :param build_variant: Name of build variant to check. - :return: True if build variant is an ASAN build variant. - """ - bv = self.evg_project_config.get_variant(build_variant) - return bv.is_asan_build() - - def determine_timeouts(self, cli_idle_timeout: Optional[timedelta], - cli_exec_timeout: Optional[timedelta], outfile: Optional[str], task: str, - variant: str, evg_alias: str, suite_name: str, - exec_timeout_factor: Optional[float]) -> None: - """ - Determine the timeouts to use for the given task and write timeouts to expansion file. - - :param cli_idle_timeout: Idle timeout specified by the CLI. - :param cli_exec_timeout: Exec timeout specified by the CLI. - :param outfile: File to write timeout expansions to. - :param variant: Build variant task is being run on. - :param evg_alias: Evergreen alias that triggered task. - :param suite_name: Name of evergreen suite being run. - :param exec_timeout_factor: Scaling factor to use when determining timeout. - """ - idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout) - exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout, - evg_alias) - - historic_timeout = self.determine_historic_timeout(task, variant, suite_name, - exec_timeout_factor) - if historic_timeout: - exec_timeout = historic_timeout - - output_timeout(exec_timeout, idle_timeout, outfile) - - def main(): """Determine the timeout value a task should use in evergreen.""" parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.") - parser.add_argument("--suite-name", dest="suite_name", required=True, - help="Resmoke suite being run against.") parser.add_argument("--build-variant", dest="variant", required=True, help="Build variant task is being executed on.") parser.add_argument("--evg-alias", dest="evg_alias", required=True, help="Evergreen alias used to trigger build.") parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).") parser.add_argument("--exec-timeout", dest="exec_timeout", type=int, - help="Exec timeout to use (in sec).") - parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float, - help="Exec timeout factor to use (in sec).") + help="Exec timeout ot use (in sec).") parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.") - parser.add_argument("--timeout-overrides", dest="timeout_overrides_file", - default=DEFAULT_TIMEOUT_OVERRIDES, - help="File containing timeout overrides to use.") - parser.add_argument("--evg-api-config", dest="evg_api_config", - default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.") - parser.add_argument("--evg-project-config", dest="evg_project_config", - default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.") options = parser.parse_args() - end_date = datetime.now() - start_date = end_date - HISTORY_LOOKBACK - timeout_override = timedelta(seconds=options.timeout) if options.timeout else None exec_timeout_override = timedelta( seconds=options.exec_timeout) if options.exec_timeout else None - - task_name = determine_task_base_name(options.task, options.variant) - timeout_overrides = TimeoutOverrides.from_yaml_file( - os.path.expanduser(options.timeout_overrides_file)) - - enable_logging(verbose=False) - - def dependencies(binder: inject.Binder) -> None: - binder.bind( - EvergreenApi, - RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config))) - binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date)) - binder.bind(TimeoutOverrides, timeout_overrides) - binder.bind(EvergreenProjectConfig, - parse_evergreen_file(os.path.expanduser(options.evg_project_config))) - - inject.configure(dependencies) - - task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator) - task_timeout_orchestrator.determine_timeouts( - timeout_override, exec_timeout_override, options.outfile, task_name, options.variant, - options.evg_alias, options.suite_name, options.exec_timeout_factor) + task_timeout = determine_timeout(options.task, options.variant, timeout_override, + exec_timeout_override, options.evg_alias) + output_timeout(task_timeout, options.outfile) if __name__ == "__main__": diff --git a/buildscripts/task_generation/suite_split.py b/buildscripts/task_generation/suite_split.py index 5e1e9d32115..e0b3cfbb449 100644 --- a/buildscripts/task_generation/suite_split.py +++ b/buildscripts/task_generation/suite_split.py @@ -13,7 +13,7 @@ from evergreen import EvergreenApi from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy -from buildscripts.timeouts.timeout import TimeoutEstimate +from buildscripts.task_generation.timeout import TimeoutEstimate from buildscripts.util import taskname from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name diff --git a/buildscripts/task_generation/task_types/resmoke_tasks.py b/buildscripts/task_generation/task_types/resmoke_tasks.py index 5ddd4f9a541..7ceab680265 100644 --- a/buildscripts/task_generation/task_types/resmoke_tasks.py +++ b/buildscripts/task_generation/task_types/resmoke_tasks.py @@ -14,7 +14,7 @@ from buildscripts.task_generation.task_types.gentask_options import GenTaskOptio from buildscripts.task_generation.task_types.models.resmoke_task_model import ResmokeTask from buildscripts.task_generation.task_types.multiversion_decorator import MultiversionGenTaskDecorator, \ MultiversionDecoratorParams -from buildscripts.timeouts.timeout import TimeoutEstimate +from buildscripts.task_generation.timeout import TimeoutEstimate LOGGER = structlog.getLogger(__name__) diff --git a/buildscripts/timeouts/timeout.py b/buildscripts/task_generation/timeout.py index 3e3440f9c5b..261c2a8b82d 100644 --- a/buildscripts/timeouts/timeout.py +++ b/buildscripts/task_generation/timeout.py @@ -1,10 +1,11 @@ """Timeout information for generating tasks.""" import math from datetime import timedelta -from inspect import currentframe, getframeinfo +from inspect import getframeinfo, currentframe from typing import NamedTuple, Optional import structlog + from buildscripts.patch_builds.task_generation import TimeoutInfo LOGGER = structlog.getLogger(__name__) diff --git a/buildscripts/tests/timeouts/test_timeout.py b/buildscripts/tests/task_generation/test_timeout.py index 1d450aed913..5d9fb48c6e6 100644 --- a/buildscripts/tests/timeouts/test_timeout.py +++ b/buildscripts/tests/task_generation/test_timeout.py @@ -1,7 +1,7 @@ """Unit tests for timeout.py.""" import unittest -from buildscripts.timeouts import timeout as under_test +from buildscripts.task_generation import timeout as under_test # pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access,no-value-for-parameter diff --git a/buildscripts/tests/test_evergreen_task_timeout.py b/buildscripts/tests/test_evergreen_task_timeout.py index 2043de695c7..3728885f479 100644 --- a/buildscripts/tests/test_evergreen_task_timeout.py +++ b/buildscripts/tests/test_evergreen_task_timeout.py @@ -1,213 +1,46 @@ """Unit tests for the evergreen_task_timeout script.""" -import unittest from datetime import timedelta -from unittest.mock import MagicMock +import unittest import buildscripts.evergreen_task_timeout as under_test -from buildscripts.ciconfig.evergreen import EvergreenProjectConfig -from buildscripts.timeouts.timeout_service import TimeoutService - -# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access - - -class TestTimeoutOverride(unittest.TestCase): - def test_exec_timeout_should_be_settable(self): - timeout_override = under_test.TimeoutOverride(task="my task", exec_timeout=42) - - timeout = timeout_override.get_exec_timeout() - - self.assertIsNotNone(timeout) - self.assertEqual(42 * 60, timeout.total_seconds()) - - def test_exec_timeout_should_default_to_none(self): - timeout_override = under_test.TimeoutOverride(task="my task") - - timeout = timeout_override.get_exec_timeout() - - self.assertIsNone(timeout) - - def test_idle_timeout_should_be_settable(self): - timeout_override = under_test.TimeoutOverride(task="my task", idle_timeout=42) - - timeout = timeout_override.get_idle_timeout() - - self.assertIsNotNone(timeout) - self.assertEqual(42 * 60, timeout.total_seconds()) - - def test_idle_timeout_should_default_to_none(self): - timeout_override = under_test.TimeoutOverride(task="my task") - - timeout = timeout_override.get_idle_timeout() - - self.assertIsNone(timeout) - - -class TestTimeoutOverrides(unittest.TestCase): - def test_looking_up_a_non_existing_override_should_return_none(self): - timeout_overrides = under_test.TimeoutOverrides(overrides={}) - self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task")) - self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task")) +# pylint: disable=missing-docstring,no-self-use - def test_looking_up_a_duplicate_override_should_raise_error(self): - timeout_overrides = under_test.TimeoutOverrides( - overrides={ - "bv": [{ - "task": "task_name", - "exec_timeout": 42, - "idle_timeout": 10, - }, { - "task": "task_name", - "exec_timeout": 314, - "idle_timeout": 20, - }] - }) - with self.assertRaises(ValueError): - self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task_name")) - - with self.assertRaises(ValueError): - self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task_name")) - - def test_looking_up_an_exec_override_should_work(self): - timeout_overrides = under_test.TimeoutOverrides( - overrides={ - "bv": [ - { - "task": "another_task", - "exec_timeout": 314, - "idle_timeout": 20, - }, - { - "task": "task_name", - "exec_timeout": 42, - }, - ] - }) - - self.assertEqual(42 * 60, - timeout_overrides.lookup_exec_override("bv", "task_name").total_seconds()) - - def test_looking_up_an_idle_override_should_work(self): - timeout_overrides = under_test.TimeoutOverrides( - overrides={ - "bv": [ - { - "task": "another_task", - "exec_timeout": 314, - "idle_timeout": 20, - }, - { - "task": "task_name", - "idle_timeout": 10, - }, - ] - }) - - self.assertEqual(10 * 60, - timeout_overrides.lookup_idle_override("bv", "task_name").total_seconds()) - - -class TestDetermineExecTimeout(unittest.TestCase): +class DetermineTimeoutTest(unittest.TestCase): def test_timeout_used_if_specified(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) timeout = timedelta(seconds=42) self.assertEqual( - orchestrator.determine_exec_timeout("task_name", "variant", None, timeout), timeout) + under_test.determine_timeout("task_name", "variant", None, timeout), timeout) def test_default_is_returned_with_no_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) self.assertEqual( - orchestrator.determine_exec_timeout("task_name", "variant"), + under_test.determine_timeout("task_name", "variant"), under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT) def test_default_is_returned_with_timeout_at_zero(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) self.assertEqual( - orchestrator.determine_exec_timeout("task_name", "variant", timedelta(seconds=0)), + under_test.determine_timeout("task_name", "variant", timedelta(seconds=0)), under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT) def test_default_required_returned_on_required_variants(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) self.assertEqual( - orchestrator.determine_exec_timeout("task_name", "variant-required"), + under_test.determine_timeout("task_name", "variant-required"), under_test.DEFAULT_REQUIRED_BUILD_TIMEOUT) def test_task_specific_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides( - overrides={"linux-64-debug": [{"task": "auth", "exec_timeout": 60}]}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) self.assertEqual( - orchestrator.determine_exec_timeout("auth", "linux-64-debug"), timedelta(minutes=60)) + under_test.determine_timeout("auth", "linux-64-debug"), timedelta(minutes=60)) def test_commit_queue_items_use_commit_queue_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) - timeout = orchestrator.determine_exec_timeout("auth", "variant", - evg_alias=under_test.COMMIT_QUEUE_ALIAS) + timeout = under_test.determine_timeout("auth", "variant", + evg_alias=under_test.COMMIT_QUEUE_ALIAS) self.assertEqual(timeout, under_test.COMMIT_QUEUE_TIMEOUT) def test_use_idle_timeout_if_greater_than_exec_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) idle_timeout = timedelta(hours=2) exec_timeout = timedelta(minutes=10) - timeout = orchestrator.determine_exec_timeout( - "task_name", "variant", idle_timeout=idle_timeout, exec_timeout=exec_timeout) + timeout = under_test.determine_timeout("task_name", "variant", idle_timeout=idle_timeout, + exec_timeout=exec_timeout) self.assertEqual(timeout, idle_timeout) - - -class TestDetermineIdleTimeout(unittest.TestCase): - def test_timeout_used_if_specified(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) - timeout = timedelta(seconds=42) - self.assertEqual( - orchestrator.determine_idle_timeout("task_name", "variant", timeout), timeout) - - def test_default_is_returned_with_no_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides(overrides={}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) - self.assertIsNone(orchestrator.determine_idle_timeout("task_name", "variant")) - - def test_task_specific_timeout(self): - mock_timeout_overrides = under_test.TimeoutOverrides( - overrides={"linux-64-debug": [{"task": "auth", "idle_timeout": 60}]}) - orchestrator = under_test.TaskTimeoutOrchestrator( - timeout_service=MagicMock(spec_set=TimeoutService), - timeout_overrides=mock_timeout_overrides, - evg_project_config=MagicMock(spec_set=EvergreenProjectConfig)) - self.assertEqual( - orchestrator.determine_idle_timeout("auth", "linux-64-debug"), timedelta(minutes=60)) diff --git a/buildscripts/tests/timeouts/__init__.py b/buildscripts/tests/timeouts/__init__.py deleted file mode 100644 index 4b7a2bb941b..00000000000 --- a/buildscripts/tests/timeouts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Empty.""" diff --git a/buildscripts/tests/timeouts/test_timeout_service.py b/buildscripts/tests/timeouts/test_timeout_service.py deleted file mode 100644 index bb0dd8a0c3e..00000000000 --- a/buildscripts/tests/timeouts/test_timeout_service.py +++ /dev/null @@ -1,258 +0,0 @@ -"""Unit tests for timeout_service.py.""" -import random -import unittest -from datetime import datetime, timedelta -from unittest.mock import MagicMock - -from requests.exceptions import HTTPError -from evergreen import EvergreenApi - -import buildscripts.timeouts.timeout_service as under_test -from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService -from buildscripts.util.teststats import HistoricTaskData - -# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access - - -def build_mock_service(evg_api=None, resmoke_proxy=None): - end_date = datetime.now() - start_date = end_date - timedelta(weeks=2) - timeout_settings = under_test.TimeoutSettings( - end_date=end_date, - start_date=start_date, - ) - return under_test.TimeoutService( - evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi), - resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService), - timeout_settings=timeout_settings) - - -def tst_stat_mock(file, duration, pass_count): - return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count) - - -class TestGetTimeoutEstimate(unittest.TestCase): - def test_no_stats_should_return_default_timeout(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - mock_evg_api.test_stats_by_project.return_value = [] - timeout_service = build_mock_service(evg_api=mock_evg_api) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - timeout = timeout_service.get_timeout_estimate(timeout_params) - - self.assertFalse(timeout.is_specified()) - - def test_a_test_with_missing_history_should_cause_a_default_timeout(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)] - mock_evg_api.test_stats_by_project.return_value = test_stats - mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) - mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"] - timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - timeout = timeout_service.get_timeout_estimate(timeout_params) - - self.assertFalse(timeout.is_specified()) - - def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)] - test_stats.append(tst_stat_mock("zero.js", 0.0, 1)) - mock_evg_api.test_stats_by_project.return_value = test_stats - mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) - mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats] - timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - timeout = timeout_service.get_timeout_estimate(timeout_params) - - self.assertFalse(timeout.is_specified()) - - def test_all_tests_with_runtime_history_should_use_custom_timeout(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - n_tests = 30 - test_runtime = 600 - test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)] - mock_evg_api.test_stats_by_project.return_value = test_stats - mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) - mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats] - timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - timeout = timeout_service.get_timeout_estimate(timeout_params) - - self.assertTrue(timeout.is_specified()) - self.assertEqual(1860, timeout.calculate_test_timeout(1)) - self.assertEqual(54180, timeout.calculate_task_timeout(1)) - - -class TestGetTaskHookOverhead(unittest.TestCase): - def test_no_stats_should_return_zero(self): - timeout_service = build_mock_service() - - overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30, - historic_stats=None) - - self.assertEqual(0.0, overhead) - - def test_stats_with_no_clean_every_n_should_return_zero(self): - timeout_service = build_mock_service() - test_stats = HistoricTaskData.from_stats_list( - [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]) - - overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30, - historic_stats=test_stats) - - self.assertEqual(0.0, overhead) - - def test_stats_with_clean_every_n_should_return_overhead(self): - test_count = 30 - runtime = 25 - timeout_service = build_mock_service() - test_stat_list = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(test_count)] - test_stat_list.extend([ - tst_stat_mock(f"test_{i}:{under_test.CLEAN_EVERY_N_HOOK}", runtime, 1) - for i in range(10) - ]) - random.shuffle(test_stat_list) - test_stats = HistoricTaskData.from_stats_list(test_stat_list) - - overhead = timeout_service.get_task_hook_overhead( - "suite", is_asan=True, test_count=test_count, historic_stats=test_stats) - - self.assertEqual(runtime * test_count, overhead) - - -class TestLookupHistoricStats(unittest.TestCase): - def test_no_stats_from_evergreen_should_return_none(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - mock_evg_api.test_stats_by_project.return_value = [] - timeout_service = build_mock_service(evg_api=mock_evg_api) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - stats = timeout_service.lookup_historic_stats(timeout_params) - - self.assertIsNone(stats) - - def test_errors_from_evergreen_should_return_none(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect") - timeout_service = build_mock_service(evg_api=mock_evg_api) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - stats = timeout_service.lookup_historic_stats(timeout_params) - - self.assertIsNone(stats) - - def test_stats_from_evergreen_should_return_the_stats(self): - mock_evg_api = MagicMock(spec_set=EvergreenApi) - test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)] - mock_evg_api.test_stats_by_project.return_value = test_stats - timeout_service = build_mock_service(evg_api=mock_evg_api) - timeout_params = under_test.TimeoutParams( - evg_project="my project", - build_variant="bv", - task_name="my task", - suite_name="my suite", - is_asan=False, - ) - - stats = timeout_service.lookup_historic_stats(timeout_params) - - self.assertIsNotNone(stats) - self.assertEqual(len(test_stats), len(stats.historic_test_results)) - - -class TestGetCleanEveryNCadence(unittest.TestCase): - def test_clean_every_n_cadence_on_asan(self): - timeout_service = build_mock_service() - - cadence = timeout_service._get_clean_every_n_cadence("suite", True) - - self.assertEqual(1, cadence) - - def test_clean_every_n_cadence_from_hook_config(self): - expected_n = 42 - mock_resmoke_proxy = MagicMock() - mock_resmoke_proxy.read_suite_config.return_value = { - "executor": { - "hooks": [{ - "class": "hook1", - }, { - "class": under_test.CLEAN_EVERY_N_HOOK, - "n": expected_n, - }] - } - } - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) - - cadence = timeout_service._get_clean_every_n_cadence("suite", False) - - self.assertEqual(expected_n, cadence) - - def test_clean_every_n_cadence_no_n_in_hook_config(self): - mock_resmoke_proxy = MagicMock() - mock_resmoke_proxy.read_suite_config.return_value = { - "executor": { - "hooks": [{ - "class": "hook1", - }, { - "class": under_test.CLEAN_EVERY_N_HOOK, - }] - } - } - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) - - cadence = timeout_service._get_clean_every_n_cadence("suite", False) - - self.assertEqual(1, cadence) - - def test_clean_every_n_cadence_no_hook_config(self): - mock_resmoke_proxy = MagicMock() - mock_resmoke_proxy.read_suite_config.return_value = { - "executor": {"hooks": [{ - "class": "hook1", - }, ]} - } - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) - - cadence = timeout_service._get_clean_every_n_cadence("suite", False) - - self.assertEqual(1, cadence) diff --git a/buildscripts/tests/util/test_taskname.py b/buildscripts/tests/util/test_taskname.py index 7f3296ca1aa..22ab279066a 100644 --- a/buildscripts/tests/util/test_taskname.py +++ b/buildscripts/tests/util/test_taskname.py @@ -4,7 +4,7 @@ import unittest import buildscripts.util.taskname as under_test -# pylint: disable=missing-docstring,protected-access,invalid-name +# pylint: disable=missing-docstring,protected-access class TestNameTask(unittest.TestCase): @@ -24,29 +24,3 @@ class TestRemoveGenSuffix(unittest.TestCase): input_task_name = "sharded_multi_stmt_txn_jscore_passthroug" self.assertEqual("sharded_multi_stmt_txn_jscore_passthroug", under_test.remove_gen_suffix(input_task_name)) - - -class TestDetermineTaskBaseName(unittest.TestCase): - def test_task_name_with_build_variant_should_strip_bv_and_sub_task_index(self): - bv = "enterprise-rhel-80-64-bit-dynamic-required" - task_name = f"auth_23_{bv}" - - base_task_name = under_test.determine_task_base_name(task_name, bv) - - self.assertEqual("auth", base_task_name) - - def test_task_name_without_build_variant_should_strip_sub_task_index(self): - bv = "enterprise-rhel-80-64-bit-dynamic-required" - task_name = "auth_314" - - base_task_name = under_test.determine_task_base_name(task_name, bv) - - self.assertEqual("auth", base_task_name) - - def test_task_name_without_build_variant_or_subtask_index_should_self(self): - bv = "enterprise-rhel-80-64-bit-dynamic-required" - task_name = "auth" - - base_task_name = under_test.determine_task_base_name(task_name, bv) - - self.assertEqual("auth", base_task_name) diff --git a/buildscripts/timeouts/__init__.py b/buildscripts/timeouts/__init__.py deleted file mode 100644 index 4b7a2bb941b..00000000000 --- a/buildscripts/timeouts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Empty.""" diff --git a/buildscripts/timeouts/timeout_service.py b/buildscripts/timeouts/timeout_service.py deleted file mode 100644 index 8c0d5ad58cd..00000000000 --- a/buildscripts/timeouts/timeout_service.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Service for determining task timeouts.""" -from datetime import datetime -from typing import Any, Dict, NamedTuple, Optional - -import inject -import structlog -from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService -from buildscripts.timeouts.timeout import TimeoutEstimate -from buildscripts.util.teststats import HistoricTaskData -from evergreen import EvergreenApi - -LOGGER = structlog.get_logger(__name__) -CLEAN_EVERY_N_HOOK = "CleanEveryN" - - -class TimeoutParams(NamedTuple): - """ - Parameters about task being run. - - * evg_project: Evergreen project. - * build_variant: Evergreen build variant. - * task_name: Evergreen task_name. - * suite_name: Test Suite being run. - * is_asan: Whether this run is part of an asan build. - """ - - evg_project: str - build_variant: str - task_name: str - suite_name: str - is_asan: bool - - -class TimeoutSettings(NamedTuple): - """Settings for determining timeouts.""" - - start_date: datetime - end_date: datetime - - -class TimeoutService: - """A service for determining task timeouts.""" - - @inject.autoparams() - def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService, - timeout_settings: TimeoutSettings) -> None: - """ - Initialize the service. - - :param evg_api: Evergreen API client. - :param resmoke_proxy: Proxy to query resmoke. - :param timeout_settings: Settings for how timeouts are calculated. - """ - self.evg_api = evg_api - self.resmoke_proxy = resmoke_proxy - self.timeout_settings = timeout_settings - - def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate: - """ - Calculate the timeout estimate for the given task based on historic test results. - - :param timeout_params: Details about the task to query. - :return: Timeouts to use based on historic test results. - """ - historic_stats = self.lookup_historic_stats(timeout_params) - if not historic_stats: - return TimeoutEstimate.no_timeouts() - - test_set = set(self.resmoke_proxy.list_tests(timeout_params.suite_name)) - test_runtimes = [ - stat for stat in historic_stats.get_tests_runtimes() if stat.test_name in test_set - ] - test_runtime_set = {test.test_name for test in test_runtimes} - for test in test_set: - if test not in test_runtime_set: - # If we don't have historic runtime information for all the tests, we cannot - # reliable determine a timeout, so fallback to a default timeout. - LOGGER.warning( - "Could not find historic runtime information for test, using default timeout", - test=test) - return TimeoutEstimate.no_timeouts() - - total_runtime = 0.0 - max_runtime = 0.0 - - for runtime in test_runtimes: - if runtime.runtime > 0.0: - total_runtime += runtime.runtime - max_runtime = max(max_runtime, runtime.runtime) - else: - LOGGER.warning("Found a test with 0 runtime, using default timeouts", - test=runtime.test_name) - # We found a test with a runtime of 0, which indicates that it does not have a - # proper runtime history, so fall back to a default timeout. - return TimeoutEstimate.no_timeouts() - - hook_overhead = self.get_task_hook_overhead( - timeout_params.suite_name, timeout_params.is_asan, len(test_set), historic_stats) - total_runtime += hook_overhead - - return TimeoutEstimate(max_test_runtime=max_runtime, expected_task_runtime=total_runtime) - - def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int, - historic_stats: Optional[HistoricTaskData]) -> float: - """ - Add how much overhead task-level hooks each suite should account for. - - Certain test hooks need to be accounted for on the task level instead of the test level - in order to calculate accurate timeouts. So we will add details about those hooks to - each suite here. - - :param suite_name: Name of suite being generated. - :param is_asan: Whether ASAN is being used. - :param test_count: Number of tests in sub-suite. - :param historic_stats: Historic runtime data of the suite. - """ - # The CleanEveryN hook is run every 'N' tests. The runtime of the - # hook will be associated with whichever test happens to be running, which could be - # different every run. So we need to take its runtime into account at the task level. - if historic_stats is None: - return 0.0 - - clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan) - avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK) - LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence, - runtime=avg_clean_every_n_runtime, is_asan=is_asan) - if avg_clean_every_n_runtime != 0: - n_expected_runs = test_count / clean_every_n_cadence - return n_expected_runs * avg_clean_every_n_runtime - return 0.0 - - def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]: - """ - Lookup historic test results stats for the given task. - - :param timeout_params: Details about the task to lookup. - :return: Historic test results if they exist. - """ - try: - evg_stats = HistoricTaskData.from_evg( - self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date, - self.timeout_settings.end_date, timeout_params.task_name, - timeout_params.build_variant) - if not evg_stats: - LOGGER.warning("No historic runtime information available") - return None - return evg_stats - except Exception: # pylint: disable=broad-except - # If we have any trouble getting the historic runtime information, log the issue, but - # don't fall back to default timeouts instead of failing. - LOGGER.warning("Error querying history runtime information from evergreen", - exc_info=True) - return None - - def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int: - """ - Get the N value for the CleanEveryN hook. - - :param suite_name: Name of suite being generated. - :param is_asan: Whether ASAN is being used. - :return: How frequently clean every end is run. - """ - # Default to 1, which is the worst case meaning CleanEveryN would run for every test. - clean_every_n_cadence = 1 - if is_asan: - # ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`. - return clean_every_n_cadence - - clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK) - if clean_every_n_config: - clean_every_n_cadence = clean_every_n_config.get("n", 1) - - return clean_every_n_cadence - - def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]: - """ - Get the configuration for the given hook. - - :param hook_name: Name of hook to query. - :return: Configuration for hook, if it exists. - """ - hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor", - {}).get("hooks") - if hooks_config: - for hook in hooks_config: - if hook.get("class") == hook_name: - return hook - - return None diff --git a/buildscripts/util/taskname.py b/buildscripts/util/taskname.py index 784fc6d6555..7dd3b12685b 100644 --- a/buildscripts/util/taskname.py +++ b/buildscripts/util/taskname.py @@ -1,7 +1,6 @@ """Functions for working with resmoke task names.""" import math -import re GEN_SUFFIX = "_gen" @@ -37,25 +36,3 @@ def remove_gen_suffix(task_name: str) -> str: if task_name.endswith(GEN_SUFFIX): return task_name[:-4] return task_name - - -def determine_task_base_name(task_name: str, build_variant: str) -> str: - """ - Determine the base name of a task. - - For generated tasks the base name will have the build variant and sub-task index - stripped off. For other tasks, it is the unmodified task_name. - - :param task_name: Name of task to get base name of. - :param build_variant: Build variant that may be included in task name. - :return: Base name of given task. - """ - match = re.match(f"(.*)_([0-9]+|misc)_{build_variant}", task_name) - if match: - return match.group(1) - - match = re.match(r"(.*)_([0-9]+|misc)", task_name) - if match: - return match.group(1) - - return task_name diff --git a/docs/evergreen-testing/index.md b/docs/evergreen-testing/index.md deleted file mode 100644 index f57692ade9a..00000000000 --- a/docs/evergreen-testing/index.md +++ /dev/null @@ -1,5 +0,0 @@ -# Testing in Evergreen - -Documentation about how MongoDB is tested in Evergreen. - -* [Task Timeouts](task_timeouts.md) diff --git a/docs/evergreen-testing/task_timeouts.md b/docs/evergreen-testing/task_timeouts.md deleted file mode 100644 index e370aad22c9..00000000000 --- a/docs/evergreen-testing/task_timeouts.md +++ /dev/null @@ -1,35 +0,0 @@ -# Evergreen Task Timeouts - -## Type of timeouts - -There are two types of timeouts that [evergreen supports](https://github.com/evergreen-ci/evergreen/wiki/Project-Commands#timeoutupdate): - -* **Exec timeout**: The _exec_ timeout is the overall timeout for a task. Once the total runtime for -a test hits this value, the timeout logic will be triggered. This value is specified by -**exec_timeout_secs** in the evergreen configuration. -* **Idle timeout**: The _idle_ timeout is the amount of time in which evergreen will wait for -output to be created before it considers the task hung and triggers timeout logic. This value -is specified by **timeout_secs** in the evergreen configuration. - -**Note**: In most cases, **exec_timeout** is usually the more useful of the timeouts. - -## Setting the timeout for a task - -There are a few ways in which the timeout can be determined for a task running in evergreen. - -* **Specified in 'etc/evergreen.yml'**: Timeout can be specified directly in the 'evergreen.yml' file, -both on tasks and build variants. This can be useful for setting default timeout values, but is limited -since different build variants frequently have different runtime characteristics and it is not possible -to set timeouts for a task running on a specific build variant. - -* **etc/evergreen_timeouts.yml**: The 'etc/evergreen_timeouts.yml' file for overriding timeouts -for specific tasks on specific build variants. This provides a work-around for the limitations of -specifying the timeouts directly in the 'evergreen.yml'. In order to use this method, the task -must run the "determine task timeout" and "update task timeout expansions" functions at the beginning -of the task evergreen definition. Most resmoke tasks already do this. - -* **buildscripts/evergreen_task_timeout.py**: This is the script that reads the 'etc/evergreen_timeouts.yml' -file and calculates the timeout to use. Additionally, it will check the historic test results of the -task being run and see if there is enough information to calculate timeouts based on that. It can -also be used for more advanced ways of determining timeouts (e.g. the script is used to set much -more aggressive timeouts on tasks that are run in the commit-queue). diff --git a/etc/evergreen.yml b/etc/evergreen.yml index bbd46888ca5..43f743dbfbc 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -1218,9 +1218,6 @@ functions: - *update_resmoke_jobs_expansions - *f_expansions_write - *configure_evergreen_api_credentials - - *determine_task_timeout - - *update_task_timeout_expansions - - *f_expansions_write - command: subprocess.exec params: binary: bash @@ -1246,7 +1243,6 @@ functions: "run tests": - *f_expansions_write - - *configure_evergreen_api_credentials - *determine_task_timeout - *update_task_timeout_expansions - *f_expansions_write @@ -2250,9 +2246,7 @@ tasks: - "./build/**.gcno" - "./etc/*san.suppressions" - "./etc/backports_required_for_multiversion_tests.yml" - - "./etc/evergreen_timeouts.yml" - "./etc/expansions.default.yml" - - "./etc/evergreen.yml" - "./etc/pip/**" - "./etc/repo_config.yaml" - "./etc/scons/**" diff --git a/etc/evergreen_timeouts.yml b/etc/evergreen_timeouts.yml deleted file mode 100644 index b25ce231d27..00000000000 --- a/etc/evergreen_timeouts.yml +++ /dev/null @@ -1,93 +0,0 @@ -# This file defines timeouts in evergreen that will override the default timeouts. -# -# Each key under `overrides` provides the build variant where the override will occur. The -# override should include the `task` that should have its timeout overridden and either the -# `exec_timeout` to override or the `idle_timeout` to override. -# -# The timeouts should be specified in minutes. - -# Note: In order to make it easier to find existing entries, please try to keep the build variants -# in alphabetical order. - -overrides: - enterprise-macos: - - task: replica_sets_jscore_passthrough - exec_timeout: 150 # 2.5 hours - - enterprise-rhel-80-64-bit-coverage: - - task: replica_sets_jscore_passthrough - exec_timeout: 150 # 2.5 hours. - - enterprise-ubuntu2004-debug-tsan: - - task: run_unittests - exec_timeout: 24 - - enterprise-windows: - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - enterprise-windows-all-feature-flags-suggested: - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - task: replica_sets_update_v1_oplog_jscore_passthrough - exec_timeout: 150 # 2.5 hours. - - enterprise-windows-inmem: - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - enterprise-windows-required: - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - task: replica_sets_update_v1_oplog_jscore_passthrough - exec_timeout: 150 # 2.5 hours. - - linux-64-debug: - - task: auth - exec_timeout: 60 # 1 hour. - - linux-64-debug-repeated-execution: - - task: run_unittests - exec_timeout: 120 # 2 hours. - - macos: - - task: replica_sets_jscore_passthrough - exec_timeout: 150 # 2.5 hours - - ubuntu1804-asan: - - task: run_unittests - exec_timeout: 24 - - ubuntu1804-debug-asan: - - task: run_unittests - exec_timeout: 24 - - ubuntu1804-debug-aubsan-lite: - - task: run_unittests - exec_timeout: 24 - - ubuntu1804-debug-ubsan: - - task: run_unittests - exec_timeout: 24 - - ubuntu1804-debug-suggested: - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - ubuntu1804-ubsan: - - task: run_unittests - exec_timeout: 24 - - windows: - - task: replica_sets - exec_timeout: 180 # 3 hours. - - task: replica_sets_jscore_passthrough - exec_timeout: 150 # 2.5 hours. - - windows-debug-suggested: - - task: replica_sets_initsync_jscore_passthrough - exec_timeout: 150 # 2.5 hours. - - task: replica_sets_jscore_passthrough - exec_timeout: 180 # 3 hours. - - task: replica_sets_update_v1_oplog_jscore_passthrough - exec_timeout: 150 # 2.5 hours. diff --git a/evergreen/functions/task_timeout_determine.sh b/evergreen/functions/task_timeout_determine.sh index f63416b2374..645aedbc302 100644 --- a/evergreen/functions/task_timeout_determine.sh +++ b/evergreen/functions/task_timeout_determine.sh @@ -5,25 +5,11 @@ cd src set -o verbose set -o errexit - -# Set the suite name to be the task name by default; unless overridden with the `suite` expansion. -suite_name=${task_name} -if [[ -n ${suite} ]]; then - suite_name=${suite} -fi - -timeout_factor="" -if [[ -n "${exec_timeout_factor}" ]]; then - timeout_factor="--exec-timeout-factor ${exec_timeout_factor}" -fi - activate_venv -PATH=$PATH:$HOME:/ $python buildscripts/evergreen_task_timeout.py $timeout_factor \ +$python buildscripts/evergreen_task_timeout.py \ --task-name ${task_name} \ - --suite-name ${suite_name} \ --build-variant ${build_variant} \ --evg-alias '${alias}' \ --timeout ${timeout_secs} \ --exec-timeout ${exec_timeout_secs} \ - --evg-api-config ./.evergreen.yml \ --out-file task_timeout_expansions.yml |