summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuildscripts/evergreen_task_timeout.py401
-rw-r--r--buildscripts/task_generation/suite_split.py2
-rw-r--r--buildscripts/task_generation/task_types/resmoke_tasks.py2
-rw-r--r--buildscripts/tests/test_evergreen_task_timeout.py191
-rw-r--r--buildscripts/tests/timeouts/__init__.py1
-rw-r--r--buildscripts/tests/timeouts/test_timeout.py (renamed from buildscripts/tests/task_generation/test_timeout.py)2
-rw-r--r--buildscripts/tests/timeouts/test_timeout_service.py258
-rw-r--r--buildscripts/tests/util/test_taskname.py28
-rw-r--r--buildscripts/timeouts/__init__.py1
-rw-r--r--buildscripts/timeouts/timeout.py (renamed from buildscripts/task_generation/timeout.py)3
-rw-r--r--buildscripts/timeouts/timeout_service.py189
-rw-r--r--buildscripts/util/taskname.py23
-rw-r--r--docs/evergreen-testing/index.md5
-rw-r--r--docs/evergreen-testing/task_timeouts.md35
-rw-r--r--etc/evergreen.yml6
-rw-r--r--etc/evergreen_timeouts.yml93
-rw-r--r--evergreen/functions/task_timeout_determine.sh16
17 files changed, 1141 insertions, 115 deletions
diff --git a/buildscripts/evergreen_task_timeout.py b/buildscripts/evergreen_task_timeout.py
index d9e60a9fc87..cb3ec5653cb 100755
--- a/buildscripts/evergreen_task_timeout.py
+++ b/buildscripts/evergreen_task_timeout.py
@@ -3,14 +3,31 @@
import argparse
import math
+import os
import sys
-from datetime import timedelta
-from typing import Optional
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, List, Optional
+import inject
+import structlog
import yaml
-
+from pydantic import BaseModel
+from evergreen import EvergreenApi, RetryingEvergreenApi
+
+from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
+from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings)
+from buildscripts.util.cmdutils import enable_logging
+from buildscripts.util.taskname import determine_task_base_name
+
+LOGGER = structlog.get_logger(__name__)
+DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml"
+DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml"
+DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml"
COMMIT_QUEUE_ALIAS = "__commit_queue"
UNITTEST_TASK = "run_unittests"
+IGNORED_SUITES = {"mongos_test"}
+HISTORY_LOOKBACK = timedelta(weeks=2)
COMMIT_QUEUE_TIMEOUT = timedelta(minutes=40)
DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20)
@@ -19,114 +36,118 @@ DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2)
# which is 5 mins 47 secs, excluding outliers below
UNITTESTS_TIMEOUT = timedelta(minutes=12)
-SPECIFIC_TASK_OVERRIDES = {
- "linux-64-debug": {"auth": timedelta(minutes=60)},
- "enterprise-windows-all-feature-flags-suggested": {
- "replica_sets_jscore_passthrough": timedelta(hours=3),
- "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
- },
- "enterprise-windows-required": {
- "replica_sets_jscore_passthrough": timedelta(hours=3),
- "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
- },
- "enterprise-windows-inmem": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
- "enterprise-windows": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
- "windows-debug-suggested": {
- "replica_sets_initsync_jscore_passthrough": timedelta(hours=2, minutes=30),
- "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
- "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
- },
- "windows": {
- "replica_sets": timedelta(hours=3),
- "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
- },
- "ubuntu1804-debug-suggested": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
- "enterprise-rhel-80-64-bit-coverage": {
- "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
- },
- "macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
- "enterprise-macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
-
- # unittests outliers
- # repeated execution runs a suite 10 times
- "linux-64-repeated-execution": {UNITTEST_TASK: 10 * UNITTESTS_TIMEOUT},
- # some of the a/ub/t san variants need a little extra time
- "enterprise-ubuntu2004-debug-tsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
- "ubuntu1804-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
- "ubuntu1804-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
- "ubuntu1804-debug-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
- "ubuntu1804-debug-aubsan-lite": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
- "ubuntu1804-debug-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-}
-
-def _is_required_build_variant(build_variant: str) -> bool:
+class TimeoutOverride(BaseModel):
"""
- Determine if the given build variants is a required build variant.
+ Specification for overriding a task timeout.
- :param build_variant: Name of build variant to check.
- :return: True if the given build variant is required.
+ * task: Name of task to overide.
+ * exec_timeout: Value to override exec timeout with.
+ * idle_timeout: Value to override idle timeout with.
"""
- return build_variant.endswith("-required")
+ task: str
+ exec_timeout: Optional[int] = None
+ idle_timeout: Optional[int] = None
+
+ def get_exec_timeout(self) -> Optional[timedelta]:
+ """Get a timedelta of the exec timeout to use."""
+ if self.exec_timeout is not None:
+ return timedelta(minutes=self.exec_timeout)
+ return None
+
+ def get_idle_timeout(self) -> Optional[timedelta]:
+ """Get a timedelta of the idle timeout to use."""
+ if self.idle_timeout is not None:
+ return timedelta(minutes=self.idle_timeout)
+ return None
+
+
+class TimeoutOverrides(BaseModel):
+ """Collection of timeout overrides to apply."""
+
+ overrides: Dict[str, List[TimeoutOverride]]
+
+ @classmethod
+ def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides":
+ """Read the timeout overrides from the given file."""
+ with open(file_path) as file_handler:
+ return cls(**yaml.safe_load(file_handler))
+
+ def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]:
+ """
+ Check if the given task on the given build variant has an override defined.
+
+ Note: If multiple overrides are found, an exception will be raised.
+
+ :param build_variant: Build Variant to check.
+ :param task_name: Task name to check.
+ :return: Timeout override if found.
+ """
+ overrides = [
+ override for override in self.overrides.get(build_variant, [])
+ if override.task == task_name
+ ]
+ if overrides:
+ if len(overrides) > 1:
+ LOGGER.error("Found multiple overrides for the same task",
+ build_variant=build_variant, task=task_name,
+ overrides=[override.dict() for override in overrides])
+ raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'")
+ return overrides[0]
+ return None
+
+ def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
+ """
+ Look up the exec timeout override of the given build variant/task.
+
+ :param build_variant: Build Variant to check.
+ :param task_name: Task name to check.
+ :return: Exec timeout override if found.
+ """
+ override = self._lookup_override(build_variant, task_name)
+ if override is not None:
+ return override.get_exec_timeout()
+ return None
+
+ def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
+ """
+ Look up the idle timeout override of the given build variant/task.
+
+ :param build_variant: Build Variant to check.
+ :param task_name: Task name to check.
+ :return: Idle timeout override if found.
+ """
+ override = self._lookup_override(build_variant, task_name)
+ if override is not None:
+ return override.get_idle_timeout()
+ return None
-def _has_override(variant: str, task_name: str) -> bool:
- """
- Determine if the given task has a timeout override.
- :param variant: Build Variant task is running on.
- :param task_name: Task to check.
- :return: True if override exists for task.
+def _is_required_build_variant(build_variant: str) -> bool:
"""
- return variant in SPECIFIC_TASK_OVERRIDES and task_name in SPECIFIC_TASK_OVERRIDES[variant]
-
+ Determine if the given build variants is a required build variant.
-def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
- exec_timeout: Optional[timedelta] = None, evg_alias: str = '') -> timedelta:
- """
- Determine what exec timeout should be used.
-
- :param task_name: Name of task being run.
- :param variant: Name of build variant being run.
- :param idle_timeout: Idle timeout if specified.
- :param exec_timeout: Override to use for exec_timeout or 0 if no override.
- :param evg_alias: Evergreen alias running the task.
- :return: Exec timeout to use for running task.
+ :param build_variant: Name of build variant to check.
+ :return: True if the given build variant is required.
"""
- determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
-
- if exec_timeout and exec_timeout.total_seconds() != 0:
- determined_timeout = exec_timeout
-
- elif task_name == UNITTEST_TASK and not _has_override(variant, task_name):
- determined_timeout = UNITTESTS_TIMEOUT
-
- elif evg_alias == COMMIT_QUEUE_ALIAS:
- determined_timeout = COMMIT_QUEUE_TIMEOUT
-
- elif _has_override(variant, task_name):
- determined_timeout = SPECIFIC_TASK_OVERRIDES[variant][task_name]
-
- elif _is_required_build_variant(variant):
- determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
-
- # The timeout needs to be at least as large as the idle timeout.
- if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
- return idle_timeout
-
- return determined_timeout
+ return build_variant.endswith("-required")
-def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
+def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta],
+ output_file: Optional[str]) -> None:
"""
Output timeout configuration to the specified location.
- :param task_timeout: Timeout to output.
+ :param exec_timeout: Exec timeout to output.
+ :param idle_timeout: Idle timeout to output.
:param output_file: Location of output file to write.
"""
output = {
- "exec_timeout_secs": math.ceil(task_timeout.total_seconds()),
+ "exec_timeout_secs": math.ceil(exec_timeout.total_seconds()),
}
+ if idle_timeout is not None:
+ output["timeout_secs"] = math.ceil(idle_timeout.total_seconds())
if output_file:
with open(output_file, "w") as outfile:
@@ -135,28 +156,216 @@ def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
yaml.dump(output, stream=sys.stdout, default_flow_style=False)
+class TaskTimeoutOrchestrator:
+ """An orchestrator for determining task timeouts."""
+
+ @inject.autoparams()
+ def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides,
+ evg_project_config: EvergreenProjectConfig) -> None:
+ """
+ Initialize the orchestrator.
+
+ :param timeout_service: Service for calculating historic timeouts.
+ :param timeout_overrides: Timeout overrides for specific tasks.
+ """
+ self.timeout_service = timeout_service
+ self.timeout_overrides = timeout_overrides
+ self.evg_project_config = evg_project_config
+
+ def determine_exec_timeout(
+ self, task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
+ exec_timeout: Optional[timedelta] = None, evg_alias: str = "") -> timedelta:
+ """
+ Determine what exec timeout should be used.
+
+ :param task_name: Name of task being run.
+ :param variant: Name of build variant being run.
+ :param idle_timeout: Idle timeout if specified.
+ :param exec_timeout: Override to use for exec_timeout or 0 if no override.
+ :param evg_alias: Evergreen alias running the task.
+ :return: Exec timeout to use for running task.
+ """
+ determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
+
+ override = self.timeout_overrides.lookup_exec_override(variant, task_name)
+
+ if exec_timeout and exec_timeout.total_seconds() != 0:
+ LOGGER.info("Using timeout from cmd line",
+ exec_timeout_secs=exec_timeout.total_seconds())
+ determined_timeout = exec_timeout
+
+ elif task_name == UNITTEST_TASK and override is None:
+ LOGGER.info("Overriding unittest timeout",
+ exec_timeout_secs=UNITTESTS_TIMEOUT.total_seconds())
+ determined_timeout = UNITTESTS_TIMEOUT
+
+ elif evg_alias == COMMIT_QUEUE_ALIAS:
+ LOGGER.info("Overriding commit-queue timeout",
+ exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds())
+ determined_timeout = COMMIT_QUEUE_TIMEOUT
+
+ elif override is not None:
+ LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds())
+ determined_timeout = override
+
+ elif _is_required_build_variant(variant):
+ LOGGER.info("Overriding required-builder timeout",
+ exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds())
+ determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
+
+ # The timeout needs to be at least as large as the idle timeout.
+ if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
+ LOGGER.info("Making exec timeout as large as idle timeout",
+ exec_timeout_secs=idle_timeout.total_seconds())
+ return idle_timeout
+
+ return determined_timeout
+
+ def determine_idle_timeout(self, task_name: str, variant: str,
+ idle_timeout: Optional[timedelta] = None) -> Optional[timedelta]:
+ """
+ Determine what idle timeout should be used.
+
+ :param task_name: Name of task being run.
+ :param variant: Name of build variant being run.
+ :param idle_timeout: Override to use for idle_timeout.
+ :return: Idle timeout to use for running task.
+ """
+ determined_timeout = None
+ override = self.timeout_overrides.lookup_idle_override(variant, task_name)
+
+ if idle_timeout and idle_timeout.total_seconds() != 0:
+ LOGGER.info("Using timeout from cmd line",
+ idle_timeout_secs=idle_timeout.total_seconds())
+ determined_timeout = idle_timeout
+
+ elif override is not None:
+ LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds())
+ determined_timeout = override
+
+ return determined_timeout
+
+ def determine_historic_timeout(self, task: str, variant: str, suite_name: str,
+ exec_timeout_factor: Optional[float]) -> Optional[timedelta]:
+ """
+ Calculate the timeout based on historic test results.
+
+ :param task: Name of task to query.
+ :param variant: Name of build variant to query.
+ :param suite_name: Name of test suite being run.
+ :param exec_timeout_factor: Scaling factor to use when determining timeout.
+ """
+ if suite_name in IGNORED_SUITES:
+ return None
+
+ timeout_params = TimeoutParams(
+ evg_project="mongodb-mongo-master",
+ build_variant=variant,
+ task_name=task,
+ suite_name=suite_name,
+ is_asan=self.is_build_variant_asan(variant),
+ )
+ timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params)
+ if timeout_estimate and timeout_estimate.is_specified():
+ exec_timeout = timeout_estimate.calculate_task_timeout(
+ repeat_factor=1, scaling_factor=exec_timeout_factor)
+ if exec_timeout is not None:
+ LOGGER.info("Using historic based timeout", exec_timeout_secs=exec_timeout)
+ return timedelta(seconds=exec_timeout)
+ return None
+
+ def is_build_variant_asan(self, build_variant: str) -> bool:
+ """
+ Determine if the given build variant is an ASAN build variant.
+
+ :param build_variant: Name of build variant to check.
+ :return: True if build variant is an ASAN build variant.
+ """
+ bv = self.evg_project_config.get_variant(build_variant)
+ return bv.is_asan_build()
+
+ def determine_timeouts(self, cli_idle_timeout: Optional[timedelta],
+ cli_exec_timeout: Optional[timedelta], outfile: Optional[str], task: str,
+ variant: str, evg_alias: str, suite_name: str,
+ exec_timeout_factor: Optional[float]) -> None:
+ """
+ Determine the timeouts to use for the given task and write timeouts to expansion file.
+
+ :param cli_idle_timeout: Idle timeout specified by the CLI.
+ :param cli_exec_timeout: Exec timeout specified by the CLI.
+ :param outfile: File to write timeout expansions to.
+ :param variant: Build variant task is being run on.
+ :param evg_alias: Evergreen alias that triggered task.
+ :param suite_name: Name of evergreen suite being run.
+ :param exec_timeout_factor: Scaling factor to use when determining timeout.
+ """
+ idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout)
+ exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout,
+ evg_alias)
+
+ historic_timeout = self.determine_historic_timeout(task, variant, suite_name,
+ exec_timeout_factor)
+ if historic_timeout:
+ exec_timeout = historic_timeout
+
+ output_timeout(exec_timeout, idle_timeout, outfile)
+
+
def main():
"""Determine the timeout value a task should use in evergreen."""
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.")
+ parser.add_argument("--suite-name", dest="suite_name", required=True,
+ help="Resmoke suite being run against.")
parser.add_argument("--build-variant", dest="variant", required=True,
help="Build variant task is being executed on.")
parser.add_argument("--evg-alias", dest="evg_alias", required=True,
help="Evergreen alias used to trigger build.")
parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).")
parser.add_argument("--exec-timeout", dest="exec_timeout", type=int,
- help="Exec timeout ot use (in sec).")
+ help="Exec timeout to use (in sec).")
+ parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float,
+ help="Exec timeout factor to use (in sec).")
parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.")
+ parser.add_argument("--timeout-overrides", dest="timeout_overrides_file",
+ default=DEFAULT_TIMEOUT_OVERRIDES,
+ help="File containing timeout overrides to use.")
+ parser.add_argument("--evg-api-config", dest="evg_api_config",
+ default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.")
+ parser.add_argument("--evg-project-config", dest="evg_project_config",
+ default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.")
options = parser.parse_args()
+ end_date = datetime.now()
+ start_date = end_date - HISTORY_LOOKBACK
+
timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
exec_timeout_override = timedelta(
seconds=options.exec_timeout) if options.exec_timeout else None
- task_timeout = determine_timeout(options.task, options.variant, timeout_override,
- exec_timeout_override, options.evg_alias)
- output_timeout(task_timeout, options.outfile)
+
+ task_name = determine_task_base_name(options.task, options.variant)
+ timeout_overrides = TimeoutOverrides.from_yaml_file(
+ os.path.expanduser(options.timeout_overrides_file))
+
+ enable_logging(verbose=False)
+
+ def dependencies(binder: inject.Binder) -> None:
+ binder.bind(
+ EvergreenApi,
+ RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
+ binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date))
+ binder.bind(TimeoutOverrides, timeout_overrides)
+ binder.bind(EvergreenProjectConfig,
+ parse_evergreen_file(os.path.expanduser(options.evg_project_config)))
+
+ inject.configure(dependencies)
+
+ task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator)
+ task_timeout_orchestrator.determine_timeouts(
+ timeout_override, exec_timeout_override, options.outfile, task_name, options.variant,
+ options.evg_alias, options.suite_name, options.exec_timeout_factor)
if __name__ == "__main__":
diff --git a/buildscripts/task_generation/suite_split.py b/buildscripts/task_generation/suite_split.py
index e0b3cfbb449..5e1e9d32115 100644
--- a/buildscripts/task_generation/suite_split.py
+++ b/buildscripts/task_generation/suite_split.py
@@ -13,7 +13,7 @@ from evergreen import EvergreenApi
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy
-from buildscripts.task_generation.timeout import TimeoutEstimate
+from buildscripts.timeouts.timeout import TimeoutEstimate
from buildscripts.util import taskname
from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name
diff --git a/buildscripts/task_generation/task_types/resmoke_tasks.py b/buildscripts/task_generation/task_types/resmoke_tasks.py
index 7ceab680265..5ddd4f9a541 100644
--- a/buildscripts/task_generation/task_types/resmoke_tasks.py
+++ b/buildscripts/task_generation/task_types/resmoke_tasks.py
@@ -14,7 +14,7 @@ from buildscripts.task_generation.task_types.gentask_options import GenTaskOptio
from buildscripts.task_generation.task_types.models.resmoke_task_model import ResmokeTask
from buildscripts.task_generation.task_types.multiversion_decorator import MultiversionGenTaskDecorator, \
MultiversionDecoratorParams
-from buildscripts.task_generation.timeout import TimeoutEstimate
+from buildscripts.timeouts.timeout import TimeoutEstimate
LOGGER = structlog.getLogger(__name__)
diff --git a/buildscripts/tests/test_evergreen_task_timeout.py b/buildscripts/tests/test_evergreen_task_timeout.py
index 3728885f479..2043de695c7 100644
--- a/buildscripts/tests/test_evergreen_task_timeout.py
+++ b/buildscripts/tests/test_evergreen_task_timeout.py
@@ -1,46 +1,213 @@
"""Unit tests for the evergreen_task_timeout script."""
-from datetime import timedelta
import unittest
+from datetime import timedelta
+from unittest.mock import MagicMock
import buildscripts.evergreen_task_timeout as under_test
+from buildscripts.ciconfig.evergreen import EvergreenProjectConfig
+from buildscripts.timeouts.timeout_service import TimeoutService
+
+# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
+
+
+class TestTimeoutOverride(unittest.TestCase):
+ def test_exec_timeout_should_be_settable(self):
+ timeout_override = under_test.TimeoutOverride(task="my task", exec_timeout=42)
+
+ timeout = timeout_override.get_exec_timeout()
+
+ self.assertIsNotNone(timeout)
+ self.assertEqual(42 * 60, timeout.total_seconds())
+
+ def test_exec_timeout_should_default_to_none(self):
+ timeout_override = under_test.TimeoutOverride(task="my task")
+
+ timeout = timeout_override.get_exec_timeout()
+
+ self.assertIsNone(timeout)
+
+ def test_idle_timeout_should_be_settable(self):
+ timeout_override = under_test.TimeoutOverride(task="my task", idle_timeout=42)
+
+ timeout = timeout_override.get_idle_timeout()
+
+ self.assertIsNotNone(timeout)
+ self.assertEqual(42 * 60, timeout.total_seconds())
+
+ def test_idle_timeout_should_default_to_none(self):
+ timeout_override = under_test.TimeoutOverride(task="my task")
+
+ timeout = timeout_override.get_idle_timeout()
+
+ self.assertIsNone(timeout)
+
+
+class TestTimeoutOverrides(unittest.TestCase):
+ def test_looking_up_a_non_existing_override_should_return_none(self):
+ timeout_overrides = under_test.TimeoutOverrides(overrides={})
-# pylint: disable=missing-docstring,no-self-use
+ self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task"))
+ self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task"))
+ def test_looking_up_a_duplicate_override_should_raise_error(self):
+ timeout_overrides = under_test.TimeoutOverrides(
+ overrides={
+ "bv": [{
+ "task": "task_name",
+ "exec_timeout": 42,
+ "idle_timeout": 10,
+ }, {
+ "task": "task_name",
+ "exec_timeout": 314,
+ "idle_timeout": 20,
+ }]
+ })
-class DetermineTimeoutTest(unittest.TestCase):
+ with self.assertRaises(ValueError):
+ self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task_name"))
+
+ with self.assertRaises(ValueError):
+ self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task_name"))
+
+ def test_looking_up_an_exec_override_should_work(self):
+ timeout_overrides = under_test.TimeoutOverrides(
+ overrides={
+ "bv": [
+ {
+ "task": "another_task",
+ "exec_timeout": 314,
+ "idle_timeout": 20,
+ },
+ {
+ "task": "task_name",
+ "exec_timeout": 42,
+ },
+ ]
+ })
+
+ self.assertEqual(42 * 60,
+ timeout_overrides.lookup_exec_override("bv", "task_name").total_seconds())
+
+ def test_looking_up_an_idle_override_should_work(self):
+ timeout_overrides = under_test.TimeoutOverrides(
+ overrides={
+ "bv": [
+ {
+ "task": "another_task",
+ "exec_timeout": 314,
+ "idle_timeout": 20,
+ },
+ {
+ "task": "task_name",
+ "idle_timeout": 10,
+ },
+ ]
+ })
+
+ self.assertEqual(10 * 60,
+ timeout_overrides.lookup_idle_override("bv", "task_name").total_seconds())
+
+
+class TestDetermineExecTimeout(unittest.TestCase):
def test_timeout_used_if_specified(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
timeout = timedelta(seconds=42)
self.assertEqual(
- under_test.determine_timeout("task_name", "variant", None, timeout), timeout)
+ orchestrator.determine_exec_timeout("task_name", "variant", None, timeout), timeout)
def test_default_is_returned_with_no_timeout(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
- under_test.determine_timeout("task_name", "variant"),
+ orchestrator.determine_exec_timeout("task_name", "variant"),
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
def test_default_is_returned_with_timeout_at_zero(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
- under_test.determine_timeout("task_name", "variant", timedelta(seconds=0)),
+ orchestrator.determine_exec_timeout("task_name", "variant", timedelta(seconds=0)),
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
def test_default_required_returned_on_required_variants(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
- under_test.determine_timeout("task_name", "variant-required"),
+ orchestrator.determine_exec_timeout("task_name", "variant-required"),
under_test.DEFAULT_REQUIRED_BUILD_TIMEOUT)
def test_task_specific_timeout(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(
+ overrides={"linux-64-debug": [{"task": "auth", "exec_timeout": 60}]})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
- under_test.determine_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
+ orchestrator.determine_exec_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
def test_commit_queue_items_use_commit_queue_timeout(self):
- timeout = under_test.determine_timeout("auth", "variant",
- evg_alias=under_test.COMMIT_QUEUE_ALIAS)
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+ timeout = orchestrator.determine_exec_timeout("auth", "variant",
+ evg_alias=under_test.COMMIT_QUEUE_ALIAS)
self.assertEqual(timeout, under_test.COMMIT_QUEUE_TIMEOUT)
def test_use_idle_timeout_if_greater_than_exec_timeout(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
idle_timeout = timedelta(hours=2)
exec_timeout = timedelta(minutes=10)
- timeout = under_test.determine_timeout("task_name", "variant", idle_timeout=idle_timeout,
- exec_timeout=exec_timeout)
+ timeout = orchestrator.determine_exec_timeout(
+ "task_name", "variant", idle_timeout=idle_timeout, exec_timeout=exec_timeout)
self.assertEqual(timeout, idle_timeout)
+
+
+class TestDetermineIdleTimeout(unittest.TestCase):
+ def test_timeout_used_if_specified(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+ timeout = timedelta(seconds=42)
+ self.assertEqual(
+ orchestrator.determine_idle_timeout("task_name", "variant", timeout), timeout)
+
+ def test_default_is_returned_with_no_timeout(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+ self.assertIsNone(orchestrator.determine_idle_timeout("task_name", "variant"))
+
+ def test_task_specific_timeout(self):
+ mock_timeout_overrides = under_test.TimeoutOverrides(
+ overrides={"linux-64-debug": [{"task": "auth", "idle_timeout": 60}]})
+ orchestrator = under_test.TaskTimeoutOrchestrator(
+ timeout_service=MagicMock(spec_set=TimeoutService),
+ timeout_overrides=mock_timeout_overrides,
+ evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+ self.assertEqual(
+ orchestrator.determine_idle_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
diff --git a/buildscripts/tests/timeouts/__init__.py b/buildscripts/tests/timeouts/__init__.py
new file mode 100644
index 00000000000..4b7a2bb941b
--- /dev/null
+++ b/buildscripts/tests/timeouts/__init__.py
@@ -0,0 +1 @@
+"""Empty."""
diff --git a/buildscripts/tests/task_generation/test_timeout.py b/buildscripts/tests/timeouts/test_timeout.py
index 5d9fb48c6e6..1d450aed913 100644
--- a/buildscripts/tests/task_generation/test_timeout.py
+++ b/buildscripts/tests/timeouts/test_timeout.py
@@ -1,7 +1,7 @@
"""Unit tests for timeout.py."""
import unittest
-from buildscripts.task_generation import timeout as under_test
+from buildscripts.timeouts import timeout as under_test
# pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access,no-value-for-parameter
diff --git a/buildscripts/tests/timeouts/test_timeout_service.py b/buildscripts/tests/timeouts/test_timeout_service.py
new file mode 100644
index 00000000000..bb0dd8a0c3e
--- /dev/null
+++ b/buildscripts/tests/timeouts/test_timeout_service.py
@@ -0,0 +1,258 @@
+"""Unit tests for timeout_service.py."""
+import random
+import unittest
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock
+
+from requests.exceptions import HTTPError
+from evergreen import EvergreenApi
+
+import buildscripts.timeouts.timeout_service as under_test
+from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
+from buildscripts.util.teststats import HistoricTaskData
+
+# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
+
+
+def build_mock_service(evg_api=None, resmoke_proxy=None):
+ end_date = datetime.now()
+ start_date = end_date - timedelta(weeks=2)
+ timeout_settings = under_test.TimeoutSettings(
+ end_date=end_date,
+ start_date=start_date,
+ )
+ return under_test.TimeoutService(
+ evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi),
+ resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService),
+ timeout_settings=timeout_settings)
+
+
+def tst_stat_mock(file, duration, pass_count):
+ return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
+
+
+class TestGetTimeoutEstimate(unittest.TestCase):
+ def test_no_stats_should_return_default_timeout(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ mock_evg_api.test_stats_by_project.return_value = []
+ timeout_service = build_mock_service(evg_api=mock_evg_api)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+ self.assertFalse(timeout.is_specified())
+
+ def test_a_test_with_missing_history_should_cause_a_default_timeout(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
+ mock_evg_api.test_stats_by_project.return_value = test_stats
+ mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+ mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"]
+ timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+ self.assertFalse(timeout.is_specified())
+
+ def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
+ test_stats.append(tst_stat_mock("zero.js", 0.0, 1))
+ mock_evg_api.test_stats_by_project.return_value = test_stats
+ mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+ mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
+ timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+ self.assertFalse(timeout.is_specified())
+
+ def test_all_tests_with_runtime_history_should_use_custom_timeout(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ n_tests = 30
+ test_runtime = 600
+ test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)]
+ mock_evg_api.test_stats_by_project.return_value = test_stats
+ mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+ mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
+ timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+ self.assertTrue(timeout.is_specified())
+ self.assertEqual(1860, timeout.calculate_test_timeout(1))
+ self.assertEqual(54180, timeout.calculate_task_timeout(1))
+
+
+class TestGetTaskHookOverhead(unittest.TestCase):
+ def test_no_stats_should_return_zero(self):
+ timeout_service = build_mock_service()
+
+ overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
+ historic_stats=None)
+
+ self.assertEqual(0.0, overhead)
+
+ def test_stats_with_no_clean_every_n_should_return_zero(self):
+ timeout_service = build_mock_service()
+ test_stats = HistoricTaskData.from_stats_list(
+ [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)])
+
+ overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
+ historic_stats=test_stats)
+
+ self.assertEqual(0.0, overhead)
+
+ def test_stats_with_clean_every_n_should_return_overhead(self):
+ test_count = 30
+ runtime = 25
+ timeout_service = build_mock_service()
+ test_stat_list = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(test_count)]
+ test_stat_list.extend([
+ tst_stat_mock(f"test_{i}:{under_test.CLEAN_EVERY_N_HOOK}", runtime, 1)
+ for i in range(10)
+ ])
+ random.shuffle(test_stat_list)
+ test_stats = HistoricTaskData.from_stats_list(test_stat_list)
+
+ overhead = timeout_service.get_task_hook_overhead(
+ "suite", is_asan=True, test_count=test_count, historic_stats=test_stats)
+
+ self.assertEqual(runtime * test_count, overhead)
+
+
+class TestLookupHistoricStats(unittest.TestCase):
+ def test_no_stats_from_evergreen_should_return_none(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ mock_evg_api.test_stats_by_project.return_value = []
+ timeout_service = build_mock_service(evg_api=mock_evg_api)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ stats = timeout_service.lookup_historic_stats(timeout_params)
+
+ self.assertIsNone(stats)
+
+ def test_errors_from_evergreen_should_return_none(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect")
+ timeout_service = build_mock_service(evg_api=mock_evg_api)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ stats = timeout_service.lookup_historic_stats(timeout_params)
+
+ self.assertIsNone(stats)
+
+ def test_stats_from_evergreen_should_return_the_stats(self):
+ mock_evg_api = MagicMock(spec_set=EvergreenApi)
+ test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)]
+ mock_evg_api.test_stats_by_project.return_value = test_stats
+ timeout_service = build_mock_service(evg_api=mock_evg_api)
+ timeout_params = under_test.TimeoutParams(
+ evg_project="my project",
+ build_variant="bv",
+ task_name="my task",
+ suite_name="my suite",
+ is_asan=False,
+ )
+
+ stats = timeout_service.lookup_historic_stats(timeout_params)
+
+ self.assertIsNotNone(stats)
+ self.assertEqual(len(test_stats), len(stats.historic_test_results))
+
+
+class TestGetCleanEveryNCadence(unittest.TestCase):
+ def test_clean_every_n_cadence_on_asan(self):
+ timeout_service = build_mock_service()
+
+ cadence = timeout_service._get_clean_every_n_cadence("suite", True)
+
+ self.assertEqual(1, cadence)
+
+ def test_clean_every_n_cadence_from_hook_config(self):
+ expected_n = 42
+ mock_resmoke_proxy = MagicMock()
+ mock_resmoke_proxy.read_suite_config.return_value = {
+ "executor": {
+ "hooks": [{
+ "class": "hook1",
+ }, {
+ "class": under_test.CLEAN_EVERY_N_HOOK,
+ "n": expected_n,
+ }]
+ }
+ }
+ timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+ cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+ self.assertEqual(expected_n, cadence)
+
+ def test_clean_every_n_cadence_no_n_in_hook_config(self):
+ mock_resmoke_proxy = MagicMock()
+ mock_resmoke_proxy.read_suite_config.return_value = {
+ "executor": {
+ "hooks": [{
+ "class": "hook1",
+ }, {
+ "class": under_test.CLEAN_EVERY_N_HOOK,
+ }]
+ }
+ }
+ timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+ cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+ self.assertEqual(1, cadence)
+
+ def test_clean_every_n_cadence_no_hook_config(self):
+ mock_resmoke_proxy = MagicMock()
+ mock_resmoke_proxy.read_suite_config.return_value = {
+ "executor": {"hooks": [{
+ "class": "hook1",
+ }, ]}
+ }
+ timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+ cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+ self.assertEqual(1, cadence)
diff --git a/buildscripts/tests/util/test_taskname.py b/buildscripts/tests/util/test_taskname.py
index 22ab279066a..7f3296ca1aa 100644
--- a/buildscripts/tests/util/test_taskname.py
+++ b/buildscripts/tests/util/test_taskname.py
@@ -4,7 +4,7 @@ import unittest
import buildscripts.util.taskname as under_test
-# pylint: disable=missing-docstring,protected-access
+# pylint: disable=missing-docstring,protected-access,invalid-name
class TestNameTask(unittest.TestCase):
@@ -24,3 +24,29 @@ class TestRemoveGenSuffix(unittest.TestCase):
input_task_name = "sharded_multi_stmt_txn_jscore_passthroug"
self.assertEqual("sharded_multi_stmt_txn_jscore_passthroug",
under_test.remove_gen_suffix(input_task_name))
+
+
+class TestDetermineTaskBaseName(unittest.TestCase):
+ def test_task_name_with_build_variant_should_strip_bv_and_sub_task_index(self):
+ bv = "enterprise-rhel-80-64-bit-dynamic-required"
+ task_name = f"auth_23_{bv}"
+
+ base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+ self.assertEqual("auth", base_task_name)
+
+ def test_task_name_without_build_variant_should_strip_sub_task_index(self):
+ bv = "enterprise-rhel-80-64-bit-dynamic-required"
+ task_name = "auth_314"
+
+ base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+ self.assertEqual("auth", base_task_name)
+
+ def test_task_name_without_build_variant_or_subtask_index_should_self(self):
+ bv = "enterprise-rhel-80-64-bit-dynamic-required"
+ task_name = "auth"
+
+ base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+ self.assertEqual("auth", base_task_name)
diff --git a/buildscripts/timeouts/__init__.py b/buildscripts/timeouts/__init__.py
new file mode 100644
index 00000000000..4b7a2bb941b
--- /dev/null
+++ b/buildscripts/timeouts/__init__.py
@@ -0,0 +1 @@
+"""Empty."""
diff --git a/buildscripts/task_generation/timeout.py b/buildscripts/timeouts/timeout.py
index 261c2a8b82d..3e3440f9c5b 100644
--- a/buildscripts/task_generation/timeout.py
+++ b/buildscripts/timeouts/timeout.py
@@ -1,11 +1,10 @@
"""Timeout information for generating tasks."""
import math
from datetime import timedelta
-from inspect import getframeinfo, currentframe
+from inspect import currentframe, getframeinfo
from typing import NamedTuple, Optional
import structlog
-
from buildscripts.patch_builds.task_generation import TimeoutInfo
LOGGER = structlog.getLogger(__name__)
diff --git a/buildscripts/timeouts/timeout_service.py b/buildscripts/timeouts/timeout_service.py
new file mode 100644
index 00000000000..8c0d5ad58cd
--- /dev/null
+++ b/buildscripts/timeouts/timeout_service.py
@@ -0,0 +1,189 @@
+"""Service for determining task timeouts."""
+from datetime import datetime
+from typing import Any, Dict, NamedTuple, Optional
+
+import inject
+import structlog
+from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
+from buildscripts.timeouts.timeout import TimeoutEstimate
+from buildscripts.util.teststats import HistoricTaskData
+from evergreen import EvergreenApi
+
+LOGGER = structlog.get_logger(__name__)
+CLEAN_EVERY_N_HOOK = "CleanEveryN"
+
+
+class TimeoutParams(NamedTuple):
+ """
+ Parameters about task being run.
+
+ * evg_project: Evergreen project.
+ * build_variant: Evergreen build variant.
+ * task_name: Evergreen task_name.
+ * suite_name: Test Suite being run.
+ * is_asan: Whether this run is part of an asan build.
+ """
+
+ evg_project: str
+ build_variant: str
+ task_name: str
+ suite_name: str
+ is_asan: bool
+
+
+class TimeoutSettings(NamedTuple):
+ """Settings for determining timeouts."""
+
+ start_date: datetime
+ end_date: datetime
+
+
+class TimeoutService:
+ """A service for determining task timeouts."""
+
+ @inject.autoparams()
+ def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService,
+ timeout_settings: TimeoutSettings) -> None:
+ """
+ Initialize the service.
+
+ :param evg_api: Evergreen API client.
+ :param resmoke_proxy: Proxy to query resmoke.
+ :param timeout_settings: Settings for how timeouts are calculated.
+ """
+ self.evg_api = evg_api
+ self.resmoke_proxy = resmoke_proxy
+ self.timeout_settings = timeout_settings
+
+ def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate:
+ """
+ Calculate the timeout estimate for the given task based on historic test results.
+
+ :param timeout_params: Details about the task to query.
+ :return: Timeouts to use based on historic test results.
+ """
+ historic_stats = self.lookup_historic_stats(timeout_params)
+ if not historic_stats:
+ return TimeoutEstimate.no_timeouts()
+
+ test_set = set(self.resmoke_proxy.list_tests(timeout_params.suite_name))
+ test_runtimes = [
+ stat for stat in historic_stats.get_tests_runtimes() if stat.test_name in test_set
+ ]
+ test_runtime_set = {test.test_name for test in test_runtimes}
+ for test in test_set:
+ if test not in test_runtime_set:
+ # If we don't have historic runtime information for all the tests, we cannot
+ # reliable determine a timeout, so fallback to a default timeout.
+ LOGGER.warning(
+ "Could not find historic runtime information for test, using default timeout",
+ test=test)
+ return TimeoutEstimate.no_timeouts()
+
+ total_runtime = 0.0
+ max_runtime = 0.0
+
+ for runtime in test_runtimes:
+ if runtime.runtime > 0.0:
+ total_runtime += runtime.runtime
+ max_runtime = max(max_runtime, runtime.runtime)
+ else:
+ LOGGER.warning("Found a test with 0 runtime, using default timeouts",
+ test=runtime.test_name)
+ # We found a test with a runtime of 0, which indicates that it does not have a
+ # proper runtime history, so fall back to a default timeout.
+ return TimeoutEstimate.no_timeouts()
+
+ hook_overhead = self.get_task_hook_overhead(
+ timeout_params.suite_name, timeout_params.is_asan, len(test_set), historic_stats)
+ total_runtime += hook_overhead
+
+ return TimeoutEstimate(max_test_runtime=max_runtime, expected_task_runtime=total_runtime)
+
+ def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int,
+ historic_stats: Optional[HistoricTaskData]) -> float:
+ """
+ Add how much overhead task-level hooks each suite should account for.
+
+ Certain test hooks need to be accounted for on the task level instead of the test level
+ in order to calculate accurate timeouts. So we will add details about those hooks to
+ each suite here.
+
+ :param suite_name: Name of suite being generated.
+ :param is_asan: Whether ASAN is being used.
+ :param test_count: Number of tests in sub-suite.
+ :param historic_stats: Historic runtime data of the suite.
+ """
+ # The CleanEveryN hook is run every 'N' tests. The runtime of the
+ # hook will be associated with whichever test happens to be running, which could be
+ # different every run. So we need to take its runtime into account at the task level.
+ if historic_stats is None:
+ return 0.0
+
+ clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan)
+ avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
+ LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence,
+ runtime=avg_clean_every_n_runtime, is_asan=is_asan)
+ if avg_clean_every_n_runtime != 0:
+ n_expected_runs = test_count / clean_every_n_cadence
+ return n_expected_runs * avg_clean_every_n_runtime
+ return 0.0
+
+ def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]:
+ """
+ Lookup historic test results stats for the given task.
+
+ :param timeout_params: Details about the task to lookup.
+ :return: Historic test results if they exist.
+ """
+ try:
+ evg_stats = HistoricTaskData.from_evg(
+ self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date,
+ self.timeout_settings.end_date, timeout_params.task_name,
+ timeout_params.build_variant)
+ if not evg_stats:
+ LOGGER.warning("No historic runtime information available")
+ return None
+ return evg_stats
+ except Exception: # pylint: disable=broad-except
+ # If we have any trouble getting the historic runtime information, log the issue, but
+ # don't fall back to default timeouts instead of failing.
+ LOGGER.warning("Error querying history runtime information from evergreen",
+ exc_info=True)
+ return None
+
+ def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int:
+ """
+ Get the N value for the CleanEveryN hook.
+
+ :param suite_name: Name of suite being generated.
+ :param is_asan: Whether ASAN is being used.
+ :return: How frequently clean every end is run.
+ """
+ # Default to 1, which is the worst case meaning CleanEveryN would run for every test.
+ clean_every_n_cadence = 1
+ if is_asan:
+ # ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`.
+ return clean_every_n_cadence
+
+ clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK)
+ if clean_every_n_config:
+ clean_every_n_cadence = clean_every_n_config.get("n", 1)
+
+ return clean_every_n_cadence
+
+ def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]:
+ """
+ Get the configuration for the given hook.
+
+ :param hook_name: Name of hook to query.
+ :return: Configuration for hook, if it exists.
+ """
+ hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor",
+ {}).get("hooks")
+ if hooks_config:
+ for hook in hooks_config:
+ if hook.get("class") == hook_name:
+ return hook
+
+ return None
diff --git a/buildscripts/util/taskname.py b/buildscripts/util/taskname.py
index 7dd3b12685b..784fc6d6555 100644
--- a/buildscripts/util/taskname.py
+++ b/buildscripts/util/taskname.py
@@ -1,6 +1,7 @@
"""Functions for working with resmoke task names."""
import math
+import re
GEN_SUFFIX = "_gen"
@@ -36,3 +37,25 @@ def remove_gen_suffix(task_name: str) -> str:
if task_name.endswith(GEN_SUFFIX):
return task_name[:-4]
return task_name
+
+
+def determine_task_base_name(task_name: str, build_variant: str) -> str:
+ """
+ Determine the base name of a task.
+
+ For generated tasks the base name will have the build variant and sub-task index
+ stripped off. For other tasks, it is the unmodified task_name.
+
+ :param task_name: Name of task to get base name of.
+ :param build_variant: Build variant that may be included in task name.
+ :return: Base name of given task.
+ """
+ match = re.match(f"(.*)_([0-9]+|misc)_{build_variant}", task_name)
+ if match:
+ return match.group(1)
+
+ match = re.match(r"(.*)_([0-9]+|misc)", task_name)
+ if match:
+ return match.group(1)
+
+ return task_name
diff --git a/docs/evergreen-testing/index.md b/docs/evergreen-testing/index.md
new file mode 100644
index 00000000000..f57692ade9a
--- /dev/null
+++ b/docs/evergreen-testing/index.md
@@ -0,0 +1,5 @@
+# Testing in Evergreen
+
+Documentation about how MongoDB is tested in Evergreen.
+
+* [Task Timeouts](task_timeouts.md)
diff --git a/docs/evergreen-testing/task_timeouts.md b/docs/evergreen-testing/task_timeouts.md
new file mode 100644
index 00000000000..e370aad22c9
--- /dev/null
+++ b/docs/evergreen-testing/task_timeouts.md
@@ -0,0 +1,35 @@
+# Evergreen Task Timeouts
+
+## Type of timeouts
+
+There are two types of timeouts that [evergreen supports](https://github.com/evergreen-ci/evergreen/wiki/Project-Commands#timeoutupdate):
+
+* **Exec timeout**: The _exec_ timeout is the overall timeout for a task. Once the total runtime for
+a test hits this value, the timeout logic will be triggered. This value is specified by
+**exec_timeout_secs** in the evergreen configuration.
+* **Idle timeout**: The _idle_ timeout is the amount of time in which evergreen will wait for
+output to be created before it considers the task hung and triggers timeout logic. This value
+is specified by **timeout_secs** in the evergreen configuration.
+
+**Note**: In most cases, **exec_timeout** is usually the more useful of the timeouts.
+
+## Setting the timeout for a task
+
+There are a few ways in which the timeout can be determined for a task running in evergreen.
+
+* **Specified in 'etc/evergreen.yml'**: Timeout can be specified directly in the 'evergreen.yml' file,
+both on tasks and build variants. This can be useful for setting default timeout values, but is limited
+since different build variants frequently have different runtime characteristics and it is not possible
+to set timeouts for a task running on a specific build variant.
+
+* **etc/evergreen_timeouts.yml**: The 'etc/evergreen_timeouts.yml' file for overriding timeouts
+for specific tasks on specific build variants. This provides a work-around for the limitations of
+specifying the timeouts directly in the 'evergreen.yml'. In order to use this method, the task
+must run the "determine task timeout" and "update task timeout expansions" functions at the beginning
+of the task evergreen definition. Most resmoke tasks already do this.
+
+* **buildscripts/evergreen_task_timeout.py**: This is the script that reads the 'etc/evergreen_timeouts.yml'
+file and calculates the timeout to use. Additionally, it will check the historic test results of the
+task being run and see if there is enough information to calculate timeouts based on that. It can
+also be used for more advanced ways of determining timeouts (e.g. the script is used to set much
+more aggressive timeouts on tasks that are run in the commit-queue).
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 43f743dbfbc..bbd46888ca5 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -1218,6 +1218,9 @@ functions:
- *update_resmoke_jobs_expansions
- *f_expansions_write
- *configure_evergreen_api_credentials
+ - *determine_task_timeout
+ - *update_task_timeout_expansions
+ - *f_expansions_write
- command: subprocess.exec
params:
binary: bash
@@ -1243,6 +1246,7 @@ functions:
"run tests":
- *f_expansions_write
+ - *configure_evergreen_api_credentials
- *determine_task_timeout
- *update_task_timeout_expansions
- *f_expansions_write
@@ -2246,7 +2250,9 @@ tasks:
- "./build/**.gcno"
- "./etc/*san.suppressions"
- "./etc/backports_required_for_multiversion_tests.yml"
+ - "./etc/evergreen_timeouts.yml"
- "./etc/expansions.default.yml"
+ - "./etc/evergreen.yml"
- "./etc/pip/**"
- "./etc/repo_config.yaml"
- "./etc/scons/**"
diff --git a/etc/evergreen_timeouts.yml b/etc/evergreen_timeouts.yml
new file mode 100644
index 00000000000..b25ce231d27
--- /dev/null
+++ b/etc/evergreen_timeouts.yml
@@ -0,0 +1,93 @@
+# This file defines timeouts in evergreen that will override the default timeouts.
+#
+# Each key under `overrides` provides the build variant where the override will occur. The
+# override should include the `task` that should have its timeout overridden and either the
+# `exec_timeout` to override or the `idle_timeout` to override.
+#
+# The timeouts should be specified in minutes.
+
+# Note: In order to make it easier to find existing entries, please try to keep the build variants
+# in alphabetical order.
+
+overrides:
+ enterprise-macos:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours
+
+ enterprise-rhel-80-64-bit-coverage:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
+
+ enterprise-ubuntu2004-debug-tsan:
+ - task: run_unittests
+ exec_timeout: 24
+
+ enterprise-windows:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+
+ enterprise-windows-all-feature-flags-suggested:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+ - task: replica_sets_update_v1_oplog_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
+
+ enterprise-windows-inmem:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+
+ enterprise-windows-required:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+ - task: replica_sets_update_v1_oplog_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
+
+ linux-64-debug:
+ - task: auth
+ exec_timeout: 60 # 1 hour.
+
+ linux-64-debug-repeated-execution:
+ - task: run_unittests
+ exec_timeout: 120 # 2 hours.
+
+ macos:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours
+
+ ubuntu1804-asan:
+ - task: run_unittests
+ exec_timeout: 24
+
+ ubuntu1804-debug-asan:
+ - task: run_unittests
+ exec_timeout: 24
+
+ ubuntu1804-debug-aubsan-lite:
+ - task: run_unittests
+ exec_timeout: 24
+
+ ubuntu1804-debug-ubsan:
+ - task: run_unittests
+ exec_timeout: 24
+
+ ubuntu1804-debug-suggested:
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+
+ ubuntu1804-ubsan:
+ - task: run_unittests
+ exec_timeout: 24
+
+ windows:
+ - task: replica_sets
+ exec_timeout: 180 # 3 hours.
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
+
+ windows-debug-suggested:
+ - task: replica_sets_initsync_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
+ - task: replica_sets_jscore_passthrough
+ exec_timeout: 180 # 3 hours.
+ - task: replica_sets_update_v1_oplog_jscore_passthrough
+ exec_timeout: 150 # 2.5 hours.
diff --git a/evergreen/functions/task_timeout_determine.sh b/evergreen/functions/task_timeout_determine.sh
index 645aedbc302..f63416b2374 100644
--- a/evergreen/functions/task_timeout_determine.sh
+++ b/evergreen/functions/task_timeout_determine.sh
@@ -5,11 +5,25 @@ cd src
set -o verbose
set -o errexit
+
+# Set the suite name to be the task name by default; unless overridden with the `suite` expansion.
+suite_name=${task_name}
+if [[ -n ${suite} ]]; then
+ suite_name=${suite}
+fi
+
+timeout_factor=""
+if [[ -n "${exec_timeout_factor}" ]]; then
+ timeout_factor="--exec-timeout-factor ${exec_timeout_factor}"
+fi
+
activate_venv
-$python buildscripts/evergreen_task_timeout.py \
+PATH=$PATH:$HOME:/ $python buildscripts/evergreen_task_timeout.py $timeout_factor \
--task-name ${task_name} \
+ --suite-name ${suite_name} \
--build-variant ${build_variant} \
--evg-alias '${alias}' \
--timeout ${timeout_secs} \
--exec-timeout ${exec_timeout_secs} \
+ --evg-api-config ./.evergreen.yml \
--out-file task_timeout_expansions.yml