From 9dc322154b1a1d6b3b5910843bbd87e690a46995 Mon Sep 17 00:00:00 2001 From: Mikhail Shchatko Date: Wed, 23 Nov 2022 18:25:29 +0200 Subject: Revert "SERVER-71473: Begin using the new test stats location" This reverts commit af474b1d0c342c0f9959c6e866f297f6d32731eb. --- buildscripts/evergreen_task_timeout.py | 8 +- buildscripts/resmoke_tests_runtime_validate.py | 25 +++-- .../tests/timeouts/test_timeout_service.py | 112 +++++++++------------ buildscripts/tests/util/test_teststats.py | 2 +- buildscripts/timeouts/timeout_service.py | 24 ++++- buildscripts/util/teststats.py | 78 +++++--------- evergreen/resmoke_tests_runtime_validate.sh | 1 + 7 files changed, 119 insertions(+), 131 deletions(-) diff --git a/buildscripts/evergreen_task_timeout.py b/buildscripts/evergreen_task_timeout.py index f35a6c6c897..5c0eabf7aef 100755 --- a/buildscripts/evergreen_task_timeout.py +++ b/buildscripts/evergreen_task_timeout.py @@ -7,7 +7,7 @@ import math import os import shlex import sys -from datetime import timedelta +from datetime import datetime, timedelta from pathlib import Path from typing import Dict, List, Optional @@ -19,7 +19,7 @@ from evergreen import EvergreenApi, RetryingEvergreenApi from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file) from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService -from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService) +from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings) from buildscripts.util.cmdutils import enable_logging from buildscripts.util.taskname import determine_task_base_name @@ -369,6 +369,9 @@ def main(): options = parser.parse_args() + end_date = datetime.now() + start_date = end_date - HISTORY_LOOKBACK + timeout_override = timedelta(seconds=options.timeout) if options.timeout else None exec_timeout_override = timedelta( seconds=options.exec_timeout) if options.exec_timeout else None @@ -383,6 +386,7 @@ def main(): binder.bind( EvergreenApi, RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config))) + binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date)) binder.bind(TimeoutOverrides, timeout_overrides) binder.bind(EvergreenProjectConfig, parse_evergreen_file(os.path.expanduser(options.evg_project_config))) diff --git a/buildscripts/resmoke_tests_runtime_validate.py b/buildscripts/resmoke_tests_runtime_validate.py index 5c17d1ee5f8..cffcbf48e9d 100644 --- a/buildscripts/resmoke_tests_runtime_validate.py +++ b/buildscripts/resmoke_tests_runtime_validate.py @@ -3,6 +3,7 @@ import json import sys from collections import namedtuple +from datetime import datetime, timedelta from statistics import mean from typing import Dict, List @@ -12,8 +13,7 @@ import structlog from buildscripts.resmokelib.testing.report import TestInfo, TestReport from buildscripts.resmokelib.utils import get_task_name_without_suffix from buildscripts.util.cmdutils import enable_logging - -from buildscripts.util.teststats import HistoricTaskData, HistoricalTestInformation +from evergreen import RetryingEvergreenApi, TestStats LOGGER = structlog.get_logger("buildscripts.resmoke_tests_runtime_validate") @@ -34,12 +34,17 @@ def parse_resmoke_report(report_file: str) -> List[TestInfo]: return [test_info for test_info in test_report.test_infos if "jstests" in test_info.test_file] -def get_historic_stats(project_id: str, task_name: str, - build_variant: str) -> List[HistoricalTestInformation]: +def get_historic_stats(evg_api_config: str, project_id: str, test_files: List[str], task_name: str, + build_variant: str) -> List[TestStats]: """Get historic test stats.""" + evg_api = RetryingEvergreenApi.get_api(config_file=evg_api_config) + before_date = datetime.today() + after_date = before_date - timedelta(days=LOOK_BACK_NUM_DAYS) base_task_name = get_task_name_without_suffix(task_name, build_variant).replace( BURN_IN_PREFIX, "") - return HistoricTaskData.get_stats_from_s3(project_id, base_task_name, build_variant) + return evg_api.test_stats_by_project(project_id=project_id, after_date=after_date, + before_date=before_date, tests=test_files, + tasks=[base_task_name], variants=[build_variant]) def make_stats_map(stats: List[_TestData]) -> Dict[str, List[float]]: @@ -58,10 +63,13 @@ def make_stats_map(stats: List[_TestData]) -> Dict[str, List[float]]: @click.command() @click.option("--resmoke-report-file", type=str, required=True, help="Location of resmoke's report JSON file.") +@click.option("--evg-api-config", type=str, required=True, + help="Location of evergreen api configuration.") @click.option("--project-id", type=str, required=True, help="Evergreen project id.") @click.option("--build-variant", type=str, required=True, help="Evergreen build variant name.") @click.option("--task-name", type=str, required=True, help="Evergreen task name.") -def main(resmoke_report_file: str, project_id: str, build_variant: str, task_name: str) -> None: +def main(resmoke_report_file: str, evg_api_config: str, project_id: str, build_variant: str, + task_name: str) -> None: """Compare resmoke tests runtime with historic stats.""" enable_logging(verbose=False) @@ -71,9 +79,10 @@ def main(resmoke_report_file: str, project_id: str, build_variant: str, task_nam for test_info in current_test_infos ]) - historic_stats = get_historic_stats(project_id, task_name, build_variant) + historic_stats = get_historic_stats(evg_api_config, project_id, list(current_stats_map.keys()), + task_name, build_variant) historic_stats_map = make_stats_map([ - _TestData(test_stats.test_name, test_stats.avg_duration_pass) + _TestData(test_stats.test_file, test_stats.avg_duration_pass) for test_stats in historic_stats ]) diff --git a/buildscripts/tests/timeouts/test_timeout_service.py b/buildscripts/tests/timeouts/test_timeout_service.py index bb0550659c6..bb0dd8a0c3e 100644 --- a/buildscripts/tests/timeouts/test_timeout_service.py +++ b/buildscripts/tests/timeouts/test_timeout_service.py @@ -1,42 +1,41 @@ """Unit tests for timeout_service.py.""" import random import unittest -from unittest.mock import MagicMock, patch +from datetime import datetime, timedelta +from unittest.mock import MagicMock from requests.exceptions import HTTPError +from evergreen import EvergreenApi import buildscripts.timeouts.timeout_service as under_test from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService -from buildscripts.util.teststats import HistoricTaskData, HistoricTestInfo +from buildscripts.util.teststats import HistoricTaskData # pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access -NS = "buildscripts.timeouts.timeout_service" - -def ns(relative_name): # pylint: disable=invalid-name - """Return a full name from a name relative to the test module"s name space.""" - return NS + "." + relative_name - - -def build_mock_service(resmoke_proxy=None): +def build_mock_service(evg_api=None, resmoke_proxy=None): + end_date = datetime.now() + start_date = end_date - timedelta(weeks=2) + timeout_settings = under_test.TimeoutSettings( + end_date=end_date, + start_date=start_date, + ) return under_test.TimeoutService( - resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService)) + evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi), + resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService), + timeout_settings=timeout_settings) def tst_stat_mock(file, duration, pass_count): - return MagicMock(test_name=file, avg_duration_pass=duration, num_pass=pass_count, hooks=[]) - - -def tst_runtime_mock(file, duration, pass_count): - return MagicMock(test_name=file, avg_duration_pass=duration, num_pass=pass_count) + return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count) class TestGetTimeoutEstimate(unittest.TestCase): - @patch(ns("HistoricTaskData.from_s3")) - def test_no_stats_should_return_default_timeout(self, from_s3_mock: MagicMock): - timeout_service = build_mock_service() - from_s3_mock.return_value = [] + def test_no_stats_should_return_default_timeout(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) + mock_evg_api.test_stats_by_project.return_value = [] + timeout_service = build_mock_service(evg_api=mock_evg_api) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -49,17 +48,13 @@ class TestGetTimeoutEstimate(unittest.TestCase): self.assertFalse(timeout.is_specified()) - @patch(ns("HistoricTaskData.from_s3")) - def test_a_test_with_missing_history_should_cause_a_default_timeout( - self, from_s3_mock: MagicMock): - test_stats = [ - HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=60, num_pass=1, hooks=[]) - for i in range(30) - ] - from_s3_mock.return_value = HistoricTaskData(test_stats) + def test_a_test_with_missing_history_should_cause_a_default_timeout(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) + test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)] + mock_evg_api.test_stats_by_project.return_value = test_stats mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"] - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) + timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -72,19 +67,14 @@ class TestGetTimeoutEstimate(unittest.TestCase): self.assertFalse(timeout.is_specified()) - @patch(ns("HistoricTaskData.from_s3")) - def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout( - self, from_s3_mock: MagicMock): - test_stats = [ - HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=60, num_pass=1, hooks=[]) - for i in range(30) - ] - test_stats.append( - HistoricTestInfo(test_name="zero.js", avg_duration=0.0, num_pass=1, hooks=[])) - from_s3_mock.return_value = HistoricTaskData(test_stats) + def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) + test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)] + test_stats.append(tst_stat_mock("zero.js", 0.0, 1)) + mock_evg_api.test_stats_by_project.return_value = test_stats mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) - mock_resmoke_proxy.list_tests.return_value = [ts.test_name for ts in test_stats] - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) + mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats] + timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -97,19 +87,15 @@ class TestGetTimeoutEstimate(unittest.TestCase): self.assertFalse(timeout.is_specified()) - @patch(ns("HistoricTaskData.from_s3")) - def test_all_tests_with_runtime_history_should_use_custom_timeout(self, - from_s3_mock: MagicMock): + def test_all_tests_with_runtime_history_should_use_custom_timeout(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) n_tests = 30 test_runtime = 600 - test_stats = [ - HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=test_runtime, num_pass=1, - hooks=[]) for i in range(n_tests) - ] - from_s3_mock.return_value = HistoricTaskData(test_stats) + test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)] + mock_evg_api.test_stats_by_project.return_value = test_stats mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService) - mock_resmoke_proxy.list_tests.return_value = [ts.test_name for ts in test_stats] - timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy) + mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats] + timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -163,10 +149,10 @@ class TestGetTaskHookOverhead(unittest.TestCase): class TestLookupHistoricStats(unittest.TestCase): - @patch(ns("HistoricTaskData.from_s3")) - def test_no_stats_from_evergreen_should_return_none(self, from_s3_mock: MagicMock): - from_s3_mock.return_value = None - timeout_service = build_mock_service() + def test_no_stats_from_evergreen_should_return_none(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) + mock_evg_api.test_stats_by_project.return_value = [] + timeout_service = build_mock_service(evg_api=mock_evg_api) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -179,10 +165,10 @@ class TestLookupHistoricStats(unittest.TestCase): self.assertIsNone(stats) - @patch(ns("HistoricTaskData.from_s3")) - def test_errors_from_evergreen_should_return_none(self, from_s3_mock: MagicMock): - from_s3_mock.side_effect = HTTPError("failed to connect") - timeout_service = build_mock_service() + def test_errors_from_evergreen_should_return_none(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) + mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect") + timeout_service = build_mock_service(evg_api=mock_evg_api) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", @@ -195,11 +181,11 @@ class TestLookupHistoricStats(unittest.TestCase): self.assertIsNone(stats) - @patch(ns("HistoricTaskData.from_s3")) - def test_stats_from_evergreen_should_return_the_stats(self, from_s3_mock: MagicMock): + def test_stats_from_evergreen_should_return_the_stats(self): + mock_evg_api = MagicMock(spec_set=EvergreenApi) test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)] - from_s3_mock.return_value = HistoricTaskData(test_stats) - timeout_service = build_mock_service() + mock_evg_api.test_stats_by_project.return_value = test_stats + timeout_service = build_mock_service(evg_api=mock_evg_api) timeout_params = under_test.TimeoutParams( evg_project="my project", build_variant="bv", diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py index 4c54952368c..ebba930d032 100644 --- a/buildscripts/tests/util/test_teststats.py +++ b/buildscripts/tests/util/test_teststats.py @@ -80,7 +80,7 @@ class TestHistoricTaskData(unittest.TestCase): @staticmethod def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0): return Mock( - test_name=test_file, + test_file=test_file, task_name="task1", variant="variant1", distro="distro1", diff --git a/buildscripts/timeouts/timeout_service.py b/buildscripts/timeouts/timeout_service.py index 68238010092..8c0d5ad58cd 100644 --- a/buildscripts/timeouts/timeout_service.py +++ b/buildscripts/timeouts/timeout_service.py @@ -1,4 +1,5 @@ """Service for determining task timeouts.""" +from datetime import datetime from typing import Any, Dict, NamedTuple, Optional import inject @@ -30,17 +31,29 @@ class TimeoutParams(NamedTuple): is_asan: bool +class TimeoutSettings(NamedTuple): + """Settings for determining timeouts.""" + + start_date: datetime + end_date: datetime + + class TimeoutService: """A service for determining task timeouts.""" @inject.autoparams() - def __init__(self, resmoke_proxy: ResmokeProxyService) -> None: + def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService, + timeout_settings: TimeoutSettings) -> None: """ Initialize the service. + :param evg_api: Evergreen API client. :param resmoke_proxy: Proxy to query resmoke. + :param timeout_settings: Settings for how timeouts are calculated. """ + self.evg_api = evg_api self.resmoke_proxy = resmoke_proxy + self.timeout_settings = timeout_settings def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate: """ @@ -116,8 +129,7 @@ class TimeoutService: return n_expected_runs * avg_clean_every_n_runtime return 0.0 - @staticmethod - def lookup_historic_stats(timeout_params: TimeoutParams) -> Optional[HistoricTaskData]: + def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]: """ Lookup historic test results stats for the given task. @@ -125,8 +137,10 @@ class TimeoutService: :return: Historic test results if they exist. """ try: - evg_stats = HistoricTaskData.from_s3( - timeout_params.evg_project, timeout_params.task_name, timeout_params.build_variant) + evg_stats = HistoricTaskData.from_evg( + self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date, + self.timeout_settings.end_date, timeout_params.task_name, + timeout_params.build_variant) if not evg_stats: LOGGER.warning("No historic runtime information available") return None diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py index c76d66f514c..a52fa3c79a4 100644 --- a/buildscripts/util/teststats.py +++ b/buildscripts/util/teststats.py @@ -1,31 +1,15 @@ """Utility to support parsing a TestStat.""" from collections import defaultdict from dataclasses import dataclass +from datetime import datetime from itertools import chain from typing import NamedTuple, List, Callable, Optional -import requests -from requests.adapters import HTTPAdapter, Retry + +from evergreen import EvergreenApi, TestStats from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file TASK_LEVEL_HOOKS = {"CleanEveryN"} -TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com" - - -class HistoricalTestInformation(NamedTuple): - """ - Container for information about the historical runtime of a test. - - test_name: Name of test. - avg_duration_pass: Average of runtime of test that passed. - num_pass: Number of times the test has passed. - num_fail: Number of times the test has failed. - """ - - test_name: str - num_pass: int - num_fail: int - avg_duration_pass: float class TestRuntime(NamedTuple): @@ -90,9 +74,9 @@ class HistoricHookInfo(NamedTuple): avg_duration: float @classmethod - def from_test_stats(cls, test_stats: HistoricalTestInformation) -> "HistoricHookInfo": + def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo": """Create an instance from a test_stats object.""" - return cls(hook_id=test_stats.test_name, num_pass=test_stats.num_pass, + return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass, avg_duration=test_stats.avg_duration_pass) def test_name(self) -> str: @@ -117,10 +101,10 @@ class HistoricTestInfo(NamedTuple): hooks: List[HistoricHookInfo] @classmethod - def from_test_stats(cls, test_stats: HistoricalTestInformation, + def from_test_stats(cls, test_stats: TestStats, hooks: List[HistoricHookInfo]) -> "HistoricTestInfo": """Create an instance from a test_stats object.""" - return cls(test_name=test_stats.test_name, num_pass=test_stats.num_pass, + return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass, avg_duration=test_stats.avg_duration_pass, hooks=hooks) def normalized_test_name(self) -> str: @@ -153,56 +137,46 @@ class HistoricTaskData(object): """Initialize the TestStats with raw results from the Evergreen API.""" self.historic_test_results = historic_test_results - @staticmethod - def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]: - """ - Retrieve test stats from s3 for a given task. - - :param project: Project to query. - :param task: Task to query. - :param variant: Build variant to query. - :return: A list of the Test stats for the specified task. - """ - session = requests.Session() - retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) - session.mount('https://', HTTPAdapter(max_retries=retries)) - - response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}") - data = response.json() - - return [HistoricalTestInformation(**item) for item in data] - + # pylint: disable=too-many-arguments @classmethod - def from_s3(cls, project: str, task: str, variant: str) -> "HistoricTaskData": + def from_evg(cls, evg_api: EvergreenApi, project: str, start_date: datetime, end_date: datetime, + task: str, variant: str) -> "HistoricTaskData": """ - Retrieve test stats from s3 for a given task. + Retrieve test stats from evergreen for a given task. + :param evg_api: Evergreen API client. :param project: Project to query. + :param start_date: Start date to query. + :param end_date: End date to query. :param task: Task to query. :param variant: Build variant to query. :return: Test stats for the specified task. """ - historical_test_data = cls.get_stats_from_s3(project, task, variant) - return cls.from_stats_list(historical_test_data) + days = (end_date - start_date).days + historic_stats = evg_api.test_stats_by_project( + project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant], + group_by="test", group_num_days=days) + + return cls.from_stats_list(historic_stats) @classmethod - def from_stats_list( - cls, historical_test_data: List[HistoricalTestInformation]) -> "HistoricTaskData": + def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData": """ Build historic task data from a list of historic stats. - :param historical_test_data: A list of information about the runtime of a test. + :param historic_stats: List of historic stats to build from. :return: Historic task data from the list of stats. """ + hooks = defaultdict(list) - for hook in [stat for stat in historical_test_data if is_resmoke_hook(stat.test_name)]: + for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]: historical_hook = HistoricHookInfo.from_test_stats(hook) hooks[historical_hook.test_name()].append(historical_hook) return cls([ HistoricTestInfo.from_test_stats(stat, - hooks[get_short_name_from_test_file(stat.test_name)]) - for stat in historical_test_data if not is_resmoke_hook(stat.test_name) + hooks[get_short_name_from_test_file(stat.test_file)]) + for stat in historic_stats if not is_resmoke_hook(stat.test_file) ]) def get_tests_runtimes(self) -> List[TestRuntime]: diff --git a/evergreen/resmoke_tests_runtime_validate.sh b/evergreen/resmoke_tests_runtime_validate.sh index aee2111ff92..1ccfc71c225 100644 --- a/evergreen/resmoke_tests_runtime_validate.sh +++ b/evergreen/resmoke_tests_runtime_validate.sh @@ -9,6 +9,7 @@ set -o errexit activate_venv $python buildscripts/resmoke_tests_runtime_validate.py \ --resmoke-report-file ./report.json \ + --evg-api-config ./.evergreen.yml \ --project-id ${project_id} \ --build-variant ${build_variant} \ --task-name ${task_name} -- cgit v1.2.1