diff options
author | David Bradford <david.bradford@mongodb.com> | 2020-12-02 08:47:14 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-12-03 20:54:14 +0000 |
commit | 59b50edbde7b2c9a926d5890f767387330fbadab (patch) | |
tree | 10df60b560a2cfa0a165bdffa7b9e53713e0fb84 | |
parent | fb67f77bd073a31d0511f9b60b7850df64b1321b (diff) | |
download | mongo-59b50edbde7b2c9a926d5890f767387330fbadab.tar.gz |
SERVER-53058: Better account for CleanEveryN runtime for dynamically split tasks
(cherry picked from commit 3cb926298aaa7b55d4e0f51daf7c176b65ccc32b)
-rwxr-xr-x | buildscripts/evergreen_generate_resmoke_tasks.py | 189 | ||||
-rw-r--r-- | buildscripts/tests/test_evergreen_generate_resmoke_tasks.py | 24 | ||||
-rw-r--r-- | buildscripts/tests/util/test_teststats.py | 24 | ||||
-rw-r--r-- | buildscripts/util/teststats.py | 152 |
4 files changed, 263 insertions, 126 deletions
diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py index e85ae2cd5c0..e6bb26f6ca4 100755 --- a/buildscripts/evergreen_generate_resmoke_tasks.py +++ b/buildscripts/evergreen_generate_resmoke_tasks.py @@ -5,6 +5,7 @@ Resmoke Test Suite Generator. Analyze the evergreen history for tests run under the given task and create new evergreen tasks to attempt to keep the task runtime under a specified amount. """ +# pylint: disable=too-many-lines from copy import deepcopy import datetime from datetime import timedelta @@ -15,7 +16,7 @@ import os import re import sys from distutils.util import strtobool # pylint: disable=no-name-in-module -from typing import Dict, List, Set, Sequence, Optional, Any, Match +from typing import Dict, List, Set, Sequence, Optional, Any, Match, NamedTuple import click import requests @@ -49,6 +50,8 @@ MIN_TIMEOUT_SECONDS = int(timedelta(minutes=5).total_seconds()) MAX_EXPECTED_TIMEOUT = int(timedelta(hours=48).total_seconds()) LOOKBACK_DURATION_DAYS = 14 GEN_SUFFIX = "_gen" +CLEAN_EVERY_N_HOOK = "CleanEveryN" +CLEAN_EVERY_N_VALUE = 20 HEADER_TEMPLATE = """# DO NOT EDIT THIS FILE. All manual edits will be lost. # This file was generated by {file} from @@ -419,7 +422,7 @@ def render_suite_files(suites: List, suite_name: str, test_list: List[str], suit return suite_configs -def calculate_timeout(avg_runtime, scaling_factor): +def calculate_timeout(avg_runtime: float, scaling_factor: int) -> int: """ Determine how long a runtime to set based on average runtime and a scaling factor. @@ -458,6 +461,79 @@ def should_tasks_be_generated(evg_api, task_id): return True +class TimeoutEstimate(NamedTuple): + """Runtime estimates used to calculate timeouts.""" + + max_test_runtime: Optional[float] + expected_task_runtime: Optional[float] + + @classmethod + def no_timeouts(cls) -> "TimeoutEstimate": + """Create an instance with no estimation data.""" + return cls(max_test_runtime=None, expected_task_runtime=None) + + def calculate_test_timeout(self, repeat_factor: int) -> Optional[int]: + """ + Calculate the timeout to use for tests. + + :param repeat_factor: How many times the suite will be repeated. + :return: Timeout value to use for tests. + """ + if self.max_test_runtime is None: + return None + + timeout = calculate_timeout(self.max_test_runtime, 3) * repeat_factor + LOGGER.debug("Setting timeout", timeout=timeout, max_runtime=self.max_test_runtime, + factor=repeat_factor) + return timeout + + def calculate_task_timeout(self, repeat_factor: int) -> Optional[int]: + """ + Calculate the timeout to use for tasks. + + :param repeat_factor: How many times the suite will be repeated. + :return: Timeout value to use for tasks. + """ + if self.expected_task_runtime is None: + return None + + exec_timeout = calculate_timeout(self.expected_task_runtime, 3) * repeat_factor + LOGGER.debug("Setting exec_timeout", exec_timeout=exec_timeout, + suite_runtime=self.expected_task_runtime, factor=repeat_factor) + return exec_timeout + + def generate_timeout_cmd(self, is_patch: bool, repeat_factor: int, + use_default: bool = False) -> TimeoutInfo: + """ + Create the timeout info to use to create a timeout shrub command. + + :param is_patch: Whether the command is being created in a patch build. + :param repeat_factor: How many times the suite will be repeated. + :param use_default: Should the default timeout be used. + :return: Timeout info for the task. + """ + + if (self.max_test_runtime is None and self.expected_task_runtime is None) or use_default: + return TimeoutInfo.default_timeout() + + test_timeout = self.calculate_test_timeout(repeat_factor) + task_timeout = self.calculate_task_timeout(repeat_factor) + + if is_patch and (test_timeout > MAX_EXPECTED_TIMEOUT + or task_timeout > MAX_EXPECTED_TIMEOUT): + frameinfo = getframeinfo(currentframe()) + LOGGER.error( + "This task looks like it is expected to run far longer than normal. This is " + "likely due to setting the suite 'repeat' value very high. If you are sure " + "this is something you want to do, comment this check out in your patch build " + "and resubmit", repeat_value=repeat_factor, timeout=test_timeout, + exec_timeout=task_timeout, code_file=frameinfo.filename, code_line=frameinfo.lineno, + max_timeout=MAX_EXPECTED_TIMEOUT) + raise ValueError("Failing due to expected runtime.") + + return TimeoutInfo.overridden(timeout=test_timeout, exec_timeout=task_timeout) + + class Suite(object): """A suite of tests that can be run by evergreen.""" @@ -474,6 +550,7 @@ class Suite(object): self.max_runtime = 0 self.tests_with_runtime_info = 0 self.source_name = source_name + self.task_overhead = 0 self.index = Suite._current_index Suite._current_index += 1 @@ -495,7 +572,7 @@ class Suite(object): if runtime > self.max_runtime: self.max_runtime = runtime - def should_overwrite_timeout(self): + def should_overwrite_timeout(self) -> bool: """ Whether the timeout for this suite should be overwritten. @@ -503,14 +580,19 @@ class Suite(object): """ return len(self.tests) == self.tests_with_runtime_info + def get_timeout_estimate(self) -> TimeoutEstimate: + """Get the estimated runtime of this task to for timeouts.""" + if self.should_overwrite_timeout(): + return TimeoutEstimate(max_test_runtime=self.max_runtime, + expected_task_runtime=self.total_runtime + self.task_overhead) + return TimeoutEstimate.no_timeouts() + def get_runtime(self): """Get the current average runtime of all the tests currently in this suite.""" - return self.total_runtime def get_test_count(self): """Get the number of tests currently in this suite.""" - return len(self.tests) @property @@ -593,44 +675,6 @@ class EvergreenConfigGenerator(object): return variables - def _get_timeout_command(self, max_test_runtime: int, expected_suite_runtime: int, - use_default: bool) -> TimeoutInfo: - """ - Add an evergreen command to override the default timeouts to the list of commands. - - :param max_test_runtime: Maximum runtime of any test in the sub-suite. - :param expected_suite_runtime: Expected runtime of the entire sub-suite. - :param use_default: Use default timeouts. - :return: Timeout information. - """ - repeat_factor = self.options.repeat_suites - if (max_test_runtime or expected_suite_runtime) and not use_default: - timeout = None - exec_timeout = None - if max_test_runtime: - timeout = calculate_timeout(max_test_runtime, 3) * repeat_factor - LOGGER.debug("Setting timeout", timeout=timeout, max_runtime=max_test_runtime, - factor=repeat_factor) - if expected_suite_runtime: - exec_timeout = calculate_timeout(expected_suite_runtime, 3) * repeat_factor - LOGGER.debug("Setting exec_timeout", exec_timeout=exec_timeout, - suite_runtime=expected_suite_runtime, factor=repeat_factor) - - if self.options.is_patch and \ - (timeout > MAX_EXPECTED_TIMEOUT or exec_timeout > MAX_EXPECTED_TIMEOUT): - frameinfo = getframeinfo(currentframe()) - LOGGER.error( - "This task looks like it is expected to run far longer than normal. This is " - "likely due to setting the suite 'repeat' value very high. If you are sure " - "this is something you want to do, comment this check out in your patch build " - "and resubmit", repeat_value=repeat_factor, timeout=timeout, - exec_timeout=exec_timeout, code_file=frameinfo.filename, - code_line=frameinfo.lineno, max_timeout=MAX_EXPECTED_TIMEOUT) - raise ValueError("Failing due to expected runtime.") - return TimeoutInfo.overridden(timeout=timeout, exec_timeout=exec_timeout) - - return TimeoutInfo.default_timeout() - @staticmethod def _is_task_dependency(task: str, possible_dependency: str) -> Optional[Match[str]]: """ @@ -668,16 +712,14 @@ class EvergreenConfigGenerator(object): return dependencies def _generate_task(self, sub_suite_name: str, sub_task_name: str, target_dir: str, - max_test_runtime: Optional[int] = None, - expected_suite_runtime: Optional[int] = None) -> Task: + timeout_est: TimeoutEstimate) -> Task: """ Generate a shrub evergreen config for a resmoke task. :param sub_suite_name: Name of suite being generated. :param sub_task_name: Name of task to generate. :param target_dir: Directory containing generated suite files. - :param max_test_runtime: Runtime of the longest test in this sub suite. - :param expected_suite_runtime: Expected total runtime of this suite. + :param timeout_est: Estimated runtime to use for calculating timeouts. :return: Shrub configuration for the described task. """ # pylint: disable=too-many-arguments @@ -690,9 +732,9 @@ class EvergreenConfigGenerator(object): run_tests_vars = self._get_run_tests_vars(target_suite_file) use_multiversion = self.options.use_multiversion - timeout_info = self._get_timeout_command(max_test_runtime, expected_suite_runtime, - self.options.use_default_timeouts) - commands = resmoke_commands("run generated tests", run_tests_vars, timeout_info, + timeout_cmd = timeout_est.generate_timeout_cmd( + self.options.is_patch, self.options.repeat_suites, self.options.use_default_timeouts) + commands = resmoke_commands("run generated tests", run_tests_vars, timeout_cmd, use_multiversion) return Task(sub_task_name, commands, self._get_dependencies()) @@ -707,13 +749,8 @@ class EvergreenConfigGenerator(object): """ sub_task_name = taskname.name_generated_task(self.options.task, idx, len(self.suites), self.options.variant) - max_runtime = None - total_runtime = None - if suite.should_overwrite_timeout(): - max_runtime = suite.max_runtime - total_runtime = suite.get_runtime() return self._generate_task(suite.name, sub_task_name, self.options.generated_config_dir, - max_runtime, total_runtime) + suite.get_timeout_estimate()) def _generate_all_tasks(self) -> Set[Task]: """Get a set of shrub task for all the sub tasks.""" @@ -724,8 +761,9 @@ class EvergreenConfigGenerator(object): misc_suite_name = f"{os.path.basename(self.options.suite)}_misc" misc_task_name = f"{self.options.task}_misc_{self.options.variant}" tasks.add( - self._generate_task(misc_suite_name, misc_task_name, - self.options.generated_config_dir)) + self._generate_task(misc_suite_name, + misc_task_name, self.options.generated_config_dir, + TimeoutEstimate.no_timeouts())) return tasks @@ -773,7 +811,7 @@ class GenerateSubSuites(object): evg_stats = HistoricTaskData.from_evg(self.evergreen_api, self.config_options.project, start_date, end_date, self.config_options.task, self.config_options.variant) - if not evg_stats.get_tests_runtimes(): + if not evg_stats: LOGGER.debug("No test history, using fallback suites") # This is probably a new suite, since there is no test history, just use the # fallback values. @@ -803,10 +841,37 @@ class GenerateSubSuites(object): if not tests_runtimes: LOGGER.debug("No test runtimes after filter, using fallback") return self.calculate_fallback_suites() + self.test_list = [info.test_name for info in tests_runtimes] - return divide_tests_into_suites(self.config_options.suite, tests_runtimes, - execution_time_secs, self.config_options.max_sub_suites, - self.config_options.max_tests_per_suite) + + suites = divide_tests_into_suites(self.config_options.suite, tests_runtimes, + execution_time_secs, self.config_options.max_sub_suites, + self.config_options.max_tests_per_suite) + + self.add_task_hook_overhead(suites, test_stats) + + return suites + + @staticmethod + def add_task_hook_overhead(suites: List[Suite], historic_stats: HistoricTaskData) -> None: + """ + Add how much overhead task-level hooks each suite should account for. + + Certain test hooks need to be accounted for on the task level instead of the test level + in order to calculate accurate timeouts. So we will add details about those hooks to + each suite here. + + :param suites: List of suites that were created. + :param historic_stats: Historic runtime data of the suite. + """ + # The CleanEveryN hook is run every 'N' tests. N is almost always 20. The runtime of the + # hook will be associated with whichever test happens to be running, which could be + # different every run. So we need to take its runtime into account at the task level. + avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK) + if avg_clean_every_n_runtime != 0: + for suite in suites: + n_expected_runs = suite.get_test_count() / CLEAN_EVERY_N_VALUE + suite.task_overhead += n_expected_runs * avg_clean_every_n_runtime def filter_tests(self, tests_runtimes: List[TestRuntime]) -> List[TestRuntime]: """ diff --git a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py index f93c2e7c3b2..004527dec4a 100644 --- a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py +++ b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py @@ -563,6 +563,15 @@ class CalculateTimeoutTest(unittest.TestCase): under_test.calculate_timeout(30, scaling_factor)) +class TimeoutEstimateTest(unittest.TestCase): + def test_too_high_a_timeout_raises_errors(self): + timeout_est = under_test.TimeoutEstimate( + max_test_runtime=5, expected_task_runtime=under_test.MAX_EXPECTED_TIMEOUT) + + with self.assertRaises(ValueError): + timeout_est.generate_timeout_cmd(is_patch=True, repeat_factor=1) + + class EvergreenConfigGeneratorTest(unittest.TestCase): @staticmethod def generate_mock_suites(count): @@ -572,6 +581,8 @@ class EvergreenConfigGeneratorTest(unittest.TestCase): suite.name = "suite {0}".format(idx) suite.max_runtime = 5.28 suite.get_runtime = lambda: 100.874 + suite.get_timeout_estimate.return_value = under_test.TimeoutEstimate( + max_test_runtime=5.28, expected_task_runtime=100.874) suites.append(suite) return suites @@ -723,15 +734,6 @@ class EvergreenConfigGeneratorTest(unittest.TestCase): expected_exec_timeout = under_test.calculate_timeout(suites[0].get_runtime(), 3) * 5 self.assertEqual(expected_exec_timeout, timeout_cmd["params"]["exec_timeout_secs"]) - def test_evg_config_has_fails_if_timeout_too_high(self): - options = self.generate_mock_options() - options.repeat_suites = under_test.MAX_EXPECTED_TIMEOUT - suites = self.generate_mock_suites(3) - - with self.assertRaises(ValueError): - generator = under_test.EvergreenConfigGenerator(suites, options, MagicMock()) - generator.generate_config(MagicMock()) - def test_evg_config_does_not_fails_if_timeout_too_high_on_mainline(self): options = self.generate_mock_options() options.is_patch = False @@ -780,7 +782,9 @@ class EvergreenConfigGeneratorTest(unittest.TestCase): suite_without_timing_info = 1 options = self.generate_mock_options() suites = self.generate_mock_suites(3) - suites[suite_without_timing_info].should_overwrite_timeout.return_value = False + suites[ + suite_without_timing_info].get_timeout_estimate.return_value = under_test.TimeoutEstimate.no_timeouts( + ) build_variant = BuildVariant("variant") generator = under_test.EvergreenConfigGenerator(suites, options, MagicMock()) diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py index 7d0f04f2600..b3405ecc00d 100644 --- a/buildscripts/tests/util/test_teststats.py +++ b/buildscripts/tests/util/test_teststats.py @@ -24,11 +24,10 @@ class NormalizeTestNameTest(unittest.TestCase): class TestHistoricTaskData(unittest.TestCase): def test_no_hooks(self): evg_results = [ - self._make_evg_result("dir/test1.js", 1, 10), self._make_evg_result("dir/test2.js", 1, 30), - self._make_evg_result("dir/test1.js", 2, 25), + self._make_evg_result("dir/test1.js", 2, 20), ] - test_stats = under_test.HistoricTaskData(evg_results) + test_stats = under_test.HistoricTaskData.from_stats_list(evg_results) expected_runtimes = [ under_test.TestRuntime(test_name="dir/test2.js", runtime=30), under_test.TestRuntime(test_name="dir/test1.js", runtime=20), @@ -37,44 +36,41 @@ class TestHistoricTaskData(unittest.TestCase): def test_hooks(self): evg_results = [ - self._make_evg_result("dir/test1.js", 1, 10), self._make_evg_result("dir/test2.js", 1, 30), - self._make_evg_result("dir/test1.js", 2, 25), + self._make_evg_result("dir/test1.js", 2, 30), self._make_evg_result("dir/test3.js", 5, 10), - self._make_evg_result("test3:CleanEveryN", 10, 30), + self._make_evg_result("test3:Validate", 10, 30), self._make_evg_result("test3:CheckReplDBHash", 10, 35), ] - test_stats = under_test.HistoricTaskData(evg_results) + test_stats = under_test.HistoricTaskData.from_stats_list(evg_results) expected_runtimes = [ under_test.TestRuntime(test_name="dir/test3.js", runtime=75), under_test.TestRuntime(test_name="dir/test2.js", runtime=30), - under_test.TestRuntime(test_name="dir/test1.js", runtime=20), + under_test.TestRuntime(test_name="dir/test1.js", runtime=30), ] self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes()) def test_hook_first(self): evg_results = [ - self._make_evg_result("test3:CleanEveryN", 10, 35), - self._make_evg_result("dir/test1.js", 1, 10), + self._make_evg_result("test3:Validate", 10, 35), self._make_evg_result("dir/test2.js", 1, 30), self._make_evg_result("dir/test1.js", 2, 25), self._make_evg_result("dir/test3.js", 5, 10), self._make_evg_result("test3:CheckReplDBHash", 10, 35), ] - test_stats = under_test.HistoricTaskData(evg_results) + test_stats = under_test.HistoricTaskData.from_stats_list(evg_results) expected_runtimes = [ under_test.TestRuntime(test_name="dir/test3.js", runtime=80), under_test.TestRuntime(test_name="dir/test2.js", runtime=30), - under_test.TestRuntime(test_name="dir/test1.js", runtime=20), + under_test.TestRuntime(test_name="dir/test1.js", runtime=25), ] self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes()) def test_zero_runs(self): evg_results = [ self._make_evg_result("dir/test1.js", 0, 0), - self._make_evg_result("dir/test1.js", 0, 0), ] - test_stats = under_test.HistoricTaskData(evg_results) + test_stats = under_test.HistoricTaskData.from_stats_list(evg_results) expected_runtimes = [ under_test.TestRuntime(test_name="dir/test1.js", runtime=0), ] diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py index de485965f67..b8c0578c19e 100644 --- a/buildscripts/util/teststats.py +++ b/buildscripts/util/teststats.py @@ -2,11 +2,14 @@ from collections import defaultdict from dataclasses import dataclass from datetime import datetime -from typing import NamedTuple, List +from itertools import chain +from typing import NamedTuple, List, Callable, Optional from evergreen import EvergreenApi, TestStats -import buildscripts.util.testname as testname # pylint: disable=wrong-import-position +from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file + +TASK_LEVEL_HOOKS = {"CleanEveryN"} class TestRuntime(NamedTuple): @@ -63,16 +66,73 @@ def _average(value_a: float, num_a: int, value_b: float, num_b: int) -> float: return float(value_a * num_a + value_b * num_b) / divisor +class HistoricHookInfo(NamedTuple): + """Historic information about a test hook.""" + + hook_id: str + num_pass: int + avg_duration: float + + @classmethod + def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo": + """Create an instance from a test_stats object.""" + return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass, + avg_duration=test_stats.avg_duration_pass) + + def test_name(self) -> str: + """Get the name of the test associated with this hook.""" + return split_test_hook_name(self.hook_id)[0] + + def hook_name(self) -> str: + """Get the name of this hook.""" + return split_test_hook_name(self.hook_id)[-1] + + def is_task_level_hook(self) -> bool: + """Determine if this hook should be counted against the task not the test.""" + return self.hook_name() in TASK_LEVEL_HOOKS + + +class HistoricTestInfo(NamedTuple): + """Historic information about a test.""" + + test_name: str + num_pass: int + avg_duration: float + hooks: List[HistoricHookInfo] + + @classmethod + def from_test_stats(cls, test_stats: TestStats, + hooks: List[HistoricHookInfo]) -> "HistoricTestInfo": + """Create an instance from a test_stats object.""" + return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass, + avg_duration=test_stats.avg_duration_pass, hooks=hooks) + + def normalized_test_name(self) -> str: + """Get the normalized version of the test name.""" + return normalize_test_name(self.test_name) + + def total_hook_runtime(self, + predicate: Optional[Callable[[HistoricHookInfo], bool]] = None) -> float: + """Get the average runtime of all the hooks associated with this test.""" + if not predicate: + predicate = lambda _: True + return sum([hook.avg_duration for hook in self.hooks if predicate(hook)]) + + def total_test_runtime(self) -> float: + """Get the average runtime of this test and it's non-task level hooks.""" + return self.avg_duration + self.total_hook_runtime(lambda h: not h.is_task_level_hook()) + + def get_hook_overhead(self) -> float: + """Get the average runtime of this test and it's non-task level hooks.""" + return self.total_hook_runtime(lambda h: h.is_task_level_hook()) + + class HistoricTaskData(object): """Represent the test statistics for the task that is being analyzed.""" - def __init__(self, evg_test_stats_results: List[TestStats]) -> None: + def __init__(self, historic_test_results: List[HistoricTestInfo]) -> None: """Initialize the TestStats with raw results from the Evergreen API.""" - self._runtime_by_test = defaultdict(_RuntimeHistory.empty) - self._hook_runtime_by_test = defaultdict(lambda: defaultdict(_RuntimeHistory.empty)) - - for doc in evg_test_stats_results: - self._add_stats(doc) + self.historic_test_results = historic_test_results # pylint: disable=too-many-arguments @classmethod @@ -90,39 +150,51 @@ class HistoricTaskData(object): :return: Test stats for the specified task. """ days = (end_date - start_date).days - return cls( - evg_api.test_stats_by_project(project, after_date=start_date, before_date=end_date, - tasks=[task], variants=[variant], group_by="test", - group_num_days=days)) - - def _add_stats(self, test_stats: TestStats) -> None: - """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint.""" - test_file = testname.normalize_test_file(test_stats.test_file) - duration = test_stats.avg_duration_pass - num_run = test_stats.num_pass - is_hook = testname.is_resmoke_hook(test_file) - if is_hook: - self._add_test_hook_stats(test_file, duration, num_run) - else: - self._add_test_stats(test_file, duration, num_run) - - def _add_test_stats(self, test_file: str, duration: float, num_run: int) -> None: - """Add the statistics for a test.""" - self._runtime_by_test[test_file].add_runtimes(duration, num_run) - - def _add_test_hook_stats(self, test_file: str, duration: float, num_run: int) -> None: - """Add the statistics for a hook.""" - test_name, hook_name = testname.split_test_hook_name(test_file) - self._hook_runtime_by_test[test_name][hook_name].add_runtimes(duration, num_run) + historic_stats = evg_api.test_stats_by_project( + project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant], + group_by="test", group_num_days=days) + + return cls.from_stats_list(historic_stats) + + @classmethod + def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData": + """ + Build historic task data from a list of historic stats. + + :param historic_stats: List of historic stats to build from. + :return: Historic task data from the list of stats. + """ + + hooks = defaultdict(list) + for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]: + historical_hook = HistoricHookInfo.from_test_stats(hook) + hooks[historical_hook.test_name()].append(historical_hook) + + return cls([ + HistoricTestInfo.from_test_stats(stat, + hooks[get_short_name_from_test_file(stat.test_file)]) + for stat in historic_stats if not is_resmoke_hook(stat.test_file) + ]) def get_tests_runtimes(self) -> List[TestRuntime]: """Return the list of (test_file, runtime_in_secs) tuples ordered by decreasing runtime.""" - tests = [] - for test_file, runtime_info in list(self._runtime_by_test.items()): - duration = runtime_info.duration - test_name = testname.get_short_name_from_test_file(test_file) - for _, hook_runtime_info in self._hook_runtime_by_test[test_name].items(): - duration += hook_runtime_info.duration - test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration) - tests.append(test) + tests = [ + TestRuntime(test_name=test_stats.normalized_test_name(), + runtime=test_stats.total_test_runtime()) + for test_stats in self.historic_test_results + ] return sorted(tests, key=lambda x: x.runtime, reverse=True) + + def get_avg_hook_runtime(self, hook_name: str) -> float: + """Get the average runtime for the specified hook.""" + hook_instances = list( + chain.from_iterable([[hook for hook in test.hooks if hook.hook_name() == hook_name] + for test in self.historic_test_results])) + + if not hook_instances: + return 0 + return sum([hook.avg_duration for hook in hook_instances]) / len(hook_instances) + + def __len__(self) -> int: + """Get the number of historical entries.""" + return len(self.historic_test_results) |