diff options
author | David Bradford <david.bradford@mongodb.com> | 2020-12-02 08:47:14 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-12-02 23:25:43 +0000 |
commit | 3cb926298aaa7b55d4e0f51daf7c176b65ccc32b (patch) | |
tree | 228c581f2edfc933e32b870b19007676b4eb6e60 /buildscripts/util | |
parent | b8cefcd121371a04b32696fe690e38309c984803 (diff) | |
download | mongo-3cb926298aaa7b55d4e0f51daf7c176b65ccc32b.tar.gz |
SERVER-53058: Better account for CleanEveryN runtime for dynamically split tasks
Diffstat (limited to 'buildscripts/util')
-rw-r--r-- | buildscripts/util/teststats.py | 152 |
1 files changed, 112 insertions, 40 deletions
diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py index de485965f67..b8c0578c19e 100644 --- a/buildscripts/util/teststats.py +++ b/buildscripts/util/teststats.py @@ -2,11 +2,14 @@ from collections import defaultdict from dataclasses import dataclass from datetime import datetime -from typing import NamedTuple, List +from itertools import chain +from typing import NamedTuple, List, Callable, Optional from evergreen import EvergreenApi, TestStats -import buildscripts.util.testname as testname # pylint: disable=wrong-import-position +from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file + +TASK_LEVEL_HOOKS = {"CleanEveryN"} class TestRuntime(NamedTuple): @@ -63,16 +66,73 @@ def _average(value_a: float, num_a: int, value_b: float, num_b: int) -> float: return float(value_a * num_a + value_b * num_b) / divisor +class HistoricHookInfo(NamedTuple): + """Historic information about a test hook.""" + + hook_id: str + num_pass: int + avg_duration: float + + @classmethod + def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo": + """Create an instance from a test_stats object.""" + return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass, + avg_duration=test_stats.avg_duration_pass) + + def test_name(self) -> str: + """Get the name of the test associated with this hook.""" + return split_test_hook_name(self.hook_id)[0] + + def hook_name(self) -> str: + """Get the name of this hook.""" + return split_test_hook_name(self.hook_id)[-1] + + def is_task_level_hook(self) -> bool: + """Determine if this hook should be counted against the task not the test.""" + return self.hook_name() in TASK_LEVEL_HOOKS + + +class HistoricTestInfo(NamedTuple): + """Historic information about a test.""" + + test_name: str + num_pass: int + avg_duration: float + hooks: List[HistoricHookInfo] + + @classmethod + def from_test_stats(cls, test_stats: TestStats, + hooks: List[HistoricHookInfo]) -> "HistoricTestInfo": + """Create an instance from a test_stats object.""" + return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass, + avg_duration=test_stats.avg_duration_pass, hooks=hooks) + + def normalized_test_name(self) -> str: + """Get the normalized version of the test name.""" + return normalize_test_name(self.test_name) + + def total_hook_runtime(self, + predicate: Optional[Callable[[HistoricHookInfo], bool]] = None) -> float: + """Get the average runtime of all the hooks associated with this test.""" + if not predicate: + predicate = lambda _: True + return sum([hook.avg_duration for hook in self.hooks if predicate(hook)]) + + def total_test_runtime(self) -> float: + """Get the average runtime of this test and it's non-task level hooks.""" + return self.avg_duration + self.total_hook_runtime(lambda h: not h.is_task_level_hook()) + + def get_hook_overhead(self) -> float: + """Get the average runtime of this test and it's non-task level hooks.""" + return self.total_hook_runtime(lambda h: h.is_task_level_hook()) + + class HistoricTaskData(object): """Represent the test statistics for the task that is being analyzed.""" - def __init__(self, evg_test_stats_results: List[TestStats]) -> None: + def __init__(self, historic_test_results: List[HistoricTestInfo]) -> None: """Initialize the TestStats with raw results from the Evergreen API.""" - self._runtime_by_test = defaultdict(_RuntimeHistory.empty) - self._hook_runtime_by_test = defaultdict(lambda: defaultdict(_RuntimeHistory.empty)) - - for doc in evg_test_stats_results: - self._add_stats(doc) + self.historic_test_results = historic_test_results # pylint: disable=too-many-arguments @classmethod @@ -90,39 +150,51 @@ class HistoricTaskData(object): :return: Test stats for the specified task. """ days = (end_date - start_date).days - return cls( - evg_api.test_stats_by_project(project, after_date=start_date, before_date=end_date, - tasks=[task], variants=[variant], group_by="test", - group_num_days=days)) - - def _add_stats(self, test_stats: TestStats) -> None: - """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint.""" - test_file = testname.normalize_test_file(test_stats.test_file) - duration = test_stats.avg_duration_pass - num_run = test_stats.num_pass - is_hook = testname.is_resmoke_hook(test_file) - if is_hook: - self._add_test_hook_stats(test_file, duration, num_run) - else: - self._add_test_stats(test_file, duration, num_run) - - def _add_test_stats(self, test_file: str, duration: float, num_run: int) -> None: - """Add the statistics for a test.""" - self._runtime_by_test[test_file].add_runtimes(duration, num_run) - - def _add_test_hook_stats(self, test_file: str, duration: float, num_run: int) -> None: - """Add the statistics for a hook.""" - test_name, hook_name = testname.split_test_hook_name(test_file) - self._hook_runtime_by_test[test_name][hook_name].add_runtimes(duration, num_run) + historic_stats = evg_api.test_stats_by_project( + project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant], + group_by="test", group_num_days=days) + + return cls.from_stats_list(historic_stats) + + @classmethod + def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData": + """ + Build historic task data from a list of historic stats. + + :param historic_stats: List of historic stats to build from. + :return: Historic task data from the list of stats. + """ + + hooks = defaultdict(list) + for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]: + historical_hook = HistoricHookInfo.from_test_stats(hook) + hooks[historical_hook.test_name()].append(historical_hook) + + return cls([ + HistoricTestInfo.from_test_stats(stat, + hooks[get_short_name_from_test_file(stat.test_file)]) + for stat in historic_stats if not is_resmoke_hook(stat.test_file) + ]) def get_tests_runtimes(self) -> List[TestRuntime]: """Return the list of (test_file, runtime_in_secs) tuples ordered by decreasing runtime.""" - tests = [] - for test_file, runtime_info in list(self._runtime_by_test.items()): - duration = runtime_info.duration - test_name = testname.get_short_name_from_test_file(test_file) - for _, hook_runtime_info in self._hook_runtime_by_test[test_name].items(): - duration += hook_runtime_info.duration - test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration) - tests.append(test) + tests = [ + TestRuntime(test_name=test_stats.normalized_test_name(), + runtime=test_stats.total_test_runtime()) + for test_stats in self.historic_test_results + ] return sorted(tests, key=lambda x: x.runtime, reverse=True) + + def get_avg_hook_runtime(self, hook_name: str) -> float: + """Get the average runtime for the specified hook.""" + hook_instances = list( + chain.from_iterable([[hook for hook in test.hooks if hook.hook_name() == hook_name] + for test in self.historic_test_results])) + + if not hook_instances: + return 0 + return sum([hook.avg_duration for hook in hook_instances]) / len(hook_instances) + + def __len__(self) -> int: + """Get the number of historical entries.""" + return len(self.historic_test_results) |