SERVER-53058: Better account for CleanEveryN runtime for dynamically split tasks

(cherry picked from commit 3cb926298aaa7b55d4e0f51daf7c176b65ccc32b)
author: David Bradford <david.bradford@mongodb.com> 2020-12-02 08:47:14 -0500
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-12-03 20:54:14 +0000
commit: 59b50edbde7b2c9a926d5890f767387330fbadab (patch)
tree: 10df60b560a2cfa0a165bdffa7b9e53713e0fb84
parent: fb67f77bd073a31d0511f9b60b7850df64b1321b (diff)
download: mongo-59b50edbde7b2c9a926d5890f767387330fbadab.tar.gz
4 files changed, 263 insertions, 126 deletions
diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py
index e85ae2cd5c0..e6bb26f6ca4 100755
--- a/buildscripts/evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/evergreen_generate_resmoke_tasks.py
@@ -5,6 +5,7 @@ Resmoke Test Suite Generator.
 Analyze the evergreen history for tests run under the given task and create new evergreen tasks
 to attempt to keep the task runtime under a specified amount.
 """
+# pylint: disable=too-many-lines
 from copy import deepcopy
 import datetime
 from datetime import timedelta
@@ -15,7 +16,7 @@ import os
 import re
 import sys
 from distutils.util import strtobool  # pylint: disable=no-name-in-module
-from typing import Dict, List, Set, Sequence, Optional, Any, Match
+from typing import Dict, List, Set, Sequence, Optional, Any, Match, NamedTuple
 
 import click
 import requests
@@ -49,6 +50,8 @@ MIN_TIMEOUT_SECONDS = int(timedelta(minutes=5).total_seconds())
 MAX_EXPECTED_TIMEOUT = int(timedelta(hours=48).total_seconds())
 LOOKBACK_DURATION_DAYS = 14
 GEN_SUFFIX = "_gen"
+CLEAN_EVERY_N_HOOK = "CleanEveryN"
+CLEAN_EVERY_N_VALUE = 20
 
 HEADER_TEMPLATE = """# DO NOT EDIT THIS FILE. All manual edits will be lost.
 # This file was generated by {file} from
@@ -419,7 +422,7 @@ def render_suite_files(suites: List, suite_name: str, test_list: List[str], suit
     return suite_configs
 
 
-def calculate_timeout(avg_runtime, scaling_factor):
+def calculate_timeout(avg_runtime: float, scaling_factor: int) -> int:
     """
     Determine how long a runtime to set based on average runtime and a scaling factor.
 
@@ -458,6 +461,79 @@ def should_tasks_be_generated(evg_api, task_id):
     return True
 
 
+class TimeoutEstimate(NamedTuple):
+    """Runtime estimates used to calculate timeouts."""
+
+    max_test_runtime: Optional[float]
+    expected_task_runtime: Optional[float]
+
+    @classmethod
+    def no_timeouts(cls) -> "TimeoutEstimate":
+        """Create an instance with no estimation data."""
+        return cls(max_test_runtime=None, expected_task_runtime=None)
+
+    def calculate_test_timeout(self, repeat_factor: int) -> Optional[int]:
+        """
+        Calculate the timeout to use for tests.
+
+        :param repeat_factor: How many times the suite will be repeated.
+        :return: Timeout value to use for tests.
+        """
+        if self.max_test_runtime is None:
+            return None
+
+        timeout = calculate_timeout(self.max_test_runtime, 3) * repeat_factor
+        LOGGER.debug("Setting timeout", timeout=timeout, max_runtime=self.max_test_runtime,
+                     factor=repeat_factor)
+        return timeout
+
+    def calculate_task_timeout(self, repeat_factor: int) -> Optional[int]:
+        """
+        Calculate the timeout to use for tasks.
+
+        :param repeat_factor: How many times the suite will be repeated.
+        :return: Timeout value to use for tasks.
+        """
+        if self.expected_task_runtime is None:
+            return None
+
+        exec_timeout = calculate_timeout(self.expected_task_runtime, 3) * repeat_factor
+        LOGGER.debug("Setting exec_timeout", exec_timeout=exec_timeout,
+                     suite_runtime=self.expected_task_runtime, factor=repeat_factor)
+        return exec_timeout
+
+    def generate_timeout_cmd(self, is_patch: bool, repeat_factor: int,
+                             use_default: bool = False) -> TimeoutInfo:
+        """
+        Create the timeout info to use to create a timeout shrub command.
+
+        :param is_patch: Whether the command is being created in a patch build.
+        :param repeat_factor: How many times the suite will be repeated.
+        :param use_default: Should the default timeout be used.
+        :return: Timeout info for the task.
+        """
+
+        if (self.max_test_runtime is None and self.expected_task_runtime is None) or use_default:
+            return TimeoutInfo.default_timeout()
+
+        test_timeout = self.calculate_test_timeout(repeat_factor)
+        task_timeout = self.calculate_task_timeout(repeat_factor)
+
+        if is_patch and (test_timeout > MAX_EXPECTED_TIMEOUT
+                         or task_timeout > MAX_EXPECTED_TIMEOUT):
+            frameinfo = getframeinfo(currentframe())
+            LOGGER.error(
+                "This task looks like it is expected to run far longer than normal. This is "
+                "likely due to setting the suite 'repeat' value very high. If you are sure "
+                "this is something you want to do, comment this check out in your patch build "
+                "and resubmit", repeat_value=repeat_factor, timeout=test_timeout,
+                exec_timeout=task_timeout, code_file=frameinfo.filename, code_line=frameinfo.lineno,
+                max_timeout=MAX_EXPECTED_TIMEOUT)
+            raise ValueError("Failing due to expected runtime.")
+
+        return TimeoutInfo.overridden(timeout=test_timeout, exec_timeout=task_timeout)
+
+
 class Suite(object):
     """A suite of tests that can be run by evergreen."""
 
@@ -474,6 +550,7 @@ class Suite(object):
         self.max_runtime = 0
         self.tests_with_runtime_info = 0
         self.source_name = source_name
+        self.task_overhead = 0
 
         self.index = Suite._current_index
         Suite._current_index += 1
@@ -495,7 +572,7 @@ class Suite(object):
         if runtime > self.max_runtime:
             self.max_runtime = runtime
 
-    def should_overwrite_timeout(self):
+    def should_overwrite_timeout(self) -> bool:
         """
         Whether the timeout for this suite should be overwritten.
 
@@ -503,14 +580,19 @@ class Suite(object):
         """
         return len(self.tests) == self.tests_with_runtime_info
 
+    def get_timeout_estimate(self) -> TimeoutEstimate:
+        """Get the estimated runtime of this task to for timeouts."""
+        if self.should_overwrite_timeout():
+            return TimeoutEstimate(max_test_runtime=self.max_runtime,
+                                   expected_task_runtime=self.total_runtime + self.task_overhead)
+        return TimeoutEstimate.no_timeouts()
+
     def get_runtime(self):
         """Get the current average runtime of all the tests currently in this suite."""
-
         return self.total_runtime
 
     def get_test_count(self):
         """Get the number of tests currently in this suite."""
-
         return len(self.tests)
 
     @property
@@ -593,44 +675,6 @@ class EvergreenConfigGenerator(object):
 
         return variables
 
-    def _get_timeout_command(self, max_test_runtime: int, expected_suite_runtime: int,
-                             use_default: bool) -> TimeoutInfo:
-        """
-        Add an evergreen command to override the default timeouts to the list of commands.
-
-        :param max_test_runtime: Maximum runtime of any test in the sub-suite.
-        :param expected_suite_runtime: Expected runtime of the entire sub-suite.
-        :param use_default: Use default timeouts.
-        :return: Timeout information.
-        """
-        repeat_factor = self.options.repeat_suites
-        if (max_test_runtime or expected_suite_runtime) and not use_default:
-            timeout = None
-            exec_timeout = None
-            if max_test_runtime:
-                timeout = calculate_timeout(max_test_runtime, 3) * repeat_factor
-                LOGGER.debug("Setting timeout", timeout=timeout, max_runtime=max_test_runtime,
-                             factor=repeat_factor)
-            if expected_suite_runtime:
-                exec_timeout = calculate_timeout(expected_suite_runtime, 3) * repeat_factor
-                LOGGER.debug("Setting exec_timeout", exec_timeout=exec_timeout,
-                             suite_runtime=expected_suite_runtime, factor=repeat_factor)
-
-            if self.options.is_patch and \
-                    (timeout > MAX_EXPECTED_TIMEOUT or exec_timeout > MAX_EXPECTED_TIMEOUT):
-                frameinfo = getframeinfo(currentframe())
-                LOGGER.error(
-                    "This task looks like it is expected to run far longer than normal. This is "
-                    "likely due to setting the suite 'repeat' value very high. If you are sure "
-                    "this is something you want to do, comment this check out in your patch build "
-                    "and resubmit", repeat_value=repeat_factor, timeout=timeout,
-                    exec_timeout=exec_timeout, code_file=frameinfo.filename,
-                    code_line=frameinfo.lineno, max_timeout=MAX_EXPECTED_TIMEOUT)
-                raise ValueError("Failing due to expected runtime.")
-            return TimeoutInfo.overridden(timeout=timeout, exec_timeout=exec_timeout)
-
-        return TimeoutInfo.default_timeout()
-
     @staticmethod
     def _is_task_dependency(task: str, possible_dependency: str) -> Optional[Match[str]]:
         """
@@ -668,16 +712,14 @@ class EvergreenConfigGenerator(object):
         return dependencies
 
     def _generate_task(self, sub_suite_name: str, sub_task_name: str, target_dir: str,
-                       max_test_runtime: Optional[int] = None,
-                       expected_suite_runtime: Optional[int] = None) -> Task:
+                       timeout_est: TimeoutEstimate) -> Task:
         """
         Generate a shrub evergreen config for a resmoke task.
 
         :param sub_suite_name: Name of suite being generated.
         :param sub_task_name: Name of task to generate.
         :param target_dir: Directory containing generated suite files.
-        :param max_test_runtime: Runtime of the longest test in this sub suite.
-        :param expected_suite_runtime: Expected total runtime of this suite.
+        :param timeout_est: Estimated runtime to use for calculating timeouts.
         :return: Shrub configuration for the described task.
         """
         # pylint: disable=too-many-arguments
@@ -690,9 +732,9 @@ class EvergreenConfigGenerator(object):
         run_tests_vars = self._get_run_tests_vars(target_suite_file)
 
         use_multiversion = self.options.use_multiversion
-        timeout_info = self._get_timeout_command(max_test_runtime, expected_suite_runtime,
-                                                 self.options.use_default_timeouts)
-        commands = resmoke_commands("run generated tests", run_tests_vars, timeout_info,
+        timeout_cmd = timeout_est.generate_timeout_cmd(
+            self.options.is_patch, self.options.repeat_suites, self.options.use_default_timeouts)
+        commands = resmoke_commands("run generated tests", run_tests_vars, timeout_cmd,
                                     use_multiversion)
 
         return Task(sub_task_name, commands, self._get_dependencies())
@@ -707,13 +749,8 @@ class EvergreenConfigGenerator(object):
         """
         sub_task_name = taskname.name_generated_task(self.options.task, idx, len(self.suites),
                                                      self.options.variant)
-        max_runtime = None
-        total_runtime = None
-        if suite.should_overwrite_timeout():
-            max_runtime = suite.max_runtime
-            total_runtime = suite.get_runtime()
         return self._generate_task(suite.name, sub_task_name, self.options.generated_config_dir,
-                                   max_runtime, total_runtime)
+                                   suite.get_timeout_estimate())
 
     def _generate_all_tasks(self) -> Set[Task]:
         """Get a set of shrub task for all the sub tasks."""
@@ -724,8 +761,9 @@ class EvergreenConfigGenerator(object):
             misc_suite_name = f"{os.path.basename(self.options.suite)}_misc"
             misc_task_name = f"{self.options.task}_misc_{self.options.variant}"
             tasks.add(
-                self._generate_task(misc_suite_name, misc_task_name,
-                                    self.options.generated_config_dir))
+                self._generate_task(misc_suite_name,
+                                    misc_task_name, self.options.generated_config_dir,
+                                    TimeoutEstimate.no_timeouts()))
 
         return tasks
 
@@ -773,7 +811,7 @@ class GenerateSubSuites(object):
             evg_stats = HistoricTaskData.from_evg(self.evergreen_api, self.config_options.project,
                                                   start_date, end_date, self.config_options.task,
                                                   self.config_options.variant)
-            if not evg_stats.get_tests_runtimes():
+            if not evg_stats:
                 LOGGER.debug("No test history, using fallback suites")
                 # This is probably a new suite, since there is no test history, just use the
                 # fallback values.
@@ -803,10 +841,37 @@ class GenerateSubSuites(object):
         if not tests_runtimes:
             LOGGER.debug("No test runtimes after filter, using fallback")
             return self.calculate_fallback_suites()
+
         self.test_list = [info.test_name for info in tests_runtimes]
-        return divide_tests_into_suites(self.config_options.suite, tests_runtimes,
-                                        execution_time_secs, self.config_options.max_sub_suites,
-                                        self.config_options.max_tests_per_suite)
+
+        suites = divide_tests_into_suites(self.config_options.suite, tests_runtimes,
+                                          execution_time_secs, self.config_options.max_sub_suites,
+                                          self.config_options.max_tests_per_suite)
+
+        self.add_task_hook_overhead(suites, test_stats)
+
+        return suites
+
+    @staticmethod
+    def add_task_hook_overhead(suites: List[Suite], historic_stats: HistoricTaskData) -> None:
+        """
+        Add how much overhead task-level hooks each suite should account for.
+
+        Certain test hooks need to be accounted for on the task level instead of the test level
+        in order to calculate accurate timeouts. So we will add details about those hooks to
+        each suite here.
+
+        :param suites: List of suites that were created.
+        :param historic_stats: Historic runtime data of the suite.
+        """
+        # The CleanEveryN hook is run every 'N' tests. N is almost always 20. The runtime of the
+        # hook will be associated with whichever test happens to be running, which could be
+        # different every run. So we need to take its runtime into account at the task level.
+        avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
+        if avg_clean_every_n_runtime != 0:
+            for suite in suites:
+                n_expected_runs = suite.get_test_count() / CLEAN_EVERY_N_VALUE
+                suite.task_overhead += n_expected_runs * avg_clean_every_n_runtime
 
     def filter_tests(self, tests_runtimes: List[TestRuntime]) -> List[TestRuntime]:
         """
diff --git a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
index f93c2e7c3b2..004527dec4a 100644
--- a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
@@ -563,6 +563,15 @@ class CalculateTimeoutTest(unittest.TestCase):
             under_test.calculate_timeout(30, scaling_factor))
 
 
+class TimeoutEstimateTest(unittest.TestCase):
+    def test_too_high_a_timeout_raises_errors(self):
+        timeout_est = under_test.TimeoutEstimate(
+            max_test_runtime=5, expected_task_runtime=under_test.MAX_EXPECTED_TIMEOUT)
+
+        with self.assertRaises(ValueError):
+            timeout_est.generate_timeout_cmd(is_patch=True, repeat_factor=1)
+
+
 class EvergreenConfigGeneratorTest(unittest.TestCase):
     @staticmethod
     def generate_mock_suites(count):
@@ -572,6 +581,8 @@ class EvergreenConfigGeneratorTest(unittest.TestCase):
             suite.name = "suite {0}".format(idx)
             suite.max_runtime = 5.28
             suite.get_runtime = lambda: 100.874
+            suite.get_timeout_estimate.return_value = under_test.TimeoutEstimate(
+                max_test_runtime=5.28, expected_task_runtime=100.874)
             suites.append(suite)
 
         return suites
@@ -723,15 +734,6 @@ class EvergreenConfigGeneratorTest(unittest.TestCase):
         expected_exec_timeout = under_test.calculate_timeout(suites[0].get_runtime(), 3) * 5
         self.assertEqual(expected_exec_timeout, timeout_cmd["params"]["exec_timeout_secs"])
 
-    def test_evg_config_has_fails_if_timeout_too_high(self):
-        options = self.generate_mock_options()
-        options.repeat_suites = under_test.MAX_EXPECTED_TIMEOUT
-        suites = self.generate_mock_suites(3)
-
-        with self.assertRaises(ValueError):
-            generator = under_test.EvergreenConfigGenerator(suites, options, MagicMock())
-            generator.generate_config(MagicMock())
-
     def test_evg_config_does_not_fails_if_timeout_too_high_on_mainline(self):
         options = self.generate_mock_options()
         options.is_patch = False
@@ -780,7 +782,9 @@ class EvergreenConfigGeneratorTest(unittest.TestCase):
         suite_without_timing_info = 1
         options = self.generate_mock_options()
         suites = self.generate_mock_suites(3)
-        suites[suite_without_timing_info].should_overwrite_timeout.return_value = False
+        suites[
+            suite_without_timing_info].get_timeout_estimate.return_value = under_test.TimeoutEstimate.no_timeouts(
+            )
         build_variant = BuildVariant("variant")
 
         generator = under_test.EvergreenConfigGenerator(suites, options, MagicMock())
diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py
index 7d0f04f2600..b3405ecc00d 100644
--- a/buildscripts/tests/util/test_teststats.py
+++ b/buildscripts/tests/util/test_teststats.py
@@ -24,11 +24,10 @@ class NormalizeTestNameTest(unittest.TestCase):
 class TestHistoricTaskData(unittest.TestCase):
     def test_no_hooks(self):
         evg_results = [
-            self._make_evg_result("dir/test1.js", 1, 10),
             self._make_evg_result("dir/test2.js", 1, 30),
-            self._make_evg_result("dir/test1.js", 2, 25),
+            self._make_evg_result("dir/test1.js", 2, 20),
         ]
-        test_stats = under_test.HistoricTaskData(evg_results)
+        test_stats = under_test.HistoricTaskData.from_stats_list(evg_results)
         expected_runtimes = [
             under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
             under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
@@ -37,44 +36,41 @@ class TestHistoricTaskData(unittest.TestCase):
 
     def test_hooks(self):
         evg_results = [
-            self._make_evg_result("dir/test1.js", 1, 10),
             self._make_evg_result("dir/test2.js", 1, 30),
-            self._make_evg_result("dir/test1.js", 2, 25),
+            self._make_evg_result("dir/test1.js", 2, 30),
             self._make_evg_result("dir/test3.js", 5, 10),
-            self._make_evg_result("test3:CleanEveryN", 10, 30),
+            self._make_evg_result("test3:Validate", 10, 30),
             self._make_evg_result("test3:CheckReplDBHash", 10, 35),
         ]
-        test_stats = under_test.HistoricTaskData(evg_results)
+        test_stats = under_test.HistoricTaskData.from_stats_list(evg_results)
         expected_runtimes = [
             under_test.TestRuntime(test_name="dir/test3.js", runtime=75),
             under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
-            under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=30),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
     def test_hook_first(self):
         evg_results = [
-            self._make_evg_result("test3:CleanEveryN", 10, 35),
-            self._make_evg_result("dir/test1.js", 1, 10),
+            self._make_evg_result("test3:Validate", 10, 35),
             self._make_evg_result("dir/test2.js", 1, 30),
             self._make_evg_result("dir/test1.js", 2, 25),
             self._make_evg_result("dir/test3.js", 5, 10),
             self._make_evg_result("test3:CheckReplDBHash", 10, 35),
         ]
-        test_stats = under_test.HistoricTaskData(evg_results)
+        test_stats = under_test.HistoricTaskData.from_stats_list(evg_results)
         expected_runtimes = [
             under_test.TestRuntime(test_name="dir/test3.js", runtime=80),
             under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
-            under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=25),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
     def test_zero_runs(self):
         evg_results = [
             self._make_evg_result("dir/test1.js", 0, 0),
-            self._make_evg_result("dir/test1.js", 0, 0),
         ]
-        test_stats = under_test.HistoricTaskData(evg_results)
+        test_stats = under_test.HistoricTaskData.from_stats_list(evg_results)
         expected_runtimes = [
             under_test.TestRuntime(test_name="dir/test1.js", runtime=0),
         ]
diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py
index de485965f67..b8c0578c19e 100644
--- a/buildscripts/util/teststats.py
+++ b/buildscripts/util/teststats.py
@@ -2,11 +2,14 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from datetime import datetime
-from typing import NamedTuple, List
+from itertools import chain
+from typing import NamedTuple, List, Callable, Optional
 
 from evergreen import EvergreenApi, TestStats
 
-import buildscripts.util.testname as testname  # pylint: disable=wrong-import-position
+from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file
+
+TASK_LEVEL_HOOKS = {"CleanEveryN"}
 
 
 class TestRuntime(NamedTuple):
@@ -63,16 +66,73 @@ def _average(value_a: float, num_a: int, value_b: float, num_b: int) -> float:
         return float(value_a * num_a + value_b * num_b) / divisor
 
 
+class HistoricHookInfo(NamedTuple):
+    """Historic information about a test hook."""
+
+    hook_id: str
+    num_pass: int
+    avg_duration: float
+
+    @classmethod
+    def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo":
+        """Create an instance from a test_stats object."""
+        return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass,
+                   avg_duration=test_stats.avg_duration_pass)
+
+    def test_name(self) -> str:
+        """Get the name of the test associated with this hook."""
+        return split_test_hook_name(self.hook_id)[0]
+
+    def hook_name(self) -> str:
+        """Get the name of this hook."""
+        return split_test_hook_name(self.hook_id)[-1]
+
+    def is_task_level_hook(self) -> bool:
+        """Determine if this hook should be counted against the task not the test."""
+        return self.hook_name() in TASK_LEVEL_HOOKS
+
+
+class HistoricTestInfo(NamedTuple):
+    """Historic information about a test."""
+
+    test_name: str
+    num_pass: int
+    avg_duration: float
+    hooks: List[HistoricHookInfo]
+
+    @classmethod
+    def from_test_stats(cls, test_stats: TestStats,
+                        hooks: List[HistoricHookInfo]) -> "HistoricTestInfo":
+        """Create an instance from a test_stats object."""
+        return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass,
+                   avg_duration=test_stats.avg_duration_pass, hooks=hooks)
+
+    def normalized_test_name(self) -> str:
+        """Get the normalized version of the test name."""
+        return normalize_test_name(self.test_name)
+
+    def total_hook_runtime(self,
+                           predicate: Optional[Callable[[HistoricHookInfo], bool]] = None) -> float:
+        """Get the average runtime of all the hooks associated with this test."""
+        if not predicate:
+            predicate = lambda _: True
+        return sum([hook.avg_duration for hook in self.hooks if predicate(hook)])
+
+    def total_test_runtime(self) -> float:
+        """Get the average runtime of this test and it's non-task level hooks."""
+        return self.avg_duration + self.total_hook_runtime(lambda h: not h.is_task_level_hook())
+
+    def get_hook_overhead(self) -> float:
+        """Get the average runtime of this test and it's non-task level hooks."""
+        return self.total_hook_runtime(lambda h: h.is_task_level_hook())
+
+
 class HistoricTaskData(object):
     """Represent the test statistics for the task that is being analyzed."""
 
-    def __init__(self, evg_test_stats_results: List[TestStats]) -> None:
+    def __init__(self, historic_test_results: List[HistoricTestInfo]) -> None:
         """Initialize the TestStats with raw results from the Evergreen API."""
-        self._runtime_by_test = defaultdict(_RuntimeHistory.empty)
-        self._hook_runtime_by_test = defaultdict(lambda: defaultdict(_RuntimeHistory.empty))
-
-        for doc in evg_test_stats_results:
-            self._add_stats(doc)
+        self.historic_test_results = historic_test_results
 
     # pylint: disable=too-many-arguments
     @classmethod
@@ -90,39 +150,51 @@ class HistoricTaskData(object):
         :return: Test stats for the specified task.
         """
         days = (end_date - start_date).days
-        return cls(
-            evg_api.test_stats_by_project(project, after_date=start_date, before_date=end_date,
-                                          tasks=[task], variants=[variant], group_by="test",
-                                          group_num_days=days))
-
-    def _add_stats(self, test_stats: TestStats) -> None:
-        """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint."""
-        test_file = testname.normalize_test_file(test_stats.test_file)
-        duration = test_stats.avg_duration_pass
-        num_run = test_stats.num_pass
-        is_hook = testname.is_resmoke_hook(test_file)
-        if is_hook:
-            self._add_test_hook_stats(test_file, duration, num_run)
-        else:
-            self._add_test_stats(test_file, duration, num_run)
-
-    def _add_test_stats(self, test_file: str, duration: float, num_run: int) -> None:
-        """Add the statistics for a test."""
-        self._runtime_by_test[test_file].add_runtimes(duration, num_run)
-
-    def _add_test_hook_stats(self, test_file: str, duration: float, num_run: int) -> None:
-        """Add the statistics for a hook."""
-        test_name, hook_name = testname.split_test_hook_name(test_file)
-        self._hook_runtime_by_test[test_name][hook_name].add_runtimes(duration, num_run)
+        historic_stats = evg_api.test_stats_by_project(
+            project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant],
+            group_by="test", group_num_days=days)
+
+        return cls.from_stats_list(historic_stats)
+
+    @classmethod
+    def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData":
+        """
+        Build historic task data from a list of historic stats.
+
+        :param historic_stats: List of historic stats to build from.
+        :return: Historic task data from the list of stats.
+        """
+
+        hooks = defaultdict(list)
+        for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]:
+            historical_hook = HistoricHookInfo.from_test_stats(hook)
+            hooks[historical_hook.test_name()].append(historical_hook)
+
+        return cls([
+            HistoricTestInfo.from_test_stats(stat,
+                                             hooks[get_short_name_from_test_file(stat.test_file)])
+            for stat in historic_stats if not is_resmoke_hook(stat.test_file)
+        ])
 
     def get_tests_runtimes(self) -> List[TestRuntime]:
         """Return the list of (test_file, runtime_in_secs) tuples ordered by decreasing runtime."""
-        tests = []
-        for test_file, runtime_info in list(self._runtime_by_test.items()):
-            duration = runtime_info.duration
-            test_name = testname.get_short_name_from_test_file(test_file)
-            for _, hook_runtime_info in self._hook_runtime_by_test[test_name].items():
-                duration += hook_runtime_info.duration
-            test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration)
-            tests.append(test)
+        tests = [
+            TestRuntime(test_name=test_stats.normalized_test_name(),
+                        runtime=test_stats.total_test_runtime())
+            for test_stats in self.historic_test_results
+        ]
         return sorted(tests, key=lambda x: x.runtime, reverse=True)
+
+    def get_avg_hook_runtime(self, hook_name: str) -> float:
+        """Get the average runtime for the specified hook."""
+        hook_instances = list(
+            chain.from_iterable([[hook for hook in test.hooks if hook.hook_name() == hook_name]
+                                 for test in self.historic_test_results]))
+
+        if not hook_instances:
+            return 0
+        return sum([hook.avg_duration for hook in hook_instances]) / len(hook_instances)
+
+    def __len__(self) -> int:
+        """Get the number of historical entries."""
+        return len(self.historic_test_results)
author	David Bradford <david.bradford@mongodb.com>	2020-12-02 08:47:14 -0500
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-12-03 20:54:14 +0000
commit	59b50edbde7b2c9a926d5890f767387330fbadab (patch)
tree	10df60b560a2cfa0a165bdffa7b9e53713e0fb84
parent	fb67f77bd073a31d0511f9b60b7850df64b1321b (diff)
download	mongo-59b50edbde7b2c9a926d5890f767387330fbadab.tar.gz