7 files changed, 135 insertions, 113 deletions
diff --git a/buildscripts/burn_in_tests.py b/buildscripts/burn_in_tests.py
index fe8780cb203..cb808a1f6a3 100644
--- a/buildscripts/burn_in_tests.py
+++ b/buildscripts/burn_in_tests.py
@@ -34,7 +34,7 @@ from buildscripts.resmokelib.utils import default_if_none, globstar
 from buildscripts.ciconfig.evergreen import parse_evergreen_file, ResmokeArgs, \
     EvergreenProjectConfig, VariantTask
 from buildscripts.util.fileops import write_file
-from buildscripts.util.teststats import TestStats
+from buildscripts.util.teststats import HistoricTaskData, TestRuntime
 from buildscripts.util.taskname import name_generated_task
 from buildscripts.patch_builds.task_generation import (resmoke_commands, TimeoutInfo,
                                                        validate_task_generation_limit)
@@ -436,11 +436,12 @@ def _set_resmoke_cmd(repeat_config: RepeatConfig, resmoke_args: [str]) -> [str]:
     return new_args
 
 
-def _parse_avg_test_runtime(test: str, task_avg_test_runtime_stats: [TestStats]) -> Optional[float]:
+def _parse_avg_test_runtime(test: str,
+                            task_avg_test_runtime_stats: List[TestRuntime]) -> Optional[float]:
     """
-    Parse list of teststats to find runtime for particular test.
+    Parse list of test runtimes to find runtime for particular test.
 
-    :param task_avg_test_runtime_stats: Teststat data.
+    :param task_avg_test_runtime_stats: List of average historic runtimes of tests.
     :param test: Test name.
     :return: Historical average runtime of the test.
     """
@@ -486,13 +487,13 @@ def _calculate_exec_timeout(repeat_config: RepeatConfig, avg_test_runtime: float
 
 
 def _generate_timeouts(repeat_config: RepeatConfig, test: str,
-                       task_avg_test_runtime_stats: [TestStats]) -> TimeoutInfo:
+                       task_avg_test_runtime_stats: [TestRuntime]) -> TimeoutInfo:
     """
     Add timeout.update command to list of commands for a burn in execution task.
 
     :param repeat_config: Information on how the test will repeat.
     :param test: Test name.
-    :param task_avg_test_runtime_stats: Teststat data.
+    :param task_avg_test_runtime_stats: Average historic runtimes of tests.
     :return: TimeoutInfo to use.
     """
     if task_avg_test_runtime_stats:
@@ -512,7 +513,7 @@ def _generate_timeouts(repeat_config: RepeatConfig, test: str,
 
 
 def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, task: str,
-                              variant: str):
+                              variant: str) -> List[TestRuntime]:
     """
     Fetch historical average runtime for all tests in a task from Evergreen API.
 
@@ -528,12 +529,9 @@ def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, tas
     try:
         end_date = datetime.datetime.utcnow().replace(microsecond=0)
         start_date = end_date - datetime.timedelta(days=AVG_TEST_RUNTIME_ANALYSIS_DAYS)
-        data = evg_api.test_stats_by_project(project, after_date=start_date.strftime("%Y-%m-%d"),
-                                             before_date=end_date.strftime("%Y-%m-%d"),
-                                             tasks=[task], variants=[variant], group_by="test",
-                                             group_num_days=AVG_TEST_RUNTIME_ANALYSIS_DAYS)
-        test_runtimes = TestStats(data).get_tests_runtimes()
-        return test_runtimes
+        test_stats = HistoricTaskData.from_evg(evg_api, project, start_date=start_date,
+                                               end_date=end_date, task=task, variant=variant)
+        return test_stats.get_tests_runtimes()
     except requests.HTTPError as err:
         if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
             # Evergreen may return a 503 when the service is degraded.
@@ -544,7 +542,7 @@ def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, tas
 
 
 def _create_task(index: int, test_count: int, test: str, task_data: Dict,
-                 task_runtime_stats: List[TestStats], generate_config: GenerateConfig,
+                 task_runtime_stats: List[TestRuntime], generate_config: GenerateConfig,
                  repeat_config: RepeatConfig, task_prefix: str) -> Task:
     # pylint: disable=too-many-arguments,too-many-locals
     """
diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py
index bb3e77fd01d..259e0eed6d3 100755
--- a/buildscripts/evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/evergreen_generate_resmoke_tasks.py
@@ -23,7 +23,6 @@ import structlog
 import yaml
 
 from evergreen.api import EvergreenApi, RetryingEvergreenApi
-from evergreen.stats import TestStats
 
 from shrub.v2 import Task, TaskDependency, BuildVariant, ExistingTask, ShrubProject
 
@@ -37,7 +36,7 @@ import buildscripts.resmokelib.suitesconfig as suitesconfig
 from buildscripts.util.fileops import write_file_to_dir
 import buildscripts.util.read_config as read_config
 import buildscripts.util.taskname as taskname
-import buildscripts.util.teststats as teststats
+from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name
 from buildscripts.patch_builds.task_generation import TimeoutInfo, resmoke_commands
 # pylint: enable=wrong-import-position
 
@@ -301,8 +300,8 @@ def _new_suite_needed(current_suite, test_runtime, max_suite_runtime, max_tests_
     return False
 
 
-def divide_tests_into_suites(suite_name, tests_runtimes, max_time_seconds, max_suites=None,
-                             max_tests_per_suite=None):
+def divide_tests_into_suites(suite_name, tests_runtimes: List[TestRuntime], max_time_seconds,
+                             max_suites=None, max_tests_per_suite=None):
     """
     Divide the given tests into suites.
 
@@ -778,9 +777,10 @@ class GenerateSubSuites(object):
         :return: List of sub suites to be generated.
         """
         try:
-            evg_stats = self.get_evg_stats(self.config_options.project, start_date, end_date,
-                                           self.config_options.task, self.config_options.variant)
-            if not evg_stats:
+            evg_stats = HistoricTaskData.from_evg(self.evergreen_api, self.config_options.project,
+                                                  start_date, end_date, self.config_options.task,
+                                                  self.config_options.variant)
+            if not evg_stats.get_tests_runtimes():
                 LOGGER.debug("No test history, using fallback suites")
                 # This is probably a new suite, since there is no test history, just use the
                 # fallback values.
@@ -797,36 +797,15 @@ class GenerateSubSuites(object):
             else:
                 raise
 
-    def get_evg_stats(self, project: str, start_date: datetime, end_date: datetime, task: str,
-                      variant: str) -> List[TestStats]:
-        """
-        Collect test execution statistics data from Evergreen.
-
-        :param project: Evergreen project to query.
-        :param start_date: Time to start historical analysis.
-        :param end_date: Time to end historical analysis.
-        :param task: Task to query.
-        :param variant: Build variant to query.
-        :return: List of test stats for specified task.
-        """
-        # pylint: disable=too-many-arguments
-
-        days = (end_date - start_date).days
-        return self.evergreen_api.test_stats_by_project(
-            project, after_date=start_date.strftime("%Y-%m-%d"),
-            before_date=end_date.strftime("%Y-%m-%d"), tasks=[task], variants=[variant],
-            group_by="test", group_num_days=days)
-
-    def calculate_suites_from_evg_stats(self, data: List[TestStats],
+    def calculate_suites_from_evg_stats(self, test_stats: HistoricTaskData,
                                         execution_time_secs: int) -> List[Suite]:
         """
         Divide tests into suites that can be run in less than the specified execution time.
 
-        :param data: Historical test results for task being split.
+        :param test_stats: Historical test results for task being split.
         :param execution_time_secs: Target execution time of each suite (in seconds).
         :return: List of sub suites calculated.
         """
-        test_stats = teststats.TestStats(data)
         tests_runtimes = self.filter_tests(test_stats.get_tests_runtimes())
         if not tests_runtimes:
             LOGGER.debug("No test runtimes after filter, using fallback")
@@ -836,8 +815,7 @@ class GenerateSubSuites(object):
             self.config_options.generated_suite_filename, tests_runtimes, execution_time_secs,
             self.config_options.max_sub_suites, self.config_options.max_tests_per_suite)
 
-    def filter_tests(self,
-                     tests_runtimes: List[teststats.TestRuntime]) -> List[teststats.TestRuntime]:
+    def filter_tests(self, tests_runtimes: List[TestRuntime]) -> List[TestRuntime]:
         """
         Filter relevant tests.
 
@@ -850,10 +828,9 @@ class GenerateSubSuites(object):
                                                     tests_runtimes)
         return tests_runtimes
 
-    def filter_existing_tests(self, tests_runtimes: List[teststats.TestRuntime]) \
-            -> List[teststats.TestRuntime]:
+    def filter_existing_tests(self, tests_runtimes: List[TestRuntime]) -> List[TestRuntime]:
         """Filter out tests that do not exist in the filesystem."""
-        all_tests = [teststats.normalize_test_name(test) for test in self.list_tests()]
+        all_tests = [normalize_test_name(test) for test in self.list_tests()]
         return [
             info for info in tests_runtimes
             if os.path.exists(info.test_name) and info.test_name in all_tests
@@ -871,7 +848,7 @@ class GenerateSubSuites(object):
             suites[idx % num_suites].add_test(test_file, 0)
         return suites
 
-    def list_tests(self) -> List[Dict]:
+    def list_tests(self) -> List[str]:
         """List the test files that are part of the suite being split."""
         return suitesconfig.get_suite(self.config_options.suite).tests
 
@@ -936,7 +913,7 @@ class GenerateSubSuites(object):
         write_file_dict(self.config_options.generated_config_dir, config_dict_of_suites)
 
 
-def filter_specified_tests(specified_tests: Set[str], tests_runtimes: List[teststats.TestRuntime]):
+def filter_specified_tests(specified_tests: Set[str], tests_runtimes: List[TestRuntime]):
     """
     Filter out tests that have not been specified in the specified tests config option.
 
diff --git a/buildscripts/tests/test_burn_in_tests.py b/buildscripts/tests/test_burn_in_tests.py
index 61962d61ffa..a1eb2e55c95 100644
--- a/buildscripts/tests/test_burn_in_tests.py
+++ b/buildscripts/tests/test_burn_in_tests.py
@@ -367,9 +367,8 @@ class TestGetTaskRuntimeHistory(unittest.TestCase):
                                                       "variant1")
         self.assertEqual(result, [("dir/test2.js", 10.1)])
         evergreen_api.test_stats_by_project.assert_called_with(
-            "project1", after_date=start_date.strftime("%Y-%m-%d"),
-            before_date=end_date.strftime("%Y-%m-%d"), group_by="test", group_num_days=14,
-            tasks=["task1"], variants=["variant1"])
+            "project1", after_date=start_date, before_date=end_date, group_by="test",
+            group_num_days=14, tasks=["task1"], variants=["variant1"])
 
     def test__get_task_runtime_history_evg_degraded_mode_error(self):  # pylint: disable=invalid-name
         response = Mock()
diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py
index 4da8d6942d8..7d0f04f2600 100644
--- a/buildscripts/tests/util/test_teststats.py
+++ b/buildscripts/tests/util/test_teststats.py
@@ -5,7 +5,7 @@ import unittest
 
 from mock import Mock
 
-import buildscripts.util.teststats as teststats_utils
+import buildscripts.util.teststats as under_test
 
 # pylint: disable=missing-docstring
 
@@ -14,25 +14,24 @@ _DATE = datetime.datetime(2018, 7, 15)
 
 class NormalizeTestNameTest(unittest.TestCase):
     def test_unix_names(self):
-        self.assertEqual("/home/user/test.js",
-                         teststats_utils.normalize_test_name("/home/user/test.js"))
+        self.assertEqual("/home/user/test.js", under_test.normalize_test_name("/home/user/test.js"))
 
     def test_windows_names(self):
         self.assertEqual("/home/user/test.js",
-                         teststats_utils.normalize_test_name("\\home\\user\\test.js"))
+                         under_test.normalize_test_name("\\home\\user\\test.js"))
 
 
-class TestTestStats(unittest.TestCase):
+class TestHistoricTaskData(unittest.TestCase):
     def test_no_hooks(self):
         evg_results = [
             self._make_evg_result("dir/test1.js", 1, 10),
             self._make_evg_result("dir/test2.js", 1, 30),
             self._make_evg_result("dir/test1.js", 2, 25),
         ]
-        test_stats = teststats_utils.TestStats(evg_results)
+        test_stats = under_test.HistoricTaskData(evg_results)
         expected_runtimes = [
-            teststats_utils.TestRuntime(test_name="dir/test2.js", runtime=30),
-            teststats_utils.TestRuntime(test_name="dir/test1.js", runtime=20),
+            under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
@@ -45,11 +44,11 @@ class TestTestStats(unittest.TestCase):
             self._make_evg_result("test3:CleanEveryN", 10, 30),
             self._make_evg_result("test3:CheckReplDBHash", 10, 35),
         ]
-        test_stats = teststats_utils.TestStats(evg_results)
+        test_stats = under_test.HistoricTaskData(evg_results)
         expected_runtimes = [
-            teststats_utils.TestRuntime(test_name="dir/test3.js", runtime=75),
-            teststats_utils.TestRuntime(test_name="dir/test2.js", runtime=30),
-            teststats_utils.TestRuntime(test_name="dir/test1.js", runtime=20),
+            under_test.TestRuntime(test_name="dir/test3.js", runtime=75),
+            under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
@@ -62,11 +61,11 @@ class TestTestStats(unittest.TestCase):
             self._make_evg_result("dir/test3.js", 5, 10),
             self._make_evg_result("test3:CheckReplDBHash", 10, 35),
         ]
-        test_stats = teststats_utils.TestStats(evg_results)
+        test_stats = under_test.HistoricTaskData(evg_results)
         expected_runtimes = [
-            teststats_utils.TestRuntime(test_name="dir/test3.js", runtime=80),
-            teststats_utils.TestRuntime(test_name="dir/test2.js", runtime=30),
-            teststats_utils.TestRuntime(test_name="dir/test1.js", runtime=20),
+            under_test.TestRuntime(test_name="dir/test3.js", runtime=80),
+            under_test.TestRuntime(test_name="dir/test2.js", runtime=30),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=20),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
@@ -75,9 +74,9 @@ class TestTestStats(unittest.TestCase):
             self._make_evg_result("dir/test1.js", 0, 0),
             self._make_evg_result("dir/test1.js", 0, 0),
         ]
-        test_stats = teststats_utils.TestStats(evg_results)
+        test_stats = under_test.HistoricTaskData(evg_results)
         expected_runtimes = [
-            teststats_utils.TestRuntime(test_name="dir/test1.js", runtime=0),
+            under_test.TestRuntime(test_name="dir/test1.js", runtime=0),
         ]
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py
index 796428d5a03..de485965f67 100644
--- a/buildscripts/util/teststats.py
+++ b/buildscripts/util/teststats.py
@@ -1,32 +1,101 @@
 """Utility to support parsing a TestStat."""
-
 from collections import defaultdict
-from collections import namedtuple
+from dataclasses import dataclass
+from datetime import datetime
+from typing import NamedTuple, List
+
+from evergreen import EvergreenApi, TestStats
+
 import buildscripts.util.testname as testname  # pylint: disable=wrong-import-position
 
-TestRuntime = namedtuple('TestRuntime', ['test_name', 'runtime'])
 
+class TestRuntime(NamedTuple):
+    """
+    Container for the runtime of a test.
+
+    test_name: Name of test.
+    runtime: Average of runtime of test.
+    """
+
+    test_name: str
+    runtime: float
+
+
+@dataclass
+class _RuntimeHistory:
+    """
+    History of runtime results.
+
+    duration: Average duration of test runtime.
+    num_runs: Number of test runs seen.
+    """
+
+    duration: float
+    num_runs: int
+
+    @classmethod
+    def empty(cls) -> "_RuntimeHistory":
+        """Create an empty runtime entry."""
+        return cls(duration=0.0, num_runs=0)
 
-def normalize_test_name(test_name):
+    def add_runtimes(self, duration: float, num_runs: int) -> None:
+        """
+        Add the given duration number this history.
+
+        :param duration: Average duration to include.
+        :param num_runs: Number of runs to include.
+        """
+        self.duration = _average(self.duration, self.num_runs, duration, num_runs)
+        self.num_runs += num_runs
+
+
+def normalize_test_name(test_name: str) -> str:
     """Normalize test names that may have been run on windows or unix."""
     return test_name.replace("\\", "/")
 
 
-class TestStats(object):
+def _average(value_a: float, num_a: int, value_b: float, num_b: int) -> float:
+    """Compute a weighted average of 2 values with associated numbers."""
+    divisor = num_a + num_b
+    if divisor == 0:
+        return 0
+    else:
+        return float(value_a * num_a + value_b * num_b) / divisor
+
+
+class HistoricTaskData(object):
     """Represent the test statistics for the task that is being analyzed."""
 
-    def __init__(self, evg_test_stats_results):
+    def __init__(self, evg_test_stats_results: List[TestStats]) -> None:
         """Initialize the TestStats with raw results from the Evergreen API."""
-        # Mapping from test_file to {"num_run": X, "duration": Y} for tests
-        self._runtime_by_test = defaultdict(dict)
-        # Mapping from 'test_name:hook_name' to
-        #       {'test_name': {'hook_name': {"num_run": X, "duration": Y}}}
-        self._hook_runtime_by_test = defaultdict(lambda: defaultdict(dict))
+        self._runtime_by_test = defaultdict(_RuntimeHistory.empty)
+        self._hook_runtime_by_test = defaultdict(lambda: defaultdict(_RuntimeHistory.empty))
 
         for doc in evg_test_stats_results:
             self._add_stats(doc)
 
-    def _add_stats(self, test_stats):
+    # pylint: disable=too-many-arguments
+    @classmethod
+    def from_evg(cls, evg_api: EvergreenApi, project: str, start_date: datetime, end_date: datetime,
+                 task: str, variant: str) -> "HistoricTaskData":
+        """
+        Retrieve test stats from evergreen for a given task.
+
+        :param evg_api: Evergreen API client.
+        :param project: Project to query.
+        :param start_date: Start date to query.
+        :param end_date: End date to query.
+        :param task: Task to query.
+        :param variant: Build variant to query.
+        :return: Test stats for the specified task.
+        """
+        days = (end_date - start_date).days
+        return cls(
+            evg_api.test_stats_by_project(project, after_date=start_date, before_date=end_date,
+                                          tasks=[task], variants=[variant], group_by="test",
+                                          group_num_days=days))
+
+    def _add_stats(self, test_stats: TestStats) -> None:
         """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint."""
         test_file = testname.normalize_test_file(test_stats.test_file)
         duration = test_stats.avg_duration_pass
@@ -37,44 +106,23 @@ class TestStats(object):
         else:
             self._add_test_stats(test_file, duration, num_run)
 
-    def _add_test_stats(self, test_file, duration, num_run):
+    def _add_test_stats(self, test_file: str, duration: float, num_run: int) -> None:
         """Add the statistics for a test."""
-        runtime_info = self._runtime_by_test[test_file]
-        self._add_runtime_info(runtime_info, duration, num_run)
+        self._runtime_by_test[test_file].add_runtimes(duration, num_run)
 
-    def _add_test_hook_stats(self, test_file, duration, num_run):
+    def _add_test_hook_stats(self, test_file: str, duration: float, num_run: int) -> None:
         """Add the statistics for a hook."""
         test_name, hook_name = testname.split_test_hook_name(test_file)
-        runtime_info = self._hook_runtime_by_test[test_name][hook_name]
-        self._add_runtime_info(runtime_info, duration, num_run)
-
-    @staticmethod
-    def _add_runtime_info(runtime_info, duration, num_run):
-        if not runtime_info:
-            runtime_info["duration"] = duration
-            runtime_info["num_run"] = num_run
-        else:
-            runtime_info["duration"] = TestStats._average(
-                runtime_info["duration"], runtime_info["num_run"], duration, num_run)
-            runtime_info["num_run"] += num_run
-
-    @staticmethod
-    def _average(value_a, num_a, value_b, num_b):
-        """Compute a weighted average of 2 values with associated numbers."""
-        divisor = num_a + num_b
-        if divisor == 0:
-            return 0
-        else:
-            return float(value_a * num_a + value_b * num_b) / divisor
+        self._hook_runtime_by_test[test_name][hook_name].add_runtimes(duration, num_run)
 
-    def get_tests_runtimes(self):
+    def get_tests_runtimes(self) -> List[TestRuntime]:
         """Return the list of (test_file, runtime_in_secs) tuples ordered by decreasing runtime."""
         tests = []
         for test_file, runtime_info in list(self._runtime_by_test.items()):
-            duration = runtime_info["duration"]
+            duration = runtime_info.duration
             test_name = testname.get_short_name_from_test_file(test_file)
             for _, hook_runtime_info in self._hook_runtime_by_test[test_name].items():
-                duration += hook_runtime_info["duration"]
+                duration += hook_runtime_info.duration
             test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration)
             tests.append(test)
         return sorted(tests, key=lambda x: x.runtime, reverse=True)
diff --git a/etc/pip/components/evergreen.req b/etc/pip/components/evergreen.req
index da5fac8bea1..12d8c78ec6e 100644
--- a/etc/pip/components/evergreen.req
+++ b/etc/pip/components/evergreen.req
@@ -1,4 +1,5 @@
 click ~= 7.0
+dataclasses; python_version < "3.7"
 GitPython ~= 3.1.7
 psutil
 structlog ~= 19.2.0
diff --git a/etc/pip/components/resmoke.req b/etc/pip/components/resmoke.req
index 0a63f5105eb..0457d28259d 100644
--- a/etc/pip/components/resmoke.req
+++ b/etc/pip/components/resmoke.req
@@ -1,5 +1,5 @@
 PyKMIP == 0.4.0 # It's now 0.8.0. We're far enough back to have API conflicts.
-evergreen.py == 1.4.7
+evergreen.py == 2.1.0
 jinja2
 mock
 shrub.py == 1.1.0