SERVER-71533 Update legacy task generation to use new test stats location

author: Mikhail Shchatko <mikhail.shchatko@mongodb.com> 2022-11-25 16:19:14 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-11-25 16:32:31 +0000
commit: 933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e (patch)
tree: 713942ddfc194a7fc64184a65edcb670b69017da
parent: 40de085f67b7494357ffd5219e25ed4d15a61a76 (diff)
download: mongo-933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e.tar.gz
8 files changed, 136 insertions, 177 deletions
diff --git a/buildscripts/burn_in_tags.py b/buildscripts/burn_in_tags.py
index ba25fe37dfd..a58de6234bd 100644
--- a/buildscripts/burn_in_tags.py
+++ b/buildscripts/burn_in_tags.py
@@ -155,7 +155,7 @@ def _generate_evg_tasks(evergreen_api: EvergreenApi, shrub_project: ShrubProject
                                          repeat_tests_secs=config_options.repeat_tests_secs)
 
             create_generate_tasks_config(shrub_build_variant, tests_by_task, gen_config,
-                                         repeat_config, evergreen_api, include_gen_task=False)
+                                         repeat_config, include_gen_task=False)
             shrub_project.add_build_variant(shrub_build_variant)
 
 
diff --git a/buildscripts/burn_in_tests.py b/buildscripts/burn_in_tests.py
index 525a3253fc6..612ef19603b 100644
--- a/buildscripts/burn_in_tests.py
+++ b/buildscripts/burn_in_tests.py
@@ -515,33 +515,18 @@ def _generate_timeouts(repeat_config: RepeatConfig, test: str,
     return TimeoutInfo.default_timeout()
 
 
-def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, task: str,
-                              variant: str) -> List[TestRuntime]:
+def _get_task_runtime_history(project: str, task: str, variant: str) -> List[TestRuntime]:
     """
-    Fetch historical average runtime for all tests in a task from Evergreen API.
+    Fetch historical average runtime for all tests in a task from S3.
 
-    :param evg_api: Evergreen API.
     :param project: Project name.
     :param task: Task name.
     :param variant: Variant name.
     :return: Test historical runtimes, parsed into teststat objects.
     """
-    if not evg_api:
-        return []
-
-    try:
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=AVG_TEST_RUNTIME_ANALYSIS_DAYS)
-        test_stats = HistoricTaskData.from_evg(evg_api, project, start_date=start_date,
-                                               end_date=end_date, task=task, variant=variant)
-        return test_stats.get_tests_runtimes()
-    except requests.HTTPError as err:
-        if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
-            # Evergreen may return a 503 when the service is degraded.
-            # We fall back to returning no test history
-            return []
-        else:
-            raise
+    test_stats = HistoricTaskData.from_s3(project, task, variant)
+    test_runtimes = test_stats.get_tests_runtimes()
+    return test_runtimes
 
 
 def _create_task(index: int, test_count: int, test: str, task_data: Dict,
@@ -584,7 +569,7 @@ def _create_task(index: int, test_count: int, test: str, task_data: Dict,
 
 
 def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_config: GenerateConfig,
-                           repeat_config: RepeatConfig, evg_api: EvergreenApi) -> Set[Task]:
+                           repeat_config: RepeatConfig) -> Set[Task]:
     """
     Create the set of tasks to run the given tests_by_task.
 
@@ -592,16 +577,14 @@ def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_confi
     :param task_prefix: Prefix all task names with this.
     :param generate_config: Configuration of what to generate.
     :param repeat_config: Configuration of how to repeat tests.
-    :param evg_api: Evergreen API.
     :return: Set of shrub tasks to run tests_by_task.
     """
     tasks: Set[Task] = set()
     for task in sorted(tests_by_task):
         task_info = tests_by_task[task]
         test_list = task_info["tests"]
-        task_runtime_stats = _get_task_runtime_history(evg_api, generate_config.project,
-                                                       task_info["display_task_name"],
-                                                       generate_config.build_variant)
+        task_runtime_stats = _get_task_runtime_history(
+            generate_config.project, task_info["display_task_name"], generate_config.build_variant)
         test_count = len(test_list)
         for index, test in enumerate(test_list):
             tasks.add(
@@ -613,7 +596,7 @@ def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_confi
 
 def create_generate_tasks_config(build_variant: BuildVariant, tests_by_task: Dict,
                                  generate_config: GenerateConfig, repeat_config: RepeatConfig,
-                                 evg_api: Optional[EvergreenApi], include_gen_task: bool = True,
+                                 include_gen_task: bool = True,
                                  task_prefix: str = "burn_in") -> None:
     # pylint: disable=too-many-arguments,too-many-locals
     """
@@ -623,12 +606,10 @@ def create_generate_tasks_config(build_variant: BuildVariant, tests_by_task: Dic
     :param tests_by_task: Dictionary of tests to generate tasks for.
     :param generate_config: Configuration of what to generate.
     :param repeat_config: Configuration of how to repeat tests.
-    :param evg_api: Evergreen API.
     :param include_gen_task: Should generating task be include in display task.
     :param task_prefix: Prefix all task names with this.
     """
-    tasks = create_generated_tasks(tests_by_task, task_prefix, generate_config, repeat_config,
-                                   evg_api)
+    tasks = create_generated_tasks(tests_by_task, task_prefix, generate_config, repeat_config)
     existing_tasks = {ExistingTask(BURN_IN_TESTS_GEN_TASK)} if include_gen_task else None
     build_variant.display_task(BURN_IN_TESTS_TASK, tasks, execution_existing_tasks=existing_tasks)
 
@@ -686,23 +667,21 @@ def create_tests_by_task(build_variant: str, evg_conf: EvergreenProjectConfig,
 
 # pylint: disable=too-many-arguments
 def create_generate_tasks_file(tests_by_task: Dict, generate_config: GenerateConfig,
-                               repeat_config: RepeatConfig, evg_api: Optional[EvergreenApi],
-                               task_prefix: str = 'burn_in', include_gen_task: bool = True) -> str:
+                               repeat_config: RepeatConfig, task_prefix: str = 'burn_in',
+                               include_gen_task: bool = True) -> str:
     """
     Create an Evergreen generate.tasks file to run the given tasks and tests.
 
     :param tests_by_task: Dictionary of tests and tasks to run.
     :param generate_config: Information about how burn_in should generate tasks.
     :param repeat_config: Information about how burn_in should repeat tests.
-    :param evg_api: Evergreen api.
     :param task_prefix: Prefix to start generated task's name with.
     :param include_gen_task: Should the generating task be included in the display task.
     :returns: Configuration to pass to 'generate.tasks'.
     """
     build_variant = BuildVariant(generate_config.run_build_variant)
     create_generate_tasks_config(build_variant, tests_by_task, generate_config, repeat_config,
-                                 evg_api, include_gen_task=include_gen_task,
-                                 task_prefix=task_prefix)
+                                 include_gen_task=include_gen_task, task_prefix=task_prefix)
 
     shrub_project = ShrubProject.empty()
     shrub_project.add_build_variant(build_variant)
@@ -793,8 +772,7 @@ def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmok
     LOGGER.debug("tests and tasks found", tests_by_task=tests_by_task)
 
     if generate_tasks_file:
-        json_text = create_generate_tasks_file(tests_by_task, generate_config, repeat_config,
-                                               evg_api)
+        json_text = create_generate_tasks_file(tests_by_task, generate_config, repeat_config)
         write_file(generate_tasks_file, json_text)
     elif not no_exec:
         run_tests(tests_by_task, resmoke_cmd)
diff --git a/buildscripts/evergreen_gen_multiversion_tests.py b/buildscripts/evergreen_gen_multiversion_tests.py
index 323de188446..1bd90c94fd6 100755
--- a/buildscripts/evergreen_gen_multiversion_tests.py
+++ b/buildscripts/evergreen_gen_multiversion_tests.py
@@ -252,9 +252,7 @@ class EvergreenMultiversionConfigGenerator(object):
         # LOOKBACK_DURATION_DAYS. Tests without enough run-time statistics will be placed
         # in the misc suite.
         gen_suites = generate_resmoke.GenerateSubSuites(self.evg_api, self.options)
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=generate_resmoke.LOOKBACK_DURATION_DAYS)
-        suites = gen_suites.calculate_suites(start_date, end_date)
+        suites = gen_suites.calculate_suites()
         # Render the given suites into yml files that can be used by resmoke.py.
         config_file_dict = generate_resmoke.render_suite_files(suites, self.options.suite,
                                                                gen_suites.test_list, TEST_SUITE_DIR,
diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py
index bea913fce12..de00e3a566d 100755
--- a/buildscripts/evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/evergreen_generate_resmoke_tasks.py
@@ -51,7 +51,6 @@ EVG_CONFIG_FILE = "./.evergreen.yml"
 GENERATE_CONFIG_FILE = "etc/generate_subtasks_config.yml"
 MIN_TIMEOUT_SECONDS = int(timedelta(minutes=5).total_seconds())
 MAX_EXPECTED_TIMEOUT = int(timedelta(hours=48).total_seconds())
-LOOKBACK_DURATION_DAYS = 14
 GEN_SUFFIX = "_gen"
 CLEAN_EVERY_N_HOOK = "CleanEveryN"
 ASAN_SIGNATURE = "detect_leaks=1"
@@ -857,34 +856,21 @@ class GenerateSubSuites(object):
         """Get the configuration of the suite being generated."""
         return read_suite_config(self.config_options.test_suites_dir, self.config_options.suite)
 
-    def calculate_suites(self, start_date: datetime, end_date: datetime) -> List[Suite]:
+    def calculate_suites(self) -> List[Suite]:
         """
         Divide tests into suites based on statistics for the provided period.
 
-        :param start_date: Time to start historical analysis.
-        :param end_date: Time to end historical analysis.
         :return: List of sub suites to be generated.
         """
-        try:
-            evg_stats = HistoricTaskData.from_evg(self.evergreen_api, self.config_options.project,
-                                                  start_date, end_date, self.config_options.task,
-                                                  self.config_options.variant)
-            if not evg_stats:
-                LOGGER.debug("No test history, using fallback suites")
-                # This is probably a new suite, since there is no test history, just use the
-                # fallback values.
-                return self.calculate_fallback_suites()
+        evg_stats = HistoricTaskData.from_s3(self.config_options.project, self.config_options.task,
+                                             self.config_options.variant)
+
+        if evg_stats:
             target_execution_time_secs = self.config_options.target_resmoke_time * 60
             return self.calculate_suites_from_evg_stats(evg_stats, target_execution_time_secs)
-        except requests.HTTPError as err:
-            if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
-                # Evergreen may return a 503 when the service is degraded.
-                # We fall back to splitting the tests into a fixed number of suites.
-                LOGGER.warning("Received 503 from Evergreen, "
-                               "dividing the tests evenly among suites")
-                return self.calculate_fallback_suites()
-            else:
-                raise
+
+            # Since there is no test history this is probably a new suite, just use the fallback values.
+        return self.calculate_fallback_suites()
 
     def calculate_suites_from_evg_stats(self, test_stats: HistoricTaskData,
                                         execution_time_secs: int) -> List[Suite]:
@@ -1041,9 +1027,7 @@ class GenerateSubSuites(object):
 
         :return: The suites files and evergreen configuration for the generated task.
         """
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=LOOKBACK_DURATION_DAYS)
-        return self.calculate_suites(start_date, end_date)
+        return self.calculate_suites()
 
     def run(self):
         """Generate resmoke suites that run within a target execution time and write to disk."""
diff --git a/buildscripts/tests/test_burn_in_tests.py b/buildscripts/tests/test_burn_in_tests.py
index 493da8d406e..f48ee2ee742 100644
--- a/buildscripts/tests/test_burn_in_tests.py
+++ b/buildscripts/tests/test_burn_in_tests.py
@@ -338,44 +338,26 @@ class TestGenerateTimeouts(unittest.TestCase):
 
 
 class TestGetTaskRuntimeHistory(unittest.TestCase):
-    def test_get_task_runtime_history_with_no_api(self):
-        self.assertListEqual([],
-                             under_test._get_task_runtime_history(None, "project", "task",
-                                                                  "variant"))
-
-    def test__get_task_runtime_history(self):
-        evergreen_api = Mock()
-        evergreen_api.test_stats_by_project.return_value = [
-            Mock(
-                test_file="dir/test2.js",
-                task_name="task1",
-                variant="variant1",
-                distro="distro1",
-                date=_DATE,
+    @patch(ns("HistoricTaskData.get_stats_from_s3"))
+    def test__get_task_runtime_history(self, get_stats_from_s3_mock):
+        test_stats = [
+            teststats_utils.HistoricalTestInformation(
+                test_name="dir/test2.js",
                 num_pass=1,
                 num_fail=0,
                 avg_duration_pass=10.1,
             )
         ]
-        analysis_duration = under_test.AVG_TEST_RUNTIME_ANALYSIS_DAYS
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=analysis_duration)
+        get_stats_from_s3_mock.return_value = test_stats
 
-        result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1",
-                                                      "variant1")
+        result = under_test._get_task_runtime_history("project1", "task1", "variant1")
         self.assertEqual(result, [("dir/test2.js", 10.1)])
-        evergreen_api.test_stats_by_project.assert_called_with(
-            "project1", after_date=start_date, before_date=end_date, group_by="test",
-            group_num_days=14, tasks=["task1"], variants=["variant1"])
-
-    def test__get_task_runtime_history_evg_degraded_mode_error(self):  # pylint: disable=invalid-name
-        response = Mock()
-        response.status_code = requests.codes.SERVICE_UNAVAILABLE
-        evergreen_api = Mock()
-        evergreen_api.test_stats_by_project.side_effect = requests.HTTPError(response=response)
-
-        result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1",
-                                                      "variant1")
+
+    @patch(ns("HistoricTaskData.get_stats_from_s3"))
+    def test__get_task_runtime_history_when_s3_has_no_data(self, get_stats_from_s3_mock):  # pylint: disable=invalid-name
+        get_stats_from_s3_mock.return_value = []
+
+        result = under_test._get_task_runtime_history("project1", "task1", "variant1")
         self.assertEqual(result, [])
 
 
diff --git a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
index 1cc59ef0aba..42d185b2830 100644
--- a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
@@ -13,7 +13,7 @@ from mock import patch, MagicMock
 from shrub.v2 import BuildVariant, ShrubProject
 from shrub.variant import DisplayTaskDefinition
 
-from buildscripts.util.teststats import TestRuntime
+from buildscripts.util.teststats import TestRuntime, HistoricalTestInformation
 
 from buildscripts import evergreen_generate_resmoke_tasks as under_test
 
@@ -31,7 +31,12 @@ def ns(relative_name):  # pylint: disable=invalid-name
 
 
 def tst_stat_mock(file, duration, pass_count):
-    return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
+    return HistoricalTestInformation(
+        test_name=file,
+        num_pass=pass_count,
+        num_fail=0,
+        avg_duration_pass=duration,
+    )
 
 
 def mock_test_stats_unavailable(evg_api_mock):
@@ -108,10 +113,10 @@ class TestAcceptance(unittest.TestCase):
         return target_directory, source_directory
 
     @staticmethod
-    def _mock_test_files(directory, n_tests, runtime, evg_api_mock, suites_config_mock):
+    def _mock_test_files(directory, n_tests, runtime, get_stats_from_s3_mock, suites_config_mock):
         test_list = [os.path.join(directory, f"test_name_{i}.js") for i in range(n_tests)]
         mock_test_stats = [tst_stat_mock(file, runtime, 5) for file in test_list]
-        evg_api_mock.test_stats_by_project.return_value = mock_test_stats
+        get_stats_from_s3_mock.return_value = mock_test_stats
         suites_config_mock.return_value.tests = test_list
         for test in test_list:
             open(test, "w").close()
@@ -136,7 +141,8 @@ class TestAcceptance(unittest.TestCase):
             self.assertEqual(0, len(os.listdir(tmpdir)))
 
     @patch(ns("suitesconfig.get_suite"))
-    def test_when_evg_test_stats_is_down(self, suites_config_mock):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_when_evg_test_stats_is_down(self, get_stats_from_s3_mock, suites_config_mock):
         """
         Given Evergreen historic test stats endpoint is disabled,
         When evergreen_generate_resmoke_tasks attempts to generate suites,
@@ -154,8 +160,9 @@ class TestAcceptance(unittest.TestCase):
             target_directory, source_directory = self._prep_dirs(tmpdir, mock_config)
             suite_path = os.path.join(source_directory, task)
             mock_config["suite"] = suite_path
-            test_list = self._mock_test_files(source_directory, n_tests, 5, evg_api_mock,
+            test_list = self._mock_test_files(source_directory, n_tests, 5, get_stats_from_s3_mock,
                                               suites_config_mock)
+            get_stats_from_s3_mock.return_value = []
             mock_resmoke_config_file(test_list, suite_path + ".yml")
 
             under_test.GenerateSubSuites(evg_api_mock, config).run()
@@ -181,7 +188,8 @@ class TestAcceptance(unittest.TestCase):
         sys.platform.startswith("win"), "Since this test is messing with directories, "
         "windows does not handle test generation correctly")
     @patch(ns("suitesconfig.get_suite"))
-    def test_with_each_test_in_own_task(self, suites_config_mock):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_with_each_test_in_own_task(self, get_stats_from_s3_mock, suites_config_mock):
         """
         Given a task with all tests having a historic runtime over the target,
         When evergreen_generate_resmoke_tasks attempts to generate suites,
@@ -200,8 +208,8 @@ class TestAcceptance(unittest.TestCase):
             target_directory, source_directory = self._prep_dirs(tmpdir, mock_config)
             suite_path = os.path.join(source_directory, task)
             mock_config["suite"] = suite_path
-            test_list = self._mock_test_files(source_directory, n_tests, 15 * 60, evg_api_mock,
-                                              suites_config_mock)
+            test_list = self._mock_test_files(source_directory, n_tests, 15 * 60,
+                                              get_stats_from_s3_mock, suites_config_mock)
             mock_resmoke_config_file(test_list, suite_path + ".yml")
 
             under_test.enable_logging(True)
@@ -857,10 +865,11 @@ class GenerateSubSuitesTest(unittest.TestCase):
         return [f"test{i}.js" for i in range(n_tests)]
 
     @patch(ns("read_suite_config"))
-    def test_calculate_suites(self, mock_read_suite_config):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_calculate_suites(self, mock_get_stats_from_s3, mock_read_suite_config):
         mock_read_suite_config.return_value = {}
         evg = MagicMock()
-        evg.test_stats_by_project.return_value = [
+        mock_get_stats_from_s3.return_value = [
             tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100)
         ]
         config_options = self.get_mock_options()
@@ -872,40 +881,26 @@ class GenerateSubSuitesTest(unittest.TestCase):
         with patch("os.path.exists") as exists_mock, patch(ns("suitesconfig")) as suitesconfig_mock:
             exists_mock.return_value = True
             suitesconfig_mock.get_suite.return_value.tests = \
-                [stat.test_file for stat in evg.test_stats_by_project.return_value]
-            suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+                [stat.test_name for stat in mock_get_stats_from_s3.return_value]
+            suites = gen_sub_suites.calculate_suites()
 
             # There are 100 tests taking 1 minute, with a target of 10 min we expect 10 suites.
             self.assertEqual(10, len(suites))
             for suite in suites:
                 self.assertEqual(10, len(suite.tests))
 
-    def test_calculate_suites_fallback(self):
-        n_tests = 100
-        evg = mock_test_stats_unavailable(MagicMock())
-        config_options = self.get_mock_options()
-
-        gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
-        gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests))
-
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
-
-        self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
-        for suite in suites:
-            self.assertEqual(50, len(suite.tests))
-
-        self.assertEqual(n_tests, len(gen_sub_suites.test_list))
-
-    def test_calculate_suites_fallback_with_fewer_tests_than_max(self):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_calculate_suites_fallback_with_fewer_tests_than_max(self, mock_get_stats_from_s3):
         n_tests = 2
         evg = mock_test_stats_unavailable(MagicMock())
         config_options = self.get_mock_options()
         config_options.fallback_num_sub_suites = 5
+        mock_get_stats_from_s3.return_value = []
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
         gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests))
 
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+        suites = gen_sub_suites.calculate_suites()
 
         self.assertEqual(n_tests, len(suites))
         for suite in suites:
@@ -913,15 +908,16 @@ class GenerateSubSuitesTest(unittest.TestCase):
 
         self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
-    def test_calculate_suites_uses_fallback_for_no_results(self):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_calculate_suites_uses_fallback_for_no_results(self, mock_get_stats_from_s3):
         n_tests = 100
         evg = MagicMock()
-        evg.test_stats_by_project.return_value = []
+        mock_get_stats_from_s3.return_value = []
         config_options = self.get_mock_options()
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
         gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests))
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+        suites = gen_sub_suites.calculate_suites()
 
         self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
         for suite in suites:
@@ -929,10 +925,12 @@ class GenerateSubSuitesTest(unittest.TestCase):
 
         self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
-    def test_calculate_suites_uses_fallback_if_only_results_are_filtered(self):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_calculate_suites_uses_fallback_if_only_results_are_filtered(
+            self, mock_get_stats_from_s3):
         n_tests = 100
         evg = MagicMock()
-        evg.test_stats_by_project.return_value = [
+        mock_get_stats_from_s3.return_value = [
             tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100)
         ]
         config_options = self.get_mock_options()
@@ -941,7 +939,7 @@ class GenerateSubSuitesTest(unittest.TestCase):
         gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests))
         with patch("os.path.exists") as exists_mock:
             exists_mock.return_value = False
-            suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+            suites = gen_sub_suites.calculate_suites()
 
             self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
             for suite in suites:
@@ -949,24 +947,13 @@ class GenerateSubSuitesTest(unittest.TestCase):
 
             self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
-    def test_calculate_suites_error(self):
-        response = MagicMock()
-        response.status_code = requests.codes.INTERNAL_SERVER_ERROR
-        evg = MagicMock()
-        evg.test_stats_by_project.side_effect = requests.HTTPError(response=response)
-        config_options = self.get_mock_options()
-
-        gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
-        gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(100))
-
-        with self.assertRaises(requests.HTTPError):
-            gen_sub_suites.calculate_suites(_DATE, _DATE)
-
     @patch(ns("read_suite_config"))
-    def test_calculate_suites_with_selected_tests_to_run(self, mock_read_suite_config):
+    @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3")
+    def test_calculate_suites_with_selected_tests_to_run(self, mock_get_stats_from_s3,
+                                                         mock_read_suite_config):
         mock_read_suite_config.return_value = {}
         evg = MagicMock()
-        evg.test_stats_by_project.return_value = [
+        mock_get_stats_from_s3.return_value = [
             tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100)
         ]
         config_options = self.get_mock_options()
@@ -978,8 +965,8 @@ class GenerateSubSuitesTest(unittest.TestCase):
         with patch("os.path.exists") as exists_mock, patch(ns("suitesconfig")) as suitesconfig_mock:
             exists_mock.return_value = True
             suitesconfig_mock.get_suite.return_value.tests = \
-                [stat.test_file for stat in evg.test_stats_by_project.return_value]
-            suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+                [stat.test_name for stat in mock_get_stats_from_s3.return_value]
+            suites = gen_sub_suites.calculate_suites()
 
             # There are 100 tests taking 1 minute, with a target of 10 min we expect 10 suites.
             # However, since we have selected only 2 tests to run, test1.js and
diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py
index b3405ecc00d..5949758559e 100644
--- a/buildscripts/tests/util/test_teststats.py
+++ b/buildscripts/tests/util/test_teststats.py
@@ -79,7 +79,7 @@ class TestHistoricTaskData(unittest.TestCase):
     @staticmethod
     def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0):
         return Mock(
-            test_file=test_file,
+            test_name=test_file,
             task_name="task1",
             variant="variant1",
             distro="distro1",
diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py
index b8c0578c19e..5336aa8cd5c 100644
--- a/buildscripts/util/teststats.py
+++ b/buildscripts/util/teststats.py
@@ -1,15 +1,32 @@
 """Utility to support parsing a TestStat."""
 from collections import defaultdict
 from dataclasses import dataclass
-from datetime import datetime
 from itertools import chain
+from json import JSONDecodeError
 from typing import NamedTuple, List, Callable, Optional
-
-from evergreen import EvergreenApi, TestStats
+import requests
+from requests.adapters import HTTPAdapter, Retry
 
 from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file
 
 TASK_LEVEL_HOOKS = {"CleanEveryN"}
+TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com"
+
+
+class HistoricalTestInformation(NamedTuple):
+    """
+    Container for information about the historical runtime of a test.
+
+    test_name: Name of test.
+    avg_duration_pass: Average of runtime of test that passed.
+    num_pass: Number of times the test has passed.
+    num_fail: Number of times the test has failed.
+    """
+
+    test_name: str
+    num_pass: int
+    num_fail: int
+    avg_duration_pass: float
 
 
 class TestRuntime(NamedTuple):
@@ -74,9 +91,9 @@ class HistoricHookInfo(NamedTuple):
     avg_duration: float
 
     @classmethod
-    def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo":
+    def from_test_stats(cls, test_stats: HistoricalTestInformation) -> "HistoricHookInfo":
         """Create an instance from a test_stats object."""
-        return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass,
+        return cls(hook_id=test_stats.test_name, num_pass=test_stats.num_pass,
                    avg_duration=test_stats.avg_duration_pass)
 
     def test_name(self) -> str:
@@ -101,10 +118,10 @@ class HistoricTestInfo(NamedTuple):
     hooks: List[HistoricHookInfo]
 
     @classmethod
-    def from_test_stats(cls, test_stats: TestStats,
+    def from_test_stats(cls, test_stats: HistoricalTestInformation,
                         hooks: List[HistoricHookInfo]) -> "HistoricTestInfo":
         """Create an instance from a test_stats object."""
-        return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass,
+        return cls(test_name=test_stats.test_name, num_pass=test_stats.num_pass,
                    avg_duration=test_stats.avg_duration_pass, hooks=hooks)
 
     def normalized_test_name(self) -> str:
@@ -134,46 +151,59 @@ class HistoricTaskData(object):
         """Initialize the TestStats with raw results from the Evergreen API."""
         self.historic_test_results = historic_test_results
 
-    # pylint: disable=too-many-arguments
+    @staticmethod
+    def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]:
+        """
+        Retrieve test stats from s3 for a given task.
+
+        :param project: Project to query.
+        :param task: Task to query.
+        :param variant: Build variant to query.
+        :return: A list of the Test stats for the specified task.
+        """
+        session = requests.Session()
+        retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
+        session.mount('https://', HTTPAdapter(max_retries=retries))
+
+        response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}")
+
+        try:
+            data = response.json()
+            return [HistoricalTestInformation(**item) for item in data]
+        except JSONDecodeError:
+            return []
+
     @classmethod
-    def from_evg(cls, evg_api: EvergreenApi, project: str, start_date: datetime, end_date: datetime,
-                 task: str, variant: str) -> "HistoricTaskData":
+    def from_s3(cls, project: str, task: str, variant: str) -> "HistoricTaskData":
         """
-        Retrieve test stats from evergreen for a given task.
+        Retrieve test stats from s3 for a given task.
 
-        :param evg_api: Evergreen API client.
         :param project: Project to query.
-        :param start_date: Start date to query.
-        :param end_date: End date to query.
         :param task: Task to query.
         :param variant: Build variant to query.
         :return: Test stats for the specified task.
         """
-        days = (end_date - start_date).days
-        historic_stats = evg_api.test_stats_by_project(
-            project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant],
-            group_by="test", group_num_days=days)
-
-        return cls.from_stats_list(historic_stats)
+        historical_test_data = cls.get_stats_from_s3(project, task, variant)
+        return cls.from_stats_list(historical_test_data)
 
     @classmethod
-    def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData":
+    def from_stats_list(
+            cls, historical_test_data: List[HistoricalTestInformation]) -> "HistoricTaskData":
         """
         Build historic task data from a list of historic stats.
 
-        :param historic_stats: List of historic stats to build from.
+        :param historical_test_data: A list of information about the runtime of a test.
         :return: Historic task data from the list of stats.
         """
-
         hooks = defaultdict(list)
-        for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]:
+        for hook in [stat for stat in historical_test_data if is_resmoke_hook(stat.test_name)]:
             historical_hook = HistoricHookInfo.from_test_stats(hook)
             hooks[historical_hook.test_name()].append(historical_hook)
 
         return cls([
             HistoricTestInfo.from_test_stats(stat,
-                                             hooks[get_short_name_from_test_file(stat.test_file)])
-            for stat in historic_stats if not is_resmoke_hook(stat.test_file)
+                                             hooks[get_short_name_from_test_file(stat.test_name)])
+            for stat in historical_test_data if not is_resmoke_hook(stat.test_name)
         ])
 
     def get_tests_runtimes(self) -> List[TestRuntime]:
author	Mikhail Shchatko <mikhail.shchatko@mongodb.com>	2022-11-25 16:19:14 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-11-25 16:32:31 +0000
commit	933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e (patch)
tree	713942ddfc194a7fc64184a65edcb670b69017da
parent	40de085f67b7494357ffd5219e25ed4d15a61a76 (diff)
download	mongo-933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e.tar.gz