diff options
author | Mikhail Shchatko <mikhail.shchatko@mongodb.com> | 2022-11-25 16:19:14 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-25 16:32:31 +0000 |
commit | 933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e (patch) | |
tree | 713942ddfc194a7fc64184a65edcb670b69017da | |
parent | 40de085f67b7494357ffd5219e25ed4d15a61a76 (diff) | |
download | mongo-933ff4b1f401dca11ea688c2fa1f6c4e7899ca7e.tar.gz |
SERVER-71533 Update legacy task generation to use new test stats location
-rw-r--r-- | buildscripts/burn_in_tags.py | 2 | ||||
-rw-r--r-- | buildscripts/burn_in_tests.py | 50 | ||||
-rwxr-xr-x | buildscripts/evergreen_gen_multiversion_tests.py | 4 | ||||
-rwxr-xr-x | buildscripts/evergreen_generate_resmoke_tasks.py | 34 | ||||
-rw-r--r-- | buildscripts/tests/test_burn_in_tests.py | 44 | ||||
-rw-r--r-- | buildscripts/tests/test_evergreen_generate_resmoke_tasks.py | 95 | ||||
-rw-r--r-- | buildscripts/tests/util/test_teststats.py | 2 | ||||
-rw-r--r-- | buildscripts/util/teststats.py | 82 |
8 files changed, 136 insertions, 177 deletions
diff --git a/buildscripts/burn_in_tags.py b/buildscripts/burn_in_tags.py index ba25fe37dfd..a58de6234bd 100644 --- a/buildscripts/burn_in_tags.py +++ b/buildscripts/burn_in_tags.py @@ -155,7 +155,7 @@ def _generate_evg_tasks(evergreen_api: EvergreenApi, shrub_project: ShrubProject repeat_tests_secs=config_options.repeat_tests_secs) create_generate_tasks_config(shrub_build_variant, tests_by_task, gen_config, - repeat_config, evergreen_api, include_gen_task=False) + repeat_config, include_gen_task=False) shrub_project.add_build_variant(shrub_build_variant) diff --git a/buildscripts/burn_in_tests.py b/buildscripts/burn_in_tests.py index 525a3253fc6..612ef19603b 100644 --- a/buildscripts/burn_in_tests.py +++ b/buildscripts/burn_in_tests.py @@ -515,33 +515,18 @@ def _generate_timeouts(repeat_config: RepeatConfig, test: str, return TimeoutInfo.default_timeout() -def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, task: str, - variant: str) -> List[TestRuntime]: +def _get_task_runtime_history(project: str, task: str, variant: str) -> List[TestRuntime]: """ - Fetch historical average runtime for all tests in a task from Evergreen API. + Fetch historical average runtime for all tests in a task from S3. - :param evg_api: Evergreen API. :param project: Project name. :param task: Task name. :param variant: Variant name. :return: Test historical runtimes, parsed into teststat objects. """ - if not evg_api: - return [] - - try: - end_date = datetime.datetime.utcnow().replace(microsecond=0) - start_date = end_date - datetime.timedelta(days=AVG_TEST_RUNTIME_ANALYSIS_DAYS) - test_stats = HistoricTaskData.from_evg(evg_api, project, start_date=start_date, - end_date=end_date, task=task, variant=variant) - return test_stats.get_tests_runtimes() - except requests.HTTPError as err: - if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE: - # Evergreen may return a 503 when the service is degraded. - # We fall back to returning no test history - return [] - else: - raise + test_stats = HistoricTaskData.from_s3(project, task, variant) + test_runtimes = test_stats.get_tests_runtimes() + return test_runtimes def _create_task(index: int, test_count: int, test: str, task_data: Dict, @@ -584,7 +569,7 @@ def _create_task(index: int, test_count: int, test: str, task_data: Dict, def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_config: GenerateConfig, - repeat_config: RepeatConfig, evg_api: EvergreenApi) -> Set[Task]: + repeat_config: RepeatConfig) -> Set[Task]: """ Create the set of tasks to run the given tests_by_task. @@ -592,16 +577,14 @@ def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_confi :param task_prefix: Prefix all task names with this. :param generate_config: Configuration of what to generate. :param repeat_config: Configuration of how to repeat tests. - :param evg_api: Evergreen API. :return: Set of shrub tasks to run tests_by_task. """ tasks: Set[Task] = set() for task in sorted(tests_by_task): task_info = tests_by_task[task] test_list = task_info["tests"] - task_runtime_stats = _get_task_runtime_history(evg_api, generate_config.project, - task_info["display_task_name"], - generate_config.build_variant) + task_runtime_stats = _get_task_runtime_history( + generate_config.project, task_info["display_task_name"], generate_config.build_variant) test_count = len(test_list) for index, test in enumerate(test_list): tasks.add( @@ -613,7 +596,7 @@ def create_generated_tasks(tests_by_task: Dict, task_prefix: str, generate_confi def create_generate_tasks_config(build_variant: BuildVariant, tests_by_task: Dict, generate_config: GenerateConfig, repeat_config: RepeatConfig, - evg_api: Optional[EvergreenApi], include_gen_task: bool = True, + include_gen_task: bool = True, task_prefix: str = "burn_in") -> None: # pylint: disable=too-many-arguments,too-many-locals """ @@ -623,12 +606,10 @@ def create_generate_tasks_config(build_variant: BuildVariant, tests_by_task: Dic :param tests_by_task: Dictionary of tests to generate tasks for. :param generate_config: Configuration of what to generate. :param repeat_config: Configuration of how to repeat tests. - :param evg_api: Evergreen API. :param include_gen_task: Should generating task be include in display task. :param task_prefix: Prefix all task names with this. """ - tasks = create_generated_tasks(tests_by_task, task_prefix, generate_config, repeat_config, - evg_api) + tasks = create_generated_tasks(tests_by_task, task_prefix, generate_config, repeat_config) existing_tasks = {ExistingTask(BURN_IN_TESTS_GEN_TASK)} if include_gen_task else None build_variant.display_task(BURN_IN_TESTS_TASK, tasks, execution_existing_tasks=existing_tasks) @@ -686,23 +667,21 @@ def create_tests_by_task(build_variant: str, evg_conf: EvergreenProjectConfig, # pylint: disable=too-many-arguments def create_generate_tasks_file(tests_by_task: Dict, generate_config: GenerateConfig, - repeat_config: RepeatConfig, evg_api: Optional[EvergreenApi], - task_prefix: str = 'burn_in', include_gen_task: bool = True) -> str: + repeat_config: RepeatConfig, task_prefix: str = 'burn_in', + include_gen_task: bool = True) -> str: """ Create an Evergreen generate.tasks file to run the given tasks and tests. :param tests_by_task: Dictionary of tests and tasks to run. :param generate_config: Information about how burn_in should generate tasks. :param repeat_config: Information about how burn_in should repeat tests. - :param evg_api: Evergreen api. :param task_prefix: Prefix to start generated task's name with. :param include_gen_task: Should the generating task be included in the display task. :returns: Configuration to pass to 'generate.tasks'. """ build_variant = BuildVariant(generate_config.run_build_variant) create_generate_tasks_config(build_variant, tests_by_task, generate_config, repeat_config, - evg_api, include_gen_task=include_gen_task, - task_prefix=task_prefix) + include_gen_task=include_gen_task, task_prefix=task_prefix) shrub_project = ShrubProject.empty() shrub_project.add_build_variant(build_variant) @@ -793,8 +772,7 @@ def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmok LOGGER.debug("tests and tasks found", tests_by_task=tests_by_task) if generate_tasks_file: - json_text = create_generate_tasks_file(tests_by_task, generate_config, repeat_config, - evg_api) + json_text = create_generate_tasks_file(tests_by_task, generate_config, repeat_config) write_file(generate_tasks_file, json_text) elif not no_exec: run_tests(tests_by_task, resmoke_cmd) diff --git a/buildscripts/evergreen_gen_multiversion_tests.py b/buildscripts/evergreen_gen_multiversion_tests.py index 323de188446..1bd90c94fd6 100755 --- a/buildscripts/evergreen_gen_multiversion_tests.py +++ b/buildscripts/evergreen_gen_multiversion_tests.py @@ -252,9 +252,7 @@ class EvergreenMultiversionConfigGenerator(object): # LOOKBACK_DURATION_DAYS. Tests without enough run-time statistics will be placed # in the misc suite. gen_suites = generate_resmoke.GenerateSubSuites(self.evg_api, self.options) - end_date = datetime.datetime.utcnow().replace(microsecond=0) - start_date = end_date - datetime.timedelta(days=generate_resmoke.LOOKBACK_DURATION_DAYS) - suites = gen_suites.calculate_suites(start_date, end_date) + suites = gen_suites.calculate_suites() # Render the given suites into yml files that can be used by resmoke.py. config_file_dict = generate_resmoke.render_suite_files(suites, self.options.suite, gen_suites.test_list, TEST_SUITE_DIR, diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py index bea913fce12..de00e3a566d 100755 --- a/buildscripts/evergreen_generate_resmoke_tasks.py +++ b/buildscripts/evergreen_generate_resmoke_tasks.py @@ -51,7 +51,6 @@ EVG_CONFIG_FILE = "./.evergreen.yml" GENERATE_CONFIG_FILE = "etc/generate_subtasks_config.yml" MIN_TIMEOUT_SECONDS = int(timedelta(minutes=5).total_seconds()) MAX_EXPECTED_TIMEOUT = int(timedelta(hours=48).total_seconds()) -LOOKBACK_DURATION_DAYS = 14 GEN_SUFFIX = "_gen" CLEAN_EVERY_N_HOOK = "CleanEveryN" ASAN_SIGNATURE = "detect_leaks=1" @@ -857,34 +856,21 @@ class GenerateSubSuites(object): """Get the configuration of the suite being generated.""" return read_suite_config(self.config_options.test_suites_dir, self.config_options.suite) - def calculate_suites(self, start_date: datetime, end_date: datetime) -> List[Suite]: + def calculate_suites(self) -> List[Suite]: """ Divide tests into suites based on statistics for the provided period. - :param start_date: Time to start historical analysis. - :param end_date: Time to end historical analysis. :return: List of sub suites to be generated. """ - try: - evg_stats = HistoricTaskData.from_evg(self.evergreen_api, self.config_options.project, - start_date, end_date, self.config_options.task, - self.config_options.variant) - if not evg_stats: - LOGGER.debug("No test history, using fallback suites") - # This is probably a new suite, since there is no test history, just use the - # fallback values. - return self.calculate_fallback_suites() + evg_stats = HistoricTaskData.from_s3(self.config_options.project, self.config_options.task, + self.config_options.variant) + + if evg_stats: target_execution_time_secs = self.config_options.target_resmoke_time * 60 return self.calculate_suites_from_evg_stats(evg_stats, target_execution_time_secs) - except requests.HTTPError as err: - if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE: - # Evergreen may return a 503 when the service is degraded. - # We fall back to splitting the tests into a fixed number of suites. - LOGGER.warning("Received 503 from Evergreen, " - "dividing the tests evenly among suites") - return self.calculate_fallback_suites() - else: - raise + + # Since there is no test history this is probably a new suite, just use the fallback values. + return self.calculate_fallback_suites() def calculate_suites_from_evg_stats(self, test_stats: HistoricTaskData, execution_time_secs: int) -> List[Suite]: @@ -1041,9 +1027,7 @@ class GenerateSubSuites(object): :return: The suites files and evergreen configuration for the generated task. """ - end_date = datetime.datetime.utcnow().replace(microsecond=0) - start_date = end_date - datetime.timedelta(days=LOOKBACK_DURATION_DAYS) - return self.calculate_suites(start_date, end_date) + return self.calculate_suites() def run(self): """Generate resmoke suites that run within a target execution time and write to disk.""" diff --git a/buildscripts/tests/test_burn_in_tests.py b/buildscripts/tests/test_burn_in_tests.py index 493da8d406e..f48ee2ee742 100644 --- a/buildscripts/tests/test_burn_in_tests.py +++ b/buildscripts/tests/test_burn_in_tests.py @@ -338,44 +338,26 @@ class TestGenerateTimeouts(unittest.TestCase): class TestGetTaskRuntimeHistory(unittest.TestCase): - def test_get_task_runtime_history_with_no_api(self): - self.assertListEqual([], - under_test._get_task_runtime_history(None, "project", "task", - "variant")) - - def test__get_task_runtime_history(self): - evergreen_api = Mock() - evergreen_api.test_stats_by_project.return_value = [ - Mock( - test_file="dir/test2.js", - task_name="task1", - variant="variant1", - distro="distro1", - date=_DATE, + @patch(ns("HistoricTaskData.get_stats_from_s3")) + def test__get_task_runtime_history(self, get_stats_from_s3_mock): + test_stats = [ + teststats_utils.HistoricalTestInformation( + test_name="dir/test2.js", num_pass=1, num_fail=0, avg_duration_pass=10.1, ) ] - analysis_duration = under_test.AVG_TEST_RUNTIME_ANALYSIS_DAYS - end_date = datetime.datetime.utcnow().replace(microsecond=0) - start_date = end_date - datetime.timedelta(days=analysis_duration) + get_stats_from_s3_mock.return_value = test_stats - result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1", - "variant1") + result = under_test._get_task_runtime_history("project1", "task1", "variant1") self.assertEqual(result, [("dir/test2.js", 10.1)]) - evergreen_api.test_stats_by_project.assert_called_with( - "project1", after_date=start_date, before_date=end_date, group_by="test", - group_num_days=14, tasks=["task1"], variants=["variant1"]) - - def test__get_task_runtime_history_evg_degraded_mode_error(self): # pylint: disable=invalid-name - response = Mock() - response.status_code = requests.codes.SERVICE_UNAVAILABLE - evergreen_api = Mock() - evergreen_api.test_stats_by_project.side_effect = requests.HTTPError(response=response) - - result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1", - "variant1") + + @patch(ns("HistoricTaskData.get_stats_from_s3")) + def test__get_task_runtime_history_when_s3_has_no_data(self, get_stats_from_s3_mock): # pylint: disable=invalid-name + get_stats_from_s3_mock.return_value = [] + + result = under_test._get_task_runtime_history("project1", "task1", "variant1") self.assertEqual(result, []) diff --git a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py index 1cc59ef0aba..42d185b2830 100644 --- a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py +++ b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py @@ -13,7 +13,7 @@ from mock import patch, MagicMock from shrub.v2 import BuildVariant, ShrubProject from shrub.variant import DisplayTaskDefinition -from buildscripts.util.teststats import TestRuntime +from buildscripts.util.teststats import TestRuntime, HistoricalTestInformation from buildscripts import evergreen_generate_resmoke_tasks as under_test @@ -31,7 +31,12 @@ def ns(relative_name): # pylint: disable=invalid-name def tst_stat_mock(file, duration, pass_count): - return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count) + return HistoricalTestInformation( + test_name=file, + num_pass=pass_count, + num_fail=0, + avg_duration_pass=duration, + ) def mock_test_stats_unavailable(evg_api_mock): @@ -108,10 +113,10 @@ class TestAcceptance(unittest.TestCase): return target_directory, source_directory @staticmethod - def _mock_test_files(directory, n_tests, runtime, evg_api_mock, suites_config_mock): + def _mock_test_files(directory, n_tests, runtime, get_stats_from_s3_mock, suites_config_mock): test_list = [os.path.join(directory, f"test_name_{i}.js") for i in range(n_tests)] mock_test_stats = [tst_stat_mock(file, runtime, 5) for file in test_list] - evg_api_mock.test_stats_by_project.return_value = mock_test_stats + get_stats_from_s3_mock.return_value = mock_test_stats suites_config_mock.return_value.tests = test_list for test in test_list: open(test, "w").close() @@ -136,7 +141,8 @@ class TestAcceptance(unittest.TestCase): self.assertEqual(0, len(os.listdir(tmpdir))) @patch(ns("suitesconfig.get_suite")) - def test_when_evg_test_stats_is_down(self, suites_config_mock): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_when_evg_test_stats_is_down(self, get_stats_from_s3_mock, suites_config_mock): """ Given Evergreen historic test stats endpoint is disabled, When evergreen_generate_resmoke_tasks attempts to generate suites, @@ -154,8 +160,9 @@ class TestAcceptance(unittest.TestCase): target_directory, source_directory = self._prep_dirs(tmpdir, mock_config) suite_path = os.path.join(source_directory, task) mock_config["suite"] = suite_path - test_list = self._mock_test_files(source_directory, n_tests, 5, evg_api_mock, + test_list = self._mock_test_files(source_directory, n_tests, 5, get_stats_from_s3_mock, suites_config_mock) + get_stats_from_s3_mock.return_value = [] mock_resmoke_config_file(test_list, suite_path + ".yml") under_test.GenerateSubSuites(evg_api_mock, config).run() @@ -181,7 +188,8 @@ class TestAcceptance(unittest.TestCase): sys.platform.startswith("win"), "Since this test is messing with directories, " "windows does not handle test generation correctly") @patch(ns("suitesconfig.get_suite")) - def test_with_each_test_in_own_task(self, suites_config_mock): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_with_each_test_in_own_task(self, get_stats_from_s3_mock, suites_config_mock): """ Given a task with all tests having a historic runtime over the target, When evergreen_generate_resmoke_tasks attempts to generate suites, @@ -200,8 +208,8 @@ class TestAcceptance(unittest.TestCase): target_directory, source_directory = self._prep_dirs(tmpdir, mock_config) suite_path = os.path.join(source_directory, task) mock_config["suite"] = suite_path - test_list = self._mock_test_files(source_directory, n_tests, 15 * 60, evg_api_mock, - suites_config_mock) + test_list = self._mock_test_files(source_directory, n_tests, 15 * 60, + get_stats_from_s3_mock, suites_config_mock) mock_resmoke_config_file(test_list, suite_path + ".yml") under_test.enable_logging(True) @@ -857,10 +865,11 @@ class GenerateSubSuitesTest(unittest.TestCase): return [f"test{i}.js" for i in range(n_tests)] @patch(ns("read_suite_config")) - def test_calculate_suites(self, mock_read_suite_config): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_calculate_suites(self, mock_get_stats_from_s3, mock_read_suite_config): mock_read_suite_config.return_value = {} evg = MagicMock() - evg.test_stats_by_project.return_value = [ + mock_get_stats_from_s3.return_value = [ tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100) ] config_options = self.get_mock_options() @@ -872,40 +881,26 @@ class GenerateSubSuitesTest(unittest.TestCase): with patch("os.path.exists") as exists_mock, patch(ns("suitesconfig")) as suitesconfig_mock: exists_mock.return_value = True suitesconfig_mock.get_suite.return_value.tests = \ - [stat.test_file for stat in evg.test_stats_by_project.return_value] - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) + [stat.test_name for stat in mock_get_stats_from_s3.return_value] + suites = gen_sub_suites.calculate_suites() # There are 100 tests taking 1 minute, with a target of 10 min we expect 10 suites. self.assertEqual(10, len(suites)) for suite in suites: self.assertEqual(10, len(suite.tests)) - def test_calculate_suites_fallback(self): - n_tests = 100 - evg = mock_test_stats_unavailable(MagicMock()) - config_options = self.get_mock_options() - - gen_sub_suites = under_test.GenerateSubSuites(evg, config_options) - gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests)) - - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) - - self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites)) - for suite in suites: - self.assertEqual(50, len(suite.tests)) - - self.assertEqual(n_tests, len(gen_sub_suites.test_list)) - - def test_calculate_suites_fallback_with_fewer_tests_than_max(self): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_calculate_suites_fallback_with_fewer_tests_than_max(self, mock_get_stats_from_s3): n_tests = 2 evg = mock_test_stats_unavailable(MagicMock()) config_options = self.get_mock_options() config_options.fallback_num_sub_suites = 5 + mock_get_stats_from_s3.return_value = [] gen_sub_suites = under_test.GenerateSubSuites(evg, config_options) gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests)) - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) + suites = gen_sub_suites.calculate_suites() self.assertEqual(n_tests, len(suites)) for suite in suites: @@ -913,15 +908,16 @@ class GenerateSubSuitesTest(unittest.TestCase): self.assertEqual(n_tests, len(gen_sub_suites.test_list)) - def test_calculate_suites_uses_fallback_for_no_results(self): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_calculate_suites_uses_fallback_for_no_results(self, mock_get_stats_from_s3): n_tests = 100 evg = MagicMock() - evg.test_stats_by_project.return_value = [] + mock_get_stats_from_s3.return_value = [] config_options = self.get_mock_options() gen_sub_suites = under_test.GenerateSubSuites(evg, config_options) gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests)) - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) + suites = gen_sub_suites.calculate_suites() self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites)) for suite in suites: @@ -929,10 +925,12 @@ class GenerateSubSuitesTest(unittest.TestCase): self.assertEqual(n_tests, len(gen_sub_suites.test_list)) - def test_calculate_suites_uses_fallback_if_only_results_are_filtered(self): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_calculate_suites_uses_fallback_if_only_results_are_filtered( + self, mock_get_stats_from_s3): n_tests = 100 evg = MagicMock() - evg.test_stats_by_project.return_value = [ + mock_get_stats_from_s3.return_value = [ tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100) ] config_options = self.get_mock_options() @@ -941,7 +939,7 @@ class GenerateSubSuitesTest(unittest.TestCase): gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests)) with patch("os.path.exists") as exists_mock: exists_mock.return_value = False - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) + suites = gen_sub_suites.calculate_suites() self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites)) for suite in suites: @@ -949,24 +947,13 @@ class GenerateSubSuitesTest(unittest.TestCase): self.assertEqual(n_tests, len(gen_sub_suites.test_list)) - def test_calculate_suites_error(self): - response = MagicMock() - response.status_code = requests.codes.INTERNAL_SERVER_ERROR - evg = MagicMock() - evg.test_stats_by_project.side_effect = requests.HTTPError(response=response) - config_options = self.get_mock_options() - - gen_sub_suites = under_test.GenerateSubSuites(evg, config_options) - gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(100)) - - with self.assertRaises(requests.HTTPError): - gen_sub_suites.calculate_suites(_DATE, _DATE) - @patch(ns("read_suite_config")) - def test_calculate_suites_with_selected_tests_to_run(self, mock_read_suite_config): + @patch("buildscripts.util.teststats.HistoricTaskData.get_stats_from_s3") + def test_calculate_suites_with_selected_tests_to_run(self, mock_get_stats_from_s3, + mock_read_suite_config): mock_read_suite_config.return_value = {} evg = MagicMock() - evg.test_stats_by_project.return_value = [ + mock_get_stats_from_s3.return_value = [ tst_stat_mock(f"test{i}.js", 60, 1) for i in range(100) ] config_options = self.get_mock_options() @@ -978,8 +965,8 @@ class GenerateSubSuitesTest(unittest.TestCase): with patch("os.path.exists") as exists_mock, patch(ns("suitesconfig")) as suitesconfig_mock: exists_mock.return_value = True suitesconfig_mock.get_suite.return_value.tests = \ - [stat.test_file for stat in evg.test_stats_by_project.return_value] - suites = gen_sub_suites.calculate_suites(_DATE, _DATE) + [stat.test_name for stat in mock_get_stats_from_s3.return_value] + suites = gen_sub_suites.calculate_suites() # There are 100 tests taking 1 minute, with a target of 10 min we expect 10 suites. # However, since we have selected only 2 tests to run, test1.js and diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py index b3405ecc00d..5949758559e 100644 --- a/buildscripts/tests/util/test_teststats.py +++ b/buildscripts/tests/util/test_teststats.py @@ -79,7 +79,7 @@ class TestHistoricTaskData(unittest.TestCase): @staticmethod def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0): return Mock( - test_file=test_file, + test_name=test_file, task_name="task1", variant="variant1", distro="distro1", diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py index b8c0578c19e..5336aa8cd5c 100644 --- a/buildscripts/util/teststats.py +++ b/buildscripts/util/teststats.py @@ -1,15 +1,32 @@ """Utility to support parsing a TestStat.""" from collections import defaultdict from dataclasses import dataclass -from datetime import datetime from itertools import chain +from json import JSONDecodeError from typing import NamedTuple, List, Callable, Optional - -from evergreen import EvergreenApi, TestStats +import requests +from requests.adapters import HTTPAdapter, Retry from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file TASK_LEVEL_HOOKS = {"CleanEveryN"} +TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com" + + +class HistoricalTestInformation(NamedTuple): + """ + Container for information about the historical runtime of a test. + + test_name: Name of test. + avg_duration_pass: Average of runtime of test that passed. + num_pass: Number of times the test has passed. + num_fail: Number of times the test has failed. + """ + + test_name: str + num_pass: int + num_fail: int + avg_duration_pass: float class TestRuntime(NamedTuple): @@ -74,9 +91,9 @@ class HistoricHookInfo(NamedTuple): avg_duration: float @classmethod - def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo": + def from_test_stats(cls, test_stats: HistoricalTestInformation) -> "HistoricHookInfo": """Create an instance from a test_stats object.""" - return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass, + return cls(hook_id=test_stats.test_name, num_pass=test_stats.num_pass, avg_duration=test_stats.avg_duration_pass) def test_name(self) -> str: @@ -101,10 +118,10 @@ class HistoricTestInfo(NamedTuple): hooks: List[HistoricHookInfo] @classmethod - def from_test_stats(cls, test_stats: TestStats, + def from_test_stats(cls, test_stats: HistoricalTestInformation, hooks: List[HistoricHookInfo]) -> "HistoricTestInfo": """Create an instance from a test_stats object.""" - return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass, + return cls(test_name=test_stats.test_name, num_pass=test_stats.num_pass, avg_duration=test_stats.avg_duration_pass, hooks=hooks) def normalized_test_name(self) -> str: @@ -134,46 +151,59 @@ class HistoricTaskData(object): """Initialize the TestStats with raw results from the Evergreen API.""" self.historic_test_results = historic_test_results - # pylint: disable=too-many-arguments + @staticmethod + def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]: + """ + Retrieve test stats from s3 for a given task. + + :param project: Project to query. + :param task: Task to query. + :param variant: Build variant to query. + :return: A list of the Test stats for the specified task. + """ + session = requests.Session() + retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) + session.mount('https://', HTTPAdapter(max_retries=retries)) + + response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}") + + try: + data = response.json() + return [HistoricalTestInformation(**item) for item in data] + except JSONDecodeError: + return [] + @classmethod - def from_evg(cls, evg_api: EvergreenApi, project: str, start_date: datetime, end_date: datetime, - task: str, variant: str) -> "HistoricTaskData": + def from_s3(cls, project: str, task: str, variant: str) -> "HistoricTaskData": """ - Retrieve test stats from evergreen for a given task. + Retrieve test stats from s3 for a given task. - :param evg_api: Evergreen API client. :param project: Project to query. - :param start_date: Start date to query. - :param end_date: End date to query. :param task: Task to query. :param variant: Build variant to query. :return: Test stats for the specified task. """ - days = (end_date - start_date).days - historic_stats = evg_api.test_stats_by_project( - project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant], - group_by="test", group_num_days=days) - - return cls.from_stats_list(historic_stats) + historical_test_data = cls.get_stats_from_s3(project, task, variant) + return cls.from_stats_list(historical_test_data) @classmethod - def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData": + def from_stats_list( + cls, historical_test_data: List[HistoricalTestInformation]) -> "HistoricTaskData": """ Build historic task data from a list of historic stats. - :param historic_stats: List of historic stats to build from. + :param historical_test_data: A list of information about the runtime of a test. :return: Historic task data from the list of stats. """ - hooks = defaultdict(list) - for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]: + for hook in [stat for stat in historical_test_data if is_resmoke_hook(stat.test_name)]: historical_hook = HistoricHookInfo.from_test_stats(hook) hooks[historical_hook.test_name()].append(historical_hook) return cls([ HistoricTestInfo.from_test_stats(stat, - hooks[get_short_name_from_test_file(stat.test_file)]) - for stat in historic_stats if not is_resmoke_hook(stat.test_file) + hooks[get_short_name_from_test_file(stat.test_name)]) + for stat in historical_test_data if not is_resmoke_hook(stat.test_name) ]) def get_tests_runtimes(self) -> List[TestRuntime]: |