SERVER-71533 Update legacy task generation to use new test stats location

author: Mikhail Shchatko <mikhail.shchatko@mongodb.com> 2022-11-23 08:39:36 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-11-23 09:41:47 +0000
commit: c651e9267af95ae9f8addd1783f1c3600acfe6c3 (patch)
tree: 768f4e3a4a4921adaac0e919c31159e0d00b33f6
parent: 40553a02ad7a218009fd01989894e6f44a38fe8c (diff)
download: mongo-c651e9267af95ae9f8addd1783f1c3600acfe6c3.tar.gz
8 files changed, 191 insertions, 248 deletions
diff --git a/buildscripts/burn_in_tags.py b/buildscripts/burn_in_tags.py
index 1d6197aedc6..1b97ea97dbf 100644
--- a/buildscripts/burn_in_tags.py
+++ b/buildscripts/burn_in_tags.py
@@ -123,12 +123,10 @@ def _generate_evg_build_variant(shrub_config, build_variant, run_build_variant,
 
 
 # pylint: disable=too-many-arguments
-def _generate_evg_tasks(evergreen_api, shrub_config, expansions_file_data, build_variant_map, repo,
-                        evg_conf):
+def _generate_evg_tasks(shrub_config, expansions_file_data, build_variant_map, repo, evg_conf):
     """
     Generate burn in tests tasks for a given shrub config and group of buildvariants.
 
-    :param evergreen_api: Evergreen.py object.
     :param shrub_config: Shrub config object that the build variants will be built upon.
     :param expansions_file_data: Config data file to use.
     :param build_variant_map: Map of base buildvariants to their generated buildvariant.
@@ -147,7 +145,7 @@ def _generate_evg_tasks(evergreen_api, shrub_config, expansions_file_data, build
                                          repeat_tests_secs=config_options.repeat_tests_secs)
 
             create_generate_tasks_config(shrub_config, tests_by_task, gen_config, repeat_config,
-                                         evergreen_api, include_gen_task=False)
+                                         include_gen_task=False)
 
 
 def _write_to_file(shrub_config):
@@ -163,7 +161,7 @@ def _write_to_file(shrub_config):
         file_handle.write(shrub_config.to_json())
 
 
-def main(evergreen_api, repo):
+def main(repo):
     """Execute Main program."""
 
     parser = argparse.ArgumentParser(description=main.__doc__)
@@ -175,10 +173,9 @@ def main(evergreen_api, repo):
     shrub_config = Configuration()
     evg_conf = evergreen.parse_evergreen_file(EVERGREEN_FILE)
     build_variant_map = _create_evg_build_variant_map(expansions_file_data, evg_conf)
-    _generate_evg_tasks(evergreen_api, shrub_config, expansions_file_data, build_variant_map, repo,
-                        evg_conf)
+    _generate_evg_tasks(shrub_config, expansions_file_data, build_variant_map, repo, evg_conf)
     _write_to_file(shrub_config)
 
 
 if __name__ == '__main__':
-    main(RetryingEvergreenApi.get_api(config_file=EVG_CONFIG_FILE), Repo("."))
+    main(Repo("."))
diff --git a/buildscripts/burn_in_tests.py b/buildscripts/burn_in_tests.py
index d160d9491b7..b453a2da6c8 100644
--- a/buildscripts/burn_in_tests.py
+++ b/buildscripts/burn_in_tests.py
@@ -2,7 +2,6 @@
 """Command line utility for determining what jstests have been added or modified."""
 
 import copy
-import datetime
 import json
 import logging
 import os.path
@@ -15,7 +14,6 @@ from collections import defaultdict
 from typing import Optional, Set, Tuple, List, Dict
 
 import click
-import requests
 import structlog
 from structlog.stdlib import LoggerFactory
 import yaml
@@ -23,8 +21,6 @@ import yaml
 from git import Repo
 from shrub.config import Configuration
 
-from evergreen.api import RetryingEvergreenApi, EvergreenApi
-
 # Get relative imports to work when the package is not installed on the PYTHONPATH.
 if __name__ == "__main__" and __package__ is None:
     sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -36,7 +32,7 @@ from buildscripts.resmokelib.suitesconfig import create_test_membership_map, get
 from buildscripts.resmokelib.utils import default_if_none, globstar
 from buildscripts.ciconfig.evergreen import parse_evergreen_file, ResmokeArgs, \
     EvergreenProjectConfig, VariantTask
-from buildscripts.util.teststats import TestStats
+from buildscripts.util.teststats import TestStats, get_stats_from_s3
 from buildscripts.util.taskname import name_generated_task
 from buildscripts.patch_builds.task_generation import resmoke_commands, TimeoutInfo, TaskList
 
@@ -50,7 +46,6 @@ EXTERNAL_LOGGERS = {
     "urllib3",
 }
 
-AVG_TEST_RUNTIME_ANALYSIS_DAYS = 14
 AVG_TEST_SETUP_SEC = 4 * 60
 AVG_TEST_TIME_MULTIPLIER = 3
 CONFIG_FILE = ".evergreen.yml"
@@ -492,41 +487,23 @@ def _generate_timeouts(repeat_config: RepeatConfig, test: str,
     return TimeoutInfo.default_timeout()
 
 
-def _get_task_runtime_history(evg_api: Optional[EvergreenApi], project: str, task: str,
-                              variant: str):
+def _get_task_runtime_history(project: str, task: str, variant: str):
     """
-    Fetch historical average runtime for all tests in a task from Evergreen API.
+    Fetch historical average runtime for all tests in a task from S3.
 
-    :param evg_api: Evergreen API.
     :param project: Project name.
     :param task: Task name.
     :param variant: Variant name.
     :return: Test historical runtimes, parsed into teststat objects.
     """
-    if not evg_api:
-        return []
-
-    try:
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=AVG_TEST_RUNTIME_ANALYSIS_DAYS)
-        data = evg_api.test_stats_by_project(project, after_date=start_date.strftime("%Y-%m-%d"),
-                                             before_date=end_date.strftime("%Y-%m-%d"),
-                                             tasks=[task], variants=[variant], group_by="test",
-                                             group_num_days=AVG_TEST_RUNTIME_ANALYSIS_DAYS)
-        test_runtimes = TestStats(data).get_tests_runtimes()
-        return test_runtimes
-    except requests.HTTPError as err:
-        if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
-            # Evergreen may return a 503 when the service is degraded.
-            # We fall back to returning no test history
-            return []
-        else:
-            raise
+    data = get_stats_from_s3(project, task, variant)
+    test_runtimes = TestStats(data).get_tests_runtimes()
+    return test_runtimes
 
 
 def create_generate_tasks_config(evg_config: Configuration, tests_by_task: Dict,
                                  generate_config: GenerateConfig, repeat_config: RepeatConfig,
-                                 evg_api: Optional[EvergreenApi], include_gen_task: bool = True,
+                                 include_gen_task: bool = True,
                                  task_prefix: str = "burn_in") -> Configuration:
     # pylint: disable=too-many-arguments,too-many-locals
     """
@@ -536,7 +513,6 @@ def create_generate_tasks_config(evg_config: Configuration, tests_by_task: Dict,
     :param tests_by_task: Dictionary of tests to generate tasks for.
     :param generate_config: Configuration of what to generate.
     :param repeat_config: Configuration of how to repeat tests.
-    :param evg_api: Evergreen API.
     :param include_gen_task: Should generating task be include in display task.
     :param task_prefix: Prefix all task names with this.
     :return: Shrub configuration with added tasks.
@@ -545,7 +521,7 @@ def create_generate_tasks_config(evg_config: Configuration, tests_by_task: Dict,
     resmoke_options = repeat_config.generate_resmoke_options()
     for task in sorted(tests_by_task):
         multiversion_path = tests_by_task[task].get("use_multiversion")
-        task_runtime_stats = _get_task_runtime_history(evg_api, generate_config.project, task,
+        task_runtime_stats = _get_task_runtime_history(generate_config.project, task,
                                                        generate_config.build_variant)
         resmoke_args = tests_by_task[task]["resmoke_args"]
         test_list = tests_by_task[task]["tests"]
@@ -622,23 +598,22 @@ def create_tests_by_task(build_variant: str, repo: Repo, evg_conf: EvergreenProj
 
 # pylint: disable=too-many-arguments
 def create_generate_tasks_file(tests_by_task: Dict, generate_config: GenerateConfig,
-                               repeat_config: RepeatConfig, evg_api: Optional[EvergreenApi],
-                               task_prefix: str = 'burn_in', include_gen_task: bool = True) -> Dict:
+                               repeat_config: RepeatConfig, task_prefix: str = 'burn_in',
+                               include_gen_task: bool = True) -> Dict:
     """
     Create an Evergreen generate.tasks file to run the given tasks and tests.
 
     :param tests_by_task: Dictionary of tests and tasks to run.
     :param generate_config: Information about how burn_in should generate tasks.
     :param repeat_config: Information about how burn_in should repeat tests.
-    :param evg_api: Evergreen api.
     :param task_prefix: Prefix to start generated task's name with.
     :param include_gen_task: Should the generating task be included in the display task.
     :returns: Configuration to pass to 'generate.tasks'.
     """
     evg_config = Configuration()
-    evg_config = create_generate_tasks_config(
-        evg_config, tests_by_task, generate_config, repeat_config, evg_api,
-        include_gen_task=include_gen_task, task_prefix=task_prefix)
+    evg_config = create_generate_tasks_config(evg_config, tests_by_task, generate_config,
+                                              repeat_config, include_gen_task=include_gen_task,
+                                              task_prefix=task_prefix)
 
     json_config = evg_config.to_map()
     tasks_to_create = len(json_config.get('tasks', []))
@@ -687,22 +662,8 @@ def _configure_logging(verbose: bool):
         logging.getLogger(log_name).setLevel(logging.WARNING)
 
 
-def _get_evg_api(evg_api_config: str, local_mode: bool) -> Optional[EvergreenApi]:
-    """
-    Get an instance of the Evergreen Api.
-
-    :param evg_api_config: Config file with evg auth information.
-    :param local_mode: If true, do not connect to Evergreen API.
-    :return: Evergreen Api instance.
-    """
-    if not local_mode:
-        return RetryingEvergreenApi.get_api(config_file=evg_api_config)
-    return None
-
-
 def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmoke_args: str,
-            generate_tasks_file: str, no_exec: bool, evg_conf: EvergreenProjectConfig, repo: Repo,
-            evg_api: EvergreenApi):
+            generate_tasks_file: str, no_exec: bool, evg_conf: EvergreenProjectConfig, repo: Repo):
     """
     Run burn_in_tests with the given configuration.
 
@@ -713,7 +674,6 @@ def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmok
     :param no_exec: Do not execute tests, just discover tests to run.
     :param evg_conf: Evergreen configuration.
     :param repo: Git repository.
-    :param evg_api: Evergreen API client.
     """
     # Populate the config values in order to use the helpers from resmokelib.suitesconfig.
     resmoke_cmd = _set_resmoke_cmd(repeat_config, list(resmoke_args))
@@ -722,8 +682,7 @@ def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmok
     LOGGER.debug("tests and tasks found", tests_by_task=tests_by_task)
 
     if generate_tasks_file:
-        json_config = create_generate_tasks_file(tests_by_task, generate_config, repeat_config,
-                                                 evg_api)
+        json_config = create_generate_tasks_file(tests_by_task, generate_config, repeat_config)
         _write_json_file(json_config, generate_tasks_file)
     elif not no_exec:
         run_tests(tests_by_task, resmoke_cmd)
@@ -752,16 +711,12 @@ def burn_in(repeat_config: RepeatConfig, generate_config: GenerateConfig, resmok
               help="The maximum number of times to repeat tests if time option is specified.")
 @click.option("--repeat-tests-secs", "repeat_tests_secs", default=None, type=int, metavar="SECONDS",
               help="Repeat tests for the given time (in secs).")
-@click.option("--evg-api-config", "evg_api_config", default=CONFIG_FILE, metavar="FILE",
-              help="Configuration file with connection info for Evergreen API.")
-@click.option("--local", "local_mode", default=False, is_flag=True,
-              help="Local mode. Do not call out to evergreen api.")
 @click.option("--verbose", "verbose", default=False, is_flag=True, help="Enable extra logging.")
 @click.argument("resmoke_args", nargs=-1, type=click.UNPROCESSED)
 # pylint: disable=too-many-arguments,too-many-locals
 def main(build_variant, run_build_variant, distro, project, generate_tasks_file, no_exec,
          repeat_tests_num, repeat_tests_min, repeat_tests_max, repeat_tests_secs, resmoke_args,
-         local_mode, evg_api_config, verbose):
+         verbose):
     """
     Run new or changed tests in repeated mode to validate their stability.
 
@@ -799,8 +754,6 @@ def main(build_variant, run_build_variant, distro, project, generate_tasks_file,
     :param repeat_tests_max: Once this number of repetitions has been reached, stop repeating.
     :param repeat_tests_secs: Continue repeating tests for this number of seconds.
     :param resmoke_args: Arguments to pass through to resmoke.
-    :param local_mode: Don't call out to the evergreen API (used for testing).
-    :param evg_api_config: Location of configuration file to connect to evergreen.
     :param verbose: Log extra debug information.
     """
     _configure_logging(verbose)
@@ -817,11 +770,10 @@ def main(build_variant, run_build_variant, distro, project, generate_tasks_file,
                                      project=project)  # yapf: disable
     generate_config.validate(evg_conf)
 
-    evg_api = _get_evg_api(evg_api_config, local_mode)
     repo = Repo(".")
 
     burn_in(repeat_config, generate_config, resmoke_args, generate_tasks_file, no_exec, evg_conf,
-            repo, evg_api)
+            repo)
 
 
 if __name__ == "__main__":
diff --git a/buildscripts/evergreen_generate_resmoke_tasks.py b/buildscripts/evergreen_generate_resmoke_tasks.py
index 230de932764..9e66986cbf5 100755
--- a/buildscripts/evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/evergreen_generate_resmoke_tasks.py
@@ -6,7 +6,6 @@ Analyze the evergreen history for tests run under the given task and create new
 to attempt to keep the task runtime under a specified amount.
 """
 
-import datetime
 from datetime import timedelta
 import logging
 import math
@@ -15,10 +14,9 @@ import re
 import sys
 from collections import defaultdict
 from distutils.util import strtobool  # pylint: disable=no-name-in-module
-from typing import Set
+from typing import Set, List
 
 import click
-import requests
 import structlog
 import yaml
 
@@ -30,7 +28,7 @@ from shrub.task import TaskDependency
 from shrub.variant import DisplayTaskDefinition
 from shrub.variant import TaskSpec
 
-from evergreen.api import EvergreenApi, RetryingEvergreenApi
+from evergreen.api import RetryingEvergreenApi
 
 # Get relative imports to work when the package is not installed on the PYTHONPATH.
 if __name__ == "__main__" and __package__ is None:
@@ -41,6 +39,7 @@ import buildscripts.util.read_config as read_config  # pylint: disable=wrong-imp
 import buildscripts.util.taskname as taskname  # pylint: disable=wrong-import-position
 import buildscripts.util.testname as testname  # pylint: disable=wrong-import-position
 from buildscripts.util.fileops import read_yaml_file  # pylint: disable=wrong-import-position
+from buildscripts.util.teststats import get_stats_from_s3, HistoricalTestInformation  # pylint: disable=wrong-import-position
 
 LOGGER = structlog.getLogger(__name__)
 
@@ -50,7 +49,6 @@ AVG_SETUP_TIME = int(timedelta(minutes=5).total_seconds())
 EVG_CONFIG_FILE = "./.evergreen.yml"
 GENERATE_CONFIG_FILE = "etc/generate_subtasks_config.yml"
 MIN_TIMEOUT_SECONDS = int(timedelta(minutes=5).total_seconds())
-LOOKBACK_DURATION_DAYS = 14
 GEN_SUFFIX = "_gen"
 
 HEADER_TEMPLATE = """# DO NOT EDIT THIS FILE. All manual edits will be lost.
@@ -584,7 +582,7 @@ def normalize_test_name(test_name):
 class TestStats(object):
     """Represent the test statistics for the task that is being analyzed."""
 
-    def __init__(self, evg_test_stats_results):
+    def __init__(self, evg_test_stats_results: List[HistoricalTestInformation]) -> None:
         """Initialize the TestStats with raw results from the Evergreen API."""
         # Mapping from test_file to {"num_run": X, "duration": Y} for tests
         self._runtime_by_test = defaultdict(dict)
@@ -595,9 +593,9 @@ class TestStats(object):
         for doc in evg_test_stats_results:
             self._add_stats(doc)
 
-    def _add_stats(self, test_stats):
+    def _add_stats(self, test_stats: HistoricalTestInformation) -> None:
         """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint."""
-        test_file = testname.normalize_test_file(test_stats.test_file)
+        test_file = testname.normalize_test_file(test_stats.test_name)
         duration = test_stats.avg_duration_pass
         num_run = test_stats.num_pass
         is_hook = testname.is_resmoke_hook(test_file)
@@ -701,36 +699,17 @@ class GenerateSubSuites(object):
             self.generate_options = generate_config
         self.test_list = []
 
-    def calculate_suites(self, start_date, end_date):
+    def calculate_suites(self) -> List[Suite]:
         """Divide tests into suites based on statistics for the provided period."""
-        try:
-            evg_stats = self.get_evg_stats(self.config_options.project, start_date, end_date,
-                                           self.config_options.task, self.config_options.variant)
-            if not evg_stats:
-                # This is probably a new suite, since there is no test history, just use the
-                # fallback values.
-                return self.calculate_fallback_suites()
+        evg_stats = get_stats_from_s3(self.config_options.project, self.config_options.task,
+                                      self.config_options.variant)
+
+        if evg_stats:
             target_execution_time_secs = self.config_options.target_resmoke_time * 60
             return self.calculate_suites_from_evg_stats(evg_stats, target_execution_time_secs)
-        except requests.HTTPError as err:
-            if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
-                # Evergreen may return a 503 when the service is degraded.
-                # We fall back to splitting the tests into a fixed number of suites.
-                LOGGER.warning("Received 503 from Evergreen, "
-                               "dividing the tests evenly among suites")
-                return self.calculate_fallback_suites()
-            else:
-                raise
-
-    def get_evg_stats(self, project, start_date, end_date, task, variant):
-        """Collect test execution statistics data from Evergreen."""
-        # pylint: disable=too-many-arguments
-
-        days = (end_date - start_date).days
-        return self.evergreen_api.test_stats_by_project(
-            project, after_date=start_date.strftime("%Y-%m-%d"),
-            before_date=end_date.strftime("%Y-%m-%d"), tasks=[task], variants=[variant],
-            group_by="test", group_num_days=days)
+
+        # Since there is no test history this is probably a new suite, just use the fallback values.
+        return self.calculate_fallback_suites()
 
     def calculate_suites_from_evg_stats(self, data, execution_time_secs):
         """Divide tests into suites that can be run in less than the specified execution time."""
@@ -778,12 +757,9 @@ class GenerateSubSuites(object):
             LOGGER.info("Not generating configuration due to previous successful generation.")
             return
 
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=LOOKBACK_DURATION_DAYS)
-
         prepare_directory_for_suite(CONFIG_DIR)
 
-        suites = self.calculate_suites(start_date, end_date)
+        suites = self.calculate_suites()
 
         LOGGER.debug("Creating suites", num_suites=len(suites), task=self.config_options.task)
 
diff --git a/buildscripts/tests/test_burn_in_tags.py b/buildscripts/tests/test_burn_in_tags.py
index 5d80aa65339..d3872e235be 100644
--- a/buildscripts/tests/test_burn_in_tags.py
+++ b/buildscripts/tests/test_burn_in_tags.py
@@ -9,6 +9,7 @@ from shrub.config import Configuration
 import buildscripts.burn_in_tags as under_test
 
 import buildscripts.ciconfig.evergreen as _evergreen
+from buildscripts.util.teststats import HistoricalTestInformation
 
 # pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access
 
@@ -93,7 +94,9 @@ class TestGenerateEvgBuildVariants(unittest.TestCase):
 
 class TestGenerateEvgTasks(unittest.TestCase):
     @patch(ns("create_tests_by_task"))
-    def test_generate_evg_tasks_no_tests_changed(self, create_tests_by_task_mock):
+    @patch("buildscripts.burn_in_tests.get_stats_from_s3")
+    def test_generate_evg_tasks_no_tests_changed(self, get_stats_from_s3_mock,
+                                                 create_tests_by_task_mock):
         evg_conf_mock = get_evergreen_config()
         create_tests_by_task_mock.return_value = {}
         expansions_file_data = get_expansions_data()
@@ -103,15 +106,18 @@ class TestGenerateEvgTasks(unittest.TestCase):
                 "enterprise-rhel-62-64-bit-majority-read-concern-off-required",
         }  # yapf: disable
         shrub_config = Configuration()
-        evergreen_api = MagicMock()
+        get_stats_from_s3_mock.return_value = []
         repo = MagicMock()
-        under_test._generate_evg_tasks(evergreen_api, shrub_config, expansions_file_data,
-                                       buildvariant_map, repo, evg_conf_mock)
+
+        under_test._generate_evg_tasks(shrub_config, expansions_file_data, buildvariant_map, repo,
+                                       evg_conf_mock)
 
         self.assertEqual(shrub_config.to_map(), {})
 
     @patch(ns("create_tests_by_task"))
-    def test_generate_evg_tasks_one_test_changed(self, create_tests_by_task_mock):
+    @patch("buildscripts.burn_in_tests.get_stats_from_s3")
+    def test_generate_evg_tasks_one_test_changed(self, get_stats_from_s3_mock,
+                                                 create_tests_by_task_mock):
         evg_conf_mock = get_evergreen_config()
         create_tests_by_task_mock.return_value = {
             "aggregation_mongos_passthrough": {
@@ -128,13 +134,17 @@ class TestGenerateEvgTasks(unittest.TestCase):
                 "enterprise-rhel-62-64-bit-majority-read-concern-off-required",
         }  # yapf: disable
         shrub_config = Configuration()
-        evergreen_api = MagicMock()
         repo = MagicMock()
-        evergreen_api.test_stats_by_project.return_value = [
-            MagicMock(test_file="dir/test2.js", avg_duration_pass=10)
+        get_stats_from_s3_mock.return_value = [
+            HistoricalTestInformation(
+                test_name="dir/test2.js",
+                num_pass=1,
+                num_fail=0,
+                avg_duration_pass=10,
+            )
         ]
-        under_test._generate_evg_tasks(evergreen_api, shrub_config, expansions_file_data,
-                                       buildvariant_map, repo, evg_conf_mock)
+        under_test._generate_evg_tasks(shrub_config, expansions_file_data, buildvariant_map, repo,
+                                       evg_conf_mock)
 
         generated_config = shrub_config.to_map()
         self.assertEqual(len(generated_config["buildvariants"]), 2)
diff --git a/buildscripts/tests/test_burn_in_tests.py b/buildscripts/tests/test_burn_in_tests.py
index e4802d5d4e2..b788fe104f0 100644
--- a/buildscripts/tests/test_burn_in_tests.py
+++ b/buildscripts/tests/test_burn_in_tests.py
@@ -12,8 +12,6 @@ import unittest
 from math import ceil
 from mock import Mock, patch, MagicMock
 
-import requests
-
 from shrub.config import Configuration
 
 import buildscripts.burn_in_tests as under_test
@@ -85,7 +83,8 @@ def get_evergreen_config(config_file_path):
 
 class TestAcceptance(unittest.TestCase):
     @patch(ns("_write_json_file"))
-    def test_no_tests_run_if_none_changed(self, write_json_mock):
+    @patch(ns("get_stats_from_s3"))
+    def test_no_tests_run_if_none_changed(self, get_stats_from_s3_mock, write_json_mock):
         """
         Given a git repository with no changes,
         When burn_in_tests is run,
@@ -98,8 +97,9 @@ class TestAcceptance(unittest.TestCase):
             variant,
             "project",
         )  # yapf: disable
+        get_stats_from_s3_mock.return_value = []
 
-        under_test.burn_in(repeat_config, gen_config, "", "testfile.json", False, None, repo, None)
+        under_test.burn_in(repeat_config, gen_config, "", "testfile.json", False, None, repo)
 
         write_json_mock.assert_called_once()
         written_config = write_json_mock.call_args[0][0]
@@ -109,7 +109,8 @@ class TestAcceptance(unittest.TestCase):
 
     @unittest.skipIf(sys.platform.startswith("win"), "not supported on windows")
     @patch(ns("_write_json_file"))
-    def test_tests_generated_if_a_file_changed(self, write_json_mock):
+    @patch(ns("get_stats_from_s3"))
+    def test_tests_generated_if_a_file_changed(self, get_stats_from_s3_mock, write_json_mock):
         """
         Given a git repository with no changes,
         When burn_in_tests is run,
@@ -128,9 +129,9 @@ class TestAcceptance(unittest.TestCase):
             "project",
         )  # yapf: disable
         evg_config = get_evergreen_config("etc/evergreen.yml")
+        get_stats_from_s3_mock.return_value = []
 
-        under_test.burn_in(repeat_config, gen_config, "", "testfile.json", False, evg_config, repo,
-                           None)
+        under_test.burn_in(repeat_config, gen_config, "", "testfile.json", False, evg_config, repo)
 
         write_json_mock.assert_called_once()
         written_config = write_json_mock.call_args[0][0]
@@ -348,45 +349,26 @@ class TestGenerateTimeouts(unittest.TestCase):
 
 
 class TestGetTaskRuntimeHistory(unittest.TestCase):
-    def test_get_task_runtime_history_with_no_api(self):
-        self.assertListEqual([],
-                             under_test._get_task_runtime_history(None, "project", "task",
-                                                                  "variant"))
-
-    def test__get_task_runtime_history(self):
-        evergreen_api = Mock()
-        evergreen_api.test_stats_by_project.return_value = [
-            Mock(
-                test_file="dir/test2.js",
-                task_name="task1",
-                variant="variant1",
-                distro="distro1",
-                date=_DATE,
+    @patch(ns("get_stats_from_s3"))
+    def test__get_task_runtime_history(self, get_stats_from_s3_mock):
+        test_stats = [
+            teststats_utils.HistoricalTestInformation(
+                test_name="dir/test2.js",
                 num_pass=1,
                 num_fail=0,
                 avg_duration_pass=10.1,
             )
         ]
-        analysis_duration = under_test.AVG_TEST_RUNTIME_ANALYSIS_DAYS
-        end_date = datetime.datetime.utcnow().replace(microsecond=0)
-        start_date = end_date - datetime.timedelta(days=analysis_duration)
+        get_stats_from_s3_mock.return_value = test_stats
 
-        result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1",
-                                                      "variant1")
+        result = under_test._get_task_runtime_history("project1", "task1", "variant1")
         self.assertEqual(result, [("dir/test2.js", 10.1)])
-        evergreen_api.test_stats_by_project.assert_called_with(
-            "project1", after_date=start_date.strftime("%Y-%m-%d"),
-            before_date=end_date.strftime("%Y-%m-%d"), group_by="test", group_num_days=14,
-            tasks=["task1"], variants=["variant1"])
-
-    def test__get_task_runtime_history_evg_degraded_mode_error(self):  # pylint: disable=invalid-name
-        response = Mock()
-        response.status_code = requests.codes.SERVICE_UNAVAILABLE
-        evergreen_api = Mock()
-        evergreen_api.test_stats_by_project.side_effect = requests.HTTPError(response=response)
-
-        result = under_test._get_task_runtime_history(evergreen_api, "project1", "task1",
-                                                      "variant1")
+
+    @patch(ns("get_stats_from_s3"))
+    def test__get_task_runtime_history_when_s3_has_no_data(self, get_stats_from_s3_mock):  # pylint: disable=invalid-name
+        get_stats_from_s3_mock.return_value = []
+
+        result = under_test._get_task_runtime_history("project1", "task1", "variant1")
         self.assertEqual(result, [])
 
 
@@ -479,18 +461,21 @@ TESTS_BY_TASK = {
 
 
 class TestCreateGenerateTasksConfig(unittest.TestCase):
-    def test_no_tasks_given(self):
+    @patch(ns("get_stats_from_s3"))
+    def test_no_tasks_given(self, get_stats_from_s3_mock):
         evg_config = Configuration()
         gen_config = MagicMock(run_build_variant="variant")
         repeat_config = MagicMock()
+        get_stats_from_s3_mock.return_value = []
 
         evg_config = under_test.create_generate_tasks_config(evg_config, {}, gen_config,
-                                                             repeat_config, None)
+                                                             repeat_config)
 
         evg_config_dict = evg_config.to_map()
         self.assertNotIn("tasks", evg_config_dict)
 
-    def test_one_task_one_test(self):
+    @patch(ns("get_stats_from_s3"))
+    def test_one_task_one_test(self, get_stats_from_s3_mock):
         n_tasks = 1
         n_tests = 1
         resmoke_options = "options for resmoke"
@@ -499,9 +484,10 @@ class TestCreateGenerateTasksConfig(unittest.TestCase):
         repeat_config = MagicMock()
         repeat_config.generate_resmoke_options.return_value = resmoke_options
         tests_by_task = create_tests_by_task_mock(n_tasks, n_tests)
+        get_stats_from_s3_mock.return_value = []
 
         evg_config = under_test.create_generate_tasks_config(evg_config, tests_by_task, gen_config,
-                                                             repeat_config, None)
+                                                             repeat_config)
 
         evg_config_dict = evg_config.to_map()
         tasks = evg_config_dict["tasks"]
@@ -511,21 +497,24 @@ class TestCreateGenerateTasksConfig(unittest.TestCase):
         self.assertIn("--suites=suite_0", cmd[1]["vars"]["resmoke_args"])
         self.assertIn("tests_0", cmd[1]["vars"]["resmoke_args"])
 
-    def test_n_task_m_test(self):
+    @patch(ns("get_stats_from_s3"))
+    def test_n_task_m_test(self, get_stats_from_s3_mock):
         n_tasks = 3
         n_tests = 5
         evg_config = Configuration()
         gen_config = MagicMock(run_build_variant="variant", distro=None)
         repeat_config = MagicMock()
         tests_by_task = create_tests_by_task_mock(n_tasks, n_tests)
+        get_stats_from_s3_mock.return_value = []
 
         evg_config = under_test.create_generate_tasks_config(evg_config, tests_by_task, gen_config,
-                                                             repeat_config, None)
+                                                             repeat_config)
 
         evg_config_dict = evg_config.to_map()
         self.assertEqual(n_tasks * n_tests, len(evg_config_dict["tasks"]))
 
-    def test_multiversion_path_is_used(self):
+    @patch(ns("get_stats_from_s3"))
+    def test_multiversion_path_is_used(self, get_stats_from_s3_mock):
         n_tasks = 1
         n_tests = 1
         evg_config = Configuration()
@@ -535,9 +524,10 @@ class TestCreateGenerateTasksConfig(unittest.TestCase):
         first_task = "task_0"
         multiversion_path = "multiversion_path"
         tests_by_task[first_task]["use_multiversion"] = multiversion_path
+        get_stats_from_s3_mock.return_value = []
 
         evg_config = under_test.create_generate_tasks_config(evg_config, tests_by_task, gen_config,
-                                                             repeat_config, None)
+                                                             repeat_config)
 
         evg_config_dict = evg_config.to_map()
         tasks = evg_config_dict["tasks"]
diff --git a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
index 94d48e34665..cddcf971b7f 100644
--- a/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
+++ b/buildscripts/tests/test_evergreen_generate_resmoke_tasks.py
@@ -96,16 +96,12 @@ class TestTestStats(unittest.TestCase):
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
     @staticmethod
-    def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0):
-        return Mock(
-            test_file=test_file,
-            task_name="task1",
-            variant="variant1",
-            distro="distro1",
-            date=_DATE,
+    def _make_evg_result(test_name="dir/test1.js", num_pass=0, duration=0):
+        return under_test.HistoricalTestInformation(
+            test_name=test_name,
+            avg_duration_pass=duration,
             num_pass=num_pass,
             num_fail=0,
-            avg_duration_pass=duration,
         )
 
 
@@ -685,95 +681,93 @@ class GenerateSubSuitesTest(unittest.TestCase):
 
     def test_calculate_suites(self):
         evg = Mock()
-        evg.test_stats_by_project.return_value = [
-            Mock(test_file="test{}.js".format(i), avg_duration_pass=60, num_pass=1)
-            for i in range(100)
+        test_stats = [
+            under_test.HistoricalTestInformation(
+                test_name="test{}.js".format(i),
+                avg_duration_pass=60,
+                num_pass=1,
+                num_fail=0,
+            ) for i in range(100)
         ]
         config_options = self.get_mock_options()
         config_options.max_sub_suites = 1000
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
 
-        with patch("os.path.exists") as exists_mock, patch(ns("suitesconfig")) as suitesconfig_mock:
+        with patch("os.path.exists") as exists_mock, patch(
+                ns("suitesconfig")) as suitesconfig_mock, patch(
+                    ns("get_stats_from_s3")) as get_stats_from_s3_mock:
             exists_mock.return_value = True
             suitesconfig_mock.get_suite.return_value.tests = \
-                [stat.test_file for stat in evg.test_stats_by_project.return_value]
-            suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+                [stat.test_name for stat in test_stats]
+            get_stats_from_s3_mock.return_value = test_stats
+            suites = gen_sub_suites.calculate_suites()
 
             # There are 100 tests taking 1 minute, with a target of 10 min we expect 10 suites.
             self.assertEqual(10, len(suites))
             for suite in suites:
                 self.assertEqual(10, len(suite.tests))
 
-    def test_calculate_suites_fallback(self):
-        n_tests = 100
-        response = Mock()
-        response.status_code = requests.codes.SERVICE_UNAVAILABLE
-        evg = Mock()
-        evg.test_stats_by_project.side_effect = requests.HTTPError(response=response)
-        config_options = self.get_mock_options()
-
-        gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
-        gen_sub_suites.list_tests = Mock(return_value=self.get_test_list(n_tests))
-
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
-
-        self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
-        for suite in suites:
-            self.assertEqual(50, len(suite.tests))
-
-        self.assertEqual(n_tests, len(gen_sub_suites.test_list))
-
     def test_calculate_suites_fallback_with_fewer_tests_than_max(self):
         n_tests = 2
-        response = Mock()
-        response.status_code = requests.codes.SERVICE_UNAVAILABLE
         evg = Mock()
-        evg.test_stats_by_project.side_effect = requests.HTTPError(response=response)
+        test_stats = []
         config_options = self.get_mock_options()
         config_options.fallback_num_sub_suites = 5
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
         gen_sub_suites.list_tests = MagicMock(return_value=self.get_test_list(n_tests))
 
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+        with patch(ns("get_stats_from_s3")) as get_stats_from_s3_mock:
+            get_stats_from_s3_mock.return_value = test_stats
+            suites = gen_sub_suites.calculate_suites()
 
-        self.assertEqual(n_tests, len(suites))
-        for suite in suites:
-            self.assertEqual(1, len(suite.tests))
+            self.assertEqual(n_tests, len(suites))
+            for suite in suites:
+                self.assertEqual(1, len(suite.tests))
 
-        self.assertEqual(n_tests, len(gen_sub_suites.test_list))
+            self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
     def test_calculate_suites_uses_fallback_for_no_results(self):
         n_tests = 100
         evg = Mock()
-        evg.test_stats_by_project.return_value = []
+        test_stats = []
         config_options = self.get_mock_options()
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
         gen_sub_suites.list_tests = Mock(return_value=self.get_test_list(n_tests))
-        suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
 
-        self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
-        for suite in suites:
-            self.assertEqual(50, len(suite.tests))
+        with patch(ns("get_stats_from_s3")) as get_stats_from_s3_mock:
+            get_stats_from_s3_mock.return_value = test_stats
+            suites = gen_sub_suites.calculate_suites()
+
+            self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
+            for suite in suites:
+                self.assertEqual(50, len(suite.tests))
 
-        self.assertEqual(n_tests, len(gen_sub_suites.test_list))
+            self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
     def test_calculate_suites_uses_fallback_if_only_results_are_filtered(self):
         n_tests = 100
         evg = Mock()
-        evg.test_stats_by_project.return_value = [
-            Mock(test_file="test{}.js".format(i), avg_duration_pass=60, num_pass=1)
-            for i in range(100)
+        test_stats = [
+            under_test.HistoricalTestInformation(
+                test_name="test{}.js".format(i),
+                avg_duration_pass=60,
+                num_pass=1,
+                num_fail=0,
+            ) for i in range(100)
         ]
         config_options = self.get_mock_options()
 
         gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
         gen_sub_suites.list_tests = Mock(return_value=self.get_test_list(n_tests))
-        with patch("os.path.exists") as exists_mock:
+
+        with patch("os.path.exists") as exists_mock, patch(
+                ns("get_stats_from_s3")) as get_stats_from_s3_mock:
             exists_mock.return_value = False
-            suites = gen_sub_suites.calculate_suites(_DATE, _DATE)
+            get_stats_from_s3_mock.return_value = test_stats
+            suites = gen_sub_suites.calculate_suites()
 
             self.assertEqual(gen_sub_suites.config_options.fallback_num_sub_suites, len(suites))
             for suite in suites:
@@ -781,19 +775,6 @@ class GenerateSubSuitesTest(unittest.TestCase):
 
             self.assertEqual(n_tests, len(gen_sub_suites.test_list))
 
-    def test_calculate_suites_error(self):
-        response = Mock()
-        response.status_code = requests.codes.INTERNAL_SERVER_ERROR
-        evg = Mock()
-        evg.test_stats_by_project.side_effect = requests.HTTPError(response=response)
-        config_options = self.get_mock_options()
-
-        gen_sub_suites = under_test.GenerateSubSuites(evg, config_options)
-        gen_sub_suites.list_tests = Mock(return_value=self.get_test_list(100))
-
-        with self.assertRaises(requests.HTTPError):
-            gen_sub_suites.calculate_suites(_DATE, _DATE)
-
     def test_filter_missing_files(self):
         tests_runtimes = [
             ("dir1/file1.js", 20.32),
diff --git a/buildscripts/tests/util/test_teststats.py b/buildscripts/tests/util/test_teststats.py
index 4da8d6942d8..8a9c273d7e2 100644
--- a/buildscripts/tests/util/test_teststats.py
+++ b/buildscripts/tests/util/test_teststats.py
@@ -3,8 +3,6 @@
 import datetime
 import unittest
 
-from mock import Mock
-
 import buildscripts.util.teststats as teststats_utils
 
 # pylint: disable=missing-docstring
@@ -82,14 +80,10 @@ class TestTestStats(unittest.TestCase):
         self.assertEqual(expected_runtimes, test_stats.get_tests_runtimes())
 
     @staticmethod
-    def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0):
-        return Mock(
-            test_file=test_file,
-            task_name="task1",
-            variant="variant1",
-            distro="distro1",
-            date=_DATE,
+    def _make_evg_result(test_name="dir/test1.js", num_pass=0, duration=0):
+        return teststats_utils.HistoricalTestInformation(
+            test_name=test_name,
+            avg_duration_pass=duration,
             num_pass=num_pass,
             num_fail=0,
-            avg_duration_pass=duration,
         )
diff --git a/buildscripts/util/teststats.py b/buildscripts/util/teststats.py
index 796428d5a03..985fa50a30f 100644
--- a/buildscripts/util/teststats.py
+++ b/buildscripts/util/teststats.py
@@ -2,8 +2,32 @@
 
 from collections import defaultdict
 from collections import namedtuple
+
+from typing import NamedTuple, List
+import requests
+from requests.adapters import HTTPAdapter, Retry
+
 import buildscripts.util.testname as testname  # pylint: disable=wrong-import-position
 
+TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com"
+
+
+class HistoricalTestInformation(NamedTuple):
+    """
+    Container for information about the historical runtime of a test.
+
+    test_name: Name of test.
+    avg_duration_pass: Average of runtime of test that passed.
+    num_pass: Number of times the test has passed.
+    num_fail: Number of times the test has failed.
+    """
+
+    test_name: str
+    num_pass: int
+    num_fail: int
+    avg_duration_pass: float
+
+
 TestRuntime = namedtuple('TestRuntime', ['test_name', 'runtime'])
 
 
@@ -15,20 +39,20 @@ def normalize_test_name(test_name):
 class TestStats(object):
     """Represent the test statistics for the task that is being analyzed."""
 
-    def __init__(self, evg_test_stats_results):
+    def __init__(self, evg_test_stats_results: List[HistoricalTestInformation]) -> None:
         """Initialize the TestStats with raw results from the Evergreen API."""
         # Mapping from test_file to {"num_run": X, "duration": Y} for tests
         self._runtime_by_test = defaultdict(dict)
         # Mapping from 'test_name:hook_name' to
-        #       {'test_name': {'hook_name': {"num_run": X, "duration": Y}}}
+        #       {'test_name': {hook_name': {"num_run": X, "duration": Y}}}
         self._hook_runtime_by_test = defaultdict(lambda: defaultdict(dict))
 
         for doc in evg_test_stats_results:
             self._add_stats(doc)
 
-    def _add_stats(self, test_stats):
+    def _add_stats(self, test_stats: HistoricalTestInformation) -> None:
         """Add the statistics found in a document returned by the Evergreen test_stats/ endpoint."""
-        test_file = testname.normalize_test_file(test_stats.test_file)
+        test_file = testname.normalize_test_file(test_stats.test_name)
         duration = test_stats.avg_duration_pass
         num_run = test_stats.num_pass
         is_hook = testname.is_resmoke_hook(test_file)
@@ -78,3 +102,22 @@ class TestStats(object):
             test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration)
             tests.append(test)
         return sorted(tests, key=lambda x: x.runtime, reverse=True)
+
+
+def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]:
+    """
+    Retrieve test stats from s3 for a given task.
+
+    :param project: Project to query.
+    :param task: Task to query.
+    :param variant: Build variant to query.
+    :return: A list of the Test stats for the specified task.
+    """
+    session = requests.Session()
+    retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
+    session.mount('https://', HTTPAdapter(max_retries=retries))
+
+    response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}")
+    data = response.json()
+
+    return [HistoricalTestInformation(**item) for item in data]
author	Mikhail Shchatko <mikhail.shchatko@mongodb.com>	2022-11-23 08:39:36 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-11-23 09:41:47 +0000
commit	c651e9267af95ae9f8addd1783f1c3600acfe6c3 (patch)
tree	768f4e3a4a4921adaac0e919c31159e0d00b33f6
parent	40553a02ad7a218009fd01989894e6f44a38fe8c (diff)
download	mongo-c651e9267af95ae9f8addd1783f1c3600acfe6c3.tar.gz