summaryrefslogtreecommitdiff
path: root/buildscripts/evergreen_gen_build_variant.py
diff options
context:
space:
mode:
authorDavid Bradford <david.bradford@mongodb.com>2021-06-11 19:43:08 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-07 19:38:59 +0000
commitf7af7fe8600b5245bdfefd03081948a7525d39f4 (patch)
treef294aaa7c46f5ce157ef1e45f9e5ad8d1b588afe /buildscripts/evergreen_gen_build_variant.py
parent25e6b31298e882b06b4afb6c72ada5d22e9aaeec (diff)
downloadmongo-f7af7fe8600b5245bdfefd03081948a7525d39f4.tar.gz
SERVER-57003: Generate resmoke tasks at build variant granularity
Diffstat (limited to 'buildscripts/evergreen_gen_build_variant.py')
-rw-r--r--buildscripts/evergreen_gen_build_variant.py457
1 files changed, 457 insertions, 0 deletions
diff --git a/buildscripts/evergreen_gen_build_variant.py b/buildscripts/evergreen_gen_build_variant.py
new file mode 100644
index 00000000000..488b98e65fb
--- /dev/null
+++ b/buildscripts/evergreen_gen_build_variant.py
@@ -0,0 +1,457 @@
+#!/usr/bin/env python3
+"""Generate configuration for a build variant."""
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor as Executor
+from datetime import datetime, timedelta
+from time import perf_counter
+from typing import Optional, Any, List, Set
+
+import click
+import inject
+import structlog
+from pydantic import BaseModel
+from evergreen import EvergreenApi, RetryingEvergreenApi
+from evergreen import Task as EvgTask
+
+from buildscripts.ciconfig.evergreen import EvergreenProjectConfig, parse_evergreen_file, Task, \
+ Variant
+from buildscripts.task_generation.evg_config_builder import EvgConfigBuilder
+from buildscripts.task_generation.gen_config import GenerationConfiguration
+from buildscripts.task_generation.gen_task_validation import GenTaskValidationService
+from buildscripts.task_generation.multiversion_util import MultiversionUtilService, \
+ SHARDED_MIXED_VERSION_CONFIGS, REPL_MIXED_VERSION_CONFIGS
+from buildscripts.task_generation.resmoke_proxy import ResmokeProxyConfig
+from buildscripts.task_generation.suite_split import SuiteSplitConfig, SuiteSplitParameters
+from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy, \
+ greedy_division, round_robin_fallback
+from buildscripts.task_generation.task_types.fuzzer_tasks import FuzzerGenTaskParams
+from buildscripts.task_generation.task_types.gentask_options import GenTaskOptions
+from buildscripts.task_generation.task_types.multiversion_tasks import MultiversionGenTaskParams
+from buildscripts.task_generation.task_types.resmoke_tasks import ResmokeGenTaskParams
+from buildscripts.util.cmdutils import enable_logging
+from buildscripts.util.fileops import read_yaml_file
+from buildscripts.util.taskname import remove_gen_suffix
+
+LOGGER = structlog.get_logger(__name__)
+
+DEFAULT_TEST_SUITE_DIR = os.path.join("buildscripts", "resmokeconfig", "suites")
+MAX_WORKERS = 16
+LOOKBACK_DURATION_DAYS = 14
+MAX_TASK_PRIORITY = 99
+GENERATED_CONFIG_DIR = "generated_resmoke_config"
+GEN_PARENT_TASK = "generator_tasks"
+EXPANSION_RE = re.compile(r"\${(?P<id>[a-zA-Z0-9_]+)(\|(?P<default>.*))?}")
+
+
+class EvgExpansions(BaseModel):
+ """
+ Evergreen expansions needed to generate tasks.
+
+ build_id: Build ID being run under.
+ build_variant: Build variant being generated.
+ gen_task_gran: Granularity of how tasks are being generated.
+ is_patch: Whether generation is part of a patch build.
+ project: Evergreen project being run under.
+ max_test_per_suite: Maximum amount of tests to include in a suite.
+ max_sub_suites: Maximum number of sub-suites to generate per task.
+ revision: Git revision being run against.
+ task_name: Name of task running.
+ target_resmoke_time: Target time of generated sub-suites.
+ task_id: ID of task being run under.
+ """
+
+ build_id: str
+ build_variant: str
+ is_patch: Optional[bool]
+ project: str
+ max_tests_per_suite: Optional[int] = 100
+ max_sub_suites: Optional[int] = 5
+ revision: str
+ task_name: str
+ target_resmoke_time: Optional[int] = None
+ task_id: str
+
+ @classmethod
+ def from_yaml_file(cls, path: str) -> "EvgExpansions":
+ """
+ Read the evergreen expansions from the given YAML file.
+
+ :param path: Path to expansions YAML file.
+ :return: Expansions read from file.
+ """
+ return cls(**read_yaml_file(path))
+
+ def build_suite_split_config(self, start_date: datetime,
+ end_date: datetime) -> SuiteSplitConfig:
+ """
+ Get the configuration for splitting suites based on Evergreen expansions.
+
+ :param start_date: Start date for historic stats lookup.
+ :param end_date: End date for historic stats lookup.
+ :return: Configuration to use for splitting suites.
+ """
+ return SuiteSplitConfig(
+ evg_project=self.project,
+ target_resmoke_time=self.target_resmoke_time if self.target_resmoke_time else 60,
+ max_sub_suites=self.max_sub_suites,
+ max_tests_per_suite=self.max_tests_per_suite,
+ start_date=start_date,
+ end_date=end_date,
+ )
+
+ def build_evg_config_gen_options(self) -> GenTaskOptions:
+ """
+ Get the configuration for generating tasks from Evergreen expansions.
+
+ :return: Configuration to use for splitting suites.
+ """
+ return GenTaskOptions(
+ create_misc_suite=True,
+ is_patch=self.is_patch,
+ generated_config_dir=GENERATED_CONFIG_DIR,
+ use_default_timeouts=False,
+ )
+
+ def config_location(self) -> str:
+ """Location where generated configuration is stored."""
+ task = remove_gen_suffix(self.task_name)
+ return f"{self.build_variant}/{self.revision}/generate_tasks/{task}_gen-{self.build_id}.tgz"
+
+
+def translate_run_var(run_var: str, build_variant: Variant) -> Any:
+ """
+ Translate the given "run_var" into an actual value.
+
+ Run_vars can contain evergreen expansions, in which case, the expansion (and possible default
+ value) need to be translated into a value we can use.
+
+ :param run_var: Run var to translate.
+ :param build_variant: Build variant configuration.
+ :return: Value of run_var.
+ """
+ match = EXPANSION_RE.search(run_var)
+ if match:
+ value = build_variant.expansion(match.group("id"))
+ if value is None:
+ value = match.group("default")
+ return value
+ return run_var
+
+
+def get_version_configs(is_sharded: bool) -> List[str]:
+ """Get the version configurations to use."""
+ if is_sharded:
+ return SHARDED_MIXED_VERSION_CONFIGS
+ return REPL_MIXED_VERSION_CONFIGS
+
+
+class GenerateBuildVariantOrchestrator:
+ """Orchestrator for generating tasks in a build variant."""
+
+ # pylint: disable=too-many-arguments
+ @inject.autoparams()
+ def __init__(
+ self,
+ gen_task_validation: GenTaskValidationService,
+ gen_task_options: GenTaskOptions,
+ evg_project_config: EvergreenProjectConfig,
+ evg_expansions: EvgExpansions,
+ multiversion_util: MultiversionUtilService,
+ evg_api: EvergreenApi,
+ ) -> None:
+ """
+ Initialize the orchestrator.
+
+ :param gen_task_validation: Service to validate task generation.
+ :param gen_task_options: Options for how tasks should be generated.
+ :param evg_project_config: Configuration for Evergreen Project.
+ :param evg_expansions: Evergreen expansions for running task.
+ :param multiversion_util: Multiversion utility service.
+ :param evg_api: Evergreen API client.
+ """
+ self.gen_task_validation = gen_task_validation
+ self.gen_task_options = gen_task_options
+ self.evg_project_config = evg_project_config
+ self.evg_expansions = evg_expansions
+ self.multiversion_util = multiversion_util
+ self.evg_api = evg_api
+
+ def get_build_variant_expansion(self, build_variant_name: str, expansion: str) -> Any:
+ """
+ Get the value of the given expansion for the specified build variant.
+
+ :param build_variant_name: Build Variant to query.
+ :param expansion: Expansion to query.
+ :return: Value of given expansion.
+ """
+ build_variant = self.evg_project_config.get_variant(build_variant_name)
+ return build_variant.expansion(expansion)
+
+ @staticmethod
+ def task_def_to_split_params(task_def: Task, build_variant: str) -> SuiteSplitParameters:
+ """
+ Build parameters for how a task should be split based on its task definition.
+
+ :param task_def: Task definition in evergreen project config.
+ :param build_variant: Name of Build Variant being generated.
+ :return: Parameters for how task should be split.
+ """
+ task = remove_gen_suffix(task_def.name)
+ run_func = task_def.generate_resmoke_tasks_command
+ if run_func is None:
+ run_func = task_def.generate_implicit_multiversion_command()
+ run_vars = run_func.get("vars", {})
+
+ suite = run_vars.get("suite", task)
+ return SuiteSplitParameters(
+ build_variant=build_variant,
+ task_name=task,
+ suite_name=suite,
+ filename=suite,
+ )
+
+ def task_def_to_gen_params(self, task_def: Task, build_variant: str) -> ResmokeGenTaskParams:
+ """
+ Build parameters for how a task should be generated based on its task definition.
+
+ :param task_def: Task definition in evergreen project config.
+ :param build_variant: Name of Build Variant being generated.
+ :return: Parameters for how task should be generated.
+ """
+ run_func = task_def.generate_resmoke_tasks_command
+ run_vars = run_func["vars"]
+
+ return ResmokeGenTaskParams(
+ use_large_distro=run_vars.get("use_large_distro"),
+ require_multiversion=run_vars.get("require_multiversion"),
+ repeat_suites=1,
+ resmoke_args=run_vars.get("resmoke_args"),
+ resmoke_jobs_max=run_vars.get("resmoke_jobs_max"),
+ large_distro_name=self.get_build_variant_expansion(build_variant, "large_distro_name"),
+ config_location=self.evg_expansions.config_location(),
+ )
+
+ def task_def_to_mv_gen_params(self, task_def: Task, build_variant: str, is_sharded: bool,
+ version_config: List[str]) -> MultiversionGenTaskParams:
+ """
+ Build parameters for how a task should be generated based on its task definition.
+
+ :param task_def: Task definition in evergreen project config.
+ :param build_variant: Name of Build Variant being generated.
+ :return: Parameters for how task should be generated.
+ """
+ run_func = task_def.generate_implicit_multiversion_command()
+ run_vars = run_func["vars"]
+
+ task = remove_gen_suffix(task_def.name)
+
+ return MultiversionGenTaskParams(
+ mixed_version_configs=version_config,
+ is_sharded=is_sharded,
+ resmoke_args=run_vars.get("resmoke_args"),
+ parent_task_name=task,
+ origin_suite=run_vars.get("suite", task),
+ use_large_distro=run_vars.get("use_large_distro"),
+ large_distro_name=self.get_build_variant_expansion(build_variant, "large_distro_name"),
+ config_location=self.evg_expansions.config_location(),
+ )
+
+ def task_def_to_fuzzer_params(
+ self, task_def: Task, build_variant: str, is_sharded: Optional[bool] = None,
+ version_config: Optional[List[str]] = None) -> FuzzerGenTaskParams:
+ """
+ Build parameters for how a fuzzer task should be generated based on its task definition.
+
+ :param task_def: Task definition in evergreen project config.
+ :param build_variant: Name of Build Variant being generated.
+ :return: Parameters for how a fuzzer task should be generated.
+ """
+ variant = self.evg_project_config.get_variant(build_variant)
+ if is_sharded is not None or version_config is not None:
+ run_func = task_def.generate_implicit_multiversion_command()
+ else:
+ run_func = task_def.generate_fuzzer_tasks_command()
+ run_vars = run_func["vars"]
+ run_vars = {k: translate_run_var(v, variant) for k, v in run_vars.items()}
+
+ return FuzzerGenTaskParams(
+ task_name=run_vars.get("name"),
+ variant=build_variant,
+ suite=run_vars.get("suite"),
+ num_files=int(run_vars.get("num_files")),
+ num_tasks=int(run_vars.get("num_tasks")),
+ resmoke_args=run_vars.get("resmoke_args"),
+ npm_command=run_vars.get("npm_command", "jstestfuzz"),
+ jstestfuzz_vars=run_vars.get("jstestfuzz_vars", ""),
+ continue_on_failure=run_vars.get("continue_on_failure"),
+ resmoke_jobs_max=run_vars.get("resmoke_jobs_max"),
+ should_shuffle=run_vars.get("should_shuffle"),
+ timeout_secs=run_vars.get("timeout_secs"),
+ require_multiversion=run_vars.get("require_multiversion"),
+ use_large_distro=run_vars.get("use_large_distro", False),
+ large_distro_name=self.get_build_variant_expansion(build_variant, "large_distro_name"),
+ config_location=self.evg_expansions.config_location(),
+ is_sharded=is_sharded,
+ version_config=version_config,
+ )
+
+ def generate(self, task_id: str, build_variant_name: str, output_file: str) -> None:
+ """
+ Write task configuration for a build variant to disk.
+
+ :param task_id: ID of running task.
+ :param build_variant_name: Name of build variant to generate.
+ :param output_file: Filename to write generated configuration to.
+ """
+ if not self.gen_task_validation.should_task_be_generated(task_id):
+ LOGGER.info("Not generating configuration due to previous successful generation.")
+ return
+
+ builder = EvgConfigBuilder() # pylint: disable=no-value-for-parameter
+ builder = self.generate_build_variant(builder, build_variant_name)
+
+ generated_config = builder.build(output_file)
+ generated_config.write_all_to_dir(self.gen_task_options.generated_config_dir)
+
+ # pylint: disable=too-many-locals
+ def generate_build_variant(self, builder: EvgConfigBuilder,
+ build_variant_name: str) -> EvgConfigBuilder:
+ """
+ Generate task configuration for a build variant.
+
+ :param builder: Evergreen configuration builder to use.
+ :param build_variant_name: Name of build variant to generate.
+ :return: Evergreen configuration builder with build variant configuration.
+ """
+ LOGGER.info("Generating config", build_variant=build_variant_name)
+ start_time = perf_counter()
+ task_list = self.evg_project_config.get_variant(build_variant_name).task_names
+ tasks_to_hide = set()
+ with Executor(max_workers=MAX_WORKERS) as exe:
+ jobs = []
+ for task_name in task_list:
+ task_def = self.evg_project_config.get_task(task_name)
+ if task_def.is_generate_resmoke_task:
+ tasks_to_hide.add(task_name)
+ split_params = self.task_def_to_split_params(task_def, build_variant_name)
+ gen_params = self.task_def_to_gen_params(task_def, build_variant_name)
+ jobs.append(exe.submit(builder.generate_suite, split_params, gen_params))
+ elif task_def.is_generate_fuzzer_task():
+ tasks_to_hide.add(task_name)
+ fuzzer_params = self.task_def_to_fuzzer_params(task_def, build_variant_name)
+ jobs.append(exe.submit(builder.generate_fuzzer, fuzzer_params))
+ elif task_def.is_generate_implicit_multiversion_task():
+ tasks_to_hide.add(task_name)
+ run_func = task_def.generate_implicit_multiversion_command()
+ run_vars = run_func["vars"]
+ is_jstestjuzz = run_vars.get("is_jstestfuzz", False)
+ suite = run_vars["suite"]
+ is_sharded = self.multiversion_util.is_suite_sharded(suite)
+ version_list = get_version_configs(is_sharded)
+ if is_jstestjuzz:
+ fuzzer_params = self.task_def_to_fuzzer_params(
+ task_def, build_variant_name, is_sharded, version_list)
+ jobs.append(exe.submit(builder.generate_fuzzer, fuzzer_params))
+ else:
+ split_params = self.task_def_to_split_params(task_def, build_variant_name)
+ gen_params = self.task_def_to_mv_gen_params(task_def, build_variant_name,
+ is_sharded, version_list)
+ jobs.append(
+ exe.submit(builder.add_multiversion_suite, split_params, gen_params))
+
+ [j.result() for j in jobs] # pylint: disable=expression-not-assigned
+
+ end_time = perf_counter()
+ duration = end_time - start_time
+
+ LOGGER.info("Finished BV", build_variant=build_variant_name, duration=duration,
+ task_count=len(tasks_to_hide))
+
+ builder.add_display_task(GEN_PARENT_TASK, tasks_to_hide, build_variant_name)
+ self.adjust_gen_tasks_priority(tasks_to_hide)
+ return builder
+
+ def adjust_task_priority(self, task: EvgTask) -> None:
+ """
+ Increase the priority of the given task by 1.
+
+ :param task: Task to increase priority of.
+ """
+ priority = min(task.priority + 1, MAX_TASK_PRIORITY)
+ LOGGER.info("Configure task", task_id=task.task_id, priority=priority)
+ self.evg_api.configure_task(task.task_id, priority=priority)
+
+ def adjust_gen_tasks_priority(self, gen_tasks: Set[str]) -> None:
+ """
+ Increase the priority of any "_gen" tasks.
+
+ We want to minimize the time it tasks for the "_gen" tasks to activate the generated
+ sub-tasks. We will do that by increase the priority of the "_gen" tasks.
+
+ :param gen_tasks: Set of "_gen" tasks that were found.
+ """
+ build = self.evg_api.build_by_id(self.evg_expansions.build_id)
+ task_list = build.get_tasks()
+
+ with Executor(max_workers=MAX_WORKERS) as exe:
+ jobs = [
+ exe.submit(self.adjust_task_priority, task) for task in task_list
+ if task.display_name in gen_tasks
+ ]
+
+ [j.result() for j in jobs] # pylint: disable=expression-not-assigned
+
+
+@click.command()
+@click.option("--expansion-file", type=str, required=True,
+ help="Location of expansions file generated by evergreen.")
+@click.option("--evg-api-config", type=str, required=True,
+ help="Location of evergreen api configuration.")
+@click.option("--evg-project-config", type=str, default="etc/evergreen.yml",
+ help="Location of Evergreen project configuration.")
+@click.option("--output-file", type=str, help="Name of output file to write.")
+@click.option("--verbose", is_flag=True, default=False, help="Enable verbose logging.")
+def main(expansion_file: str, evg_api_config: str, evg_project_config: str, output_file: str,
+ verbose: bool) -> None:
+ """
+ Generate task configuration for a build variant.
+ \f
+ :param expansion_file: Location of evergreen expansions for task.
+ :param evg_api_config: Location of file containing evergreen API authentication information.
+ :param evg_project_config: Location of file containing evergreen project configuration.
+ :param output_file: Location to write generated configuration to.
+ :param verbose: Should verbose logging be used.
+ """
+ enable_logging(verbose)
+
+ end_date = datetime.utcnow().replace(microsecond=0)
+ start_date = end_date - timedelta(days=LOOKBACK_DURATION_DAYS)
+
+ evg_expansions = EvgExpansions.from_yaml_file(expansion_file)
+
+ # pylint: disable=no-value-for-parameter
+ def dependencies(binder: inject.Binder) -> None:
+ binder.bind(EvgExpansions, evg_expansions)
+ binder.bind(SuiteSplitConfig, evg_expansions.build_suite_split_config(start_date, end_date))
+ binder.bind(SplitStrategy, greedy_division)
+ binder.bind(FallbackStrategy, round_robin_fallback)
+ binder.bind(GenTaskOptions, evg_expansions.build_evg_config_gen_options())
+ binder.bind(EvergreenApi, RetryingEvergreenApi.get_api(config_file=evg_api_config))
+ binder.bind(EvergreenProjectConfig, parse_evergreen_file(evg_project_config))
+ binder.bind(GenerationConfiguration, GenerationConfiguration.from_yaml_file())
+ binder.bind(ResmokeProxyConfig,
+ ResmokeProxyConfig(resmoke_suite_dir=DEFAULT_TEST_SUITE_DIR))
+
+ inject.configure(dependencies)
+
+ orchestrator = GenerateBuildVariantOrchestrator() # pylint: disable=no-value-for-parameter
+ start_time = perf_counter()
+ orchestrator.generate(evg_expansions.task_id, evg_expansions.build_variant, output_file)
+ end_time = perf_counter()
+
+ LOGGER.info("Total runtime", duration=end_time - start_time)
+
+
+if __name__ == '__main__':
+ main() # pylint: disable=no-value-for-parameter