summaryrefslogtreecommitdiff
path: root/buildscripts/selected_tests.py
blob: 068f9e1b939ddf2f22754d1c62a936a42b4409fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
#!/usr/bin/env python3
"""Command line utility for determining what jstests should run for the given changed files."""
import os
import re
import sys
from datetime import datetime, timedelta
from functools import partial
from typing import Any, Dict, List, Set, Optional

import click
import inject
import structlog
from pydantic import BaseModel
from structlog.stdlib import LoggerFactory
from git import Repo
from evergreen.api import EvergreenApi, RetryingEvergreenApi

if __name__ == "__main__" and __package__ is None:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# pylint: disable=wrong-import-position
# Get relative imports to work when the package is not installed on the PYTHONPATH.
from buildscripts.patch_builds.change_data import find_changed_files_in_repos
from buildscripts.patch_builds.evg_change_data import generate_revision_map_from_manifest
from buildscripts.patch_builds.selected_tests.selected_tests_client import SelectedTestsClient
from buildscripts.task_generation.evg_config_builder import EvgConfigBuilder
from buildscripts.task_generation.gen_config import GenerationConfiguration
from buildscripts.task_generation.generated_config import GeneratedConfiguration
from buildscripts.task_generation.suite_split import SuiteSplitParameters, SuiteSplitConfig
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy, \
    greedy_division, round_robin_fallback
from buildscripts.task_generation.task_types.gentask_options import GenTaskOptions
from buildscripts.task_generation.task_types.resmoke_tasks import ResmokeGenTaskParams
from buildscripts.util.cmdutils import enable_logging
from buildscripts.util.fileops import read_yaml_file
from buildscripts.burn_in_tests import DEFAULT_REPO_LOCATIONS, create_task_list_for_tests, \
    TaskInfo
from buildscripts.ciconfig.evergreen import (
    EvergreenProjectConfig,
    Task,
    parse_evergreen_file,
    Variant,
)
from buildscripts.patch_builds.selected_tests.selected_tests_service import SelectedTestsService

structlog.configure(logger_factory=LoggerFactory())
LOGGER = structlog.getLogger(__name__)

TASK_ID_EXPANSION = "task_id"
EVERGREEN_FILE = "etc/evergreen.yml"
EVG_CONFIG_FILE = ".evergreen.yml"
SELECTED_TESTS_CONFIG_DIR = "generated_resmoke_config"
RELATION_THRESHOLD = 0
LOOKBACK_DURATION_DAYS = 14

COMPILE_TASK_PATTERN = re.compile(".*compile.*")
CONCURRENCY_TASK_PATTERN = re.compile("concurrency.*")
INTEGRATION_TASK_PATTERN = re.compile("integration.*")
FUZZER_TASK_PATTERN = re.compile(".*fuzz.*")
GENERATE_TASK_PATTERN = re.compile("burn_in.*")
MULTIVERSION_TASK_PATTERN = re.compile(".*multiversion.*")
LINT_TASK_PATTERN = re.compile("lint.*")
STITCH_TASK_PATTERN = re.compile("stitch.*")
EXCLUDE_TASK_PATTERNS = [
    COMPILE_TASK_PATTERN, CONCURRENCY_TASK_PATTERN, INTEGRATION_TASK_PATTERN, FUZZER_TASK_PATTERN,
    GENERATE_TASK_PATTERN, LINT_TASK_PATTERN, MULTIVERSION_TASK_PATTERN, STITCH_TASK_PATTERN
]

CPP_TASK_NAMES = [
    "dbtest",
    "idl_tests",
    "run_unittests",
]
PUBLISH_TASK_NAMES = [
    "package",
    "publish_packages",
    "push",
]
PYTHON_TESTS = ["buildscripts_test"]
EXCLUDE_TASK_LIST = [
    *CPP_TASK_NAMES,
    *PYTHON_TESTS,
    *PUBLISH_TASK_NAMES,
]
POSSIBLE_RUN_TASK_FUNCS = [
    "generate resmoke tasks",
    "run tests",
]


class EvgExpansions(BaseModel):
    """
    Evergreen expansions needed for selected tests.

    task_id: ID of task being run.
    task_name: Name of task being run.
    build_variant: Name of build variant being run on.
    build_id: ID of build being run.
    is_patch: Is this task run as part of a patch build.
    project: Evergreen project being run.
    revision: git revision being run against.
    version_id: ID of version being run.
    """

    task_id: str
    task_name: str
    build_variant: str
    build_id: str
    is_patch: Optional[bool] = None
    project: str
    revision: str
    version_id: str

    @classmethod
    def from_yaml_file(cls, path: str) -> "EvgExpansions":
        """Read the generation configuration from the given file."""
        return cls(**read_yaml_file(path))

    def build_gen_task_options(self) -> GenTaskOptions:
        """Build options needed to generate tasks."""
        return GenTaskOptions(create_misc_suite=False,
                              generated_config_dir=SELECTED_TESTS_CONFIG_DIR, is_patch=self.is_patch
                              or False, use_default_timeouts=False)

    def build_suite_split_config(self, start_date: datetime,
                                 end_date: datetime) -> SuiteSplitConfig:
        """
        Build options need to split suite into sub-suites.

        :param start_date: Start date to look at historic results.
        :param end_date: End date to look at historic results.
        :return: Options for splitting suites.
        """
        return SuiteSplitConfig(
            evg_project=self.project,
            target_resmoke_time=60,
            max_sub_suites=5,
            max_tests_per_suite=100,
            start_date=start_date,
            end_date=end_date,
            include_build_variant_in_name=True,
        )

    def get_config_location(self) -> str:
        """Get the location the generated configuration will be stored."""
        return f"{self.build_variant}/{self.revision}/generate_tasks/{self.task_name}-{self.build_id}.tgz"


class TaskConfigService:
    """Service for generating selected tests task configuration."""

    @staticmethod
    def get_evg_task_config(task: Task, build_variant_config: Variant) -> Dict[str, Any]:
        """
        Look up task config of the task to be generated.

        :param task: Task to get info for.
        :param build_variant_config: Config of build variant to collect task info from.
        :return: Task configuration values.
        """
        LOGGER.info("Calculating evg_task_config values for task", task=task.name)
        task_vars = {}
        for run_task_func in POSSIBLE_RUN_TASK_FUNCS:
            task_def = task.find_func_command(run_task_func)
            if task_def:
                task_vars = task_def["vars"]
                break

        task_vars.update({"suite": task.get_suite_name()})

        task_name = task.name[:-4] if task.name.endswith("_gen") else task.name
        return {
            "task_name": task_name,
            "build_variant": build_variant_config.name,
            **task_vars,
            "large_distro_name": build_variant_config.expansion("large_distro_name"),
        }

    def get_task_configs_for_test_mappings(self, tests_by_task: Dict[str, TaskInfo],
                                           build_variant_config: Variant) -> Dict[str, dict]:
        """
        For test mappings, generate a dict containing task names and their config settings.

        :param tests_by_task: Dictionary of tests and tasks to run.
        :param build_variant_config: Config of build variant to collect task info from.
        :return: Dict of task names and their config settings.
        """
        evg_task_configs = {}
        for task_name, test_list_info in tests_by_task.items():
            task = _find_task(build_variant_config, task_name)
            if task and not _exclude_task(task):
                evg_task_config = self.get_evg_task_config(task, build_variant_config)
                evg_task_config.update({"selected_tests_to_run": set(test_list_info.tests)})
                evg_task_configs[task.name] = evg_task_config

        return evg_task_configs

    def get_task_configs_for_task_mappings(self, related_tasks: List[str],
                                           build_variant_config: Variant) -> Dict[str, dict]:
        """
        For task mappings, generate a dict containing task names and their config settings.

        :param related_tasks: List of tasks to run.
        :param build_variant_config: Config of build variant to collect task info from.
        :return: Dict of task names and their config settings.
        """
        evg_task_configs = {}
        for task_name in related_tasks:
            task = _find_task(build_variant_config, task_name)
            if task and not _exclude_task(task):
                evg_task_config = self.get_evg_task_config(task, build_variant_config)
                evg_task_configs[task.name] = evg_task_config

        return evg_task_configs


def _exclude_task(task: Task) -> bool:
    """
    Check whether a task should be excluded.

    :param task: Task to get info for.
    :return: True if this task should be excluded.
    """
    if task.name in EXCLUDE_TASK_LIST or any(
            regex.match(task.name) for regex in EXCLUDE_TASK_PATTERNS):
        LOGGER.debug("Excluding task from analysis because it is not a jstest", task=task.name)
        return True
    return False


def _find_task(build_variant_config: Variant, task_name: str) -> Task:
    """
    Look up shrub config for task.

    :param build_variant_config: Config of build variant to collect task info from.
    :param task_name: Name of task to get info for.
    :return: Task configuration.
    """
    task = build_variant_config.get_task(task_name)
    if not task:
        task = build_variant_config.get_task(task_name + "_gen")
    return task


def _remove_repo_path_prefix(file_path: str) -> str:
    """
    Remove the repo path prefix from the filepath.

    :param file_path: Path of the changed file.
    :return: Path of the changed file without prefix.
    """
    for repo_path in DEFAULT_REPO_LOCATIONS:
        if repo_path != ".":
            if repo_path.startswith("./"):
                repo_path = repo_path[2:]
                file_path = re.sub(repo_path + "/", '', file_path)
    return file_path


def filter_set(item: str, input_set: Set[str]) -> bool:
    """
    Filter to determine if the given item is in the given set.

    :param item: Item to search for.
    :param input_set: Set to search.
    :return: True if the item is contained in the list.
    """
    return item in input_set


class SelectedTestsOrchestrator:
    """Orchestrator for generating selected test builds."""

    # pylint: disable=too-many-arguments
    @inject.autoparams()
    def __init__(self, evg_api: EvergreenApi, evg_conf: EvergreenProjectConfig,
                 selected_tests_service: SelectedTestsService,
                 task_config_service: TaskConfigService, evg_expansions: EvgExpansions) -> None:
        """
        Initialize the orchestrator.

        :param evg_api: Evergreen API client.
        :param evg_conf: Evergreen Project configuration.
        :param selected_tests_service: Selected tests service.
        :param task_config_service: Task Config service.
        :param evg_expansions: Evergreen expansions.
        """
        self.evg_api = evg_api
        self.evg_conf = evg_conf
        self.selected_tests_service = selected_tests_service
        self.task_config_service = task_config_service
        self.evg_expansions = evg_expansions

    def find_changed_files(self, repos: List[Repo], task_id: str) -> Set[str]:
        """
        Determine what files have changed in the given repos.

        :param repos: List of git repos to query.
        :param task_id: ID of task being run.
        :return: Set of files that contain changes.
        """
        revision_map = generate_revision_map_from_manifest(repos, task_id, self.evg_api)
        changed_files = find_changed_files_in_repos(repos, revision_map)
        changed_files = {_remove_repo_path_prefix(file_path) for file_path in changed_files}
        changed_files = {
            file_path
            for file_path in changed_files if not file_path.startswith("src/third_party")
        }
        LOGGER.info("Found changed files", files=changed_files)
        return changed_files

    def get_task_config(self, build_variant_config: Variant,
                        changed_files: Set[str]) -> Dict[str, Dict]:
        """
        Get task configurations for the tasks to be generated.

        :param build_variant_config: Config of build variant to collect task info from.
        :param changed_files: Set of changed_files.
        :return: Task configurations.
        """
        existing_tasks = self.get_existing_tasks(self.evg_expansions.version_id,
                                                 build_variant_config.name)
        task_configs = {}

        related_test_files = self.selected_tests_service.find_selected_test_files(changed_files)
        LOGGER.info("related test files found", related_test_files=related_test_files,
                    variant=build_variant_config.name)

        if related_test_files:
            tests_by_task = create_task_list_for_tests(related_test_files,
                                                       build_variant_config.name, self.evg_conf)
            LOGGER.info("tests and tasks found", tests_by_task=tests_by_task)
            tests_by_task = {
                task: tests
                for task, tests in tests_by_task.items() if task not in existing_tasks
            }

            test_mapping_task_configs = self.task_config_service.get_task_configs_for_test_mappings(
                tests_by_task, build_variant_config)
            task_configs.update(test_mapping_task_configs)

        related_tasks = self.selected_tests_service.find_selected_tasks(changed_files)
        LOGGER.info("related tasks found", related_tasks=related_tasks,
                    variant=build_variant_config.name)
        related_tasks = {task for task in related_tasks if task not in existing_tasks}
        if related_tasks:
            task_mapping_task_configs = self.task_config_service.get_task_configs_for_task_mappings(
                list(related_tasks), build_variant_config)
            # task_mapping_task_configs will overwrite test_mapping_task_configs
            # because task_mapping_task_configs will run all tests rather than a subset of tests
            # and we should err on the side of running all tests
            task_configs.update(task_mapping_task_configs)

        return task_configs

    def get_existing_tasks(self, version_id: str, build_variant: str) -> Set[str]:
        """
        Get the set of tasks that already exist in the given build.

        :param version_id: ID of version to query.
        :param build_variant: Name of build variant to query.
        :return: Set of task names that already exist in the specified build.
        """
        version = self.evg_api.version_by_id(version_id)

        try:
            build = version.build_by_variant(build_variant)
        except KeyError:
            LOGGER.debug("No build exists on this build variant for this version yet",
                         variant=build_variant)
            return set()

        if build:
            tasks_already_in_build = build.get_tasks()
            return {task.display_name for task in tasks_already_in_build}

        return set()

    def generate_build_variant(self, build_variant_config: Variant, changed_files: Set[str],
                               builder: EvgConfigBuilder) -> None:
        """
        Generate the selected tasks on the specified build variant.

        :param build_variant_config: Configuration of build variant to generate.
        :param changed_files: List of file changes to determine what to run.
        :param builder: Builder to create new configuration.
        """
        build_variant_name = build_variant_config.name
        LOGGER.info("Generating build variant", build_variant=build_variant_name)
        task_configs = self.get_task_config(build_variant_config, changed_files)

        for task_config in task_configs.values():
            task_def = Task(task_config)
            test_filter = None
            if "selected_tests_to_run" in task_config:
                test_filter = partial(filter_set, input_set=task_config["selected_tests_to_run"])
            split_params = SuiteSplitParameters(
                build_variant=build_variant_name,
                task_name=task_config["task_name"],
                suite_name=task_config.get("suite", task_config["task_name"]),
                filename=task_config.get("suite", task_config["task_name"]),
                test_file_filter=test_filter,
                is_asan=build_variant_config.is_asan_build(),
            )
            gen_params = ResmokeGenTaskParams(
                use_large_distro=task_config.get("use_large_distro", False),
                large_distro_name=task_config.get("large_distro_name"),
                require_multiversion_setup=task_def.require_multiversion_setup(),
                repeat_suites=task_config.get("repeat_suites", 1),
                resmoke_args=task_config["resmoke_args"],
                resmoke_jobs_max=task_config.get("resmoke_jobs_max"),
                config_location=self.evg_expansions.get_config_location(),
            )
            builder.generate_suite(split_params, gen_params)

    def generate(self, repos: List[Repo], task_id: str) -> None:
        """
        Build and generate the configuration to create selected tests.

        :param repos: List of git repos containing changes to check.
        :param task_id: ID of task being run.
        """
        changed_files = self.find_changed_files(repos, task_id)
        generated_config = self.generate_version(changed_files)
        generated_config.write_all_to_dir(SELECTED_TESTS_CONFIG_DIR)

    def generate_version(self, changed_files: Set[str]) -> GeneratedConfiguration:
        """
        Generate selected tests configuration for the given file changes.

        :param changed_files: Set of files that contain changes.
        :return: Configuration to generate selected-tests tasks.
        """
        builder = EvgConfigBuilder()  # pylint: disable=no-value-for-parameter
        for build_variant_config in self.evg_conf.get_required_variants():
            self.generate_build_variant(build_variant_config, changed_files, builder)

        return builder.build("selected_tests_config.json")


@click.command()
@click.option("--verbose", "verbose", default=False, is_flag=True, help="Enable extra logging.")
@click.option(
    "--expansion-file",
    "expansion_file",
    type=str,
    required=True,
    help="Location of expansions file generated by evergreen.",
)
@click.option(
    "--evg-api-config",
    "evg_api_config",
    default=EVG_CONFIG_FILE,
    metavar="FILE",
    help="Configuration file with connection info for Evergreen API.",
)
@click.option(
    "--selected-tests-config",
    "selected_tests_config",
    required=True,
    metavar="FILE",
    help="Configuration file with connection info for selected tests service.",
)
def main(
        verbose: bool,
        expansion_file: str,
        evg_api_config: str,
        selected_tests_config: str,
):
    """
    Select tasks to be run based on changed files in a patch build.

    :param verbose: Log extra debug information.
    :param expansion_file: Configuration file.
    :param evg_api_config: Location of configuration file to connect to evergreen.
    :param selected_tests_config: Location of config file to connect to elected-tests service.
    """
    enable_logging(verbose)

    end_date = datetime.utcnow().replace(microsecond=0)
    start_date = end_date - timedelta(days=LOOKBACK_DURATION_DAYS)

    evg_expansions = EvgExpansions.from_yaml_file(expansion_file)

    def dependencies(binder: inject.Binder) -> None:
        binder.bind(EvgExpansions, evg_expansions)
        binder.bind(EvergreenApi, RetryingEvergreenApi.get_api(config_file=evg_api_config))
        binder.bind(EvergreenProjectConfig, parse_evergreen_file(EVERGREEN_FILE))
        binder.bind(SelectedTestsClient, SelectedTestsClient.from_file(selected_tests_config))
        binder.bind(SuiteSplitConfig, evg_expansions.build_suite_split_config(start_date, end_date))
        binder.bind(SplitStrategy, greedy_division)
        binder.bind(FallbackStrategy, round_robin_fallback)
        binder.bind(GenTaskOptions, evg_expansions.build_gen_task_options())
        binder.bind(GenerationConfiguration, GenerationConfiguration.from_yaml_file())

    inject.configure(dependencies)

    repos = [Repo(x) for x in DEFAULT_REPO_LOCATIONS if os.path.isdir(x)]
    selected_tests = SelectedTestsOrchestrator()  # pylint: disable=no-value-for-parameter
    selected_tests.generate(repos, evg_expansions.task_id)


if __name__ == "__main__":
    main()  # pylint: disable=no-value-for-parameter