diff options
Diffstat (limited to 'buildscripts/update_test_lifecycle.py')
-rwxr-xr-x | buildscripts/update_test_lifecycle.py | 1209 |
1 files changed, 0 insertions, 1209 deletions
diff --git a/buildscripts/update_test_lifecycle.py b/buildscripts/update_test_lifecycle.py deleted file mode 100755 index 1fceb98dc4f..00000000000 --- a/buildscripts/update_test_lifecycle.py +++ /dev/null @@ -1,1209 +0,0 @@ -#!/usr/bin/env python - -"""Test Failures - -Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates. -""" - -from __future__ import absolute_import -from __future__ import division - -import collections -import datetime -import logging -import multiprocessing.dummy -import operator -import optparse -import os.path -import posixpath -import subprocess -import sys -import textwrap -import warnings - -import yaml - -# Get relative imports to work when the package is not installed on the PYTHONPATH. -if __name__ == "__main__" and __package__ is None: - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from buildscripts import git -from buildscripts import jiraclient -from buildscripts import resmokelib -from buildscripts.resmokelib import utils -from buildscripts.resmokelib.utils import globstar -from buildscripts import test_failures as tf -from buildscripts.ciconfig import evergreen as ci_evergreen -from buildscripts.ciconfig import tags as ci_tags - - -LOGGER = logging.getLogger(__name__) - -if sys.version_info[0] == 2: - _NUMBER_TYPES = (int, long, float) -else: - _NUMBER_TYPES = (int, float) - - -Rates = collections.namedtuple("Rates", ["acceptable", "unacceptable"]) - - -Config = collections.namedtuple("Config", [ - "test_fail_rates", - "task_fail_rates", - "variant_fail_rates", - "distro_fail_rates", - "reliable_min_runs", - "reliable_time_period", - "unreliable_min_runs", - "unreliable_time_period", -]) - - -DEFAULT_CONFIG = Config( - test_fail_rates=Rates(acceptable=0.1, unacceptable=0.3), - task_fail_rates=Rates(acceptable=0.1, unacceptable=0.3), - variant_fail_rates=Rates(acceptable=0.2, unacceptable=0.4), - distro_fail_rates=Rates(acceptable=0.2, unacceptable=0.4), - reliable_min_runs=5, - reliable_time_period=datetime.timedelta(weeks=1), - unreliable_min_runs=20, - unreliable_time_period=datetime.timedelta(weeks=4)) - - -DEFAULT_PROJECT = "mongodb-mongo-master" - - -DEFAULT_NUM_THREADS = 12 - - -def get_suite_tasks_membership(evg_conf): - """Return a dictionary with keys of all suites and list of associated tasks.""" - suite_membership = collections.defaultdict(list) - for task in evg_conf.tasks: - suite = task.resmoke_suite - if suite: - suite_membership[suite].append(task.name) - return suite_membership - - -def get_test_tasks_membership(evg_conf): - """Return a dictionary with keys of all tests and list of associated tasks.""" - test_suites_membership = resmokelib.parser.create_test_membership_map(test_kind="js_test") - suite_tasks_membership = get_suite_tasks_membership(evg_conf) - test_tasks_membership = collections.defaultdict(list) - for test in test_suites_membership.keys(): - for suite in test_suites_membership[test]: - test_tasks_membership[test].extend(suite_tasks_membership[suite]) - return test_tasks_membership - - -def get_tests_from_tasks(tasks, test_tasks_membership): - """Return a list of tests from list of specified tasks.""" - tests = [] - tasks_set = set(tasks) - for test in test_tasks_membership.keys(): - if not tasks_set.isdisjoint(test_tasks_membership[test]): - tests.append(test) - return tests - - -def create_test_groups(tests): - """Return groups of tests by their directory, i.e., jstests/core.""" - test_groups = collections.defaultdict(list) - for test in tests: - test_split = test.split("/") - # If the test does not have a directory, then ignore it. - if len(test_split) <= 1: - continue - test_dir = test_split[1] - test_groups[test_dir].append(test) - return test_groups - - -def create_batch_groups(test_groups, batch_size): - """Return batch groups list of test_groups.""" - batch_groups = [] - for test_group_name in test_groups: - test_group = test_groups[test_group_name] - while test_group: - batch_groups.append(test_group[:batch_size]) - test_group = test_group[batch_size:] - return batch_groups - - -class TestHistorySource(object): - - """A class used to parallelize requests to buildscripts.test_failures.TestHistory.""" - def __init__(self, project, variants, distros, start_revision, end_revision, - thread_pool_size=DEFAULT_NUM_THREADS): - """ - Initializes the TestHistorySource. - - Args: - project: the Evergreen project name. - variants: a list of variant names. - distros: a list of distro names. - start_revision: the revision delimiting the begining of the history we want to retrieve. - end_revision: the revision delimiting the end of the history we want to retrieve. - thread_pool_size: the size of the thread pool used to make parallel requests. - """ - self._project = project - self._variants = variants - self._distros = distros - self._start_revision = start_revision - self._end_revision = end_revision - self._thread_pool = multiprocessing.dummy.Pool(thread_pool_size) - - def get_history_data(self, tests, tasks): - """Retrieves the history data for the given tests and tasks. - - The requests for each task will be parallelized using the internal thread pool. - """ - history_data = [] - jobs = [self._thread_pool.apply_async(self._get_task_history_data, (tests, task)) - for task in tasks] - for job in jobs: - history_data.extend(job.get()) - return history_data - - def _get_task_history_data(self, tests, task): - test_history = tf.TestHistory(project=self._project, - tests=tests, - tasks=[task], - variants=self._variants, - distros=self._distros) - return test_history.get_history_by_revision(start_revision=self._start_revision, - end_revision=self._end_revision) - - -def callo(args): - """Call a program, and capture its output.""" - return subprocess.check_output(args) - - -def git_commit_range_since(since): - """Returns first and last commit in 'since' period specified. - - Specify 'since' as any acceptable period for git log --since. - The period can be specified as '4.weeks' or '3.days'. - """ - git_command = "git log --since={} --pretty=format:%H".format(since) - commits = callo(git_command.split()).split("\n") - return commits[-1], commits[0] - - -def git_commit_prior(revision): - """Returns commit revision prior to one specified.""" - git_format = "git log -2 {revision} --pretty=format:%H" - git_command = git_format.format(revision=revision) - commits = callo(git_command.split()).split("\n") - return commits[-1] - - -def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run): - """Check for an unreliable test. - - A test should be added to the set of tests believed not to run reliably when it has more - than min_run executions with a failure percentage greater than unacceptable_fr. - """ - return test_runs >= min_run and test_fr >= unacceptable_fr - - -def reliable_test(test_fr, acceptable_fr, test_runs, min_run): - """Check for a reliable test. - - A test should then removed from the set of tests believed not to run reliably when it has - less than min_run executions or has a failure percentage less than acceptable_fr. - """ - return test_runs < min_run or test_fr <= acceptable_fr - - -def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr): - """Raise an error if the acceptable_fr > unacceptable_fr.""" - if acceptable_fr > unacceptable_fr: - raise ValueError("'{}' acceptable failure rate {} must be <= the unacceptable failure rate" - " {}".format(fr_name, acceptable_fr, unacceptable_fr)) - - -def check_days(name, days): - """Raise an error if days < 1.""" - if days < 1: - raise ValueError("'{}' days must be greater than 0.".format(name)) - - -def unreliable_tag(task, variant, distro): - """Returns the unreliable tag.""" - - for (component_name, component_value) in (("task", task), - ("variant", variant), - ("distro", distro)): - if isinstance(component_value, (tf.Wildcard, tf.Missing)): - if component_name == "task": - return "unreliable" - elif component_name == "variant": - return "unreliable|{}".format(task) - elif component_name == "distro": - return "unreliable|{}|{}".format(task, variant) - - return "unreliable|{}|{}|{}".format(task, variant, distro) - - -def update_lifecycle(lifecycle_tags_file, report, method_test, add_tags, fail_rate, min_run): - """Updates the lifecycle object based on the test_method. - - The test_method checks unreliable or reliable fail_rates. - """ - for summary in report: - if method_test(summary.fail_rate, - fail_rate, - summary.num_pass + summary.num_fail, - min_run): - update_tag = unreliable_tag(summary.task, summary.variant, summary.distro) - if add_tags: - lifecycle_tags_file.add_tag("js_test", summary.test, - update_tag, summary.fail_rate) - else: - lifecycle_tags_file.remove_tag("js_test", summary.test, - update_tag, summary.fail_rate) - - -def compare_tags(tag_a, tag_b): - """Compare two tags and return 1, -1 or 0 if 'tag_a' is superior, inferior or - equal to 'tag_b'. - """ - return cmp(tag_a.split("|"), tag_b.split("|")) - - -def validate_config(config): - """ - Raises a TypeError or ValueError exception if 'config' isn't a valid model. - """ - - for (name, fail_rates) in (("test", config.test_fail_rates), - ("task", config.task_fail_rates), - ("variant", config.variant_fail_rates), - ("distro", config.distro_fail_rates)): - if not isinstance(fail_rates.acceptable, _NUMBER_TYPES): - raise TypeError("The acceptable {} failure rate must be a number, but got {}".format( - name, fail_rates.acceptable)) - elif fail_rates.acceptable < 0 or fail_rates.acceptable > 1: - raise ValueError(("The acceptable {} failure rate must be between 0 and 1 (inclusive)," - " but got {}").format(name, fail_rates.acceptable)) - elif not isinstance(fail_rates.unacceptable, _NUMBER_TYPES): - raise TypeError("The unacceptable {} failure rate must be a number, but got {}".format( - name, fail_rates.unacceptable)) - elif fail_rates.unacceptable < 0 or fail_rates.unacceptable > 1: - raise ValueError(("The unacceptable {} failure rate must be between 0 and 1" - " (inclusive), but got {}").format(name, fail_rates.unacceptable)) - elif fail_rates.acceptable > fail_rates.unacceptable: - raise ValueError( - ("The acceptable {0} failure rate ({1}) must be no larger than unacceptable {0}" - " failure rate ({2})").format( - name, fail_rates.acceptable, fail_rates.unacceptable)) - - for (name, min_runs) in (("reliable", config.reliable_min_runs), - ("unreliable", config.unreliable_min_runs)): - if not isinstance(min_runs, _NUMBER_TYPES): - raise TypeError(("The minimum number of runs for considering a test {} must be a" - " number, but got {}").format(name, min_runs)) - elif min_runs <= 0: - raise ValueError(("The minimum number of runs for considering a test {} must be a" - " positive integer, but got {}").format(name, min_runs)) - elif isinstance(min_runs, float) and not min_runs.is_integer(): - raise ValueError(("The minimum number of runs for considering a test {} must be an" - " integer, but got {}").format(name, min_runs)) - - for (name, time_period) in (("reliable", config.reliable_time_period), - ("unreliable", config.unreliable_time_period)): - if not isinstance(time_period, datetime.timedelta): - raise TypeError( - "The {} time period must be a datetime.timedelta instance, but got {}".format( - name, time_period)) - elif time_period.days <= 0: - raise ValueError( - "The {} time period must be a positive number of days, but got {}".format( - name, time_period)) - elif time_period - datetime.timedelta(days=time_period.days) > datetime.timedelta(): - raise ValueError( - "The {} time period must be an integral number of days, but got {}".format( - name, time_period)) - - -def _test_combination_from_entry(entry, components): - """Creates a test combination tuple from a tf._ReportEntry and target components. - - Returns: - A tuple containing the entry fields specified in components. - """ - combination = [] - for component in components: - combination.append(operator.attrgetter(component)(entry)) - return tuple(combination) - - -def _test_combination_from_tag(test, tag): - """Creates a test combination tuple from a test name and a tag. - - Returns: - A tuple containing the test name and the components found in the tag. - """ - combination = [test] - for element in _split_tag(tag): - if element: - combination.append(element) - return tuple(combination) - - -def update_tags(lifecycle_tags, config, report, tests): - """ - Updates the tags in 'lifecycle_tags' based on the historical test failures of tests 'tests' - mentioned in 'report' according to the model described by 'config'. - """ - - # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used - # before assignment. - grouped_entries = None - for (i, (components, rates)) in enumerate( - ((tf.Report.TEST_TASK_VARIANT_DISTRO, config.distro_fail_rates), - (tf.Report.TEST_TASK_VARIANT, config.variant_fail_rates), - (tf.Report.TEST_TASK, config.task_fail_rates), - (tf.Report.TEST, config.test_fail_rates))): - if i > 0: - report = tf.Report(grouped_entries) - - # We reassign the value of 'grouped_entries' to take advantage of how data that is on - # (test, task, variant, distro) preserves enough information to be grouped on any subset of - # those components, etc. - grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY) - - # Create the reliable report. - # Filter out any test executions from prior to 'config.reliable_time_period'. - reliable_start_date = (report.end_date - config.reliable_time_period - + datetime.timedelta(days=1)) - reliable_entries = [entry for entry in grouped_entries - if entry.start_date >= reliable_start_date] - if reliable_entries: - reliable_report = tf.Report(reliable_entries) - reliable_combinations = {_test_combination_from_entry(entry, components) - for entry in reliable_entries} - reliable_summaries = reliable_report.summarize_by(components) - else: - reliable_combinations = set() - reliable_summaries = [] - - # Create the unreliable report. - # Filter out any test executions from prior to 'config.unreliable_time_period'. - # Also filter out any test that is not present in the reliable_report in order - # to avoid tagging as unreliable tests that are no longer running. - unreliable_start_date = (report.end_date - config.unreliable_time_period - + datetime.timedelta(days=1)) - unreliable_entries = [ - entry for entry in grouped_entries - if (entry.start_date >= unreliable_start_date and - _test_combination_from_entry(entry, components) in reliable_combinations) - ] - if unreliable_entries: - unreliable_report = tf.Report(unreliable_entries) - unreliable_summaries = unreliable_report.summarize_by(components) - else: - unreliable_summaries = [] - - # Update the tags using the unreliable report. - update_lifecycle(lifecycle_tags, - unreliable_summaries, - unreliable_test, - True, - rates.unacceptable, - config.unreliable_min_runs) - - # Update the tags using the reliable report. - update_lifecycle(lifecycle_tags, - reliable_summaries, - reliable_test, - False, - rates.acceptable, - config.reliable_min_runs) - - def should_be_removed(test, tag): - combination = _test_combination_from_tag(test, tag) - if len(combination) != len(components): - # The tag is not for these components. - return False - return combination not in reliable_combinations - - # Remove the tags that correspond to tests that have not run during the reliable period. - for test in tests: - tags = lifecycle_tags.lifecycle.get_tags("js_test", test) - for tag in tags[:]: - if should_be_removed(test, tag): - LOGGER.info("Removing tag '%s' of test '%s' because the combination did not run" - " during the reliable period", tag, test) - lifecycle_tags.remove_tag("js_test", test, tag, failure_rate=0) - - -def _split_tag(tag): - """Split a tag into its components. - - Return a tuple containing task, variant, distro. The values are None if absent from the tag. - If the tag is invalid, the return value is (None, None, None). - """ - elements = tag.split("|") - length = len(elements) - if elements[0] != "unreliable" or length < 2 or length > 4: - return None, None, None - # fillout the array - elements.extend([None] * (4 - length)) - # return as a tuple - return tuple(elements[1:]) - - -def _is_tag_still_relevant(evg_conf, tag): - """Indicate if a tag still corresponds to a valid task/variant/distro combination.""" - if tag == "unreliable": - return True - task, variant, distro = _split_tag(tag) - if not task or task not in evg_conf.task_names: - return False - if variant: - variant_conf = evg_conf.get_variant(variant) - if not variant_conf or task not in variant_conf.task_names: - return False - if distro and distro not in variant_conf.distros: - return False - return True - - -def clean_up_tags(lifecycle_tags, evg_conf): - """Remove the tags that do not correspond to a valid test/task/variant/distro combination.""" - lifecycle = lifecycle_tags.lifecycle - for test_kind in lifecycle.get_test_kinds(): - for test_pattern in lifecycle.get_test_patterns(test_kind): - if not globstar.glob(test_pattern): - # The pattern does not match any file in the repository. - lifecycle_tags.clean_up_test(test_kind, test_pattern) - continue - for tag in lifecycle.get_tags(test_kind, test_pattern): - if not _is_tag_still_relevant(evg_conf, tag): - lifecycle_tags.clean_up_tag(test_kind, test_pattern, tag) - - -def _config_as_options(config): - return ("--reliableTestMinRuns {} " - "--reliableDays {} " - "--unreliableTestMinRuns {} " - "--unreliableDays {} " - "--testFailRates {} {} " - "--taskFailRates {} {} " - "--variantFailRates {} {} " - "--distroFailRates {} {}").format( - config.reliable_min_runs, - config.reliable_time_period.days, - config.unreliable_min_runs, - config.unreliable_time_period.days, - config.test_fail_rates.acceptable, - config.test_fail_rates.unacceptable, - config.task_fail_rates.acceptable, - config.task_fail_rates.unacceptable, - config.variant_fail_rates.acceptable, - config.variant_fail_rates.unacceptable, - config.distro_fail_rates.acceptable, - config.distro_fail_rates.unacceptable) - - -class TagsConfigWithChangelog(object): - """A wrapper around TagsConfig that can perform updates on a tags file and record the - modifications made. - """ - - def __init__(self, lifecycle): - """Initialize the TagsConfigWithChangelog with the lifecycle TagsConfig.""" - self.lifecycle = lifecycle - self.added = {} - self.removed = {} - self.cleaned_up = {} - - @staticmethod - def _cancel_tag_log(log_dict, test_kind, test, tag): - """Remove a tag from a changelog dictionary. - - Used to remove a tag from the 'added' or 'removed' attribute. - """ - kind_dict = log_dict[test_kind] - test_dict = kind_dict[test] - del test_dict[tag] - if not test_dict: - del kind_dict[test] - if not kind_dict: - del log_dict[test_kind] - - def add_tag(self, test_kind, test, tag, failure_rate): - """Add a tag.""" - if self.lifecycle.add_tag(test_kind, test, tag): - if tag in self.removed.get(test_kind, {}).get(test, {}): - # The tag has just been removed. - self._cancel_tag_log(self.removed, test_kind, test, tag) - else: - self.added.setdefault(test_kind, {}).setdefault(test, {})[tag] = failure_rate - - def remove_tag(self, test_kind, test, tag, failure_rate): - """Remove a tag.""" - if self.lifecycle.remove_tag(test_kind, test, tag): - if tag in self.added.get(test_kind, {}).get(test, {}): - # The tag has just been added. - self._cancel_tag_log(self.added, test_kind, test, tag) - else: - self.removed.setdefault(test_kind, {}).setdefault(test, {})[tag] = failure_rate - - def clean_up_tag(self, test_kind, test, tag): - """Clean up an invalid tag.""" - self.lifecycle.remove_tag(test_kind, test, tag) - self.cleaned_up.setdefault(test_kind, {}).setdefault(test, []).append(tag) - - def clean_up_test(self, test_kind, test): - """Clean up an invalid test.""" - self.lifecycle.remove_test_pattern(test_kind, test) - self.cleaned_up.setdefault(test_kind, {})[test] = [] - - -class JiraIssueCreator(object): - _LABEL = "test-lifecycle" - _PROJECT = "TIGBOT" - _MAX_DESCRIPTION_SIZE = 32767 - - def __init__(self, - server=None, - username=None, - password=None, - access_token=None, - access_token_secret=None, - consumer_key=None, - key_cert=None): - self._client = jiraclient.JiraClient( - server=server, - username=username, - password=password, - access_token=access_token, - access_token_secret=access_token_secret, - consumer_key=consumer_key, - key_cert=key_cert) - - def create_issue(self, evg_project, mongo_revision, model_config, added, removed, cleaned_up): - """Create a JIRA issue for the test lifecycle tag update.""" - summary = self._get_jira_summary(evg_project) - description = self._get_jira_description(evg_project, mongo_revision, model_config, - added, removed, cleaned_up) - issue_key = self._client.create_issue(self._PROJECT, summary, description, [self._LABEL]) - return issue_key - - def close_fix_issue(self, issue_key): - """Close the issue with the "Fixed" resolution.""" - LOGGER.info("Closing issue '%s' as FIXED.", issue_key) - self._client.close_issue(issue_key, self._client.FIXED_RESOLUTION_NAME) - - def close_wontfix_issue(self, issue_key): - """Close the issue the with "Won't Fix" resolution.""" - LOGGER.info("Closing issue '%s' as WON'T FIX.", issue_key) - self._client.close_issue(issue_key, self._client.WONT_FIX_RESOLUTION_NAME) - - @staticmethod - def _get_jira_summary(project): - return "Update of test lifecycle tags for {}".format(project) - - @staticmethod - def _monospace(text): - """Transform a text into a monospace JIRA text.""" - return "{{" + text + "}}" - - @staticmethod - def _truncate_description(desc): - max_size = JiraIssueCreator._MAX_DESCRIPTION_SIZE - if len(desc) > max_size: - warning = ("\nDescription truncated: " - "exceeded max size of {} characters.").format(max_size) - truncated_length = max_size - len(warning) - desc = desc[:truncated_length] + warning - return desc - - @staticmethod - def _get_jira_description(project, mongo_revision, model_config, added, removed, cleaned_up): - mono = JiraIssueCreator._monospace - config_desc = _config_as_options(model_config) - added_desc = JiraIssueCreator._make_updated_tags_description(added) - removed_desc = JiraIssueCreator._make_updated_tags_description(removed) - cleaned_up_desc = JiraIssueCreator._make_tags_cleaned_up_description(cleaned_up) - project_link = "[{0}|https://evergreen.mongodb.com/waterfall/{1}]".format( - mono(project), project) - revision_link = "[{0}|https://github.com/mongodb/mongo/commit/{1}]".format( - mono(mongo_revision), mongo_revision) - full_desc = ("h3. Automatic update of the test lifecycle tags\n" - "Evergreen Project: {0}\n" - "Revision: {1}\n\n" - "{{{{update_test_lifecycle.py}}}} options:\n{2}\n\n" - "h5. Tags added\n{3}\n\n" - "h5. Tags removed\n{4}\n\n" - "h5. Tags cleaned up (no longer relevant)\n{5}\n").format( - project_link, revision_link, mono(config_desc), - added_desc, removed_desc, cleaned_up_desc) - - return JiraIssueCreator._truncate_description(full_desc) - - @staticmethod - def _make_updated_tags_description(data): - mono = JiraIssueCreator._monospace - tags_lines = [] - for test_kind in sorted(data.keys()): - tests = data[test_kind] - tags_lines.append("- *{0}*".format(test_kind)) - for test in sorted(tests.keys()): - tags = tests[test] - tags_lines.append("-- {0}".format(mono(test))) - for tag in sorted(tags.keys()): - coefficient = tags[tag] - tags_lines.append("--- {0} ({1:.2f})".format(mono(tag), coefficient)) - if tags_lines: - return "\n".join(tags_lines) - else: - return "_None_" - - @staticmethod - def _make_tags_cleaned_up_description(cleaned_up): - mono = JiraIssueCreator._monospace - tags_cleaned_up_lines = [] - for test_kind in sorted(cleaned_up.keys()): - test_tags = cleaned_up[test_kind] - tags_cleaned_up_lines.append("- *{0}*".format(test_kind)) - for test in sorted(test_tags.keys()): - tags = test_tags[test] - tags_cleaned_up_lines.append("-- {0}".format(mono(test))) - if not tags: - tags_cleaned_up_lines.append("--- ALL (test file removed or renamed as part of" - " an earlier commit)") - else: - for tag in sorted(tags): - tags_cleaned_up_lines.append("--- {0}".format(mono(tag))) - if tags_cleaned_up_lines: - return "\n".join(tags_cleaned_up_lines) - else: - return "_None_" - - -class LifecycleTagsFile(object): - """Represent a test lifecycle tags file that can be written and committed.""" - - def __init__(self, project, lifecycle_file, metadata_repo_url=None, references_file=None, - jira_issue_creator=None, git_info=None, model_config=None): - """Initalize the LifecycleTagsFile. - - Arguments: - project: The Evergreen project name, e.g. "mongodb-mongo-master". - lifecycle_file: The path to the lifecycle tags file. If 'metadata_repo_url' is - specified, this path must be relative to the root of the metadata repository. - metadata_repo_url: The URL of the metadat repository that contains the test lifecycle - tags file. - references_file: The path to the references file in the metadata repository. - jira_issue_creator: A JiraIssueCreator instance. - git_info: A tuple containing the git user's name and email to set before committing. - model_config: The model configuration as a Config instance. - """ - self.project = project - self.mongo_repo = git.Repository(os.getcwd()) - self.mongo_revision = self.mongo_repo.get_current_revision() - # The branch name is the same on both repositories. - self.mongo_branch = self.mongo_repo.get_branch_name() - self.metadata_branch = project - - if metadata_repo_url: - # The file can be found in another repository. We clone it. - self.metadata_repo = self._clone_repository(metadata_repo_url, self.project) - self.relative_lifecycle_file = lifecycle_file - self.lifecycle_file = os.path.join(self.metadata_repo.directory, lifecycle_file) - self.relative_references_file = references_file - self.references_file = os.path.join(self.metadata_repo.directory, references_file) - if git_info: - self.metadata_repo.configure("user.name", git_info[0]) - self.metadata_repo.configure("user.email", git_info[1]) - else: - self.metadata_repo = None - self.relative_lifecycle_file = lifecycle_file - self.lifecycle_file = lifecycle_file - self.relative_references_file = None - self.references_file = None - self.metadata_repo_url = metadata_repo_url - self.lifecycle = ci_tags.TagsConfig.from_file(self.lifecycle_file, cmp_func=compare_tags) - self.jira_issue_creator = jira_issue_creator - self.model_config = model_config - self.changelog_lifecycle = TagsConfigWithChangelog(self.lifecycle) - - @staticmethod - def _clone_repository(metadata_repo_url, branch): - directory_name = posixpath.splitext(posixpath.basename(metadata_repo_url))[0] - LOGGER.info("Cloning the repository %s into the directory %s", - metadata_repo_url, directory_name) - return git.Repository.clone(metadata_repo_url, directory_name, branch) - - def is_modified(self): - """Indicate if the tags have been modified.""" - return self.lifecycle.is_modified() - - def _create_issue(self): - LOGGER.info("Creating a JIRA issue") - issue_key = self.jira_issue_creator.create_issue( - self.project, self.mongo_revision, self.model_config, - self.changelog_lifecycle.added, self.changelog_lifecycle.removed, - self.changelog_lifecycle.cleaned_up) - LOGGER.info("JIRA issue created: %s", issue_key) - return issue_key - - def write(self): - """Write the test lifecycle tag file.""" - LOGGER.info("Writing the tag file to '%s'", self.lifecycle_file) - comment = ("This file was generated by {} and shouldn't be edited by hand. It was" - " generated against commit {} with the following options: {}.").format( - sys.argv[0], self.mongo_repo.get_current_revision(), - _config_as_options(self.model_config)) - self.lifecycle.write_file(self.lifecycle_file, comment) - - def _ready_for_commit(self, ref_branch, references): - # Check that the test lifecycle tags file has changed. - diff = self.metadata_repo.git_diff(["--name-only", ref_branch, - self.relative_lifecycle_file]) - if not diff: - LOGGER.info("The local lifecycle file is identical to the the one on branch '%s'", - ref_branch) - return False - # Check that the lifecycle file has not been updated after the current mongo revision. - update_revision = references.get("test-lifecycle", {}).get(self.project) - if update_revision and not self.mongo_repo.is_ancestor(update_revision, - self.mongo_revision): - LOGGER.warning(("The existing lifecycle file is based on revision '%s' which is not a" - " parent revision of the current revision '%s'"), - update_revision, self.mongo_revision) - return False - return True - - def _read_references(self, metadata_branch=None): - branch = metadata_branch if metadata_branch is not None else "" - references_content = self.metadata_repo.git_cat_file( - ["blob", "{0}:{1}".format(branch, self.relative_references_file)]) - return yaml.safe_load(references_content) - - def _update_and_write_references(self, references): - LOGGER.info("Writing the references file to '%s'", self.references_file) - references.setdefault("test-lifecycle", {})[self.project] = self.mongo_revision - with open(self.references_file, "w") as fstream: - yaml.safe_dump(references, fstream, default_flow_style=False) - - def _commit_locally(self, issue_key): - self.metadata_repo.git_add([self.relative_lifecycle_file]) - self.metadata_repo.git_add([self.relative_references_file]) - commit_message = "{} Update {}".format(issue_key, self.relative_lifecycle_file) - self.metadata_repo.commit_with_message(commit_message) - LOGGER.info("Change committed with message: %s", commit_message) - - def commit(self, nb_retries=10): - """Commit the test lifecycle tag file. - - Args: - nb_retries: the number of times the script will reset, fetch, recommit and retry when - the push fails. - """ - references = self._read_references() - # Verify we are ready to commit. - if not self._ready_for_commit(self.metadata_branch, references): - return True - - # Write the references file. - self._update_and_write_references(references) - - # Create the issue. - issue_key = self._create_issue() - - # Commit the change. - self._commit_locally(issue_key) - - # Push the change. - tries = 0 - pushed = False - upstream = "origin/{0}".format(self.metadata_branch) - while tries < nb_retries: - try: - self.metadata_repo.push_to_remote_branch("origin", self.metadata_branch) - pushed = True - break - except git.GitException: - LOGGER.warning("git push command failed, fetching and retrying.") - # Fetch upstream branch. - LOGGER.info("Fetching branch %s of %s", self.metadata_branch, - self.metadata_repo_url) - self.metadata_repo.fetch_remote_branch("origin", self.metadata_branch) - # Resetting the current branch to the origin branch - LOGGER.info("Resetting branch %s to %s", self.metadata_branch, upstream) - self.metadata_repo.git_reset(["--hard", upstream]) - # Rewrite the test lifecycle tags file - self.write() - # Rewrite the references file - references = self._read_references() - self._update_and_write_references(references) - # Checking if we can still commit - if not self._ready_for_commit(upstream, references): - LOGGER.warning("Aborting.") - break - # Committing - self._commit_locally(issue_key) - tries += 1 - if pushed: - self.jira_issue_creator.close_fix_issue(issue_key) - return True - else: - self.jira_issue_creator.close_wontfix_issue(issue_key) - return False - - -def make_lifecycle_tags_file(options, model_config): - """Create a LifecycleTagsFile based on the script options.""" - if options.commit: - if not options.jira_config: - LOGGER.error("JIRA configuration file is required when specifying --commit.") - return None - if not (options.git_user_name or options.git_user_email): - LOGGER.error("Git configuration parameters are required when specifying --commit.") - return None - jira_issue_creator = JiraIssueCreator(**utils.load_yaml_file(options.jira_config)) - git_config = (options.git_user_name, options.git_user_email) - else: - jira_issue_creator = None - git_config = None - - lifecycle_tags_file = LifecycleTagsFile( - options.project, - options.tag_file, - options.metadata_repo_url, - options.references_file, - jira_issue_creator, - git_config, - model_config) - - return lifecycle_tags_file - - -def main(): - """ - Utility for updating a resmoke.py tag file based on computing test failure rates from the - Evergreen API. - """ - - parser = optparse.OptionParser(description=textwrap.dedent(main.__doc__), - usage="Usage: %prog [options] [test1 test2 ...]") - - data_options = optparse.OptionGroup( - parser, - title="Data options", - description=("Options used to configure what historical test failure data to retrieve from" - " Evergreen.")) - parser.add_option_group(data_options) - - data_options.add_option( - "--project", dest="project", - metavar="<project-name>", - default=tf.TestHistory.DEFAULT_PROJECT, - help="The Evergreen project to analyze. Defaults to '%default'.") - - data_options.add_option( - "--tasks", dest="tasks", - metavar="<task1,task2,...>", - help=("The Evergreen tasks to analyze for tagging unreliable tests. If specified in" - " additional to having test positional arguments, then only tests that run under the" - " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks" - " defaults to the non-excluded list of tasks from the specified" - " --evergreenProjectConfig file.")) - - data_options.add_option( - "--variants", dest="variants", - metavar="<variant1,variant2,...>", - default="", - help="The Evergreen build variants to analyze for tagging unreliable tests.") - - data_options.add_option( - "--distros", dest="distros", - metavar="<distro1,distro2,...>", - default="", - help="The Evergreen distros to analyze for tagging unreliable tests.") - - data_options.add_option( - "--evergreenProjectConfig", dest="evergreen_project_config", - metavar="<project-config-file>", - default="etc/evergreen.yml", - help=("The Evergreen project configuration file used to get the list of tasks if --tasks is" - " omitted. Defaults to '%default'.")) - - model_options = optparse.OptionGroup( - parser, - title="Model options", - description=("Options used to configure whether (test,), (test, task)," - " (test, task, variant), and (test, task, variant, distro) combinations are" - " considered unreliable.")) - parser.add_option_group(model_options) - - model_options.add_option( - "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs", - metavar="<reliable-min-runs>", - default=DEFAULT_CONFIG.reliable_min_runs, - help=("The minimum number of test executions required for a test's failure rate to" - " determine whether the test is considered reliable. If a test has fewer than" - " <reliable-min-runs> executions, then it cannot be considered unreliable.")) - - model_options.add_option( - "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs", - metavar="<unreliable-min-runs>", - default=DEFAULT_CONFIG.unreliable_min_runs, - help=("The minimum number of test executions required for a test's failure rate to" - " determine whether the test is considered unreliable. If a test has fewer than" - " <unreliable-min-runs> executions, then it cannot be considered unreliable.")) - - model_options.add_option( - "--testFailRates", type="float", nargs=2, dest="test_fail_rates", - metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>", - default=DEFAULT_CONFIG.test_fail_rates, - help=("Controls how readily a test is considered unreliable. Each failure rate must be a" - " number between 0 and 1 (inclusive) with" - " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no" - " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is" - " considered reliable. Otherwise, if a test fails at least as much as" - " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered" - " unreliable. Defaults to %default.")) - - model_options.add_option( - "--taskFailRates", type="float", nargs=2, dest="task_fail_rates", - metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>", - default=DEFAULT_CONFIG.task_fail_rates, - help=("Controls how readily a (test, task) combination is considered unreliable. Each" - " failure rate must be a number between 0 and 1 (inclusive) with" - " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)" - " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time," - " then it is considered reliable. Otherwise, if a test fails at least as much as" - " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" - " unreliable. Defaults to %default.")) - - model_options.add_option( - "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates", - metavar="<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>", - default=DEFAULT_CONFIG.variant_fail_rates, - help=("Controls how readily a (test, task, variant) combination is considered unreliable." - " Each failure rate must be a number between 0 and 1 (inclusive) with" - " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a" - " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>" - " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails" - " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time," - " then it is considered unreliable. Defaults to %default.")) - - model_options.add_option( - "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates", - metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>", - default=DEFAULT_CONFIG.distro_fail_rates, - help=("Controls how readily a (test, task, variant, distro) combination is considered" - " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with" - " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a" - " (test, task, variant, distro) combination fails no more than" - " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered" - " reliable. Otherwise, if a test fails at least as much as" - " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" - " unreliable. Defaults to %default.")) - - model_options.add_option( - "--reliableDays", type="int", dest="reliable_days", - metavar="<ndays>", - default=DEFAULT_CONFIG.reliable_time_period.days, - help=("The time period to analyze when determining if a test has become reliable. Defaults" - " to %default day(s).")) - - model_options.add_option( - "--unreliableDays", type="int", dest="unreliable_days", - metavar="<ndays>", - default=DEFAULT_CONFIG.unreliable_time_period.days, - help=("The time period to analyze when determining if a test has become unreliable." - " Defaults to %default day(s).")) - - parser.add_option("--resmokeTagFile", dest="tag_file", - metavar="<tagfile>", - default="etc/test_lifecycle.yml", - help=("The resmoke.py tag file to update. If --metadataRepo is specified, it" - " is the relative path in the metadata repository, otherwise it can be" - " an absolute path or a relative path from the current directory." - " Defaults to '%default'.")) - - parser.add_option("--metadataRepo", dest="metadata_repo_url", - metavar="<metadata-repo-url>", - default="git@github.com:mongodb/mongo-test-metadata.git", - help=("The repository that contains the lifecycle file. " - "It will be cloned in the current working directory. " - "Defaults to '%default'.")) - - parser.add_option("--referencesFile", dest="references_file", - metavar="<references-file>", - default="references.yml", - help=("The YAML file in the metadata repository that contains the revision " - "mappings. Defaults to '%default'.")) - - parser.add_option("--requestBatchSize", type="int", dest="batch_size", - metavar="<batch-size>", - default=100, - help=("The maximum number of tests to query the Evergreen API for in a single" - " request. A higher value for this option will reduce the number of" - " roundtrips between this client and Evergreen. Defaults to %default.")) - - parser.add_option("--requestThreads", type="int", dest="num_request_threads", - metavar="<num-request-threads>", - default=DEFAULT_NUM_THREADS, - help=("The maximum number of threads to use when querying the Evergreen API." - " Batches are processed sequentially but the test history is queried in" - " parallel for each task. Defaults to %default.")) - - commit_options = optparse.OptionGroup( - parser, - title="Commit options", - description=("Options used to configure whether and how to commit the updated test" - " lifecycle tags.")) - parser.add_option_group(commit_options) - - commit_options.add_option( - "--commit", action="store_true", dest="commit", - default=False, - help="Indicates that the updated tag file should be committed.") - - commit_options.add_option( - "--jiraConfig", dest="jira_config", - metavar="<jira-config>", - default=None, - help=("The YAML file containing the JIRA access configuration ('user', 'password'," - "'server').")) - - commit_options.add_option( - "--gitUserName", dest="git_user_name", - metavar="<git-user-name>", - default="Test Lifecycle", - help=("The git user name that will be set before committing to the metadata repository." - " Defaults to '%default'.")) - - commit_options.add_option( - "--gitUserEmail", dest="git_user_email", - metavar="<git-user-email>", - default="buil+testlifecycle@mongodb.com", - help=("The git user email address that will be set before committing to the metadata" - " repository. Defaults to '%default'.")) - - logging_options = optparse.OptionGroup( - parser, - title="Logging options", - description="Options used to configure the logging output of the script.") - parser.add_option_group(logging_options) - - logging_options.add_option( - "--logLevel", dest="log_level", - metavar="<log-level>", - choices=["DEBUG", "INFO", "WARNING", "ERROR"], - default="INFO", - help=("The log level. Accepted values are: DEBUG, INFO, WARNING and ERROR." - " Defaults to '%default'.")) - - logging_options.add_option( - "--logFile", dest="log_file", - metavar="<log-file>", - default=None, - help="The destination file for the logs output. Defaults to the standard output.") - - (options, tests) = parser.parse_args() - - if options.distros: - warnings.warn( - ("Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information" - " isn't returned by the Evergreen API. This option will therefore be ignored."), - RuntimeWarning) - - logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", - level=options.log_level, filename=options.log_file) - evg_conf = ci_evergreen.EvergreenProjectConfig(options.evergreen_project_config) - use_test_tasks_membership = False - - tasks = options.tasks.split(",") if options.tasks else [] - if not tasks: - # If no tasks are specified, then the list of tasks is all. - tasks = evg_conf.lifecycle_task_names - use_test_tasks_membership = True - - variants = options.variants.split(",") if options.variants else [] - - distros = options.distros.split(",") if options.distros else [] - - config = Config( - test_fail_rates=Rates(*options.test_fail_rates), - task_fail_rates=Rates(*options.task_fail_rates), - variant_fail_rates=Rates(*options.variant_fail_rates), - distro_fail_rates=Rates(*options.distro_fail_rates), - reliable_min_runs=options.reliable_test_min_runs, - reliable_time_period=datetime.timedelta(days=options.reliable_days), - unreliable_min_runs=options.unreliable_test_min_runs, - unreliable_time_period=datetime.timedelta(days=options.unreliable_days)) - validate_config(config) - - lifecycle_tags_file = make_lifecycle_tags_file(options, config) - if not lifecycle_tags_file: - sys.exit(1) - - test_tasks_membership = get_test_tasks_membership(evg_conf) - # If no tests are specified then the list of tests is generated from the list of tasks. - if not tests: - tests = get_tests_from_tasks(tasks, test_tasks_membership) - if not options.tasks: - use_test_tasks_membership = True - - commit_first, commit_last = git_commit_range_since("{}.days".format(options.unreliable_days)) - commit_prior = git_commit_prior(commit_first) - - # For efficiency purposes, group the tests and process in batches of batch_size. - test_groups = create_batch_groups(create_test_groups(tests), options.batch_size) - - test_history_source = TestHistorySource(options.project, variants, distros, - commit_prior, commit_last, - options.num_request_threads) - - LOGGER.info("Updating the tags") - nb_groups = len(test_groups) - count = 0 - for tests in test_groups: - LOGGER.info("Progress: %s %%", 100 * count / nb_groups) - count += 1 - # Find all associated tasks for the test_group if tasks or tests were not specified. - if use_test_tasks_membership: - tasks_set = set() - for test in tests: - tasks_set = tasks_set.union(test_tasks_membership[test]) - tasks = list(tasks_set) - if not tasks: - LOGGER.warning("No tasks found for tests %s, skipping this group.", tests) - continue - history_data = test_history_source.get_history_data(tests, tasks) - if not history_data: - continue - report = tf.Report(history_data) - update_tags(lifecycle_tags_file.changelog_lifecycle, config, report, tests) - - # Remove tags that are no longer relevant - clean_up_tags(lifecycle_tags_file.changelog_lifecycle, evg_conf) - - # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there - # have been changes to the tags. In particular, we avoid modifying the file when only the header - # comment for the YAML file would change. - if lifecycle_tags_file.is_modified(): - lifecycle_tags_file.write() - - if options.commit: - commit_ok = lifecycle_tags_file.commit() - if not commit_ok: - sys.exit(1) - else: - LOGGER.info("The tags have not been modified.") - - -if __name__ == "__main__": - main() |