diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2017-07-17 11:09:34 -0400 |
---|---|---|
committer | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2017-07-17 11:09:34 -0400 |
commit | 58a3909a3f678dec7bd94bfb38f96756c970113e (patch) | |
tree | 96675ca63ab47a93aca816909805264e6667ea7b /buildscripts | |
parent | 27cf9fd7b31f043af913da135385367126f5691b (diff) | |
download | mongo-58a3909a3f678dec7bd94bfb38f96756c970113e.tar.gz |
SERVER-29642 SERVER-29643 Add Python tests for test lifecycle scripts.
For test_failures.py:
* Replaces HistoryReport with a TestHistory class that has
get_history_by_revision() and get_history_by_date() methods. They
both return a list of ReportEntry tuples that can be used to
construct a Report instance.
* Adds Python unit test cases for the Report and ReportEntry classes.
* Creates Wildcard class as separate concept from Missing class.
* Enables --sinceDate and --untilDate with a warning that the script
may not return a complete result set.
* Adds support for running the script with Python 3.
For update_test_lifecycle.py:
* Introduces Config namedtuple to represent the test lifecycle model.
* Adds Python unit tests cases for the update_tags() function.
* Takes advantage of the partial grouping so that computing summaries
for (test, task, variant), (test, task), and (test,) combinations do
not require re-processing the entire result set.
Diffstat (limited to 'buildscripts')
-rw-r--r-- | buildscripts/ciconfig/tags.py | 39 | ||||
-rw-r--r-- | buildscripts/resmokelib/selector.py | 2 | ||||
-rwxr-xr-x | buildscripts/test_failures.py | 1172 | ||||
-rw-r--r-- | buildscripts/tests/ciconfig/test_tags.py | 6 | ||||
-rw-r--r-- | buildscripts/tests/test_test_failures.py | 676 | ||||
-rw-r--r-- | buildscripts/tests/test_update_test_lifecycle.py | 760 | ||||
-rwxr-xr-x | buildscripts/update_test_lifecycle.py | 525 |
7 files changed, 2432 insertions, 748 deletions
diff --git a/buildscripts/ciconfig/tags.py b/buildscripts/ciconfig/tags.py index dbd090bc9a8..dfab58832fa 100644 --- a/buildscripts/ciconfig/tags.py +++ b/buildscripts/ciconfig/tags.py @@ -1,4 +1,5 @@ """Module to access and modify tag configuration files used by resmoke.""" + from __future__ import absolute_import from __future__ import print_function @@ -11,7 +12,7 @@ import yaml # Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021 def _represent_dict_order(self, data): - return self.represent_mapping('tag:yaml.org,2002:map', data.items()) + return self.represent_mapping("tag:yaml.org,2002:map", data.items()) yaml.add_representer(collections.OrderedDict, _represent_dict_order) # End setup @@ -20,17 +21,38 @@ yaml.add_representer(collections.OrderedDict, _represent_dict_order) class TagsConfig(object): """Represent a test tag configuration file.""" - def __init__(self, filename, cmp_func=None): - """Initialize a TagsConfig from a file. + def __init__(self, raw, cmp_func=None): + """Initialize a TagsConfig from a dict representing the associations between tests and tags. 'cmp_func' can be used to specify a comparison function that will be used when sorting tags. """ - with open(filename, "r") as fstream: - self.raw = yaml.safe_load(fstream) + + self.raw = raw self._conf = self.raw["selector"] self._conf_copy = copy.deepcopy(self._conf) self._cmp_func = cmp_func + @classmethod + def from_file(cls, filename, **kwargs): + """Return a TagsConfig from a file containing the associations between tests and tags. + + See TagsConfig.__init__() for the keyword arguments that can be specified. + """ + + with open(filename, "r") as fstream: + raw = yaml.safe_load(fstream) + + return cls(raw, **kwargs) + + @classmethod + def from_dict(cls, raw, **kwargs): + """Return a TagsConfig from a dict representing the associations between tests and tags. + + See TagsConfig.__init__() for the keyword arguments that can be specified. + """ + + return cls(copy.deepcopy(raw), **kwargs) + def get_test_kinds(self): """List the test kinds.""" return self._conf.keys() @@ -75,9 +97,14 @@ class TagsConfig(object): """ with open(filename, "w") as fstream: if preamble: - print(textwrap.fill(preamble, width=100, initial_indent="# ", + print(textwrap.fill(preamble, + width=100, + initial_indent="# ", subsequent_indent="# "), file=fstream) + + # We use yaml.safe_dump() in order avoid having strings being written to the file as + # "!!python/unicode ..." and instead have them written as plain 'str' instances. yaml.safe_dump(self.raw, fstream, default_flow_style=False) diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py index a3448116efb..e1cec945603 100644 --- a/buildscripts/resmokelib/selector.py +++ b/buildscripts/resmokelib/selector.py @@ -28,7 +28,7 @@ def _parse_tag_file(test_kind): a list of tags, i.e., {'file1.js': ['tag1', 'tag2'], 'file2.js': ['tag2', 'tag3']} """ if config.TAG_FILE: - tags_conf = _tags.TagsConfig(config.TAG_FILE) + tags_conf = _tags.TagsConfig.from_file(config.TAG_FILE) tagged_roots = tags_conf.get_test_patterns(test_kind) else: tagged_roots = [] diff --git a/buildscripts/test_failures.py b/buildscripts/test_failures.py index 72a9785911c..3a874c55fc4 100755 --- a/buildscripts/test_failures.py +++ b/buildscripts/test_failures.py @@ -1,9 +1,9 @@ #!/usr/bin/env python -"""Test Failures - -Compute Test failures rates from Evergreen API for specified tests, tasks, etc. """ +Utility for computing test failure rates from the Evergreen API. +""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -14,660 +14,710 @@ import itertools import operator import optparse import os -import urlparse +import sys +import warnings + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse import requests import yaml -_API_SERVER_DEFAULT = "http://evergreen-api.mongodb.com:8080" -_REST_PREFIX = "/rest/v1" -_PROJECT = "mongodb-mongo-master" +if sys.version_info[0] == 2: + _STRING_TYPES = (basestring,) +else: + _STRING_TYPES = (str,) + + +_ReportEntry = collections.namedtuple("_ReportEntry", [ + "test", + "task", + "variant", + "distro", + "start_date", + "end_date", + "num_pass", + "num_fail", +]) + + +class Wildcard(object): + """ + A class for representing there are multiple values associated with a particular component. + """ -class _Missing(object): - """Class to support missing fields from the report.""" def __init__(self, kind): self._kind = kind def __eq__(self, other): - if not isinstance(other, _Missing): + if not isinstance(other, Wildcard): return NotImplemented + return self._kind == other._kind def __ne__(self, other): return not self == other - def __str__(self): - return "<_Missing: {}>".format(self._kind) - -_ALL_TEST = _Missing("test") -_ALL_TASK = _Missing("task") -_ALL_VARIANT = _Missing("variant") -_ALL_DISTRO = _Missing("distro") -_ALL_DATE = _Missing("date") + def __hash__(self): + return hash(self._kind) + def __str__(self): + return "<multiple {}>".format(self._kind) -def read_evg_config(): - # Expand out evergreen config file possibilities - file_list = [ - "./.evergreen.yml", - os.path.expanduser("~/.evergreen.yml"), - os.path.expanduser("~/cli_bin/.evergreen.yml")] - for filename in file_list: - if os.path.isfile(filename): - with open(filename, "r") as fstream: - return yaml.load(fstream) - return None +class ReportEntry(_ReportEntry): + """ + Holds information about Evergreen test executions. + """ + _MULTIPLE_TESTS = Wildcard("tests") + _MULTIPLE_TASKS = Wildcard("tasks") + _MULTIPLE_VARIANTS = Wildcard("variants") + _MULTIPLE_DISTROS = Wildcard("distros") -def datestr_to_date(date_str): - """Returns datetime from a date string in the format of YYYY-MM-DD. - Note that any time in the date string is stripped off.""" - return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date() + _MIN_DATE = datetime.date(datetime.MINYEAR, 1, 1) + _MAX_DATE = datetime.date(datetime.MAXYEAR, 12, 31) + @property + def fail_rate(self): + """ + Returns the fraction of test failures to total number of test executions. -def date_to_datestr(date_time): - """Returns date string in the format of YYYY-MM-DD from a datetime.""" - return date_time.strftime("%Y-%m-%d") + If a test hasn't been run at all, then we still say it has a failure rate of 0% for + convenience when applying thresholds. + """ + if self.num_pass == self.num_fail == 0: + return 0.0 + return self.num_fail / (self.num_pass + self.num_fail) -class ViewReport(object): - """"Class to support any views into the HistoryReport.""" + def period_start_date(self, start_date, period_size): + """ + Returns a datetime.date() instance corresponding to the beginning of the time period + containing 'self.start_date'. + """ - DetailGroup = collections.namedtuple( - "DetailGroup", - "test task variant distro start_date end_date") + if not isinstance(start_date, datetime.date): + raise TypeError("'start_date' argument must be a date") - Summary = collections.namedtuple( - "Summary", - "test task variant distro start_date end_date fail_rate num_fail num_pass") + if not isinstance(period_size, datetime.timedelta): + raise TypeError("'period_size' argument must a datetime.timedelta instance") + elif period_size.days <= 0: + raise ValueError("'period_size' argument must be a positive number of days") + elif period_size - datetime.timedelta(days=period_size.days) > datetime.timedelta(): + raise ValueError("'period_size' argument must be an integral number of days") - SummaryGroup = collections.namedtuple( - "SummaryGroup", - "test task variant distro start_date end_date") + # 'start_day_offset' is the number of days 'self.start_date' is from the start of the time + # period. + start_day_offset = (self.start_date - start_date).days % period_size.days + return self.start_date - datetime.timedelta(days=start_day_offset) - _MIN_DATE = "{0:04}-01-01".format(datetime.MINYEAR) - _MAX_DATE = "{}-12-31".format(datetime.MAXYEAR) - _group_by = ["test", "task", "variant", "distro"] - _start_days = ["first_day", "sunday", "monday"] + def week_start_date(self, start_day_of_week): + """ + Returns a datetime.date() instance corresponding to the beginning of the week containing + 'self.start_date'. The first day of the week can be specified as the strings "Sunday" or + "Monday", as well as an arbitrary datetime.date() instance. + """ - def __init__(self, - history_report=[], - group_period=7, - start_day_of_week="first_day"): - self._report = history_report - - self.start_day_of_week = start_day_of_week - # Using 'first_day' means the a group report will start on the day of the - # week from the earliest date in the test history. - if self.start_day_of_week not in self._start_days: - raise ValueError( - "Invalid start_day_of_week specified '{}'".format(self.start_day_of_week)) - - # Set start and end dates of report and create the group_periods - self.group_period = group_period - if self._report: - start_dts = [r.start_dt for r in self._report] - self.start_dt = min(start_dts) - self.end_dt = max(start_dts) - self._group_periods = self._create_group_periods() - else: - self.start_dt = datestr_to_date(self._MIN_DATE) - self.end_dt = datestr_to_date(self._MAX_DATE) - self._group_periods = [] - - self._summary_report = {} - self._update_summary_report() - - # Create the lists of tests, tasks, variants & distros. - self._all_tests = list(set([r.test for r in self._report])) - self._all_tasks = list(set([r.task for r in self._report])) - self._all_variants = list(set([r.variant for r in self._report])) - self._all_distros = list(set([str(r.distro) for r in self._report])) - - def fail_rate(self, num_fail, num_pass): - """Computes fails rate, return 0 if no tests have run.""" - if num_pass == num_fail == 0: - return 0.0 - return num_fail / (num_pass + num_fail) - - def _group_dates(self, test_dt, from_end): - """Returns start_date and end_date for the group_period, which are are included - in the group_period.""" - # Computing the start and end dates for a period may have special cases for the - # first and last periods, only if the self.group_period is 7, which represents weekly. - # Since the first period may not start on the weekday for start_day_of_week - # (if it's 'sunday' or 'monday'), that period may be less than the - # period days. Similarly the last period will always end on end_dt. - # Example, if the start_date falls on a Wednesday, then all group starting - # dates are offset from that, if start_day_of_week is 'first_day'. - - # Use 'from_end=True' to produce group_dates for analyzing the report from the end. - - # The start date for a group_period is one of the following: - # - start_dt (the earliest date in the report) - # - The day specified in start_day_of_week - # - An offset from start_dt, if start_day_of_week is 'first_day' - # The ending date for a group_period is one of the following: - # - end_dt (the latest date in the report) - # - The mod of difference of weekday of test_dt and the start_weekday - - if test_dt < self.start_dt or test_dt > self.end_dt: - raise ValueError("The test_dt {} must be >= {} and <= {}".format( - test_dt, self.start_dt, self.end_dt)) - - if self.group_period == 1: - return (test_dt, test_dt) - - # Return group_dates relative to the end_dt. The start_day_of_week is not - # used in computing the dates. - if from_end: - group_end_dt = min( - self.end_dt, - test_dt + datetime.timedelta( - days=((self.end_dt - test_dt).days % self.group_period))) - group_st_dt = max( - self.start_dt, - group_end_dt - datetime.timedelta(days=self.group_period - 1)) - return (group_st_dt, group_end_dt) - - # When the self.group_period is 7, we support a start_day_of_week. - if self.group_period == 7: - if self.start_day_of_week == "sunday": + if isinstance(start_day_of_week, _STRING_TYPES): + start_day_of_week = start_day_of_week.lower() + if start_day_of_week == "sunday": start_weekday = 6 - elif self.start_day_of_week == "monday": + elif start_day_of_week == "monday": start_weekday = 0 - elif self.start_day_of_week == "first_day": - start_weekday = self.start_dt.weekday() - # 'start_day_offset' is the number of days 'test_dt' is from the start of the week. - start_day_offset = (test_dt.weekday() - start_weekday) % 7 - else: - start_day_offset = (test_dt - self.start_dt).days % self.group_period - - group_start_dt = test_dt - datetime.timedelta(days=start_day_offset) - group_end_dt = group_start_dt + datetime.timedelta(days=self.group_period - 1) - return (max(group_start_dt, self.start_dt), min(group_end_dt, self.end_dt)) - - def _select_attribute(self, value, attributes): - """Returns true if attribute value list is None or a value matches from the list of - attribute values.""" - return not attributes or value in attributes - - def _create_group_periods(self): - """Discover all group_periods.""" - group_periods = set() - test_dt = self.start_dt - end_dt = self.end_dt - while test_dt <= end_dt: - # We will summarize for time periods from start-to-end and end-to-start. - group_periods.add(self._group_dates(test_dt, False)) - group_periods.add(self._group_dates(test_dt, True)) - test_dt += datetime.timedelta(days=1) - return group_periods - - def _update_summary_record(self, report_key, status_key): - """Increments the self._summary_report report_key's status_key & fail_rate.""" - summary = self._summary_report.setdefault( - report_key, - {"num_fail": 0, "num_pass": 0, "fail_rate": 0.0}) - summary[status_key] += 1 - summary["fail_rate"] = self.fail_rate(summary["num_fail"], summary["num_pass"]) - - def _update_summary_report(self): - """Process self._report and updates the self._summary_report.""" - - for record in self._report: - if record.test_status == "pass": - status_key = "num_pass" - else: - status_key = "num_fail" - # Update each combination summary: - # _total_, test, test/task, test/task/variant, test/task/variant/distro - for combo in ["_total_", "test", "task", "variant", "distro"]: - test = record.test if combo != "_total_" else _ALL_TEST - task = record.task if combo in ["task", "variant", "distro"] else _ALL_TASK - variant = record.variant if combo in ["variant", "distro"] else _ALL_VARIANT - distro = record.distro if combo == "distro" else _ALL_DISTRO - # Update the summary for matching group periods. - for (group_start_dt, group_end_dt) in self._group_periods: - if record.start_dt >= group_start_dt and record.start_dt <= group_end_dt: - report_key = self.SummaryGroup( - test=test, - task=task, - variant=variant, - distro=distro, - start_date=date_to_datestr(group_start_dt), - end_date=date_to_datestr(group_end_dt)) - self._update_summary_record(report_key, status_key) - # Update the summary for the entire date period. - report_key = self.SummaryGroup( - test=test, - task=task, - variant=variant, - distro=distro, - start_date=_ALL_DATE, - end_date=_ALL_DATE) - self._update_summary_record(report_key, status_key) - - def _filter_reports(self, - start_date=_MIN_DATE, - end_date=_MAX_DATE, - tests=None, - tasks=None, - variants=None, - distros=None): - """Returns filter of self._report.""" - return [r for r in self._report - if r.start_dt >= datestr_to_date(start_date) and - r.start_dt <= datestr_to_date(end_date) and - self._select_attribute(r.test, tests) and - self._select_attribute(r.task, tasks) and - self._select_attribute(r.variant, variants) and - (r.distro is None or self._select_attribute(r.distro, distros))] - - def _detail_report(self, report): - """Returns the detailed report, which is a dictionary in the form of key tuples, - '(test, task, variant, distro, start_date, end_date)', with a value of - {num_pass, num_fail}.""" - detail_report = {} - for record in report: - group_start_dt, group_end_dt = self._group_dates(record.start_dt, False) - detail_group = self.DetailGroup( - test=record.test, - task=record.task, - variant=record.variant, - distro=record.distro, - start_date=group_start_dt, - end_date=group_end_dt) - detail_report.setdefault(detail_group, {"num_pass": 0, "num_fail": 0}) - if record.test_status == "pass": - status_key = "num_pass" else: - status_key = "num_fail" - detail_report[detail_group][status_key] += 1 - return detail_report - - def last_period(self): - """Returns start_date and end_date for the last period in the report.""" - start_dt = max(self.start_dt, - self.end_dt - datetime.timedelta(days=self.group_period - 1)) - return date_to_datestr(start_dt), date_to_datestr(self.end_dt) - - def view_detail(self, tests=None, tasks=None, variants=None, distros=None): - """Provides a detailed view of specified parameters. - The parameters are used as a filter, so an unspecified parameter provides - more results. - Returns the view as a list of namedtuples: - (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass) + raise ValueError( + "'start_day_of_week' can only be the string \"sunday\" or \"monday\"") + elif isinstance(start_day_of_week, datetime.date): + start_weekday = start_day_of_week.weekday() + else: + raise TypeError("'start_day_of_week' argument must be a string or a date") + + # 'start_day_offset' is the number of days 'self.start_date' is from the start of the week. + start_day_offset = (self.start_date.weekday() - start_weekday) % 7 + return self.start_date - datetime.timedelta(days=start_day_offset) + + @classmethod + def sum(cls, entries): + """ + Returns a single ReportEntry() instance corresponding to all test executions represented by + 'entries'. + """ + + test = set() + task = set() + variant = set() + distro = set() + start_date = cls._MAX_DATE + end_date = cls._MIN_DATE + num_pass = 0 + num_fail = 0 + + for entry in entries: + test.add(entry.test) + task.add(entry.task) + variant.add(entry.variant) + distro.add(entry.distro) + start_date = min(entry.start_date, start_date) + end_date = max(entry.end_date, end_date) + num_pass += entry.num_pass + num_fail += entry.num_fail + + test = next(iter(test)) if len(test) == 1 else ReportEntry._MULTIPLE_TESTS + task = next(iter(task)) if len(task) == 1 else ReportEntry._MULTIPLE_TASKS + variant = next(iter(variant)) if len(variant) == 1 else ReportEntry._MULTIPLE_VARIANTS + distro = next(iter(distro)) if len(distro) == 1 else ReportEntry._MULTIPLE_DISTROS + + return ReportEntry(test=test, + task=task, + variant=variant, + distro=distro, + start_date=start_date, + end_date=end_date, + num_pass=num_pass, + num_fail=num_fail) + + +class Report(object): + """ + A class for generating summarizations about Evergreen test executions. + """ + + TEST = ("test",) + TEST_TASK = ("test", "task") + TEST_TASK_VARIANT = ("test", "task", "variant") + TEST_TASK_VARIANT_DISTRO = ("test", "task", "variant", "distro") + + DAILY = "daily" + WEEKLY = "weekly" + + SUNDAY = "sunday" + MONDAY = "monday" + FIRST_DAY = "first-day" + + def __init__(self, entries): + """ + Initializes the Report instance. """ - filter_results = self._filter_reports( - tests=tests, tasks=tasks, variants=variants, distros=distros) - - view_report = [] - detail_report = self._detail_report(filter_results) - for detail_group in detail_report: - view_report.append(self.Summary( - test=detail_group.test, - task=detail_group.task, - variant=detail_group.variant, - distro=detail_group.distro, - start_date=detail_group.start_date, - end_date=detail_group.end_date, - fail_rate=self.fail_rate( - detail_report[detail_group]["num_fail"], - detail_report[detail_group]["num_pass"]), - num_fail=detail_report[detail_group]["num_fail"], - num_pass=detail_report[detail_group]["num_pass"])) - return view_report - - def view_summary(self, - group_on=None, - start_date=_ALL_DATE, - end_date=_ALL_DATE): - """Provides a summary view report, based on the group_on list. If group_on is empty, then - a total summary report is provided. The start_date and end_date must match the - group periods for a result to be returned. - Returns the view as a list of namedtuples: - (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass) + if not isinstance(entries, list): + # It is possible that 'entries' is a generator function, so we convert it to a list in + # order to be able to iterate it multiple times. + entries = list(entries) + + self.start_date = min(entry.start_date for entry in entries) + self.end_date = max(entry.end_date for entry in entries) + + self._entries = entries + + @property + def raw_data(self): + """ + Returns a copy of the list of ReportEntry instances underlying the report. + """ + + return self._entries[:] + + def summarize_by(self, components, time_period=None, start_day_of_week=FIRST_DAY): + """ + Returns a list of ReportEntry instances grouped by + + 'components' if 'time_period' is None, + + 'components' followed by Entry.start_date if 'time_period' is "daily", + + 'components' followed by Entry.week_start_date(start_day_of_week) if 'time_period' is + "weekly". See Entry.week_start_date() for more details on the possible values for + 'start_day_of_week'. + + 'components' followed by Entry.period_start_date(self.start_date, time_period) if + 'time_period' is a datetime.timedelta instance. """ - group_on = group_on if group_on is not None else [] - - for group_name in group_on: - if group_name not in self._group_by: - raise ValueError("Invalid group '{}' specified, the supported groups are {}" - .format(group_name, self._group_by)) - - tests = self._all_tests if "test" in group_on else [_ALL_TEST] - tasks = self._all_tasks if "task" in group_on else [_ALL_TASK] - variants = self._all_variants if "variant" in group_on else [_ALL_VARIANT] - distros = self._all_distros if "distro" in group_on else [_ALL_DISTRO] - - group_lists = [tests, tasks, variants, distros] - group_combos = list(itertools.product(*group_lists)) - view_report = [] - for group in group_combos: - test_filter = group[0] if group[0] else _ALL_TEST - task_filter = group[1] if group[1] else _ALL_TASK - variant_filter = group[2] if group[2] else _ALL_VARIANT - distro_filter = group[3] if group[3] else _ALL_DISTRO - report_key = self.SummaryGroup( - test=test_filter, - task=task_filter, - variant=variant_filter, - distro=distro_filter, - start_date=start_date, - end_date=end_date) - if report_key in self._summary_report: - view_report.append(self.Summary( - test=test_filter if test_filter != _ALL_TEST else None, - task=task_filter if task_filter != _ALL_TASK else None, - variant=variant_filter if variant_filter != _ALL_VARIANT else None, - distro=distro_filter if distro_filter != _ALL_DISTRO else None, - start_date=start_date if start_date != _ALL_DATE else None, - end_date=end_date if end_date != _ALL_DATE else None, - fail_rate=self._summary_report[report_key]["fail_rate"], - num_fail=self._summary_report[report_key]["num_fail"], - num_pass=self._summary_report[report_key]["num_pass"])) - return view_report - - -class HistoryReport(object): - """The HistoryReport class interacts with the Evergreen REST API to generate a history_report. - The history_report is meant to be viewed from the ViewReport class methods.""" - - HistoryReportTuple = collections.namedtuple( - "Report", "test task variant distro start_dt test_status") - - # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate options are exposed. - # period_types = ["date", "revision"] - period_types = ["revision"] + if not isinstance(components, (list, tuple)): + raise TypeError("'components' argument must be a list or tuple") + + for component in components: + if not isinstance(component, _STRING_TYPES): + raise TypeError("Each element of 'components' argument must be a string") + elif component not in ReportEntry._fields: + raise ValueError( + "Each element of 'components' argument must be one of {}".format( + ReportEntry._fields)) + + group_by = [operator.attrgetter(component) for component in components] + + if start_day_of_week == self.FIRST_DAY: + start_day_of_week = self.start_date + + period_size = None + if isinstance(time_period, _STRING_TYPES): + if time_period == self.DAILY: + group_by.append(operator.attrgetter("start_date")) + period_size = datetime.timedelta(days=1) + elif time_period == self.WEEKLY: + group_by.append(lambda entry: entry.week_start_date(start_day_of_week)) + period_size = datetime.timedelta(days=7) + else: + raise ValueError( + "'time_period' argument can only be the string \"{}\" or \"{}\"".format( + self.DAILY, self.WEEKLY)) + elif isinstance(time_period, datetime.timedelta): + group_by.append(lambda entry: entry.period_start_date(self.start_date, time_period)) + period_size = time_period + elif time_period is not None: + raise TypeError(("'time_period' argument must be a string or a datetime.timedelta" + " instance")) + + def key_func(entry): + """ + Assigns a key for sorting and grouping ReportEntry instances based on the combination of + options summarize_by() was called with. + """ + + return [func(entry) for func in group_by] + + sorted_entries = sorted(self._entries, key=key_func) + grouped_entries = itertools.groupby(sorted_entries, key=key_func) + summed_entries = [ReportEntry.sum(group) for (_key, group) in grouped_entries] + + if period_size is not None and period_size.days > 1: + # Overwrite the 'start_date' and 'end_date' attributes so that they correspond to the + # beginning and end of the period, respectively. If the beginning or end of the week + # falls outside the range [self.start_date, self.end_date], then the new 'start_date' + # and 'end_date' attributes are clamped to that range. + for (i, summed_entry) in enumerate(summed_entries): + if time_period == self.WEEKLY: + period_start_date = summed_entry.week_start_date(start_day_of_week) + else: + period_start_date = summed_entry.period_start_date(self.start_date, period_size) + + period_end_date = period_start_date + period_size - datetime.timedelta(days=1) + start_date = max(period_start_date, self.start_date) + end_date = min(period_end_date, self.end_date) + summed_entries[i] = summed_entry._replace(start_date=start_date, end_date=end_date) + + return summed_entries + + +class Missing(object): + """ + A class for representing the value associated with a particular component is unknown. + """ + + def __init__(self, kind): + self._kind = kind + + def __eq__(self, other): + if not isinstance(other, Missing): + return NotImplemented + + return self._kind == other._kind + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._kind) + + def __str__(self): + return "<unknown {}>".format(self._kind) + + +class TestHistory(object): + """ + A class for interacting with the /test_history Evergreen API endpoint. + """ + + DEFAULT_API_SERVER = "http://evergreen-api.mongodb.com:8080" + DEFAULT_PROJECT = "mongodb-mongo-master" + + DEFAULT_TEST_STATUSES = ("pass", "fail", "silentfail") + DEFAULT_TASK_STATUSES = ("success", "failed", "timeout", "sysfail") + + # The Evergreen API requires specifying the "limit" parameter when not specifying a range of + # revisions. + DEFAULT_LIMIT = 20 + + _MISSING_DISTRO = Missing("distro") def __init__(self, - period_type, - start, - end, - start_day_of_week="first_day", - group_period=7, - project=_PROJECT, + api_server=DEFAULT_API_SERVER, + project=DEFAULT_PROJECT, tests=None, tasks=None, variants=None, - distros=None, - evg_cfg=None): - # Initialize the report and object variables. - self._report_tuples = [] - self._report = {"tests": {}} - self.period_type = period_type.lower() - if self.period_type not in self.period_types: - raise ValueError( - "Invalid time period type '{}' specified." - " supported types are {}.".format(self.period_type, self.period_types)) - self.group_period = group_period - self.start_day_of_week = start_day_of_week.lower() - - self.start = start - self.end = end - - self.project = project - - if not tests and not tasks: - raise ValueError("Must specify either tests or tasks.") - self.tests = tests if tests is not None else [] - self.tasks = tasks if tasks is not None else [] - self.variants = variants if variants is not None else [] - self.distros = distros if distros is not None else [] - - if evg_cfg is not None and "api_server_host" in evg_cfg: - api_server = "{url.scheme}://{url.netloc}".format( - url=urlparse.urlparse(evg_cfg["api_server_host"])) - else: - api_server = _API_SERVER_DEFAULT - self.api_prefix = api_server + _REST_PREFIX - - def _all_tests(self): - """Returns a list of all test file name types from self.tests. - Since the test file names can be specifed as either Windows or Linux style, - we will ensure that both are specified for each test. - Add Windows style naming, backslashes and possibly .exe extension. - Add Linux style naming, forward slashes and removes .exe extension.""" - tests_set = set(self.tests) - for test in self.tests: - if "/" in test: - windows_test = test.replace("/", "\\") - if not os.path.splitext(test)[1]: - windows_test += ".exe" - tests_set.add(windows_test) - if "\\" in test: - linux_test = test.replace("\\", "/") - linux_test = linux_test.replace(".exe", "") - tests_set.add(linux_test) - return list(tests_set) - - def _history_request_params(self, test_statuses): - """Returns a dictionary of params used in requests.get.""" - return { - "distros": ",".join(self.distros), - "sort": "latest", - "tasks": ",".join(self.tasks), - "tests": ",".join(self.tests), - "taskStatuses": "failed,timeout,success,sysfail", - "testStatuses": ",".join(test_statuses), - "variants": ",".join(self.variants), - } - - def _get_history_by_revision(self, test_statuses): - """ Returns a list of history data for specified options.""" - after_revision = self.start - before_revision = self.end - params = self._history_request_params(test_statuses) - params["beforeRevision"] = before_revision - url = "{prefix}/projects/{project}/test_history".format( - prefix=self.api_prefix, - project=self.project) - - # Since the API limits the results, with each invocation being distinct, we can - # simulate pagination, by requesting results using afterRevision. + distros=None): + """ + Initializes the TestHistory instance with the list of tests, tasks, variants, and distros + specified. + + The list of tests specified are augmented to ensure that failures on both POSIX and Windows + platforms are returned by the Evergreen API. + """ + + tests = tests if tests is not None else [] + tests = [test for test_file in tests for test in self._denormalize_test_file(test_file)] + + self._tests = tests + self._tasks = tasks if tasks is not None else [] + self._variants = variants if variants is not None else [] + self._distros = distros if distros is not None else [] + + self._test_history_url = "{api_server}/rest/v1/projects/{project}/test_history".format( + api_server=api_server, + project=project, + ) + + def get_history_by_revision(self, + start_revision, + end_revision, + test_statuses=DEFAULT_TEST_STATUSES, + task_statuses=DEFAULT_TASK_STATUSES): + """ + Returns a list of ReportEntry instances corresponding to each individual test execution + between 'start_revision' and 'end_revision'. + + Only tests with status 'test_statuses' are included in the result. Similarly, only tests + with status 'task_statuses' are included in the result. By default, both passing and failing + test executions are returned. + """ + + params = self._history_request_params(test_statuses, task_statuses) + params["beforeRevision"] = end_revision + history_data = [] - while after_revision != before_revision: - params["afterRevision"] = after_revision - response = requests.get(url=url, params=params) + + # Since the API limits the results, with each invocation being distinct, we can simulate + # pagination by making subsequent requests using "afterRevision". + while start_revision != end_revision: + params["afterRevision"] = start_revision + response = requests.get(url=self._test_history_url, params=params) response.raise_for_status() - if not response.json(): + + test_results = response.json() + if not test_results: break - # The first test will have the latest revision for this result set. - after_revision = response.json()[0]["revision"] - history_data.extend(response.json()) + for test_result in test_results: + history_data.append(self._process_test_result(test_result)) + + # The first test will have the latest revision for this result set because + # TestHistory._history_request_params() sorts by "latest". + start_revision = test_results[0]["revision"] return history_data - def _get_history_by_date(self, test_statuses): - """ Returns a list of history data for specified options.""" - # Note this functionality requires EVG-1653 - start_date = self.start - end_date = self.end - params = self._history_request_params(test_statuses) - params["beforeDate"] = end_date + "T23:59:59Z" - url = "{prefix}/projects/{project}/test_history".format( - prefix=self.api_prefix, - project=self.project) - - # Since the API limits the results, with each invocation being distinct, we can - # simulate pagination, by requesting results using afterDate, being careful to - # filter out possible duplicate entries. - start_time = start_date + "T00:00:00Z" - history_data = [] - history_data_set = set() - last_sorted_tests = [] + def get_history_by_date(self, + start_date, + end_date, + test_statuses=DEFAULT_TEST_STATUSES, + task_statuses=DEFAULT_TASK_STATUSES): + """ + Returns a list of ReportEntry instances corresponding to each individual test execution + between 'start_date' and 'end_date'. + + Only tests with status 'test_statuses' are included in the result. Similarly, only tests + with status 'task_statuses' are included in the result. By default, both passing and failing + test executions are returned. + """ + + warnings.warn( + "Until https://jira.mongodb.org/browse/EVG-1653 is implemented, pagination using dates" + " isn't guaranteed to returned a complete result set. It is possible for the results" + " from an Evergreen task that started between the supplied start date and the" + " response's latest test start time to be omitted.", RuntimeWarning) + + params = self._history_request_params(test_statuses, task_statuses) + params["beforeDate"] = "{:%Y-%m-%d}T23:59:59Z".format(end_date) + params["limit"] = self.DEFAULT_LIMIT + + start_time = "{:%Y-%m-%d}T00:00:00Z".format(start_date) + history_data = set() + + # Since the API limits the results, with each invocation being distinct, we can simulate + # pagination by making subsequent requests using "afterDate" and being careful to filter out + # duplicate test results. while True: params["afterDate"] = start_time - response = requests.get(url=url, params=params) + response = requests.get(url=self._test_history_url, params=params) response.raise_for_status() - if not response.json(): - return history_data - sorted_tests = sorted(response.json(), key=operator.itemgetter("start_time")) + test_results = response.json() + if not test_results: + break + + original_size = len(history_data) + for test_result in test_results: + start_time = max(test_result["start_time"], start_time) + history_data.add(self._process_test_result(test_result)) - # To prevent an infinite loop, we need to bail out if the result set is the same - # as the previous one. - if sorted_tests == last_sorted_tests: + # To prevent an infinite loop, we need to bail out if test results returned by the + # request were identical to the ones we got back in an earlier request. + if original_size == len(history_data): break - last_sorted_tests = sorted_tests + return list(history_data) - for test in sorted_tests: - start_time = test["start_time"] - # Create a unique hash for the test entry and check if it's been processed. - test_hash = hash(str(sorted(test.items()))) - if test_hash not in history_data_set: - history_data_set.add(test_hash) - history_data.append(test) + def _process_test_result(self, test_result): + """ + Returns a ReportEntry() tuple representing the 'test_result' dictionary. + """ - return history_data + def parse_date(date_str): + """ + Returns a datetime.date() instance representing the specified yyyy-mm-dd date string. + + Note that any time component of 'date_str', including the timezone, is ignored. + """ + + return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date() + + # For individual test executions, we intentionally use the "start_time" of the test as both + # its 'start_date' and 'end_date' to avoid complicating how the test history is potentially + # summarized by time. By the time the test has started, the Evergreen task has already been + # assigned to a particular machine and is using a specific set of binaries, so there's + # unlikely to be a significance to when the test actually finishes. + start_date = end_date = parse_date(test_result["start_time"]) + + return ReportEntry( + test=self._normalize_test_file(test_result["test_file"]), + task=test_result["task_name"], + variant=test_result["variant"], + distro=test_result.get("distro", self._MISSING_DISTRO), + start_date=start_date, + end_date=end_date, + num_pass=(1 if test_result["test_status"] == "pass" else 0), + num_fail=(1 if test_result["test_status"] not in ("pass", "skip") else 0)) @staticmethod - def normalize_test_file(test_file): - """Normalizes the test_file name: - - Changes single backslash (\\) to forward slash (/) - - Removes .exe extension - Returns normalized string.""" - return test_file.replace("\\", "/").replace(".exe", "") - - def generate_report(self): - """Creates detail for self._report from specified test history options. - Returns a ViewReport object of self._report.""" - - if self.period_type == "date": - report_method = self._get_history_by_date - else: - report_method = self._get_history_by_revision + def _normalize_test_file(test_file): + """ + If 'test_file' represents a Windows-style path, then it is converted to a POSIX-style path + with - self.tests = self._all_tests() + - backslashes (\\) as the path separator replaced with forward slashes (/) and + - the ".exe" extension, if present, removed. - rest_api_report = report_method(test_statuses=["fail", "pass"]) + If 'test_file' already represents a POSIX-style path, then it is returned unmodified. + """ - for record in rest_api_report: - # Save API record as namedtuple - self._report_tuples.append( - self.HistoryReportTuple( - test=str(HistoryReport.normalize_test_file(record["test_file"])), - task=str(record["task_name"]), - variant=str(record["variant"]), - distro=record.get("distro", _ALL_DISTRO), - start_dt=datestr_to_date(record["start_time"]), - test_status=record["test_status"])) + if "\\" in test_file: + posix_test_file = test_file.replace("\\", "/") + (test_file_root, test_file_ext) = os.path.splitext(posix_test_file) + if test_file_ext == ".exe": + return test_file_root + return posix_test_file - return ViewReport(history_report=self._report_tuples, - group_period=self.group_period, - start_day_of_week=self.start_day_of_week) + return test_file + def _denormalize_test_file(self, test_file): + """ + Returns a list containing 'test_file' as both a POSIX-style path and a Windows-style path. -def main(): + The conversion process may involving replacing forward slashes (/) as the path separator + with backslashes (\\), as well as adding a ".exe" extension if 'test_file' has no file + extension. + """ - parser = optparse.OptionParser(description=__doc__, - usage="Usage: %prog [options] test1 test2 ...") + test_file = self._normalize_test_file(test_file) - parser.add_option("--project", dest="project", - default=_PROJECT, - help="Evergreen project to analyze, defaults to '%default'.") + if "/" in test_file: + windows_test_file = test_file.replace("/", "\\") + if not os.path.splitext(test_file)[1]: + windows_test_file += ".exe" + return [test_file, windows_test_file] - # TODO EVG-1653: Expose the --sinceDate and --untilDate command line arguments after pagination - # is made possible using the /test_history Evergreen API endpoint. - # parser.add_option("--sinceDate", dest="start_date", - # metavar="YYYY-MM-DD", - # default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)), - # help="History from this date, defaults to 1 week ago (%default).") + return [test_file] - # parser.add_option("--untilDate", dest="end_date", - # metavar="YYYY-MM-DD", - # default="{:%Y-%m-%d}".format(today), - # help="History up to, and including, this date, defaults to today (%default).") + def _history_request_params(self, test_statuses, task_statuses): + """ + Returns the query parameters for /test_history GET request as a dictionary. + """ + + return { + "distros": ",".join(self._distros), + "sort": "latest", + "tasks": ",".join(self._tasks), + "tests": ",".join(self._tests), + "taskStatuses": ",".join(task_statuses), + "testStatuses": ",".join(test_statuses), + "variants": ",".join(self._variants), + } + + +def main(): + """ + Utility computing test failure rates from the Evergreen API. + """ + + parser = optparse.OptionParser(description=main.__doc__, + usage="Usage: %prog [options] [test1 test2 ...]") + + parser.add_option("--project", dest="project", + metavar="<project-name>", + default=TestHistory.DEFAULT_PROJECT, + help="The Evergreen project to analyze. Defaults to '%default'.") + + today = datetime.datetime.utcnow().replace(microsecond=0, tzinfo=None) + parser.add_option("--sinceDate", dest="since_date", + metavar="<yyyy-mm-dd>", + default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)), + help=("The starting period as a date in UTC to analyze the test history for," + " including the specified date. Defaults to 1 week ago (%default).")) + + parser.add_option("--untilDate", dest="until_date", + metavar="<yyyy-mm-dd>", + default="{:%Y-%m-%d}".format(today), + help=("The ending period as a date in UTC to analyze the test history for," + " including the specified date. Defaults to today (%default).")) parser.add_option("--sinceRevision", dest="since_revision", + metavar="<gitrevision>", default=None, - help="History after this revision." - # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate - # options are exposed. - # "History after this revision, overrides --sinceDate & --untilDate." - " Must be specified with --untilRevision") + help=("The starting period as a git revision to analyze the test history for," + " excluding the specified commit. This option must be specified in" + " conjuction with --untilRevision and takes precedence over --sinceDate" + " and --untilDate.")) parser.add_option("--untilRevision", dest="until_revision", + metavar="<gitrevision>", default=None, - help="History up to, and including, this revision." - # TODO EVG-1653: Uncomment this line once the --sinceDate and - # --untilDate options are exposed. - # "History up to, and including, this revision, overrides" - # " --sinceDate & --untilDate." - " Must be specified with --sinceRevision") + help=("The ending period as a git revision to analyze the test history for," + " including the specified commit. This option must be specified in" + " conjuction with --sinceRevision and takes precedence over --sinceDate" + " and --untilDate.")) parser.add_option("--groupPeriod", dest="group_period", - type="int", - default=7, - help="Set group period days, defaults to '%default'.") + metavar="[{}]".format("|".join([Report.DAILY, Report.WEEKLY, "<ndays>"])), + default=Report.WEEKLY, + help=("The time period over which to group test executions. Defaults to" + " '%default'.")) parser.add_option("--weekStartDay", dest="start_day_of_week", - choices=["sunday", "monday", "first_day"], - default="first_day", - help="The group starting day of week, when --groupPeriod is not 1. " - " Set to 'sunday', 'monday' or 'first_day'." - " If 'first_day', the group will start on the first day of the" - " starting date from the history result, defaults to '%default'.") + choices=(Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY), + metavar="[{}]".format( + "|".join([Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY])), + default=Report.FIRST_DAY, + help=("The day to use as the beginning of the week when grouping over time." + " This option is only relevant in conjuction with --groupPeriod={}. If" + " '{}' is specified, then the day of week of the earliest date is used" + " as the beginning of the week. Defaults to '%default'.".format( + Report.WEEKLY, Report.FIRST_DAY))) parser.add_option("--tasks", dest="tasks", + metavar="<task1,task2,...>", default="", - help="Comma separated list of task display names to analyze.") + help="Comma-separated list of Evergreen task names to analyze.") parser.add_option("--variants", dest="variants", + metavar="<variant1,variant2,...>", default="", - help="Comma separated list of build variants to analyze.") + help="Comma-separated list of Evergreen build variants to analyze.") parser.add_option("--distros", dest="distros", + metavar="<distro1,distro2,...>", default="", - help="Comma separated list of build distros to analyze.") + help="Comma-separated list of Evergreen build distros to analyze.") (options, tests) = parser.parse_args() - # TODO EVG-1653: Uncomment these lines once the --sinceDate and --untilDate options are - # exposed. - # period_type = "date" - # start = options.start_date - # end = options.end_date - - if options.since_revision and options.until_revision: - period_type = "revision" - start = options.since_revision - end = options.until_revision - elif options.since_revision or options.until_revision: - parser.print_help() - parser.error("Must specify both --sinceRevision & --untilRevision") - # TODO EVG-1653: Remove this else clause once the --sinceDate and --untilDate options are - # exposed. - else: - parser.print_help() - parser.error("Must specify both --sinceRevision & --untilRevision") + for (option_name, option_dest) in (("--sinceDate", "since_date"), + ("--untilDate", "until_date")): + option_value = getattr(options, option_dest) + try: + setattr(options, + option_dest, + datetime.datetime.strptime(option_value, "%Y-%m-%d").date()) + except ValueError: + parser.print_help(file=sys.stderr) + print(file=sys.stderr) + parser.error("{} must be specified in yyyy-mm-dd format, but got {}".format( + option_name, option_value)) + + if options.since_revision and not options.until_revision: + parser.print_help(file=sys.stderr) + print(file=sys.stderr) + parser.error("Must specify --untilRevision in conjuction with --sinceRevision") + elif options.until_revision and not options.since_revision: + parser.print_help(file=sys.stderr) + print(file=sys.stderr) + parser.error("Must specify --sinceRevision in conjuction with --untilRevision") + + if options.group_period not in (Report.DAILY, Report.WEEKLY): + try: + options.group_period = datetime.timedelta(days=int(options.group_period)) + except ValueError: + parser.print_help(file=sys.stderr) + print(file=sys.stderr) + parser.error("--groupPeriod must be an integral number, but got {}".format( + options.group_period)) if not options.tasks and not tests: - parser.print_help() + parser.print_help(file=sys.stderr) + print(file=sys.stderr) parser.error("Must specify either --tasks or at least one test") - report = HistoryReport(period_type=period_type, - start=start, - end=end, - group_period=options.group_period, - start_day_of_week=options.start_day_of_week, - project=options.project, - tests=tests, - tasks=options.tasks.split(","), - variants=options.variants.split(","), - distros=options.distros.split(","), - evg_cfg=read_evg_config()) - view_report = report.generate_report() - summ_report = view_report.view_summary(group_on=["test", "task", "variant"]) - for s in sorted(summ_report): - print(s) + def read_evg_config(): + """ + Attempts to parse the user's or system's Evergreen configuration from its known locations. + + Returns None if the configuration file wasn't found anywhere. + """ + + known_locations = [ + "./.evergreen.yml", + os.path.expanduser("~/.evergreen.yml"), + os.path.expanduser("~/cli_bin/.evergreen.yml"), + ] + + for filename in known_locations: + if os.path.isfile(filename): + with open(filename, "r") as fstream: + return yaml.safe_load(fstream) + + return None + + evg_config = read_evg_config() + evg_config = evg_config if evg_config is not None else {} + api_server = "{url.scheme}://{url.netloc}".format( + url=urlparse(evg_config.get("api_server_host", TestHistory.DEFAULT_API_SERVER))) + + test_history = TestHistory(api_server=api_server, + project=options.project, + tests=tests, + tasks=options.tasks.split(","), + variants=options.variants.split(","), + distros=options.distros.split(",")) + + if options.since_revision: + history_data = test_history.get_history_by_revision( + start_revision=options.since_revision, + end_revision=options.until_revision) + elif options.since_date: + history_data = test_history.get_history_by_date( + start_date=options.since_date, + end_date=options.until_date) + + report = Report(history_data) + summary = report.summarize_by(Report.TEST_TASK_VARIANT_DISTRO, + time_period=options.group_period, + start_day_of_week=options.start_day_of_week) + + for entry in summary: + print("(test={e.test}," + " task={e.task}," + " variant={e.variant}," + " distro={e.distro}," + " start_date={e.start_date:%Y-%m-%d}," + " end_date={e.end_date:%Y-%m-%d}," + " num_pass={e.num_pass}," + " num_fail={e.num_fail}," + " fail_rate={e.fail_rate:0.2%})".format(e=entry)) + if __name__ == "__main__": main() diff --git a/buildscripts/tests/ciconfig/test_tags.py b/buildscripts/tests/ciconfig/test_tags.py index 4678bcc1f34..8f9b3932f4a 100644 --- a/buildscripts/tests/ciconfig/test_tags.py +++ b/buildscripts/tests/ciconfig/test_tags.py @@ -15,12 +15,12 @@ class TestTagsConfig(unittest.TestCase): """Unit tests for the TagsConfig class.""" def setUp(self): - self.conf = _tags.TagsConfig(TEST_FILE_PATH) + self.conf = _tags.TagsConfig.from_file(TEST_FILE_PATH) def test_invalid_path(self): invalid_path = "non_existing_file" with self.assertRaises(IOError): - _tags.TagsConfig(invalid_path) + _tags.TagsConfig.from_file(invalid_path) def test_list_test_kinds(self): test_kinds = self.conf.get_test_kinds() @@ -151,7 +151,7 @@ class TestTagsConfig(unittest.TestCase): def custom_cmp(tag_a, tag_b): return cmp(tag_a.split("|"), tag_b.split("|")) - conf = _tags.TagsConfig(TEST_FILE_PATH, cmp_func=custom_cmp) + conf = _tags.TagsConfig.from_file(TEST_FILE_PATH, cmp_func=custom_cmp) tags = conf.get_tags(test_kind, test_pattern) self.assertEqual(["tag1", "tag2", "tag3"], tags) diff --git a/buildscripts/tests/test_test_failures.py b/buildscripts/tests/test_test_failures.py new file mode 100644 index 00000000000..0a2c570897b --- /dev/null +++ b/buildscripts/tests/test_test_failures.py @@ -0,0 +1,676 @@ +""" +Tests for buildscripts/test_failures.py. +""" + +from __future__ import absolute_import + +import datetime +import unittest + +from buildscripts import test_failures + + +class TestReportEntry(unittest.TestCase): + """ + Tests for the test_failures.ReportEntry class. + """ + + ENTRY = test_failures.ReportEntry(test="jstests/core/all.js", + task="jsCore_WT", + variant="linux-64", + distro="rhel62", + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=0, + num_fail=0) + + def test_fail_rate(self): + """ + Tests for the test_failures.ReportEntry.fail_rate property. + """ + + entry = self.ENTRY._replace(num_pass=0, num_fail=1) + self.assertEqual(1, entry.fail_rate) + + entry = self.ENTRY._replace(num_pass=9, num_fail=1) + self.assertAlmostEqual(0.1, entry.fail_rate) + + # Verify that we don't attempt to divide by zero. + entry = self.ENTRY._replace(num_pass=0, num_fail=0) + self.assertEqual(0, entry.fail_rate) + + def test_week_start_date_with_sunday(self): + """ + Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week + specified as different forms of the string "Sunday". + """ + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3)) + self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("sunday")) + self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("Sunday")) + self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("SUNDAY")) + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4)) + self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday")) + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5)) + self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday")) + + def test_week_start_date_with_monday(self): + """ + Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week + specified as different forms of the string "Monday". + """ + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3)) + self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday")) + self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("Monday")) + self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("MONDAY")) + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4)) + self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday")) + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5)) + self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday")) + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 6)) + self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday")) + + def test_week_start_date_with_date(self): + """ + Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week + specified as a datetime.date() value. + """ + + entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3)) + + date = datetime.date(2017, 5, 21) + self.assertEqual(6, date.weekday(), "2017 May 21 is a Sunday") + self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date(date)) + + date = datetime.date(2017, 5, 22) + self.assertEqual(0, date.weekday(), "2017 May 22 is a Monday") + self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date(date)) + + date = datetime.date(2017, 6, 6) + self.assertEqual(1, date.weekday(), "2017 Jun 06 is a Tuesday") + self.assertEqual(datetime.date(2017, 5, 30), entry.week_start_date(date)) + + date = datetime.date(2017, 6, 9) + self.assertEqual(4, date.weekday(), "2017 Jun 09 is a Friday") + self.assertEqual(datetime.date(2017, 6, 2), entry.week_start_date(date)) + + date = datetime.date(2017, 6, 3) + self.assertEqual(5, date.weekday(), "2017 Jun 03 is a Saturday") + self.assertEqual(datetime.date(2017, 6, 3), entry.week_start_date(date)) + + def test_sum_combines_test_results(self): + """ + Tests for test_failures.ReportEntry.sum() that verify the start_date, end_date, num_pass, + and num_fail attributes are accumulated correctly. + """ + + entry1 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 1), + end_date=datetime.date(2017, 6, 1), + num_pass=1, + num_fail=0) + + entry2 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 2), + end_date=datetime.date(2017, 6, 2), + num_pass=0, + num_fail=3) + + entry3 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=0, + num_fail=0) + + entry4 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4), + end_date=datetime.date(2017, 6, 4), + num_pass=2, + num_fail=2) + + entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4]) + entry_1432 = test_failures.ReportEntry.sum([entry1, entry4, entry3, entry2]) + entry_124 = test_failures.ReportEntry.sum([entry1, entry2, entry4]) + entry_13 = test_failures.ReportEntry.sum([entry1, entry3]) + entry_42 = test_failures.ReportEntry.sum([entry4, entry2]) + + self.assertEqual(datetime.date(2017, 6, 1), entry_1234.start_date) + self.assertEqual(datetime.date(2017, 6, 4), entry_1234.end_date) + self.assertEqual(3, entry_1234.num_pass) + self.assertEqual(5, entry_1234.num_fail) + + self.assertEqual(entry_1234, entry_1432, "order of arguments shouldn't matter") + self.assertEqual(entry_1234, entry_124, "entry3 didn't have any test executions") + + self.assertEqual(datetime.date(2017, 6, 1), entry_13.start_date) + self.assertEqual(datetime.date(2017, 6, 3), entry_13.end_date) + self.assertEqual(1, entry_13.num_pass) + self.assertEqual(0, entry_13.num_fail) + + self.assertEqual(datetime.date(2017, 6, 2), entry_42.start_date) + self.assertEqual(datetime.date(2017, 6, 4), entry_42.end_date) + self.assertEqual(2, entry_42.num_pass) + self.assertEqual(5, entry_42.num_fail) + + def test_sum_combines_test_info(self): + """ + Tests for test_failures.ReportEntry.sum() that verify the test, task, variant, and distro + attributes are accumulated correctly. + """ + + entry1 = self.ENTRY._replace(test="jstests/core/all.js", + task="jsCore_WT", + variant="linux-64", + distro="rhel62") + + entry2 = self.ENTRY._replace(test="jstests/core/all.js", + task="jsCore_WT", + variant="linux-64", + distro="rhel55") + + entry3 = self.ENTRY._replace(test="jstests/core/all2.js", + task="jsCore_WT", + variant="linux-64-debug", + distro="rhel62") + + entry4 = self.ENTRY._replace(test="jstests/core/all.js", + task="jsCore", + variant="linux-64-debug", + distro="rhel62") + + entry_12 = test_failures.ReportEntry.sum([entry1, entry2]) + self.assertEqual("jstests/core/all.js", entry_12.test) + self.assertEqual("jsCore_WT", entry_12.task) + self.assertEqual("linux-64", entry_12.variant) + self.assertIsInstance(entry_12.distro, test_failures.Wildcard) + + entry_123 = test_failures.ReportEntry.sum([entry1, entry2, entry3]) + self.assertIsInstance(entry_123.test, test_failures.Wildcard) + self.assertEqual("jsCore_WT", entry_123.task) + self.assertIsInstance(entry_123.variant, test_failures.Wildcard) + self.assertIsInstance(entry_123.distro, test_failures.Wildcard) + + entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4]) + self.assertIsInstance(entry_1234.test, test_failures.Wildcard) + self.assertIsInstance(entry_1234.task, test_failures.Wildcard) + self.assertIsInstance(entry_1234.variant, test_failures.Wildcard) + self.assertIsInstance(entry_1234.distro, test_failures.Wildcard) + + entry_34 = test_failures.ReportEntry.sum([entry3, entry4]) + self.assertIsInstance(entry_34.test, test_failures.Wildcard) + self.assertIsInstance(entry_34.task, test_failures.Wildcard) + self.assertEqual("linux-64-debug", entry_34.variant) + self.assertEqual("rhel62", entry_34.distro) + + +class TestReportSummarization(unittest.TestCase): + """ + Tests for test_failures.Report.summarize_by(). + """ + + ENTRY = test_failures.ReportEntry(test="jstests/core/all.js", + task="jsCore_WT", + variant="linux-64", + distro="rhel62", + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=0, + num_fail=0) + + ENTRIES = [ + ENTRY._replace(start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=1, + num_fail=0), + ENTRY._replace(task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1), + ENTRY._replace(start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0), + # The following entry is intentionally not in timestamp order to verify that the + # 'time_period' parameter becomes part of the sort in summarize_by(). + ENTRY._replace(start_date=datetime.date(2017, 6, 9), + end_date=datetime.date(2017, 6, 9), + num_pass=1, + num_fail=0), + ENTRY._replace(distro="rhel55", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=0, + num_fail=1), + ENTRY._replace(test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0), + ENTRY._replace(variant="linux-64-debug", + start_date=datetime.date(2017, 6, 17), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1), + ] + + def test_group_all_by_test_task_variant_distro(self): + """ + Tests that summarize_by() correctly accumulates all unique combinations of + (test, task, variant, distro). + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT_DISTRO) + self.assertEqual(5, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + distro="rhel55", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 10), + num_pass=3, + num_fail=0, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 17), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[4], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_all_by_test_task_variant(self): + """ + Tests that summarize_by() correctly accumulates all unique combinations of + (test, task, variant). + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT) + self.assertEqual(4, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 10), + num_pass=3, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 17), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_all_by_test_task(self): + """ + Tests that summarize_by() correctly accumulates all unique combinations of (test, task). + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST_TASK) + self.assertEqual(3, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + variant=test_failures.Wildcard("variants"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 17), + num_pass=3, + num_fail=2, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_all_by_test(self): + """ + Tests that summarize_by() correctly accumulates all unique combinations of (test,). + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST) + self.assertEqual(2, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + variant=test_failures.Wildcard("variants"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 17), + num_pass=3, + num_fail=3, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_all_by_variant_task(self): + """ + Tests that summarize_by() correctly accumulates all unique combinations of (variant, task). + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(["variant", "task"]) + self.assertEqual(3, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + test=test_failures.Wildcard("tests"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 10), + num_pass=4, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 17), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + + def test_group_weekly_by_test_starting_on_sunday(self): + """ + Tests that summarize_by() correctly accumulates by week when the beginning of the week is + specified as the string "sunday". + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=test_failures.Report.WEEKLY, + start_day_of_week=test_failures.Report.SUNDAY) + + self.assertEqual(4, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=1, + num_fail=0, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 4), + end_date=datetime.date(2017, 6, 10), + num_pass=2, + num_fail=2, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 11), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 4), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_weekly_by_test_starting_on_monday(self): + """ + Tests that summarize_by() correctly accumulates by week when the beginning of the week is + specified as the string "monday". + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=test_failures.Report.WEEKLY, + start_day_of_week=test_failures.Report.MONDAY) + + self.assertEqual(4, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 4), + num_pass=1, + num_fail=0, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 11), + num_pass=2, + num_fail=2, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 12), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 11), + num_pass=1, + num_fail=0, + )) + + def test_group_weekly_by_test_starting_on_date(self): + """ + Tests that summarize_by() correctly accumulates by week when the beginning of the week is + specified as a datetime.date() value. + """ + + date = datetime.date(2017, 6, 7) + self.assertEqual(2, date.weekday(), "2017 Jun 07 is a Wednesday") + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=test_failures.Report.WEEKLY, + start_day_of_week=date) + + self.assertEqual(4, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 6), + num_pass=1, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 7), + end_date=datetime.date(2017, 6, 13), + num_pass=2, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 14), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 7), + end_date=datetime.date(2017, 6, 13), + num_pass=1, + num_fail=0, + )) + + def test_group_daily_by_test(self): + """ + Tests that summarize_by() correctly accumulates by day. + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=test_failures.Report.DAILY) + + self.assertEqual(6, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=1, + num_fail=0, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + task="jsCore", + start_date=datetime.date(2017, 6, 5), + end_date=datetime.date(2017, 6, 5), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + start_date=datetime.date(2017, 6, 9), + end_date=datetime.date(2017, 6, 9), + num_pass=1, + num_fail=0, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=1, + )) + self.assertEqual(summed_entries[4], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 17), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[5], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 10), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_4days_by_test(self): + """ + Tests that summarize_by() correctly accumulates by multiple days. + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=datetime.timedelta(days=4)) + + self.assertEqual(4, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 6), + num_pass=1, + num_fail=1, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 7), + end_date=datetime.date(2017, 6, 10), + num_pass=2, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 15), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[3], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 7), + end_date=datetime.date(2017, 6, 10), + num_pass=1, + num_fail=0, + )) + + def test_group_9days_by_test(self): + """ + Tests that summarize_by() correctly accumulates by multiple days, including time periods + greater than 1 week. + """ + + report = test_failures.Report(self.ENTRIES) + summed_entries = report.summarize_by(test_failures.Report.TEST, + time_period=datetime.timedelta(days=9)) + + self.assertEqual(3, len(summed_entries)) + self.assertEqual(summed_entries[0], self.ENTRY._replace( + task=test_failures.Wildcard("tasks"), + distro=test_failures.Wildcard("distros"), + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 11), + num_pass=3, + num_fail=2, + )) + self.assertEqual(summed_entries[1], self.ENTRY._replace( + variant="linux-64-debug", + start_date=datetime.date(2017, 6, 12), + end_date=datetime.date(2017, 6, 17), + num_pass=0, + num_fail=1, + )) + self.assertEqual(summed_entries[2], self.ENTRY._replace( + test="jstests/core/all2.js", + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 11), + num_pass=1, + num_fail=0, + )) diff --git a/buildscripts/tests/test_update_test_lifecycle.py b/buildscripts/tests/test_update_test_lifecycle.py new file mode 100644 index 00000000000..145065c2c6c --- /dev/null +++ b/buildscripts/tests/test_update_test_lifecycle.py @@ -0,0 +1,760 @@ +""" +Tests for buildscripts/update_test_lifecycle.py. +""" + +from __future__ import absolute_import + +import collections +import copy +import datetime +import unittest + +from buildscripts import test_failures +from buildscripts import update_test_lifecycle +from buildscripts.ciconfig import tags as ci_tags + + +class TestValidateConfig(unittest.TestCase): + """ + Tests for the validate_config() function. + """ + + CONFIG = update_test_lifecycle.Config( + test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + reliable_min_runs=2, + reliable_time_period=datetime.timedelta(days=1), + unreliable_min_runs=2, + unreliable_time_period=datetime.timedelta(days=1)) + + def test_acceptable_test_fail_rate(self): + """ + Tests the validation of the 'test_fail_rates.acceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_unacceptable_test_fail_rate(self): + """ + Tests the validation of the 'test_fail_rates.unacceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_test_fail_rates(self): + """ + Tests the validation of the 'test_fail_rates' attribute. + """ + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9, + unacceptable=0.1)) + update_test_lifecycle.validate_config(config) + + def test_acceptable_task_fail_rate(self): + """ + Tests the validation of the 'test_fail_rates.acceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_unacceptable_task_fail_rate(self): + """ + Tests the validation of the 'task_fail_rates.unacceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_task_fail_rates(self): + """ + Tests the validation of the 'task_fail_rates' attribute. + """ + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9, + unacceptable=0.1)) + update_test_lifecycle.validate_config(config) + + def test_acceptable_variant_fail_rate(self): + """ + Tests the validation of the 'variant_fail_rates.acceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace( + acceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_unacceptable_variant_fail_rate(self): + """ + Tests the validation of the 'variant_fail_rates.unacceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace( + unacceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_variant_fail_rates(self): + """ + Tests the validation of the 'variant_fail_rates' attribute. + """ + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9, + unacceptable=0.1)) + update_test_lifecycle.validate_config(config) + + def test_acceptable_distro_fail_rate(self): + """ + Tests the validation of the 'distro_fail_rates.acceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_unacceptable_distro_fail_rate(self): + """ + Tests the validation of the 'distro_fail_rates.unacceptable' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace( + unacceptable="not a number")) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=2)) + update_test_lifecycle.validate_config(config) + + def test_distro_fail_rates(self): + """ + Tests the validation of the 'distro_fail_rates' attribute. + """ + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9, + unacceptable=0.1)) + update_test_lifecycle.validate_config(config) + + def test_reliable_min_runs(self): + """ + Tests the validation of the 'reliable_min_runs' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace(reliable_min_runs="not a number") + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_min_runs=-1) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_min_runs=0) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_min_runs=1.5) + update_test_lifecycle.validate_config(config) + + def test_reliable_time_period(self): + """ + Tests the validation of the 'reliable_time_period' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace(reliable_time_period="not a datetime.timedelta") + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=0)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=1, hours=1)) + update_test_lifecycle.validate_config(config) + + def test_unreliable_min_runs(self): + """ + Tests the validation of the 'unreliable_min_runs' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace(unreliable_min_runs="not a number") + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(unreliable_min_runs=-1) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(unreliable_min_runs=0) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(unreliable_min_runs=1.5) + update_test_lifecycle.validate_config(config) + + def test_unreliable_time_period(self): + """ + Tests the validation of the 'unreliable_time_period' attribute. + """ + + with self.assertRaises(TypeError): + config = self.CONFIG._replace(unreliable_time_period="not a datetime.timedelta") + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=-1)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=0)) + update_test_lifecycle.validate_config(config) + + with self.assertRaises(ValueError): + config = self.CONFIG._replace( + unreliable_time_period=datetime.timedelta(days=1, hours=1)) + update_test_lifecycle.validate_config(config) + + +class TestUpdateTags(unittest.TestCase): + """ + Tests for the update_tags() function. + """ + + CONFIG = update_test_lifecycle.Config( + test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1), + reliable_min_runs=2, + reliable_time_period=datetime.timedelta(days=1), + unreliable_min_runs=2, + unreliable_time_period=datetime.timedelta(days=1)) + + ENTRY = test_failures.ReportEntry(test="jstests/core/all.js", + task="jsCore_WT", + variant="linux-64", + distro="rhel62", + start_date=datetime.date(2017, 6, 3), + end_date=datetime.date(2017, 6, 3), + num_pass=0, + num_fail=0) + + def assert_has_only_js_tests(self, lifecycle): + """ + Raises an AssertionError exception if 'lifecycle' is not of the following form: + + selector: + js_test: + ... + """ + + self.assertIn("selector", lifecycle.raw) + self.assertEqual(1, len(lifecycle.raw), msg=str(lifecycle.raw)) + self.assertIn("js_test", lifecycle.raw["selector"]) + self.assertEqual(1, len(lifecycle.raw["selector"]), msg=str(lifecycle.raw)) + + return lifecycle.raw["selector"]["js_test"] + + def transition_from_reliable_to_unreliable(self, config, expected_tags): + """ + Tests that update_tags() tags a formerly reliable combination as being unreliable. + """ + + initial_tags = collections.OrderedDict() + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), + self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, expected_tags) + + def test_transition_test_from_reliable_to_unreliable(self): + """ + Tests that update_tags() tags a formerly reliable (test,) combination as being unreliable. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1)) + + self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable"]), + ])) + + def test_transition_task_from_reliable_to_unreliable(self): + """ + Tests that update_tags() tags a formerly reliable (test, task) combination as being + unreliable. + """ + + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1)) + + self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT"]), + ])) + + def test_transition_variant_from_reliable_to_unreliable(self): + """ + Tests that update_tags() tags a formerly reliable (test, task, variant) combination as being + unreliable. + """ + + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1)) + + self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]), + ])) + + def test_transition_distro_from_reliable_to_unreliable(self): + """ + Tests that update_tags() tags a formerly reliable (test, task, variant, distro) combination + as being unreliable. + """ + + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1)) + + self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]), + ])) + + def test_transition_from_reliable_to_unreliable(self): + """ + Tests that update_tags() tags multiple formerly reliable combination as being unreliable. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1), + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1)) + + self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ])) + + def transition_from_unreliable_to_reliable(self, config, initial_tags): + """ + Tests that update_tags() untags a formerly unreliable combination after it has become + reliable again. + """ + + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"), + self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, collections.OrderedDict()) + + def test_transition_test_from_unreliable_to_reliable(self): + """ + Tests that update_tags() untags a formerly unreliable (test,) combination after it has + become reliable again. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9)) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable"]), + ])) + + def test_transition_task_from_unreliable_to_reliable(self): + """ + Tests that update_tags() untags a formerly unreliable (test, task) combination after it has + become reliable again. + """ + + config = self.CONFIG._replace( + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9)) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT"]), + ])) + + def test_transition_variant_from_unreliable_to_reliable(self): + """ + Tests that update_tags() untags a formerly unreliable (test, task, variant) combination + after it has become reliable again. + """ + + config = self.CONFIG._replace( + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9)) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]), + ])) + + def test_transition_distro_from_unreliable_to_reliable(self): + """ + Tests that update_tags() untags a formerly unreliable (test, task, variant, distro) + combination after it has become reliable again. + """ + + config = self.CONFIG._replace( + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9)) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]), + ])) + + def test_transition_from_unreliable_to_reliable(self): + """ + Tests that update_tags() untags multiple formerly unreliable combination after it has become + reliable again. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9), + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9)) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ])) + + def test_remain_reliable(self): + """ + Tests that update_tags() preserves the absence of tags for reliable combinations. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9), + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9)) + + initial_tags = collections.OrderedDict() + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"), + self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, initial_tags) + + def test_remain_unreliable(self): + """ + Tests that update_tags() preserves the tags for unreliable combinations. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1), + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1)) + + initial_tags = collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ]) + + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), + self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, initial_tags) + + def test_obeys_reliable_min_runs(self): + """ + Tests that update_tags() considers a test reliable if it has fewer than 'reliable_min_runs'. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9), + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9), + reliable_min_runs=100) + + self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ])) + + def test_obeys_reliable_time_period(self): + """ + Tests that update_tags() ignores passes from before 'reliable_time_period'. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9), + task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9)) + + initial_tags = collections.OrderedDict() + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)), + end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)), + num_pass=1, + num_fail=0), + self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)), + end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)), + num_pass=1, + num_fail=0), + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), + self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ])) + + def test_obeys_unreliable_min_runs(self): + """ + Tests that update_tags() only considers a test unreliable if it has more than + 'unreliable_min_runs'. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1), + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1), + unreliable_min_runs=100) + + initial_tags = collections.OrderedDict() + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(num_pass=0, num_fail=1), + self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), + self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, initial_tags) + + def test_obeys_unreliable_time_period(self): + """ + Tests that update_tags() ignores failures from before 'unreliable_time_period'. + """ + + config = self.CONFIG._replace( + test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1), + task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1), + variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1), + distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1)) + + initial_tags = collections.OrderedDict([ + ("jstests/core/all.js", [ + "unreliable", + "unreliable|jsCore_WT", + "unreliable|jsCore_WT|linux-64", + "unreliable|jsCore_WT|linux-64|rhel62", + ]), + ]) + + lifecycle = ci_tags.TagsConfig.from_dict( + dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) + self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) + + report = test_failures.Report([ + self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)), + end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)), + num_pass=0, + num_fail=1), + self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)), + end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)), + num_pass=0, + num_fail=1), + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=1, num_fail=0), + self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"), + self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"), + self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"), + ]) + + update_test_lifecycle.validate_config(config) + update_test_lifecycle.update_tags(lifecycle, config, report) + updated_tags = self.assert_has_only_js_tests(lifecycle) + self.assertEqual(updated_tags, collections.OrderedDict()) diff --git a/buildscripts/update_test_lifecycle.py b/buildscripts/update_test_lifecycle.py index 4b4325b255a..9699a5418d8 100755 --- a/buildscripts/update_test_lifecycle.py +++ b/buildscripts/update_test_lifecycle.py @@ -4,30 +4,73 @@ Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates. """ + +from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections -import copy +import datetime import optparse -import os +import os.path import subprocess import sys +import textwrap +import warnings # Get relative imports to work when the package is not installed on the PYTHONPATH. if __name__ == "__main__" and __package__ is None: sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from buildscripts import resmokelib -from buildscripts.ciconfig import evergreen -from buildscripts.ciconfig import tags from buildscripts import test_failures as tf +from buildscripts.ciconfig import evergreen as ci_evergreen +from buildscripts.ciconfig import tags as ci_tags + + +if sys.version_info[0] == 2: + _NUMBER_TYPES = (int, long, float) +else: + _NUMBER_TYPES = (int, float) + + +Rates = collections.namedtuple("Rates", ["acceptable", "unacceptable"]) + + +Config = collections.namedtuple("Config", [ + "test_fail_rates", + "task_fail_rates", + "variant_fail_rates", + "distro_fail_rates", + "reliable_min_runs", + "reliable_time_period", + "unreliable_min_runs", + "unreliable_time_period", +]) + + +DEFAULT_CONFIG = Config( + test_fail_rates=Rates(acceptable=0.1, unacceptable=0.3), + task_fail_rates=Rates(acceptable=0.1, unacceptable=0.3), + variant_fail_rates=Rates(acceptable=0.2, unacceptable=0.4), + distro_fail_rates=Rates(acceptable=0.2, unacceptable=0.4), + reliable_min_runs=5, + reliable_time_period=datetime.timedelta(weeks=1), + unreliable_min_runs=20, + unreliable_time_period=datetime.timedelta(weeks=4)) + + +DEFAULT_PROJECT = "mongodb-mongo-master" def write_yaml_file(yaml_file, lifecycle): """Writes the lifecycle object to yaml_file.""" - comment = ("This file was generated by {} and shouldn't be edited by hand. It was" - " generated against commit {} with the following invocation: {}.").format( - sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv)) + + comment = ( + "This file was generated by {} and shouldn't be edited by hand. It was generated against" + " commit {} with the following invocation: {}." + ).format(sys.argv[0], callo(["git", "rev-parse", "HEAD"]).rstrip(), " ".join(sys.argv)) + lifecycle.write_file(yaml_file, comment) @@ -116,7 +159,7 @@ def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run): A test should be added to the set of tests believed not to run reliably when it has more than min_run executions with a failure percentage greater than unacceptable_fr. """ - return test_runs >= min_run and test_fr > unacceptable_fr + return test_runs >= min_run and test_fr >= unacceptable_fr def reliable_test(test_fr, acceptable_fr, test_runs, min_run): @@ -125,7 +168,7 @@ def reliable_test(test_fr, acceptable_fr, test_runs, min_run): A test should then removed from the set of tests believed not to run reliably when it has less than min_run executions or has a failure percentage less than acceptable_fr. """ - return test_runs < min_run or test_fr < acceptable_fr + return test_runs < min_run or test_fr <= acceptable_fr def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr): @@ -141,16 +184,21 @@ def check_days(name, days): raise ValueError("'{}' days must be greater than 0.".format(name)) -def unreliable_tag(test, task, variant, distro): +def unreliable_tag(task, variant, distro): """Returns the unreliable tag.""" - if distro and variant and task and test: - return "unreliable|{}|{}|{}".format(task, variant, distro) - elif variant and task and test: - return "unreliable|{}|{}".format(task, variant) - elif task and test: - return "unreliable|{}".format(task) - elif test: - return "unreliable" + + for (component_name, component_value) in (("task", task), + ("variant", variant), + ("distro", distro)): + if isinstance(component_value, (tf.Wildcard, tf.Missing)): + if component_name == "task": + return "unreliable" + elif component_name == "variant": + return "unreliable|{}".format(task) + elif component_name == "distro": + return "unreliable|{}|{}".format(task, variant) + + return "unreliable|{}|{}|{}".format(task, variant, distro) def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run): @@ -163,8 +211,7 @@ def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_ru fail_rate, summary.num_pass + summary.num_fail, min_run): - update_tag = unreliable_tag( - summary.test, summary.task, summary.variant, summary.distro) + update_tag = unreliable_tag(summary.task, summary.variant, summary.distro) if add_tags: lifecycle.add_tag("js_test", summary.test, update_tag) else: @@ -175,101 +222,266 @@ def compare_tags(tag_a, tag_b): return cmp(tag_a.split("|"), tag_b.split("|")) +def validate_config(config): + """ + Raises a TypeError or ValueError exception if 'config' isn't a valid model. + """ + + for (name, fail_rates) in (("test", config.test_fail_rates), + ("task", config.task_fail_rates), + ("variant", config.variant_fail_rates), + ("distro", config.distro_fail_rates)): + if not isinstance(fail_rates.acceptable, _NUMBER_TYPES): + raise TypeError("The acceptable {} failure rate must be a number, but got {}".format( + name, fail_rates.acceptable)) + elif fail_rates.acceptable < 0 or fail_rates.acceptable > 1: + raise ValueError(("The acceptable {} failure rate must be between 0 and 1 (inclusive)," + " but got {}").format(name, fail_rates.acceptable)) + elif not isinstance(fail_rates.unacceptable, _NUMBER_TYPES): + raise TypeError("The unacceptable {} failure rate must be a number, but got {}".format( + name, fail_rates.unacceptable)) + elif fail_rates.unacceptable < 0 or fail_rates.unacceptable > 1: + raise ValueError(("The unacceptable {} failure rate must be between 0 and 1" + " (inclusive), but got {}").format(name, fail_rates.unacceptable)) + elif fail_rates.acceptable > fail_rates.unacceptable: + raise ValueError( + ("The acceptable {0} failure rate ({1}) must be no larger than unacceptable {0}" + " failure rate ({2})").format( + name, fail_rates.acceptable, fail_rates.unacceptable)) + + for (name, min_runs) in (("reliable", config.reliable_min_runs), + ("unreliable", config.unreliable_min_runs)): + if not isinstance(min_runs, _NUMBER_TYPES): + raise TypeError(("The minimum number of runs for considering a test {} must be a" + " number, but got {}").format(name, min_runs)) + elif min_runs <= 0: + raise ValueError(("The minimum number of runs for considering a test {} must be a" + " positive integer, but got {}").format(name, min_runs)) + elif isinstance(min_runs, float) and not min_runs.is_integer(): + raise ValueError(("The minimum number of runs for considering a test {} must be an" + " integer, but got {}").format(name, min_runs)) + + for (name, time_period) in (("reliable", config.reliable_time_period), + ("unreliable", config.unreliable_time_period)): + if not isinstance(time_period, datetime.timedelta): + raise TypeError( + "The {} time period must be a datetime.timedelta instance, but got {}".format( + name, time_period)) + elif time_period.days <= 0: + raise ValueError( + "The {} time period must be a positive number of days, but got {}".format( + name, time_period)) + elif time_period - datetime.timedelta(days=time_period.days) > datetime.timedelta(): + raise ValueError( + "The {} time period must be an integral number of days, but got {}".format( + name, time_period)) + + +def update_tags(lifecycle, config, report): + """ + Updates the tags in 'lifecycle' based on the historical test failures mentioned in 'report' + according to the model described by 'config'. + """ + + # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used + # before assignment. + grouped_entries = None + for (i, (components, rates)) in enumerate( + ((tf.Report.TEST_TASK_VARIANT_DISTRO, config.distro_fail_rates), + (tf.Report.TEST_TASK_VARIANT, config.variant_fail_rates), + (tf.Report.TEST_TASK, config.task_fail_rates), + (tf.Report.TEST, config.test_fail_rates))): + if i > 0: + report = tf.Report(grouped_entries) + + # We reassign the value of 'grouped_entries' to take advantage of how data that is on + # (test, task, variant, distro) preserves enough information to be grouped on any subset of + # those components, etc. + grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY) + + # Filter out any test executions from prior to 'config.unreliable_time_period'. + unreliable_start_date = (report.end_date - config.unreliable_time_period + + datetime.timedelta(days=1)) + unreliable_report = tf.Report(entry for entry in grouped_entries + if entry.start_date >= unreliable_start_date) + update_lifecycle(lifecycle, + unreliable_report.summarize_by(components), + unreliable_test, + True, + rates.unacceptable, + config.unreliable_min_runs) + + # Filter out any test executions from prior to 'config.reliable_time_period'. + reliable_start_date = (report.end_date - config.reliable_time_period + + datetime.timedelta(days=1)) + reliable_report = tf.Report(entry for entry in grouped_entries + if entry.start_date >= reliable_start_date) + update_lifecycle(lifecycle, + reliable_report.summarize_by(components), + reliable_test, + False, + rates.acceptable, + config.reliable_min_runs) + + def main(): + """ + Utility for updating a resmoke.py tag file based on computing test failure rates from the + Evergreen API. + """ - required_options = ["project", - "reliable_test_min_run", - "unreliable_test_min_run", - "test_fail_rates", - ] - parser = optparse.OptionParser(description=__doc__, - usage="Usage: %prog [options] test1 test2 ...") - parser.add_option("--project", dest="project", - default=None, - help="Evergreen project to analyze [REQUIRED].") - parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run", - default=None, - type="int", - help="Minimum number of tests runs for test to be considered as reliable" - " [REQUIRED].") - parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run", - default=None, - type="int", - help="Minimum number of tests runs for test to be considered as unreliable" - " [REQUIRED].") - parser.add_option("--testFailRates", dest="test_fail_rates", - metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE", - default=None, - type="float", - nargs=2, - help="Test fail rates: acceptable fail rate and unacceptable fail rate" - " Specify floating numbers between 0.0 and 1.0 [REQUIRED].") - parser.add_option("--taskFailRates", dest="task_fail_rates", - metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE", - default=None, - type="float", - nargs=2, - help="Task fail rates: acceptable fail rate and unacceptable fail rate." - " Specify floating numbers between 0.0 and 1.0." - " Uses --test-fail-rates if unspecified.") - parser.add_option("--variantFailRates", dest="variant_fail_rates", - metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE", - default=None, - type="float", - nargs=2, - help="Variant fail rates: acceptable fail rate and unacceptable fail rate." - " Specify floating numbers between 0.0 and 1.0." - " Uses --task-fail-rates if unspecified.") - parser.add_option("--distroFailRates", dest="distro_fail_rates", - metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE", - default=None, - type="float", - nargs=2, - help="Distro fail rates: acceptable fail rate and unacceptable fail rate." - " Specify floating numbers between 0.0 and 1.0." - " Uses --variant-fail-rates if unspecified.") - parser.add_option("--tasks", dest="tasks", - default=None, - help="Names of tasks to analyze for tagging unreliable tests." - " If specified and no tests are specified, then only tests" - " associated with the tasks will be analyzed." - " If unspecified and no tests are specified, the list of tasks will be" - " the non-excluded list of tasks from the file specified by" - " '--evergreenYML'.") - parser.add_option("--variants", dest="variants", - default="", - help="Names of variants to analyze for tagging unreliable tests.") - parser.add_option("--distros", dest="distros", - default="", - help="Names of distros to analyze for tagging unreliable tests [UNUSED].") - parser.add_option("--evergreenYML", dest="evergreen_yml", - default="etc/evergreen.yml", - help="Evergreen YML file used to get the list of tasks," - " defaults to '%default'.") - parser.add_option("--lifecycleFile", dest="lifecycle_file", + parser = optparse.OptionParser(description=textwrap.dedent(main.__doc__), + usage="Usage: %prog [options] [test1 test2 ...]") + + data_options = optparse.OptionGroup( + parser, + title="Data options", + description=("Options used to configure what historical test failure data to retrieve from" + " Evergreen.")) + parser.add_option_group(data_options) + + data_options.add_option( + "--project", dest="project", + metavar="<project-name>", + default=tf.TestHistory.DEFAULT_PROJECT, + help="The Evergreen project to analyze. Defaults to '%default'.") + + data_options.add_option( + "--tasks", dest="tasks", + metavar="<task1,task2,...>", + help=("The Evergreen tasks to analyze for tagging unreliable tests. If specified in" + " additional to having test positional arguments, then only tests that run under the" + " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks" + " defaults to the non-excluded list of tasks from the specified" + " --evergreenProjectConfig file.")) + + data_options.add_option( + "--variants", dest="variants", + metavar="<variant1,variant2,...>", + default="", + help="The Evergreen build variants to analyze for tagging unreliable tests.") + + data_options.add_option( + "--distros", dest="distros", + metavar="<distro1,distro2,...>", + default="", + help="The Evergreen distros to analyze for tagging unreliable tests.") + + data_options.add_option( + "--evergreenProjectConfig", dest="evergreen_project_config", + metavar="<project-config-file>", + default="etc/evergreen.yml", + help=("The Evergreen project configuration file used to get the list of tasks if --tasks is" + " omitted. Defaults to '%default'.")) + + model_options = optparse.OptionGroup( + parser, + title="Model options", + description=("Options used to configure whether (test,), (test, task)," + " (test, task, variant), and (test, task, variant, distro) combinations are" + " considered unreliable.")) + parser.add_option_group(model_options) + + model_options.add_option( + "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs", + metavar="<reliable-min-runs>", + default=DEFAULT_CONFIG.reliable_min_runs, + help=("The minimum number of test executions required for a test's failure rate to" + " determine whether the test is considered reliable. If a test has fewer than" + " <reliable-min-runs> executions, then it cannot be considered unreliable.")) + + model_options.add_option( + "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs", + metavar="<unreliable-min-runs>", + default=DEFAULT_CONFIG.unreliable_min_runs, + help=("The minimum number of test executions required for a test's failure rate to" + " determine whether the test is considered unreliable. If a test has fewer than" + " <unreliable-min-runs> executions, then it cannot be considered unreliable.")) + + model_options.add_option( + "--testFailRates", type="float", nargs=2, dest="test_fail_rates", + metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>", + default=DEFAULT_CONFIG.test_fail_rates, + help=("Controls how readily a test is considered unreliable. Each failure rate must be a" + " number between 0 and 1 (inclusive) with" + " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no" + " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is" + " considered reliable. Otherwise, if a test fails at least as much as" + " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered" + " unreliable. Defaults to %default.")) + + model_options.add_option( + "--taskFailRates", type="float", nargs=2, dest="task_fail_rates", + metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>", + default=DEFAULT_CONFIG.task_fail_rates, + help=("Controls how readily a (test, task) combination is considered unreliable. Each" + " failure rate must be a number between 0 and 1 (inclusive) with" + " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)" + " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time," + " then it is considered reliable. Otherwise, if a test fails at least as much as" + " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" + " unreliable. Defaults to %default.")) + + model_options.add_option( + "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates", + metavar="<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>", + default=DEFAULT_CONFIG.variant_fail_rates, + help=("Controls how readily a (test, task, variant) combination is considered unreliable." + " Each failure rate must be a number between 0 and 1 (inclusive) with" + " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a" + " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>" + " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails" + " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time," + " then it is considered unreliable. Defaults to %default.")) + + model_options.add_option( + "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates", + metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>", + default=DEFAULT_CONFIG.distro_fail_rates, + help=("Controls how readily a (test, task, variant, distro) combination is considered" + " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with" + " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a" + " (test, task, variant, distro) combination fails no more than" + " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered" + " reliable. Otherwise, if a test fails at least as much as" + " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" + " unreliable. Defaults to %default.")) + + model_options.add_option( + "--reliableDays", type="int", dest="reliable_days", + metavar="<ndays>", + default=DEFAULT_CONFIG.reliable_time_period.days, + help=("The time period to analyze when determining if a test has become reliable. Defaults" + " to %default day(s).")) + + model_options.add_option( + "--unreliableDays", type="int", dest="unreliable_days", + metavar="<ndays>", + default=DEFAULT_CONFIG.unreliable_time_period.days, + help=("The time period to analyze when determining if a test has become unreliable." + " Defaults to %default day(s).")) + + parser.add_option("--resmokeTagFile", dest="tag_file", + metavar="<tagfile>", default="etc/test_lifecycle.yml", - help="Evergreen lifecycle file to update, defaults to '%default'.") - parser.add_option("--reliableDays", dest="reliable_days", - default=7, - type="int", - help="Number of days to check for reliable tests, defaults to '%default'.") - parser.add_option("--unreliableDays", dest="unreliable_days", - default=28, - type="int", - help="Number of days to check for unreliable tests, defaults to '%default'.") - parser.add_option("--batchGroupSize", dest="batch_size", + help="The resmoke.py tag file to update. Defaults to '%default'.") + + parser.add_option("--requestBatchSize", type="int", dest="batch_size", + metavar="<batch-size>", default=100, - type="int", - help="Size of test batch group, defaults to '%default'.") + help=("The maximum number of tests to query the Evergreen API for in a single" + " request. A higher value for this option will reduce the number of" + " roundtrips between this client and Evergreen. Defaults to %default.")) (options, tests) = parser.parse_args() - for option in required_options: - if not getattr(options, option): - parser.print_help() - parser.error("Missing required option") + if options.distros: + warnings.warn( + ("Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information" + " isn't returned by the Evergreen API. This option will therefore be ignored."), + RuntimeWarning) - evg_conf = evergreen.EvergreenProjectConfig(options.evergreen_yml) + evg_conf = ci_evergreen.EvergreenProjectConfig(options.evergreen_project_config) use_test_tasks_membership = False tasks = options.tasks.split(",") if options.tasks else [] @@ -282,25 +494,18 @@ def main(): distros = options.distros.split(",") if options.distros else [] - check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1]) - # The less specific failures rates are optional and default to a lower level value. - if not options.task_fail_rates: - options.task_fail_rates = options.test_fail_rates - else: - check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1]) - if not options.variant_fail_rates: - options.variant_fail_rates = options.task_fail_rates - else: - check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1]) - if not options.distro_fail_rates: - options.distro_fail_rates = options.variant_fail_rates - else: - check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1]) - - check_days("Reliable days", options.reliable_days) - check_days("Unreliable days", options.unreliable_days) - - lifecycle = tags.TagsConfig(options.lifecycle_file, cmp_func=compare_tags) + config = Config( + test_fail_rates=Rates(*options.test_fail_rates), + task_fail_rates=Rates(*options.task_fail_rates), + variant_fail_rates=Rates(*options.variant_fail_rates), + distro_fail_rates=Rates(*options.distro_fail_rates), + reliable_min_runs=options.reliable_test_min_runs, + reliable_time_period=datetime.timedelta(days=options.reliable_days), + unreliable_min_runs=options.unreliable_test_min_runs, + unreliable_time_period=datetime.timedelta(days=options.unreliable_days)) + validate_config(config) + + lifecycle = ci_tags.TagsConfig.from_file(options.tag_file, cmp_func=compare_tags) test_tasks_membership = get_test_tasks_membership(evg_conf) # If no tests are specified then the list of tests is generated from the list of tasks. @@ -325,58 +530,24 @@ def main(): if not tasks: print("Warning - No tasks found for tests {}, skipping this group.".format(tests)) continue - report = tf.HistoryReport(period_type="revision", - start=commit_prior, - end=commit_last, - group_period=options.reliable_days, - project=options.project, - tests=tests, - tasks=tasks, - variants=variants, - distros=distros) - view_report = report.generate_report() - - # We build up report_combo to check for more specific test failures rates. - report_combo = [] - # TODO EVG-1665: Uncomment this line once this has been supported. - # for combo in ["test", "task", "variant", "distro"]: - for combo in ["test", "task", "variant"]: - report_combo.append(combo) - if combo == "distro": - acceptable_fail_rate = options.distro_fail_rates[0] - unacceptable_fail_rate = options.distro_fail_rates[1] - elif combo == "variant": - acceptable_fail_rate = options.variant_fail_rates[0] - unacceptable_fail_rate = options.variant_fail_rates[1] - elif combo == "task": - acceptable_fail_rate = options.task_fail_rates[0] - unacceptable_fail_rate = options.task_fail_rates[1] - else: - acceptable_fail_rate = options.test_fail_rates[0] - unacceptable_fail_rate = options.test_fail_rates[1] - - # Unreliable tests are analyzed from the entire period. - update_lifecycle(lifecycle, - view_report.view_summary(group_on=report_combo), - unreliable_test, - True, - unacceptable_fail_rate, - options.unreliable_test_min_run) - - # Reliable tests are analyzed from the last period, i.e., last 14 days. - (reliable_start_date, reliable_end_date) = view_report.last_period() - update_lifecycle(lifecycle, - view_report.view_summary(group_on=report_combo, - start_date=reliable_start_date, - end_date=reliable_end_date), - reliable_test, - False, - acceptable_fail_rate, - options.reliable_test_min_run) - - # Update the lifecycle_file only if there have been changes. + + test_history = tf.TestHistory(project=options.project, + tests=tests, + tasks=tasks, + variants=variants, + distros=distros) + + history_data = test_history.get_history_by_revision(start_revision=commit_prior, + end_revision=commit_last) + + report = tf.Report(history_data) + update_tags(lifecycle, config, report) + + # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there + # have been changes to the tags. In particular, we avoid modifying the file when only the header + # comment for the YAML file would change. if lifecycle.is_modified(): - write_yaml_file(options.lifecycle_file, lifecycle) + write_yaml_file(options.tag_file, lifecycle) if __name__ == "__main__": main() |