summaryrefslogtreecommitdiff
path: root/buildscripts
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2017-07-17 11:09:34 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2017-07-17 11:09:34 -0400
commit58a3909a3f678dec7bd94bfb38f96756c970113e (patch)
tree96675ca63ab47a93aca816909805264e6667ea7b /buildscripts
parent27cf9fd7b31f043af913da135385367126f5691b (diff)
downloadmongo-58a3909a3f678dec7bd94bfb38f96756c970113e.tar.gz
SERVER-29642 SERVER-29643 Add Python tests for test lifecycle scripts.
For test_failures.py: * Replaces HistoryReport with a TestHistory class that has get_history_by_revision() and get_history_by_date() methods. They both return a list of ReportEntry tuples that can be used to construct a Report instance. * Adds Python unit test cases for the Report and ReportEntry classes. * Creates Wildcard class as separate concept from Missing class. * Enables --sinceDate and --untilDate with a warning that the script may not return a complete result set. * Adds support for running the script with Python 3. For update_test_lifecycle.py: * Introduces Config namedtuple to represent the test lifecycle model. * Adds Python unit tests cases for the update_tags() function. * Takes advantage of the partial grouping so that computing summaries for (test, task, variant), (test, task), and (test,) combinations do not require re-processing the entire result set.
Diffstat (limited to 'buildscripts')
-rw-r--r--buildscripts/ciconfig/tags.py39
-rw-r--r--buildscripts/resmokelib/selector.py2
-rwxr-xr-xbuildscripts/test_failures.py1172
-rw-r--r--buildscripts/tests/ciconfig/test_tags.py6
-rw-r--r--buildscripts/tests/test_test_failures.py676
-rw-r--r--buildscripts/tests/test_update_test_lifecycle.py760
-rwxr-xr-xbuildscripts/update_test_lifecycle.py525
7 files changed, 2432 insertions, 748 deletions
diff --git a/buildscripts/ciconfig/tags.py b/buildscripts/ciconfig/tags.py
index dbd090bc9a8..dfab58832fa 100644
--- a/buildscripts/ciconfig/tags.py
+++ b/buildscripts/ciconfig/tags.py
@@ -1,4 +1,5 @@
"""Module to access and modify tag configuration files used by resmoke."""
+
from __future__ import absolute_import
from __future__ import print_function
@@ -11,7 +12,7 @@ import yaml
# Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021
def _represent_dict_order(self, data):
- return self.represent_mapping('tag:yaml.org,2002:map', data.items())
+ return self.represent_mapping("tag:yaml.org,2002:map", data.items())
yaml.add_representer(collections.OrderedDict, _represent_dict_order)
# End setup
@@ -20,17 +21,38 @@ yaml.add_representer(collections.OrderedDict, _represent_dict_order)
class TagsConfig(object):
"""Represent a test tag configuration file."""
- def __init__(self, filename, cmp_func=None):
- """Initialize a TagsConfig from a file.
+ def __init__(self, raw, cmp_func=None):
+ """Initialize a TagsConfig from a dict representing the associations between tests and tags.
'cmp_func' can be used to specify a comparison function that will be used when sorting tags.
"""
- with open(filename, "r") as fstream:
- self.raw = yaml.safe_load(fstream)
+
+ self.raw = raw
self._conf = self.raw["selector"]
self._conf_copy = copy.deepcopy(self._conf)
self._cmp_func = cmp_func
+ @classmethod
+ def from_file(cls, filename, **kwargs):
+ """Return a TagsConfig from a file containing the associations between tests and tags.
+
+ See TagsConfig.__init__() for the keyword arguments that can be specified.
+ """
+
+ with open(filename, "r") as fstream:
+ raw = yaml.safe_load(fstream)
+
+ return cls(raw, **kwargs)
+
+ @classmethod
+ def from_dict(cls, raw, **kwargs):
+ """Return a TagsConfig from a dict representing the associations between tests and tags.
+
+ See TagsConfig.__init__() for the keyword arguments that can be specified.
+ """
+
+ return cls(copy.deepcopy(raw), **kwargs)
+
def get_test_kinds(self):
"""List the test kinds."""
return self._conf.keys()
@@ -75,9 +97,14 @@ class TagsConfig(object):
"""
with open(filename, "w") as fstream:
if preamble:
- print(textwrap.fill(preamble, width=100, initial_indent="# ",
+ print(textwrap.fill(preamble,
+ width=100,
+ initial_indent="# ",
subsequent_indent="# "),
file=fstream)
+
+ # We use yaml.safe_dump() in order avoid having strings being written to the file as
+ # "!!python/unicode ..." and instead have them written as plain 'str' instances.
yaml.safe_dump(self.raw, fstream, default_flow_style=False)
diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py
index a3448116efb..e1cec945603 100644
--- a/buildscripts/resmokelib/selector.py
+++ b/buildscripts/resmokelib/selector.py
@@ -28,7 +28,7 @@ def _parse_tag_file(test_kind):
a list of tags, i.e., {'file1.js': ['tag1', 'tag2'], 'file2.js': ['tag2', 'tag3']}
"""
if config.TAG_FILE:
- tags_conf = _tags.TagsConfig(config.TAG_FILE)
+ tags_conf = _tags.TagsConfig.from_file(config.TAG_FILE)
tagged_roots = tags_conf.get_test_patterns(test_kind)
else:
tagged_roots = []
diff --git a/buildscripts/test_failures.py b/buildscripts/test_failures.py
index 72a9785911c..3a874c55fc4 100755
--- a/buildscripts/test_failures.py
+++ b/buildscripts/test_failures.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python
-"""Test Failures
-
-Compute Test failures rates from Evergreen API for specified tests, tasks, etc.
"""
+Utility for computing test failure rates from the Evergreen API.
+"""
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -14,660 +14,710 @@ import itertools
import operator
import optparse
import os
-import urlparse
+import sys
+import warnings
+
+try:
+ from urlparse import urlparse
+except ImportError:
+ from urllib.parse import urlparse
import requests
import yaml
-_API_SERVER_DEFAULT = "http://evergreen-api.mongodb.com:8080"
-_REST_PREFIX = "/rest/v1"
-_PROJECT = "mongodb-mongo-master"
+if sys.version_info[0] == 2:
+ _STRING_TYPES = (basestring,)
+else:
+ _STRING_TYPES = (str,)
+
+
+_ReportEntry = collections.namedtuple("_ReportEntry", [
+ "test",
+ "task",
+ "variant",
+ "distro",
+ "start_date",
+ "end_date",
+ "num_pass",
+ "num_fail",
+])
+
+
+class Wildcard(object):
+ """
+ A class for representing there are multiple values associated with a particular component.
+ """
-class _Missing(object):
- """Class to support missing fields from the report."""
def __init__(self, kind):
self._kind = kind
def __eq__(self, other):
- if not isinstance(other, _Missing):
+ if not isinstance(other, Wildcard):
return NotImplemented
+
return self._kind == other._kind
def __ne__(self, other):
return not self == other
- def __str__(self):
- return "<_Missing: {}>".format(self._kind)
-
-_ALL_TEST = _Missing("test")
-_ALL_TASK = _Missing("task")
-_ALL_VARIANT = _Missing("variant")
-_ALL_DISTRO = _Missing("distro")
-_ALL_DATE = _Missing("date")
+ def __hash__(self):
+ return hash(self._kind)
+ def __str__(self):
+ return "<multiple {}>".format(self._kind)
-def read_evg_config():
- # Expand out evergreen config file possibilities
- file_list = [
- "./.evergreen.yml",
- os.path.expanduser("~/.evergreen.yml"),
- os.path.expanduser("~/cli_bin/.evergreen.yml")]
- for filename in file_list:
- if os.path.isfile(filename):
- with open(filename, "r") as fstream:
- return yaml.load(fstream)
- return None
+class ReportEntry(_ReportEntry):
+ """
+ Holds information about Evergreen test executions.
+ """
+ _MULTIPLE_TESTS = Wildcard("tests")
+ _MULTIPLE_TASKS = Wildcard("tasks")
+ _MULTIPLE_VARIANTS = Wildcard("variants")
+ _MULTIPLE_DISTROS = Wildcard("distros")
-def datestr_to_date(date_str):
- """Returns datetime from a date string in the format of YYYY-MM-DD.
- Note that any time in the date string is stripped off."""
- return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date()
+ _MIN_DATE = datetime.date(datetime.MINYEAR, 1, 1)
+ _MAX_DATE = datetime.date(datetime.MAXYEAR, 12, 31)
+ @property
+ def fail_rate(self):
+ """
+ Returns the fraction of test failures to total number of test executions.
-def date_to_datestr(date_time):
- """Returns date string in the format of YYYY-MM-DD from a datetime."""
- return date_time.strftime("%Y-%m-%d")
+ If a test hasn't been run at all, then we still say it has a failure rate of 0% for
+ convenience when applying thresholds.
+ """
+ if self.num_pass == self.num_fail == 0:
+ return 0.0
+ return self.num_fail / (self.num_pass + self.num_fail)
-class ViewReport(object):
- """"Class to support any views into the HistoryReport."""
+ def period_start_date(self, start_date, period_size):
+ """
+ Returns a datetime.date() instance corresponding to the beginning of the time period
+ containing 'self.start_date'.
+ """
- DetailGroup = collections.namedtuple(
- "DetailGroup",
- "test task variant distro start_date end_date")
+ if not isinstance(start_date, datetime.date):
+ raise TypeError("'start_date' argument must be a date")
- Summary = collections.namedtuple(
- "Summary",
- "test task variant distro start_date end_date fail_rate num_fail num_pass")
+ if not isinstance(period_size, datetime.timedelta):
+ raise TypeError("'period_size' argument must a datetime.timedelta instance")
+ elif period_size.days <= 0:
+ raise ValueError("'period_size' argument must be a positive number of days")
+ elif period_size - datetime.timedelta(days=period_size.days) > datetime.timedelta():
+ raise ValueError("'period_size' argument must be an integral number of days")
- SummaryGroup = collections.namedtuple(
- "SummaryGroup",
- "test task variant distro start_date end_date")
+ # 'start_day_offset' is the number of days 'self.start_date' is from the start of the time
+ # period.
+ start_day_offset = (self.start_date - start_date).days % period_size.days
+ return self.start_date - datetime.timedelta(days=start_day_offset)
- _MIN_DATE = "{0:04}-01-01".format(datetime.MINYEAR)
- _MAX_DATE = "{}-12-31".format(datetime.MAXYEAR)
- _group_by = ["test", "task", "variant", "distro"]
- _start_days = ["first_day", "sunday", "monday"]
+ def week_start_date(self, start_day_of_week):
+ """
+ Returns a datetime.date() instance corresponding to the beginning of the week containing
+ 'self.start_date'. The first day of the week can be specified as the strings "Sunday" or
+ "Monday", as well as an arbitrary datetime.date() instance.
+ """
- def __init__(self,
- history_report=[],
- group_period=7,
- start_day_of_week="first_day"):
- self._report = history_report
-
- self.start_day_of_week = start_day_of_week
- # Using 'first_day' means the a group report will start on the day of the
- # week from the earliest date in the test history.
- if self.start_day_of_week not in self._start_days:
- raise ValueError(
- "Invalid start_day_of_week specified '{}'".format(self.start_day_of_week))
-
- # Set start and end dates of report and create the group_periods
- self.group_period = group_period
- if self._report:
- start_dts = [r.start_dt for r in self._report]
- self.start_dt = min(start_dts)
- self.end_dt = max(start_dts)
- self._group_periods = self._create_group_periods()
- else:
- self.start_dt = datestr_to_date(self._MIN_DATE)
- self.end_dt = datestr_to_date(self._MAX_DATE)
- self._group_periods = []
-
- self._summary_report = {}
- self._update_summary_report()
-
- # Create the lists of tests, tasks, variants & distros.
- self._all_tests = list(set([r.test for r in self._report]))
- self._all_tasks = list(set([r.task for r in self._report]))
- self._all_variants = list(set([r.variant for r in self._report]))
- self._all_distros = list(set([str(r.distro) for r in self._report]))
-
- def fail_rate(self, num_fail, num_pass):
- """Computes fails rate, return 0 if no tests have run."""
- if num_pass == num_fail == 0:
- return 0.0
- return num_fail / (num_pass + num_fail)
-
- def _group_dates(self, test_dt, from_end):
- """Returns start_date and end_date for the group_period, which are are included
- in the group_period."""
- # Computing the start and end dates for a period may have special cases for the
- # first and last periods, only if the self.group_period is 7, which represents weekly.
- # Since the first period may not start on the weekday for start_day_of_week
- # (if it's 'sunday' or 'monday'), that period may be less than the
- # period days. Similarly the last period will always end on end_dt.
- # Example, if the start_date falls on a Wednesday, then all group starting
- # dates are offset from that, if start_day_of_week is 'first_day'.
-
- # Use 'from_end=True' to produce group_dates for analyzing the report from the end.
-
- # The start date for a group_period is one of the following:
- # - start_dt (the earliest date in the report)
- # - The day specified in start_day_of_week
- # - An offset from start_dt, if start_day_of_week is 'first_day'
- # The ending date for a group_period is one of the following:
- # - end_dt (the latest date in the report)
- # - The mod of difference of weekday of test_dt and the start_weekday
-
- if test_dt < self.start_dt or test_dt > self.end_dt:
- raise ValueError("The test_dt {} must be >= {} and <= {}".format(
- test_dt, self.start_dt, self.end_dt))
-
- if self.group_period == 1:
- return (test_dt, test_dt)
-
- # Return group_dates relative to the end_dt. The start_day_of_week is not
- # used in computing the dates.
- if from_end:
- group_end_dt = min(
- self.end_dt,
- test_dt + datetime.timedelta(
- days=((self.end_dt - test_dt).days % self.group_period)))
- group_st_dt = max(
- self.start_dt,
- group_end_dt - datetime.timedelta(days=self.group_period - 1))
- return (group_st_dt, group_end_dt)
-
- # When the self.group_period is 7, we support a start_day_of_week.
- if self.group_period == 7:
- if self.start_day_of_week == "sunday":
+ if isinstance(start_day_of_week, _STRING_TYPES):
+ start_day_of_week = start_day_of_week.lower()
+ if start_day_of_week == "sunday":
start_weekday = 6
- elif self.start_day_of_week == "monday":
+ elif start_day_of_week == "monday":
start_weekday = 0
- elif self.start_day_of_week == "first_day":
- start_weekday = self.start_dt.weekday()
- # 'start_day_offset' is the number of days 'test_dt' is from the start of the week.
- start_day_offset = (test_dt.weekday() - start_weekday) % 7
- else:
- start_day_offset = (test_dt - self.start_dt).days % self.group_period
-
- group_start_dt = test_dt - datetime.timedelta(days=start_day_offset)
- group_end_dt = group_start_dt + datetime.timedelta(days=self.group_period - 1)
- return (max(group_start_dt, self.start_dt), min(group_end_dt, self.end_dt))
-
- def _select_attribute(self, value, attributes):
- """Returns true if attribute value list is None or a value matches from the list of
- attribute values."""
- return not attributes or value in attributes
-
- def _create_group_periods(self):
- """Discover all group_periods."""
- group_periods = set()
- test_dt = self.start_dt
- end_dt = self.end_dt
- while test_dt <= end_dt:
- # We will summarize for time periods from start-to-end and end-to-start.
- group_periods.add(self._group_dates(test_dt, False))
- group_periods.add(self._group_dates(test_dt, True))
- test_dt += datetime.timedelta(days=1)
- return group_periods
-
- def _update_summary_record(self, report_key, status_key):
- """Increments the self._summary_report report_key's status_key & fail_rate."""
- summary = self._summary_report.setdefault(
- report_key,
- {"num_fail": 0, "num_pass": 0, "fail_rate": 0.0})
- summary[status_key] += 1
- summary["fail_rate"] = self.fail_rate(summary["num_fail"], summary["num_pass"])
-
- def _update_summary_report(self):
- """Process self._report and updates the self._summary_report."""
-
- for record in self._report:
- if record.test_status == "pass":
- status_key = "num_pass"
- else:
- status_key = "num_fail"
- # Update each combination summary:
- # _total_, test, test/task, test/task/variant, test/task/variant/distro
- for combo in ["_total_", "test", "task", "variant", "distro"]:
- test = record.test if combo != "_total_" else _ALL_TEST
- task = record.task if combo in ["task", "variant", "distro"] else _ALL_TASK
- variant = record.variant if combo in ["variant", "distro"] else _ALL_VARIANT
- distro = record.distro if combo == "distro" else _ALL_DISTRO
- # Update the summary for matching group periods.
- for (group_start_dt, group_end_dt) in self._group_periods:
- if record.start_dt >= group_start_dt and record.start_dt <= group_end_dt:
- report_key = self.SummaryGroup(
- test=test,
- task=task,
- variant=variant,
- distro=distro,
- start_date=date_to_datestr(group_start_dt),
- end_date=date_to_datestr(group_end_dt))
- self._update_summary_record(report_key, status_key)
- # Update the summary for the entire date period.
- report_key = self.SummaryGroup(
- test=test,
- task=task,
- variant=variant,
- distro=distro,
- start_date=_ALL_DATE,
- end_date=_ALL_DATE)
- self._update_summary_record(report_key, status_key)
-
- def _filter_reports(self,
- start_date=_MIN_DATE,
- end_date=_MAX_DATE,
- tests=None,
- tasks=None,
- variants=None,
- distros=None):
- """Returns filter of self._report."""
- return [r for r in self._report
- if r.start_dt >= datestr_to_date(start_date) and
- r.start_dt <= datestr_to_date(end_date) and
- self._select_attribute(r.test, tests) and
- self._select_attribute(r.task, tasks) and
- self._select_attribute(r.variant, variants) and
- (r.distro is None or self._select_attribute(r.distro, distros))]
-
- def _detail_report(self, report):
- """Returns the detailed report, which is a dictionary in the form of key tuples,
- '(test, task, variant, distro, start_date, end_date)', with a value of
- {num_pass, num_fail}."""
- detail_report = {}
- for record in report:
- group_start_dt, group_end_dt = self._group_dates(record.start_dt, False)
- detail_group = self.DetailGroup(
- test=record.test,
- task=record.task,
- variant=record.variant,
- distro=record.distro,
- start_date=group_start_dt,
- end_date=group_end_dt)
- detail_report.setdefault(detail_group, {"num_pass": 0, "num_fail": 0})
- if record.test_status == "pass":
- status_key = "num_pass"
else:
- status_key = "num_fail"
- detail_report[detail_group][status_key] += 1
- return detail_report
-
- def last_period(self):
- """Returns start_date and end_date for the last period in the report."""
- start_dt = max(self.start_dt,
- self.end_dt - datetime.timedelta(days=self.group_period - 1))
- return date_to_datestr(start_dt), date_to_datestr(self.end_dt)
-
- def view_detail(self, tests=None, tasks=None, variants=None, distros=None):
- """Provides a detailed view of specified parameters.
- The parameters are used as a filter, so an unspecified parameter provides
- more results.
- Returns the view as a list of namedtuples:
- (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass)
+ raise ValueError(
+ "'start_day_of_week' can only be the string \"sunday\" or \"monday\"")
+ elif isinstance(start_day_of_week, datetime.date):
+ start_weekday = start_day_of_week.weekday()
+ else:
+ raise TypeError("'start_day_of_week' argument must be a string or a date")
+
+ # 'start_day_offset' is the number of days 'self.start_date' is from the start of the week.
+ start_day_offset = (self.start_date.weekday() - start_weekday) % 7
+ return self.start_date - datetime.timedelta(days=start_day_offset)
+
+ @classmethod
+ def sum(cls, entries):
+ """
+ Returns a single ReportEntry() instance corresponding to all test executions represented by
+ 'entries'.
+ """
+
+ test = set()
+ task = set()
+ variant = set()
+ distro = set()
+ start_date = cls._MAX_DATE
+ end_date = cls._MIN_DATE
+ num_pass = 0
+ num_fail = 0
+
+ for entry in entries:
+ test.add(entry.test)
+ task.add(entry.task)
+ variant.add(entry.variant)
+ distro.add(entry.distro)
+ start_date = min(entry.start_date, start_date)
+ end_date = max(entry.end_date, end_date)
+ num_pass += entry.num_pass
+ num_fail += entry.num_fail
+
+ test = next(iter(test)) if len(test) == 1 else ReportEntry._MULTIPLE_TESTS
+ task = next(iter(task)) if len(task) == 1 else ReportEntry._MULTIPLE_TASKS
+ variant = next(iter(variant)) if len(variant) == 1 else ReportEntry._MULTIPLE_VARIANTS
+ distro = next(iter(distro)) if len(distro) == 1 else ReportEntry._MULTIPLE_DISTROS
+
+ return ReportEntry(test=test,
+ task=task,
+ variant=variant,
+ distro=distro,
+ start_date=start_date,
+ end_date=end_date,
+ num_pass=num_pass,
+ num_fail=num_fail)
+
+
+class Report(object):
+ """
+ A class for generating summarizations about Evergreen test executions.
+ """
+
+ TEST = ("test",)
+ TEST_TASK = ("test", "task")
+ TEST_TASK_VARIANT = ("test", "task", "variant")
+ TEST_TASK_VARIANT_DISTRO = ("test", "task", "variant", "distro")
+
+ DAILY = "daily"
+ WEEKLY = "weekly"
+
+ SUNDAY = "sunday"
+ MONDAY = "monday"
+ FIRST_DAY = "first-day"
+
+ def __init__(self, entries):
+ """
+ Initializes the Report instance.
"""
- filter_results = self._filter_reports(
- tests=tests, tasks=tasks, variants=variants, distros=distros)
-
- view_report = []
- detail_report = self._detail_report(filter_results)
- for detail_group in detail_report:
- view_report.append(self.Summary(
- test=detail_group.test,
- task=detail_group.task,
- variant=detail_group.variant,
- distro=detail_group.distro,
- start_date=detail_group.start_date,
- end_date=detail_group.end_date,
- fail_rate=self.fail_rate(
- detail_report[detail_group]["num_fail"],
- detail_report[detail_group]["num_pass"]),
- num_fail=detail_report[detail_group]["num_fail"],
- num_pass=detail_report[detail_group]["num_pass"]))
- return view_report
-
- def view_summary(self,
- group_on=None,
- start_date=_ALL_DATE,
- end_date=_ALL_DATE):
- """Provides a summary view report, based on the group_on list. If group_on is empty, then
- a total summary report is provided. The start_date and end_date must match the
- group periods for a result to be returned.
- Returns the view as a list of namedtuples:
- (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass)
+ if not isinstance(entries, list):
+ # It is possible that 'entries' is a generator function, so we convert it to a list in
+ # order to be able to iterate it multiple times.
+ entries = list(entries)
+
+ self.start_date = min(entry.start_date for entry in entries)
+ self.end_date = max(entry.end_date for entry in entries)
+
+ self._entries = entries
+
+ @property
+ def raw_data(self):
+ """
+ Returns a copy of the list of ReportEntry instances underlying the report.
+ """
+
+ return self._entries[:]
+
+ def summarize_by(self, components, time_period=None, start_day_of_week=FIRST_DAY):
+ """
+ Returns a list of ReportEntry instances grouped by
+
+ 'components' if 'time_period' is None,
+
+ 'components' followed by Entry.start_date if 'time_period' is "daily",
+
+ 'components' followed by Entry.week_start_date(start_day_of_week) if 'time_period' is
+ "weekly". See Entry.week_start_date() for more details on the possible values for
+ 'start_day_of_week'.
+
+ 'components' followed by Entry.period_start_date(self.start_date, time_period) if
+ 'time_period' is a datetime.timedelta instance.
"""
- group_on = group_on if group_on is not None else []
-
- for group_name in group_on:
- if group_name not in self._group_by:
- raise ValueError("Invalid group '{}' specified, the supported groups are {}"
- .format(group_name, self._group_by))
-
- tests = self._all_tests if "test" in group_on else [_ALL_TEST]
- tasks = self._all_tasks if "task" in group_on else [_ALL_TASK]
- variants = self._all_variants if "variant" in group_on else [_ALL_VARIANT]
- distros = self._all_distros if "distro" in group_on else [_ALL_DISTRO]
-
- group_lists = [tests, tasks, variants, distros]
- group_combos = list(itertools.product(*group_lists))
- view_report = []
- for group in group_combos:
- test_filter = group[0] if group[0] else _ALL_TEST
- task_filter = group[1] if group[1] else _ALL_TASK
- variant_filter = group[2] if group[2] else _ALL_VARIANT
- distro_filter = group[3] if group[3] else _ALL_DISTRO
- report_key = self.SummaryGroup(
- test=test_filter,
- task=task_filter,
- variant=variant_filter,
- distro=distro_filter,
- start_date=start_date,
- end_date=end_date)
- if report_key in self._summary_report:
- view_report.append(self.Summary(
- test=test_filter if test_filter != _ALL_TEST else None,
- task=task_filter if task_filter != _ALL_TASK else None,
- variant=variant_filter if variant_filter != _ALL_VARIANT else None,
- distro=distro_filter if distro_filter != _ALL_DISTRO else None,
- start_date=start_date if start_date != _ALL_DATE else None,
- end_date=end_date if end_date != _ALL_DATE else None,
- fail_rate=self._summary_report[report_key]["fail_rate"],
- num_fail=self._summary_report[report_key]["num_fail"],
- num_pass=self._summary_report[report_key]["num_pass"]))
- return view_report
-
-
-class HistoryReport(object):
- """The HistoryReport class interacts with the Evergreen REST API to generate a history_report.
- The history_report is meant to be viewed from the ViewReport class methods."""
-
- HistoryReportTuple = collections.namedtuple(
- "Report", "test task variant distro start_dt test_status")
-
- # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate options are exposed.
- # period_types = ["date", "revision"]
- period_types = ["revision"]
+ if not isinstance(components, (list, tuple)):
+ raise TypeError("'components' argument must be a list or tuple")
+
+ for component in components:
+ if not isinstance(component, _STRING_TYPES):
+ raise TypeError("Each element of 'components' argument must be a string")
+ elif component not in ReportEntry._fields:
+ raise ValueError(
+ "Each element of 'components' argument must be one of {}".format(
+ ReportEntry._fields))
+
+ group_by = [operator.attrgetter(component) for component in components]
+
+ if start_day_of_week == self.FIRST_DAY:
+ start_day_of_week = self.start_date
+
+ period_size = None
+ if isinstance(time_period, _STRING_TYPES):
+ if time_period == self.DAILY:
+ group_by.append(operator.attrgetter("start_date"))
+ period_size = datetime.timedelta(days=1)
+ elif time_period == self.WEEKLY:
+ group_by.append(lambda entry: entry.week_start_date(start_day_of_week))
+ period_size = datetime.timedelta(days=7)
+ else:
+ raise ValueError(
+ "'time_period' argument can only be the string \"{}\" or \"{}\"".format(
+ self.DAILY, self.WEEKLY))
+ elif isinstance(time_period, datetime.timedelta):
+ group_by.append(lambda entry: entry.period_start_date(self.start_date, time_period))
+ period_size = time_period
+ elif time_period is not None:
+ raise TypeError(("'time_period' argument must be a string or a datetime.timedelta"
+ " instance"))
+
+ def key_func(entry):
+ """
+ Assigns a key for sorting and grouping ReportEntry instances based on the combination of
+ options summarize_by() was called with.
+ """
+
+ return [func(entry) for func in group_by]
+
+ sorted_entries = sorted(self._entries, key=key_func)
+ grouped_entries = itertools.groupby(sorted_entries, key=key_func)
+ summed_entries = [ReportEntry.sum(group) for (_key, group) in grouped_entries]
+
+ if period_size is not None and period_size.days > 1:
+ # Overwrite the 'start_date' and 'end_date' attributes so that they correspond to the
+ # beginning and end of the period, respectively. If the beginning or end of the week
+ # falls outside the range [self.start_date, self.end_date], then the new 'start_date'
+ # and 'end_date' attributes are clamped to that range.
+ for (i, summed_entry) in enumerate(summed_entries):
+ if time_period == self.WEEKLY:
+ period_start_date = summed_entry.week_start_date(start_day_of_week)
+ else:
+ period_start_date = summed_entry.period_start_date(self.start_date, period_size)
+
+ period_end_date = period_start_date + period_size - datetime.timedelta(days=1)
+ start_date = max(period_start_date, self.start_date)
+ end_date = min(period_end_date, self.end_date)
+ summed_entries[i] = summed_entry._replace(start_date=start_date, end_date=end_date)
+
+ return summed_entries
+
+
+class Missing(object):
+ """
+ A class for representing the value associated with a particular component is unknown.
+ """
+
+ def __init__(self, kind):
+ self._kind = kind
+
+ def __eq__(self, other):
+ if not isinstance(other, Missing):
+ return NotImplemented
+
+ return self._kind == other._kind
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(self._kind)
+
+ def __str__(self):
+ return "<unknown {}>".format(self._kind)
+
+
+class TestHistory(object):
+ """
+ A class for interacting with the /test_history Evergreen API endpoint.
+ """
+
+ DEFAULT_API_SERVER = "http://evergreen-api.mongodb.com:8080"
+ DEFAULT_PROJECT = "mongodb-mongo-master"
+
+ DEFAULT_TEST_STATUSES = ("pass", "fail", "silentfail")
+ DEFAULT_TASK_STATUSES = ("success", "failed", "timeout", "sysfail")
+
+ # The Evergreen API requires specifying the "limit" parameter when not specifying a range of
+ # revisions.
+ DEFAULT_LIMIT = 20
+
+ _MISSING_DISTRO = Missing("distro")
def __init__(self,
- period_type,
- start,
- end,
- start_day_of_week="first_day",
- group_period=7,
- project=_PROJECT,
+ api_server=DEFAULT_API_SERVER,
+ project=DEFAULT_PROJECT,
tests=None,
tasks=None,
variants=None,
- distros=None,
- evg_cfg=None):
- # Initialize the report and object variables.
- self._report_tuples = []
- self._report = {"tests": {}}
- self.period_type = period_type.lower()
- if self.period_type not in self.period_types:
- raise ValueError(
- "Invalid time period type '{}' specified."
- " supported types are {}.".format(self.period_type, self.period_types))
- self.group_period = group_period
- self.start_day_of_week = start_day_of_week.lower()
-
- self.start = start
- self.end = end
-
- self.project = project
-
- if not tests and not tasks:
- raise ValueError("Must specify either tests or tasks.")
- self.tests = tests if tests is not None else []
- self.tasks = tasks if tasks is not None else []
- self.variants = variants if variants is not None else []
- self.distros = distros if distros is not None else []
-
- if evg_cfg is not None and "api_server_host" in evg_cfg:
- api_server = "{url.scheme}://{url.netloc}".format(
- url=urlparse.urlparse(evg_cfg["api_server_host"]))
- else:
- api_server = _API_SERVER_DEFAULT
- self.api_prefix = api_server + _REST_PREFIX
-
- def _all_tests(self):
- """Returns a list of all test file name types from self.tests.
- Since the test file names can be specifed as either Windows or Linux style,
- we will ensure that both are specified for each test.
- Add Windows style naming, backslashes and possibly .exe extension.
- Add Linux style naming, forward slashes and removes .exe extension."""
- tests_set = set(self.tests)
- for test in self.tests:
- if "/" in test:
- windows_test = test.replace("/", "\\")
- if not os.path.splitext(test)[1]:
- windows_test += ".exe"
- tests_set.add(windows_test)
- if "\\" in test:
- linux_test = test.replace("\\", "/")
- linux_test = linux_test.replace(".exe", "")
- tests_set.add(linux_test)
- return list(tests_set)
-
- def _history_request_params(self, test_statuses):
- """Returns a dictionary of params used in requests.get."""
- return {
- "distros": ",".join(self.distros),
- "sort": "latest",
- "tasks": ",".join(self.tasks),
- "tests": ",".join(self.tests),
- "taskStatuses": "failed,timeout,success,sysfail",
- "testStatuses": ",".join(test_statuses),
- "variants": ",".join(self.variants),
- }
-
- def _get_history_by_revision(self, test_statuses):
- """ Returns a list of history data for specified options."""
- after_revision = self.start
- before_revision = self.end
- params = self._history_request_params(test_statuses)
- params["beforeRevision"] = before_revision
- url = "{prefix}/projects/{project}/test_history".format(
- prefix=self.api_prefix,
- project=self.project)
-
- # Since the API limits the results, with each invocation being distinct, we can
- # simulate pagination, by requesting results using afterRevision.
+ distros=None):
+ """
+ Initializes the TestHistory instance with the list of tests, tasks, variants, and distros
+ specified.
+
+ The list of tests specified are augmented to ensure that failures on both POSIX and Windows
+ platforms are returned by the Evergreen API.
+ """
+
+ tests = tests if tests is not None else []
+ tests = [test for test_file in tests for test in self._denormalize_test_file(test_file)]
+
+ self._tests = tests
+ self._tasks = tasks if tasks is not None else []
+ self._variants = variants if variants is not None else []
+ self._distros = distros if distros is not None else []
+
+ self._test_history_url = "{api_server}/rest/v1/projects/{project}/test_history".format(
+ api_server=api_server,
+ project=project,
+ )
+
+ def get_history_by_revision(self,
+ start_revision,
+ end_revision,
+ test_statuses=DEFAULT_TEST_STATUSES,
+ task_statuses=DEFAULT_TASK_STATUSES):
+ """
+ Returns a list of ReportEntry instances corresponding to each individual test execution
+ between 'start_revision' and 'end_revision'.
+
+ Only tests with status 'test_statuses' are included in the result. Similarly, only tests
+ with status 'task_statuses' are included in the result. By default, both passing and failing
+ test executions are returned.
+ """
+
+ params = self._history_request_params(test_statuses, task_statuses)
+ params["beforeRevision"] = end_revision
+
history_data = []
- while after_revision != before_revision:
- params["afterRevision"] = after_revision
- response = requests.get(url=url, params=params)
+
+ # Since the API limits the results, with each invocation being distinct, we can simulate
+ # pagination by making subsequent requests using "afterRevision".
+ while start_revision != end_revision:
+ params["afterRevision"] = start_revision
+ response = requests.get(url=self._test_history_url, params=params)
response.raise_for_status()
- if not response.json():
+
+ test_results = response.json()
+ if not test_results:
break
- # The first test will have the latest revision for this result set.
- after_revision = response.json()[0]["revision"]
- history_data.extend(response.json())
+ for test_result in test_results:
+ history_data.append(self._process_test_result(test_result))
+
+ # The first test will have the latest revision for this result set because
+ # TestHistory._history_request_params() sorts by "latest".
+ start_revision = test_results[0]["revision"]
return history_data
- def _get_history_by_date(self, test_statuses):
- """ Returns a list of history data for specified options."""
- # Note this functionality requires EVG-1653
- start_date = self.start
- end_date = self.end
- params = self._history_request_params(test_statuses)
- params["beforeDate"] = end_date + "T23:59:59Z"
- url = "{prefix}/projects/{project}/test_history".format(
- prefix=self.api_prefix,
- project=self.project)
-
- # Since the API limits the results, with each invocation being distinct, we can
- # simulate pagination, by requesting results using afterDate, being careful to
- # filter out possible duplicate entries.
- start_time = start_date + "T00:00:00Z"
- history_data = []
- history_data_set = set()
- last_sorted_tests = []
+ def get_history_by_date(self,
+ start_date,
+ end_date,
+ test_statuses=DEFAULT_TEST_STATUSES,
+ task_statuses=DEFAULT_TASK_STATUSES):
+ """
+ Returns a list of ReportEntry instances corresponding to each individual test execution
+ between 'start_date' and 'end_date'.
+
+ Only tests with status 'test_statuses' are included in the result. Similarly, only tests
+ with status 'task_statuses' are included in the result. By default, both passing and failing
+ test executions are returned.
+ """
+
+ warnings.warn(
+ "Until https://jira.mongodb.org/browse/EVG-1653 is implemented, pagination using dates"
+ " isn't guaranteed to returned a complete result set. It is possible for the results"
+ " from an Evergreen task that started between the supplied start date and the"
+ " response's latest test start time to be omitted.", RuntimeWarning)
+
+ params = self._history_request_params(test_statuses, task_statuses)
+ params["beforeDate"] = "{:%Y-%m-%d}T23:59:59Z".format(end_date)
+ params["limit"] = self.DEFAULT_LIMIT
+
+ start_time = "{:%Y-%m-%d}T00:00:00Z".format(start_date)
+ history_data = set()
+
+ # Since the API limits the results, with each invocation being distinct, we can simulate
+ # pagination by making subsequent requests using "afterDate" and being careful to filter out
+ # duplicate test results.
while True:
params["afterDate"] = start_time
- response = requests.get(url=url, params=params)
+ response = requests.get(url=self._test_history_url, params=params)
response.raise_for_status()
- if not response.json():
- return history_data
- sorted_tests = sorted(response.json(), key=operator.itemgetter("start_time"))
+ test_results = response.json()
+ if not test_results:
+ break
+
+ original_size = len(history_data)
+ for test_result in test_results:
+ start_time = max(test_result["start_time"], start_time)
+ history_data.add(self._process_test_result(test_result))
- # To prevent an infinite loop, we need to bail out if the result set is the same
- # as the previous one.
- if sorted_tests == last_sorted_tests:
+ # To prevent an infinite loop, we need to bail out if test results returned by the
+ # request were identical to the ones we got back in an earlier request.
+ if original_size == len(history_data):
break
- last_sorted_tests = sorted_tests
+ return list(history_data)
- for test in sorted_tests:
- start_time = test["start_time"]
- # Create a unique hash for the test entry and check if it's been processed.
- test_hash = hash(str(sorted(test.items())))
- if test_hash not in history_data_set:
- history_data_set.add(test_hash)
- history_data.append(test)
+ def _process_test_result(self, test_result):
+ """
+ Returns a ReportEntry() tuple representing the 'test_result' dictionary.
+ """
- return history_data
+ def parse_date(date_str):
+ """
+ Returns a datetime.date() instance representing the specified yyyy-mm-dd date string.
+
+ Note that any time component of 'date_str', including the timezone, is ignored.
+ """
+
+ return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date()
+
+ # For individual test executions, we intentionally use the "start_time" of the test as both
+ # its 'start_date' and 'end_date' to avoid complicating how the test history is potentially
+ # summarized by time. By the time the test has started, the Evergreen task has already been
+ # assigned to a particular machine and is using a specific set of binaries, so there's
+ # unlikely to be a significance to when the test actually finishes.
+ start_date = end_date = parse_date(test_result["start_time"])
+
+ return ReportEntry(
+ test=self._normalize_test_file(test_result["test_file"]),
+ task=test_result["task_name"],
+ variant=test_result["variant"],
+ distro=test_result.get("distro", self._MISSING_DISTRO),
+ start_date=start_date,
+ end_date=end_date,
+ num_pass=(1 if test_result["test_status"] == "pass" else 0),
+ num_fail=(1 if test_result["test_status"] not in ("pass", "skip") else 0))
@staticmethod
- def normalize_test_file(test_file):
- """Normalizes the test_file name:
- - Changes single backslash (\\) to forward slash (/)
- - Removes .exe extension
- Returns normalized string."""
- return test_file.replace("\\", "/").replace(".exe", "")
-
- def generate_report(self):
- """Creates detail for self._report from specified test history options.
- Returns a ViewReport object of self._report."""
-
- if self.period_type == "date":
- report_method = self._get_history_by_date
- else:
- report_method = self._get_history_by_revision
+ def _normalize_test_file(test_file):
+ """
+ If 'test_file' represents a Windows-style path, then it is converted to a POSIX-style path
+ with
- self.tests = self._all_tests()
+ - backslashes (\\) as the path separator replaced with forward slashes (/) and
+ - the ".exe" extension, if present, removed.
- rest_api_report = report_method(test_statuses=["fail", "pass"])
+ If 'test_file' already represents a POSIX-style path, then it is returned unmodified.
+ """
- for record in rest_api_report:
- # Save API record as namedtuple
- self._report_tuples.append(
- self.HistoryReportTuple(
- test=str(HistoryReport.normalize_test_file(record["test_file"])),
- task=str(record["task_name"]),
- variant=str(record["variant"]),
- distro=record.get("distro", _ALL_DISTRO),
- start_dt=datestr_to_date(record["start_time"]),
- test_status=record["test_status"]))
+ if "\\" in test_file:
+ posix_test_file = test_file.replace("\\", "/")
+ (test_file_root, test_file_ext) = os.path.splitext(posix_test_file)
+ if test_file_ext == ".exe":
+ return test_file_root
+ return posix_test_file
- return ViewReport(history_report=self._report_tuples,
- group_period=self.group_period,
- start_day_of_week=self.start_day_of_week)
+ return test_file
+ def _denormalize_test_file(self, test_file):
+ """
+ Returns a list containing 'test_file' as both a POSIX-style path and a Windows-style path.
-def main():
+ The conversion process may involving replacing forward slashes (/) as the path separator
+ with backslashes (\\), as well as adding a ".exe" extension if 'test_file' has no file
+ extension.
+ """
- parser = optparse.OptionParser(description=__doc__,
- usage="Usage: %prog [options] test1 test2 ...")
+ test_file = self._normalize_test_file(test_file)
- parser.add_option("--project", dest="project",
- default=_PROJECT,
- help="Evergreen project to analyze, defaults to '%default'.")
+ if "/" in test_file:
+ windows_test_file = test_file.replace("/", "\\")
+ if not os.path.splitext(test_file)[1]:
+ windows_test_file += ".exe"
+ return [test_file, windows_test_file]
- # TODO EVG-1653: Expose the --sinceDate and --untilDate command line arguments after pagination
- # is made possible using the /test_history Evergreen API endpoint.
- # parser.add_option("--sinceDate", dest="start_date",
- # metavar="YYYY-MM-DD",
- # default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)),
- # help="History from this date, defaults to 1 week ago (%default).")
+ return [test_file]
- # parser.add_option("--untilDate", dest="end_date",
- # metavar="YYYY-MM-DD",
- # default="{:%Y-%m-%d}".format(today),
- # help="History up to, and including, this date, defaults to today (%default).")
+ def _history_request_params(self, test_statuses, task_statuses):
+ """
+ Returns the query parameters for /test_history GET request as a dictionary.
+ """
+
+ return {
+ "distros": ",".join(self._distros),
+ "sort": "latest",
+ "tasks": ",".join(self._tasks),
+ "tests": ",".join(self._tests),
+ "taskStatuses": ",".join(task_statuses),
+ "testStatuses": ",".join(test_statuses),
+ "variants": ",".join(self._variants),
+ }
+
+
+def main():
+ """
+ Utility computing test failure rates from the Evergreen API.
+ """
+
+ parser = optparse.OptionParser(description=main.__doc__,
+ usage="Usage: %prog [options] [test1 test2 ...]")
+
+ parser.add_option("--project", dest="project",
+ metavar="<project-name>",
+ default=TestHistory.DEFAULT_PROJECT,
+ help="The Evergreen project to analyze. Defaults to '%default'.")
+
+ today = datetime.datetime.utcnow().replace(microsecond=0, tzinfo=None)
+ parser.add_option("--sinceDate", dest="since_date",
+ metavar="<yyyy-mm-dd>",
+ default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)),
+ help=("The starting period as a date in UTC to analyze the test history for,"
+ " including the specified date. Defaults to 1 week ago (%default)."))
+
+ parser.add_option("--untilDate", dest="until_date",
+ metavar="<yyyy-mm-dd>",
+ default="{:%Y-%m-%d}".format(today),
+ help=("The ending period as a date in UTC to analyze the test history for,"
+ " including the specified date. Defaults to today (%default)."))
parser.add_option("--sinceRevision", dest="since_revision",
+ metavar="<gitrevision>",
default=None,
- help="History after this revision."
- # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate
- # options are exposed.
- # "History after this revision, overrides --sinceDate & --untilDate."
- " Must be specified with --untilRevision")
+ help=("The starting period as a git revision to analyze the test history for,"
+ " excluding the specified commit. This option must be specified in"
+ " conjuction with --untilRevision and takes precedence over --sinceDate"
+ " and --untilDate."))
parser.add_option("--untilRevision", dest="until_revision",
+ metavar="<gitrevision>",
default=None,
- help="History up to, and including, this revision."
- # TODO EVG-1653: Uncomment this line once the --sinceDate and
- # --untilDate options are exposed.
- # "History up to, and including, this revision, overrides"
- # " --sinceDate & --untilDate."
- " Must be specified with --sinceRevision")
+ help=("The ending period as a git revision to analyze the test history for,"
+ " including the specified commit. This option must be specified in"
+ " conjuction with --sinceRevision and takes precedence over --sinceDate"
+ " and --untilDate."))
parser.add_option("--groupPeriod", dest="group_period",
- type="int",
- default=7,
- help="Set group period days, defaults to '%default'.")
+ metavar="[{}]".format("|".join([Report.DAILY, Report.WEEKLY, "<ndays>"])),
+ default=Report.WEEKLY,
+ help=("The time period over which to group test executions. Defaults to"
+ " '%default'."))
parser.add_option("--weekStartDay", dest="start_day_of_week",
- choices=["sunday", "monday", "first_day"],
- default="first_day",
- help="The group starting day of week, when --groupPeriod is not 1. "
- " Set to 'sunday', 'monday' or 'first_day'."
- " If 'first_day', the group will start on the first day of the"
- " starting date from the history result, defaults to '%default'.")
+ choices=(Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY),
+ metavar="[{}]".format(
+ "|".join([Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY])),
+ default=Report.FIRST_DAY,
+ help=("The day to use as the beginning of the week when grouping over time."
+ " This option is only relevant in conjuction with --groupPeriod={}. If"
+ " '{}' is specified, then the day of week of the earliest date is used"
+ " as the beginning of the week. Defaults to '%default'.".format(
+ Report.WEEKLY, Report.FIRST_DAY)))
parser.add_option("--tasks", dest="tasks",
+ metavar="<task1,task2,...>",
default="",
- help="Comma separated list of task display names to analyze.")
+ help="Comma-separated list of Evergreen task names to analyze.")
parser.add_option("--variants", dest="variants",
+ metavar="<variant1,variant2,...>",
default="",
- help="Comma separated list of build variants to analyze.")
+ help="Comma-separated list of Evergreen build variants to analyze.")
parser.add_option("--distros", dest="distros",
+ metavar="<distro1,distro2,...>",
default="",
- help="Comma separated list of build distros to analyze.")
+ help="Comma-separated list of Evergreen build distros to analyze.")
(options, tests) = parser.parse_args()
- # TODO EVG-1653: Uncomment these lines once the --sinceDate and --untilDate options are
- # exposed.
- # period_type = "date"
- # start = options.start_date
- # end = options.end_date
-
- if options.since_revision and options.until_revision:
- period_type = "revision"
- start = options.since_revision
- end = options.until_revision
- elif options.since_revision or options.until_revision:
- parser.print_help()
- parser.error("Must specify both --sinceRevision & --untilRevision")
- # TODO EVG-1653: Remove this else clause once the --sinceDate and --untilDate options are
- # exposed.
- else:
- parser.print_help()
- parser.error("Must specify both --sinceRevision & --untilRevision")
+ for (option_name, option_dest) in (("--sinceDate", "since_date"),
+ ("--untilDate", "until_date")):
+ option_value = getattr(options, option_dest)
+ try:
+ setattr(options,
+ option_dest,
+ datetime.datetime.strptime(option_value, "%Y-%m-%d").date())
+ except ValueError:
+ parser.print_help(file=sys.stderr)
+ print(file=sys.stderr)
+ parser.error("{} must be specified in yyyy-mm-dd format, but got {}".format(
+ option_name, option_value))
+
+ if options.since_revision and not options.until_revision:
+ parser.print_help(file=sys.stderr)
+ print(file=sys.stderr)
+ parser.error("Must specify --untilRevision in conjuction with --sinceRevision")
+ elif options.until_revision and not options.since_revision:
+ parser.print_help(file=sys.stderr)
+ print(file=sys.stderr)
+ parser.error("Must specify --sinceRevision in conjuction with --untilRevision")
+
+ if options.group_period not in (Report.DAILY, Report.WEEKLY):
+ try:
+ options.group_period = datetime.timedelta(days=int(options.group_period))
+ except ValueError:
+ parser.print_help(file=sys.stderr)
+ print(file=sys.stderr)
+ parser.error("--groupPeriod must be an integral number, but got {}".format(
+ options.group_period))
if not options.tasks and not tests:
- parser.print_help()
+ parser.print_help(file=sys.stderr)
+ print(file=sys.stderr)
parser.error("Must specify either --tasks or at least one test")
- report = HistoryReport(period_type=period_type,
- start=start,
- end=end,
- group_period=options.group_period,
- start_day_of_week=options.start_day_of_week,
- project=options.project,
- tests=tests,
- tasks=options.tasks.split(","),
- variants=options.variants.split(","),
- distros=options.distros.split(","),
- evg_cfg=read_evg_config())
- view_report = report.generate_report()
- summ_report = view_report.view_summary(group_on=["test", "task", "variant"])
- for s in sorted(summ_report):
- print(s)
+ def read_evg_config():
+ """
+ Attempts to parse the user's or system's Evergreen configuration from its known locations.
+
+ Returns None if the configuration file wasn't found anywhere.
+ """
+
+ known_locations = [
+ "./.evergreen.yml",
+ os.path.expanduser("~/.evergreen.yml"),
+ os.path.expanduser("~/cli_bin/.evergreen.yml"),
+ ]
+
+ for filename in known_locations:
+ if os.path.isfile(filename):
+ with open(filename, "r") as fstream:
+ return yaml.safe_load(fstream)
+
+ return None
+
+ evg_config = read_evg_config()
+ evg_config = evg_config if evg_config is not None else {}
+ api_server = "{url.scheme}://{url.netloc}".format(
+ url=urlparse(evg_config.get("api_server_host", TestHistory.DEFAULT_API_SERVER)))
+
+ test_history = TestHistory(api_server=api_server,
+ project=options.project,
+ tests=tests,
+ tasks=options.tasks.split(","),
+ variants=options.variants.split(","),
+ distros=options.distros.split(","))
+
+ if options.since_revision:
+ history_data = test_history.get_history_by_revision(
+ start_revision=options.since_revision,
+ end_revision=options.until_revision)
+ elif options.since_date:
+ history_data = test_history.get_history_by_date(
+ start_date=options.since_date,
+ end_date=options.until_date)
+
+ report = Report(history_data)
+ summary = report.summarize_by(Report.TEST_TASK_VARIANT_DISTRO,
+ time_period=options.group_period,
+ start_day_of_week=options.start_day_of_week)
+
+ for entry in summary:
+ print("(test={e.test},"
+ " task={e.task},"
+ " variant={e.variant},"
+ " distro={e.distro},"
+ " start_date={e.start_date:%Y-%m-%d},"
+ " end_date={e.end_date:%Y-%m-%d},"
+ " num_pass={e.num_pass},"
+ " num_fail={e.num_fail},"
+ " fail_rate={e.fail_rate:0.2%})".format(e=entry))
+
if __name__ == "__main__":
main()
diff --git a/buildscripts/tests/ciconfig/test_tags.py b/buildscripts/tests/ciconfig/test_tags.py
index 4678bcc1f34..8f9b3932f4a 100644
--- a/buildscripts/tests/ciconfig/test_tags.py
+++ b/buildscripts/tests/ciconfig/test_tags.py
@@ -15,12 +15,12 @@ class TestTagsConfig(unittest.TestCase):
"""Unit tests for the TagsConfig class."""
def setUp(self):
- self.conf = _tags.TagsConfig(TEST_FILE_PATH)
+ self.conf = _tags.TagsConfig.from_file(TEST_FILE_PATH)
def test_invalid_path(self):
invalid_path = "non_existing_file"
with self.assertRaises(IOError):
- _tags.TagsConfig(invalid_path)
+ _tags.TagsConfig.from_file(invalid_path)
def test_list_test_kinds(self):
test_kinds = self.conf.get_test_kinds()
@@ -151,7 +151,7 @@ class TestTagsConfig(unittest.TestCase):
def custom_cmp(tag_a, tag_b):
return cmp(tag_a.split("|"), tag_b.split("|"))
- conf = _tags.TagsConfig(TEST_FILE_PATH, cmp_func=custom_cmp)
+ conf = _tags.TagsConfig.from_file(TEST_FILE_PATH, cmp_func=custom_cmp)
tags = conf.get_tags(test_kind, test_pattern)
self.assertEqual(["tag1", "tag2", "tag3"], tags)
diff --git a/buildscripts/tests/test_test_failures.py b/buildscripts/tests/test_test_failures.py
new file mode 100644
index 00000000000..0a2c570897b
--- /dev/null
+++ b/buildscripts/tests/test_test_failures.py
@@ -0,0 +1,676 @@
+"""
+Tests for buildscripts/test_failures.py.
+"""
+
+from __future__ import absolute_import
+
+import datetime
+import unittest
+
+from buildscripts import test_failures
+
+
+class TestReportEntry(unittest.TestCase):
+ """
+ Tests for the test_failures.ReportEntry class.
+ """
+
+ ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+ task="jsCore_WT",
+ variant="linux-64",
+ distro="rhel62",
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=0,
+ num_fail=0)
+
+ def test_fail_rate(self):
+ """
+ Tests for the test_failures.ReportEntry.fail_rate property.
+ """
+
+ entry = self.ENTRY._replace(num_pass=0, num_fail=1)
+ self.assertEqual(1, entry.fail_rate)
+
+ entry = self.ENTRY._replace(num_pass=9, num_fail=1)
+ self.assertAlmostEqual(0.1, entry.fail_rate)
+
+ # Verify that we don't attempt to divide by zero.
+ entry = self.ENTRY._replace(num_pass=0, num_fail=0)
+ self.assertEqual(0, entry.fail_rate)
+
+ def test_week_start_date_with_sunday(self):
+ """
+ Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+ specified as different forms of the string "Sunday".
+ """
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+ self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("sunday"))
+ self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("Sunday"))
+ self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("SUNDAY"))
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4))
+ self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday"))
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5))
+ self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday"))
+
+ def test_week_start_date_with_monday(self):
+ """
+ Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+ specified as different forms of the string "Monday".
+ """
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+ self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday"))
+ self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("Monday"))
+ self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("MONDAY"))
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4))
+ self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday"))
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5))
+ self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday"))
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 6))
+ self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday"))
+
+ def test_week_start_date_with_date(self):
+ """
+ Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+ specified as a datetime.date() value.
+ """
+
+ entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+
+ date = datetime.date(2017, 5, 21)
+ self.assertEqual(6, date.weekday(), "2017 May 21 is a Sunday")
+ self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date(date))
+
+ date = datetime.date(2017, 5, 22)
+ self.assertEqual(0, date.weekday(), "2017 May 22 is a Monday")
+ self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date(date))
+
+ date = datetime.date(2017, 6, 6)
+ self.assertEqual(1, date.weekday(), "2017 Jun 06 is a Tuesday")
+ self.assertEqual(datetime.date(2017, 5, 30), entry.week_start_date(date))
+
+ date = datetime.date(2017, 6, 9)
+ self.assertEqual(4, date.weekday(), "2017 Jun 09 is a Friday")
+ self.assertEqual(datetime.date(2017, 6, 2), entry.week_start_date(date))
+
+ date = datetime.date(2017, 6, 3)
+ self.assertEqual(5, date.weekday(), "2017 Jun 03 is a Saturday")
+ self.assertEqual(datetime.date(2017, 6, 3), entry.week_start_date(date))
+
+ def test_sum_combines_test_results(self):
+ """
+ Tests for test_failures.ReportEntry.sum() that verify the start_date, end_date, num_pass,
+ and num_fail attributes are accumulated correctly.
+ """
+
+ entry1 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 1),
+ end_date=datetime.date(2017, 6, 1),
+ num_pass=1,
+ num_fail=0)
+
+ entry2 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 2),
+ end_date=datetime.date(2017, 6, 2),
+ num_pass=0,
+ num_fail=3)
+
+ entry3 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=0,
+ num_fail=0)
+
+ entry4 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4),
+ end_date=datetime.date(2017, 6, 4),
+ num_pass=2,
+ num_fail=2)
+
+ entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4])
+ entry_1432 = test_failures.ReportEntry.sum([entry1, entry4, entry3, entry2])
+ entry_124 = test_failures.ReportEntry.sum([entry1, entry2, entry4])
+ entry_13 = test_failures.ReportEntry.sum([entry1, entry3])
+ entry_42 = test_failures.ReportEntry.sum([entry4, entry2])
+
+ self.assertEqual(datetime.date(2017, 6, 1), entry_1234.start_date)
+ self.assertEqual(datetime.date(2017, 6, 4), entry_1234.end_date)
+ self.assertEqual(3, entry_1234.num_pass)
+ self.assertEqual(5, entry_1234.num_fail)
+
+ self.assertEqual(entry_1234, entry_1432, "order of arguments shouldn't matter")
+ self.assertEqual(entry_1234, entry_124, "entry3 didn't have any test executions")
+
+ self.assertEqual(datetime.date(2017, 6, 1), entry_13.start_date)
+ self.assertEqual(datetime.date(2017, 6, 3), entry_13.end_date)
+ self.assertEqual(1, entry_13.num_pass)
+ self.assertEqual(0, entry_13.num_fail)
+
+ self.assertEqual(datetime.date(2017, 6, 2), entry_42.start_date)
+ self.assertEqual(datetime.date(2017, 6, 4), entry_42.end_date)
+ self.assertEqual(2, entry_42.num_pass)
+ self.assertEqual(5, entry_42.num_fail)
+
+ def test_sum_combines_test_info(self):
+ """
+ Tests for test_failures.ReportEntry.sum() that verify the test, task, variant, and distro
+ attributes are accumulated correctly.
+ """
+
+ entry1 = self.ENTRY._replace(test="jstests/core/all.js",
+ task="jsCore_WT",
+ variant="linux-64",
+ distro="rhel62")
+
+ entry2 = self.ENTRY._replace(test="jstests/core/all.js",
+ task="jsCore_WT",
+ variant="linux-64",
+ distro="rhel55")
+
+ entry3 = self.ENTRY._replace(test="jstests/core/all2.js",
+ task="jsCore_WT",
+ variant="linux-64-debug",
+ distro="rhel62")
+
+ entry4 = self.ENTRY._replace(test="jstests/core/all.js",
+ task="jsCore",
+ variant="linux-64-debug",
+ distro="rhel62")
+
+ entry_12 = test_failures.ReportEntry.sum([entry1, entry2])
+ self.assertEqual("jstests/core/all.js", entry_12.test)
+ self.assertEqual("jsCore_WT", entry_12.task)
+ self.assertEqual("linux-64", entry_12.variant)
+ self.assertIsInstance(entry_12.distro, test_failures.Wildcard)
+
+ entry_123 = test_failures.ReportEntry.sum([entry1, entry2, entry3])
+ self.assertIsInstance(entry_123.test, test_failures.Wildcard)
+ self.assertEqual("jsCore_WT", entry_123.task)
+ self.assertIsInstance(entry_123.variant, test_failures.Wildcard)
+ self.assertIsInstance(entry_123.distro, test_failures.Wildcard)
+
+ entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4])
+ self.assertIsInstance(entry_1234.test, test_failures.Wildcard)
+ self.assertIsInstance(entry_1234.task, test_failures.Wildcard)
+ self.assertIsInstance(entry_1234.variant, test_failures.Wildcard)
+ self.assertIsInstance(entry_1234.distro, test_failures.Wildcard)
+
+ entry_34 = test_failures.ReportEntry.sum([entry3, entry4])
+ self.assertIsInstance(entry_34.test, test_failures.Wildcard)
+ self.assertIsInstance(entry_34.task, test_failures.Wildcard)
+ self.assertEqual("linux-64-debug", entry_34.variant)
+ self.assertEqual("rhel62", entry_34.distro)
+
+
+class TestReportSummarization(unittest.TestCase):
+ """
+ Tests for test_failures.Report.summarize_by().
+ """
+
+ ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+ task="jsCore_WT",
+ variant="linux-64",
+ distro="rhel62",
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=0,
+ num_fail=0)
+
+ ENTRIES = [
+ ENTRY._replace(start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=1,
+ num_fail=0),
+ ENTRY._replace(task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1),
+ ENTRY._replace(start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0),
+ # The following entry is intentionally not in timestamp order to verify that the
+ # 'time_period' parameter becomes part of the sort in summarize_by().
+ ENTRY._replace(start_date=datetime.date(2017, 6, 9),
+ end_date=datetime.date(2017, 6, 9),
+ num_pass=1,
+ num_fail=0),
+ ENTRY._replace(distro="rhel55",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=0,
+ num_fail=1),
+ ENTRY._replace(test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0),
+ ENTRY._replace(variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 17),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1),
+ ]
+
+ def test_group_all_by_test_task_variant_distro(self):
+ """
+ Tests that summarize_by() correctly accumulates all unique combinations of
+ (test, task, variant, distro).
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT_DISTRO)
+ self.assertEqual(5, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ distro="rhel55",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=3,
+ num_fail=0,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 17),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[4], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_all_by_test_task_variant(self):
+ """
+ Tests that summarize_by() correctly accumulates all unique combinations of
+ (test, task, variant).
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT)
+ self.assertEqual(4, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=3,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 17),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_all_by_test_task(self):
+ """
+ Tests that summarize_by() correctly accumulates all unique combinations of (test, task).
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST_TASK)
+ self.assertEqual(3, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ variant=test_failures.Wildcard("variants"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=3,
+ num_fail=2,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_all_by_test(self):
+ """
+ Tests that summarize_by() correctly accumulates all unique combinations of (test,).
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST)
+ self.assertEqual(2, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ variant=test_failures.Wildcard("variants"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=3,
+ num_fail=3,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_all_by_variant_task(self):
+ """
+ Tests that summarize_by() correctly accumulates all unique combinations of (variant, task).
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(["variant", "task"])
+ self.assertEqual(3, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ test=test_failures.Wildcard("tests"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=4,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 17),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+
+ def test_group_weekly_by_test_starting_on_sunday(self):
+ """
+ Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+ specified as the string "sunday".
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=test_failures.Report.WEEKLY,
+ start_day_of_week=test_failures.Report.SUNDAY)
+
+ self.assertEqual(4, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=1,
+ num_fail=0,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 4),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=2,
+ num_fail=2,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 11),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 4),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_weekly_by_test_starting_on_monday(self):
+ """
+ Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+ specified as the string "monday".
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=test_failures.Report.WEEKLY,
+ start_day_of_week=test_failures.Report.MONDAY)
+
+ self.assertEqual(4, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 4),
+ num_pass=1,
+ num_fail=0,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 11),
+ num_pass=2,
+ num_fail=2,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 12),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 11),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_weekly_by_test_starting_on_date(self):
+ """
+ Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+ specified as a datetime.date() value.
+ """
+
+ date = datetime.date(2017, 6, 7)
+ self.assertEqual(2, date.weekday(), "2017 Jun 07 is a Wednesday")
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=test_failures.Report.WEEKLY,
+ start_day_of_week=date)
+
+ self.assertEqual(4, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 6),
+ num_pass=1,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 7),
+ end_date=datetime.date(2017, 6, 13),
+ num_pass=2,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 14),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 7),
+ end_date=datetime.date(2017, 6, 13),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_daily_by_test(self):
+ """
+ Tests that summarize_by() correctly accumulates by day.
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=test_failures.Report.DAILY)
+
+ self.assertEqual(6, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=1,
+ num_fail=0,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ task="jsCore",
+ start_date=datetime.date(2017, 6, 5),
+ end_date=datetime.date(2017, 6, 5),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ start_date=datetime.date(2017, 6, 9),
+ end_date=datetime.date(2017, 6, 9),
+ num_pass=1,
+ num_fail=0,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[4], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 17),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[5], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 10),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_4days_by_test(self):
+ """
+ Tests that summarize_by() correctly accumulates by multiple days.
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=datetime.timedelta(days=4))
+
+ self.assertEqual(4, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 6),
+ num_pass=1,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 7),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=2,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 15),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[3], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 7),
+ end_date=datetime.date(2017, 6, 10),
+ num_pass=1,
+ num_fail=0,
+ ))
+
+ def test_group_9days_by_test(self):
+ """
+ Tests that summarize_by() correctly accumulates by multiple days, including time periods
+ greater than 1 week.
+ """
+
+ report = test_failures.Report(self.ENTRIES)
+ summed_entries = report.summarize_by(test_failures.Report.TEST,
+ time_period=datetime.timedelta(days=9))
+
+ self.assertEqual(3, len(summed_entries))
+ self.assertEqual(summed_entries[0], self.ENTRY._replace(
+ task=test_failures.Wildcard("tasks"),
+ distro=test_failures.Wildcard("distros"),
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 11),
+ num_pass=3,
+ num_fail=2,
+ ))
+ self.assertEqual(summed_entries[1], self.ENTRY._replace(
+ variant="linux-64-debug",
+ start_date=datetime.date(2017, 6, 12),
+ end_date=datetime.date(2017, 6, 17),
+ num_pass=0,
+ num_fail=1,
+ ))
+ self.assertEqual(summed_entries[2], self.ENTRY._replace(
+ test="jstests/core/all2.js",
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 11),
+ num_pass=1,
+ num_fail=0,
+ ))
diff --git a/buildscripts/tests/test_update_test_lifecycle.py b/buildscripts/tests/test_update_test_lifecycle.py
new file mode 100644
index 00000000000..145065c2c6c
--- /dev/null
+++ b/buildscripts/tests/test_update_test_lifecycle.py
@@ -0,0 +1,760 @@
+"""
+Tests for buildscripts/update_test_lifecycle.py.
+"""
+
+from __future__ import absolute_import
+
+import collections
+import copy
+import datetime
+import unittest
+
+from buildscripts import test_failures
+from buildscripts import update_test_lifecycle
+from buildscripts.ciconfig import tags as ci_tags
+
+
+class TestValidateConfig(unittest.TestCase):
+ """
+ Tests for the validate_config() function.
+ """
+
+ CONFIG = update_test_lifecycle.Config(
+ test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ reliable_min_runs=2,
+ reliable_time_period=datetime.timedelta(days=1),
+ unreliable_min_runs=2,
+ unreliable_time_period=datetime.timedelta(days=1))
+
+ def test_acceptable_test_fail_rate(self):
+ """
+ Tests the validation of the 'test_fail_rates.acceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_unacceptable_test_fail_rate(self):
+ """
+ Tests the validation of the 'test_fail_rates.unacceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_test_fail_rates(self):
+ """
+ Tests the validation of the 'test_fail_rates' attribute.
+ """
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9,
+ unacceptable=0.1))
+ update_test_lifecycle.validate_config(config)
+
+ def test_acceptable_task_fail_rate(self):
+ """
+ Tests the validation of the 'test_fail_rates.acceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_unacceptable_task_fail_rate(self):
+ """
+ Tests the validation of the 'task_fail_rates.unacceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_task_fail_rates(self):
+ """
+ Tests the validation of the 'task_fail_rates' attribute.
+ """
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9,
+ unacceptable=0.1))
+ update_test_lifecycle.validate_config(config)
+
+ def test_acceptable_variant_fail_rate(self):
+ """
+ Tests the validation of the 'variant_fail_rates.acceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(
+ acceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_unacceptable_variant_fail_rate(self):
+ """
+ Tests the validation of the 'variant_fail_rates.unacceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(
+ unacceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_variant_fail_rates(self):
+ """
+ Tests the validation of the 'variant_fail_rates' attribute.
+ """
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9,
+ unacceptable=0.1))
+ update_test_lifecycle.validate_config(config)
+
+ def test_acceptable_distro_fail_rate(self):
+ """
+ Tests the validation of the 'distro_fail_rates.acceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_unacceptable_distro_fail_rate(self):
+ """
+ Tests the validation of the 'distro_fail_rates.unacceptable' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(
+ unacceptable="not a number"))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=2))
+ update_test_lifecycle.validate_config(config)
+
+ def test_distro_fail_rates(self):
+ """
+ Tests the validation of the 'distro_fail_rates' attribute.
+ """
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9,
+ unacceptable=0.1))
+ update_test_lifecycle.validate_config(config)
+
+ def test_reliable_min_runs(self):
+ """
+ Tests the validation of the 'reliable_min_runs' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(reliable_min_runs="not a number")
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_min_runs=-1)
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_min_runs=0)
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_min_runs=1.5)
+ update_test_lifecycle.validate_config(config)
+
+ def test_reliable_time_period(self):
+ """
+ Tests the validation of the 'reliable_time_period' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(reliable_time_period="not a datetime.timedelta")
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=0))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=1, hours=1))
+ update_test_lifecycle.validate_config(config)
+
+ def test_unreliable_min_runs(self):
+ """
+ Tests the validation of the 'unreliable_min_runs' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(unreliable_min_runs="not a number")
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(unreliable_min_runs=-1)
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(unreliable_min_runs=0)
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(unreliable_min_runs=1.5)
+ update_test_lifecycle.validate_config(config)
+
+ def test_unreliable_time_period(self):
+ """
+ Tests the validation of the 'unreliable_time_period' attribute.
+ """
+
+ with self.assertRaises(TypeError):
+ config = self.CONFIG._replace(unreliable_time_period="not a datetime.timedelta")
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=-1))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=0))
+ update_test_lifecycle.validate_config(config)
+
+ with self.assertRaises(ValueError):
+ config = self.CONFIG._replace(
+ unreliable_time_period=datetime.timedelta(days=1, hours=1))
+ update_test_lifecycle.validate_config(config)
+
+
+class TestUpdateTags(unittest.TestCase):
+ """
+ Tests for the update_tags() function.
+ """
+
+ CONFIG = update_test_lifecycle.Config(
+ test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+ reliable_min_runs=2,
+ reliable_time_period=datetime.timedelta(days=1),
+ unreliable_min_runs=2,
+ unreliable_time_period=datetime.timedelta(days=1))
+
+ ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+ task="jsCore_WT",
+ variant="linux-64",
+ distro="rhel62",
+ start_date=datetime.date(2017, 6, 3),
+ end_date=datetime.date(2017, 6, 3),
+ num_pass=0,
+ num_fail=0)
+
+ def assert_has_only_js_tests(self, lifecycle):
+ """
+ Raises an AssertionError exception if 'lifecycle' is not of the following form:
+
+ selector:
+ js_test:
+ ...
+ """
+
+ self.assertIn("selector", lifecycle.raw)
+ self.assertEqual(1, len(lifecycle.raw), msg=str(lifecycle.raw))
+ self.assertIn("js_test", lifecycle.raw["selector"])
+ self.assertEqual(1, len(lifecycle.raw["selector"]), msg=str(lifecycle.raw))
+
+ return lifecycle.raw["selector"]["js_test"]
+
+ def transition_from_reliable_to_unreliable(self, config, expected_tags):
+ """
+ Tests that update_tags() tags a formerly reliable combination as being unreliable.
+ """
+
+ initial_tags = collections.OrderedDict()
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+ self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, expected_tags)
+
+ def test_transition_test_from_reliable_to_unreliable(self):
+ """
+ Tests that update_tags() tags a formerly reliable (test,) combination as being unreliable.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1))
+
+ self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable"]),
+ ]))
+
+ def test_transition_task_from_reliable_to_unreliable(self):
+ """
+ Tests that update_tags() tags a formerly reliable (test, task) combination as being
+ unreliable.
+ """
+
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1))
+
+ self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT"]),
+ ]))
+
+ def test_transition_variant_from_reliable_to_unreliable(self):
+ """
+ Tests that update_tags() tags a formerly reliable (test, task, variant) combination as being
+ unreliable.
+ """
+
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1))
+
+ self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]),
+ ]))
+
+ def test_transition_distro_from_reliable_to_unreliable(self):
+ """
+ Tests that update_tags() tags a formerly reliable (test, task, variant, distro) combination
+ as being unreliable.
+ """
+
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+ self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]),
+ ]))
+
+ def test_transition_from_reliable_to_unreliable(self):
+ """
+ Tests that update_tags() tags multiple formerly reliable combination as being unreliable.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+ self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ]))
+
+ def transition_from_unreliable_to_reliable(self, config, initial_tags):
+ """
+ Tests that update_tags() untags a formerly unreliable combination after it has become
+ reliable again.
+ """
+
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+ self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, collections.OrderedDict())
+
+ def test_transition_test_from_unreliable_to_reliable(self):
+ """
+ Tests that update_tags() untags a formerly unreliable (test,) combination after it has
+ become reliable again.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9))
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable"]),
+ ]))
+
+ def test_transition_task_from_unreliable_to_reliable(self):
+ """
+ Tests that update_tags() untags a formerly unreliable (test, task) combination after it has
+ become reliable again.
+ """
+
+ config = self.CONFIG._replace(
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9))
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT"]),
+ ]))
+
+ def test_transition_variant_from_unreliable_to_reliable(self):
+ """
+ Tests that update_tags() untags a formerly unreliable (test, task, variant) combination
+ after it has become reliable again.
+ """
+
+ config = self.CONFIG._replace(
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9))
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]),
+ ]))
+
+ def test_transition_distro_from_unreliable_to_reliable(self):
+ """
+ Tests that update_tags() untags a formerly unreliable (test, task, variant, distro)
+ combination after it has become reliable again.
+ """
+
+ config = self.CONFIG._replace(
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]),
+ ]))
+
+ def test_transition_from_unreliable_to_reliable(self):
+ """
+ Tests that update_tags() untags multiple formerly unreliable combination after it has become
+ reliable again.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ]))
+
+ def test_remain_reliable(self):
+ """
+ Tests that update_tags() preserves the absence of tags for reliable combinations.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+ initial_tags = collections.OrderedDict()
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+ self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, initial_tags)
+
+ def test_remain_unreliable(self):
+ """
+ Tests that update_tags() preserves the tags for unreliable combinations.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+ initial_tags = collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ])
+
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+ self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, initial_tags)
+
+ def test_obeys_reliable_min_runs(self):
+ """
+ Tests that update_tags() considers a test reliable if it has fewer than 'reliable_min_runs'.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9),
+ reliable_min_runs=100)
+
+ self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ]))
+
+ def test_obeys_reliable_time_period(self):
+ """
+ Tests that update_tags() ignores passes from before 'reliable_time_period'.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+ initial_tags = collections.OrderedDict()
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
+ end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
+ num_pass=1,
+ num_fail=0),
+ self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)),
+ end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)),
+ num_pass=1,
+ num_fail=0),
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+ self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ]))
+
+ def test_obeys_unreliable_min_runs(self):
+ """
+ Tests that update_tags() only considers a test unreliable if it has more than
+ 'unreliable_min_runs'.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1),
+ unreliable_min_runs=100)
+
+ initial_tags = collections.OrderedDict()
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(num_pass=0, num_fail=1),
+ self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+ self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, initial_tags)
+
+ def test_obeys_unreliable_time_period(self):
+ """
+ Tests that update_tags() ignores failures from before 'unreliable_time_period'.
+ """
+
+ config = self.CONFIG._replace(
+ test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+ task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+ variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+ distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+ initial_tags = collections.OrderedDict([
+ ("jstests/core/all.js", [
+ "unreliable",
+ "unreliable|jsCore_WT",
+ "unreliable|jsCore_WT|linux-64",
+ "unreliable|jsCore_WT|linux-64|rhel62",
+ ]),
+ ])
+
+ lifecycle = ci_tags.TagsConfig.from_dict(
+ dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+ self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+ report = test_failures.Report([
+ self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
+ end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
+ num_pass=0,
+ num_fail=1),
+ self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)),
+ end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)),
+ num_pass=0,
+ num_fail=1),
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=1, num_fail=0),
+ self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+ self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+ self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+ ])
+
+ update_test_lifecycle.validate_config(config)
+ update_test_lifecycle.update_tags(lifecycle, config, report)
+ updated_tags = self.assert_has_only_js_tests(lifecycle)
+ self.assertEqual(updated_tags, collections.OrderedDict())
diff --git a/buildscripts/update_test_lifecycle.py b/buildscripts/update_test_lifecycle.py
index 4b4325b255a..9699a5418d8 100755
--- a/buildscripts/update_test_lifecycle.py
+++ b/buildscripts/update_test_lifecycle.py
@@ -4,30 +4,73 @@
Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates.
"""
+
+from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
-import copy
+import datetime
import optparse
-import os
+import os.path
import subprocess
import sys
+import textwrap
+import warnings
# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
from buildscripts import resmokelib
-from buildscripts.ciconfig import evergreen
-from buildscripts.ciconfig import tags
from buildscripts import test_failures as tf
+from buildscripts.ciconfig import evergreen as ci_evergreen
+from buildscripts.ciconfig import tags as ci_tags
+
+
+if sys.version_info[0] == 2:
+ _NUMBER_TYPES = (int, long, float)
+else:
+ _NUMBER_TYPES = (int, float)
+
+
+Rates = collections.namedtuple("Rates", ["acceptable", "unacceptable"])
+
+
+Config = collections.namedtuple("Config", [
+ "test_fail_rates",
+ "task_fail_rates",
+ "variant_fail_rates",
+ "distro_fail_rates",
+ "reliable_min_runs",
+ "reliable_time_period",
+ "unreliable_min_runs",
+ "unreliable_time_period",
+])
+
+
+DEFAULT_CONFIG = Config(
+ test_fail_rates=Rates(acceptable=0.1, unacceptable=0.3),
+ task_fail_rates=Rates(acceptable=0.1, unacceptable=0.3),
+ variant_fail_rates=Rates(acceptable=0.2, unacceptable=0.4),
+ distro_fail_rates=Rates(acceptable=0.2, unacceptable=0.4),
+ reliable_min_runs=5,
+ reliable_time_period=datetime.timedelta(weeks=1),
+ unreliable_min_runs=20,
+ unreliable_time_period=datetime.timedelta(weeks=4))
+
+
+DEFAULT_PROJECT = "mongodb-mongo-master"
def write_yaml_file(yaml_file, lifecycle):
"""Writes the lifecycle object to yaml_file."""
- comment = ("This file was generated by {} and shouldn't be edited by hand. It was"
- " generated against commit {} with the following invocation: {}.").format(
- sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv))
+
+ comment = (
+ "This file was generated by {} and shouldn't be edited by hand. It was generated against"
+ " commit {} with the following invocation: {}."
+ ).format(sys.argv[0], callo(["git", "rev-parse", "HEAD"]).rstrip(), " ".join(sys.argv))
+
lifecycle.write_file(yaml_file, comment)
@@ -116,7 +159,7 @@ def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run):
A test should be added to the set of tests believed not to run reliably when it has more
than min_run executions with a failure percentage greater than unacceptable_fr.
"""
- return test_runs >= min_run and test_fr > unacceptable_fr
+ return test_runs >= min_run and test_fr >= unacceptable_fr
def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
@@ -125,7 +168,7 @@ def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
A test should then removed from the set of tests believed not to run reliably when it has
less than min_run executions or has a failure percentage less than acceptable_fr.
"""
- return test_runs < min_run or test_fr < acceptable_fr
+ return test_runs < min_run or test_fr <= acceptable_fr
def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr):
@@ -141,16 +184,21 @@ def check_days(name, days):
raise ValueError("'{}' days must be greater than 0.".format(name))
-def unreliable_tag(test, task, variant, distro):
+def unreliable_tag(task, variant, distro):
"""Returns the unreliable tag."""
- if distro and variant and task and test:
- return "unreliable|{}|{}|{}".format(task, variant, distro)
- elif variant and task and test:
- return "unreliable|{}|{}".format(task, variant)
- elif task and test:
- return "unreliable|{}".format(task)
- elif test:
- return "unreliable"
+
+ for (component_name, component_value) in (("task", task),
+ ("variant", variant),
+ ("distro", distro)):
+ if isinstance(component_value, (tf.Wildcard, tf.Missing)):
+ if component_name == "task":
+ return "unreliable"
+ elif component_name == "variant":
+ return "unreliable|{}".format(task)
+ elif component_name == "distro":
+ return "unreliable|{}|{}".format(task, variant)
+
+ return "unreliable|{}|{}|{}".format(task, variant, distro)
def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run):
@@ -163,8 +211,7 @@ def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_ru
fail_rate,
summary.num_pass + summary.num_fail,
min_run):
- update_tag = unreliable_tag(
- summary.test, summary.task, summary.variant, summary.distro)
+ update_tag = unreliable_tag(summary.task, summary.variant, summary.distro)
if add_tags:
lifecycle.add_tag("js_test", summary.test, update_tag)
else:
@@ -175,101 +222,266 @@ def compare_tags(tag_a, tag_b):
return cmp(tag_a.split("|"), tag_b.split("|"))
+def validate_config(config):
+ """
+ Raises a TypeError or ValueError exception if 'config' isn't a valid model.
+ """
+
+ for (name, fail_rates) in (("test", config.test_fail_rates),
+ ("task", config.task_fail_rates),
+ ("variant", config.variant_fail_rates),
+ ("distro", config.distro_fail_rates)):
+ if not isinstance(fail_rates.acceptable, _NUMBER_TYPES):
+ raise TypeError("The acceptable {} failure rate must be a number, but got {}".format(
+ name, fail_rates.acceptable))
+ elif fail_rates.acceptable < 0 or fail_rates.acceptable > 1:
+ raise ValueError(("The acceptable {} failure rate must be between 0 and 1 (inclusive),"
+ " but got {}").format(name, fail_rates.acceptable))
+ elif not isinstance(fail_rates.unacceptable, _NUMBER_TYPES):
+ raise TypeError("The unacceptable {} failure rate must be a number, but got {}".format(
+ name, fail_rates.unacceptable))
+ elif fail_rates.unacceptable < 0 or fail_rates.unacceptable > 1:
+ raise ValueError(("The unacceptable {} failure rate must be between 0 and 1"
+ " (inclusive), but got {}").format(name, fail_rates.unacceptable))
+ elif fail_rates.acceptable > fail_rates.unacceptable:
+ raise ValueError(
+ ("The acceptable {0} failure rate ({1}) must be no larger than unacceptable {0}"
+ " failure rate ({2})").format(
+ name, fail_rates.acceptable, fail_rates.unacceptable))
+
+ for (name, min_runs) in (("reliable", config.reliable_min_runs),
+ ("unreliable", config.unreliable_min_runs)):
+ if not isinstance(min_runs, _NUMBER_TYPES):
+ raise TypeError(("The minimum number of runs for considering a test {} must be a"
+ " number, but got {}").format(name, min_runs))
+ elif min_runs <= 0:
+ raise ValueError(("The minimum number of runs for considering a test {} must be a"
+ " positive integer, but got {}").format(name, min_runs))
+ elif isinstance(min_runs, float) and not min_runs.is_integer():
+ raise ValueError(("The minimum number of runs for considering a test {} must be an"
+ " integer, but got {}").format(name, min_runs))
+
+ for (name, time_period) in (("reliable", config.reliable_time_period),
+ ("unreliable", config.unreliable_time_period)):
+ if not isinstance(time_period, datetime.timedelta):
+ raise TypeError(
+ "The {} time period must be a datetime.timedelta instance, but got {}".format(
+ name, time_period))
+ elif time_period.days <= 0:
+ raise ValueError(
+ "The {} time period must be a positive number of days, but got {}".format(
+ name, time_period))
+ elif time_period - datetime.timedelta(days=time_period.days) > datetime.timedelta():
+ raise ValueError(
+ "The {} time period must be an integral number of days, but got {}".format(
+ name, time_period))
+
+
+def update_tags(lifecycle, config, report):
+ """
+ Updates the tags in 'lifecycle' based on the historical test failures mentioned in 'report'
+ according to the model described by 'config'.
+ """
+
+ # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used
+ # before assignment.
+ grouped_entries = None
+ for (i, (components, rates)) in enumerate(
+ ((tf.Report.TEST_TASK_VARIANT_DISTRO, config.distro_fail_rates),
+ (tf.Report.TEST_TASK_VARIANT, config.variant_fail_rates),
+ (tf.Report.TEST_TASK, config.task_fail_rates),
+ (tf.Report.TEST, config.test_fail_rates))):
+ if i > 0:
+ report = tf.Report(grouped_entries)
+
+ # We reassign the value of 'grouped_entries' to take advantage of how data that is on
+ # (test, task, variant, distro) preserves enough information to be grouped on any subset of
+ # those components, etc.
+ grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY)
+
+ # Filter out any test executions from prior to 'config.unreliable_time_period'.
+ unreliable_start_date = (report.end_date - config.unreliable_time_period
+ + datetime.timedelta(days=1))
+ unreliable_report = tf.Report(entry for entry in grouped_entries
+ if entry.start_date >= unreliable_start_date)
+ update_lifecycle(lifecycle,
+ unreliable_report.summarize_by(components),
+ unreliable_test,
+ True,
+ rates.unacceptable,
+ config.unreliable_min_runs)
+
+ # Filter out any test executions from prior to 'config.reliable_time_period'.
+ reliable_start_date = (report.end_date - config.reliable_time_period
+ + datetime.timedelta(days=1))
+ reliable_report = tf.Report(entry for entry in grouped_entries
+ if entry.start_date >= reliable_start_date)
+ update_lifecycle(lifecycle,
+ reliable_report.summarize_by(components),
+ reliable_test,
+ False,
+ rates.acceptable,
+ config.reliable_min_runs)
+
+
def main():
+ """
+ Utility for updating a resmoke.py tag file based on computing test failure rates from the
+ Evergreen API.
+ """
- required_options = ["project",
- "reliable_test_min_run",
- "unreliable_test_min_run",
- "test_fail_rates",
- ]
- parser = optparse.OptionParser(description=__doc__,
- usage="Usage: %prog [options] test1 test2 ...")
- parser.add_option("--project", dest="project",
- default=None,
- help="Evergreen project to analyze [REQUIRED].")
- parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run",
- default=None,
- type="int",
- help="Minimum number of tests runs for test to be considered as reliable"
- " [REQUIRED].")
- parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run",
- default=None,
- type="int",
- help="Minimum number of tests runs for test to be considered as unreliable"
- " [REQUIRED].")
- parser.add_option("--testFailRates", dest="test_fail_rates",
- metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
- default=None,
- type="float",
- nargs=2,
- help="Test fail rates: acceptable fail rate and unacceptable fail rate"
- " Specify floating numbers between 0.0 and 1.0 [REQUIRED].")
- parser.add_option("--taskFailRates", dest="task_fail_rates",
- metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
- default=None,
- type="float",
- nargs=2,
- help="Task fail rates: acceptable fail rate and unacceptable fail rate."
- " Specify floating numbers between 0.0 and 1.0."
- " Uses --test-fail-rates if unspecified.")
- parser.add_option("--variantFailRates", dest="variant_fail_rates",
- metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
- default=None,
- type="float",
- nargs=2,
- help="Variant fail rates: acceptable fail rate and unacceptable fail rate."
- " Specify floating numbers between 0.0 and 1.0."
- " Uses --task-fail-rates if unspecified.")
- parser.add_option("--distroFailRates", dest="distro_fail_rates",
- metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
- default=None,
- type="float",
- nargs=2,
- help="Distro fail rates: acceptable fail rate and unacceptable fail rate."
- " Specify floating numbers between 0.0 and 1.0."
- " Uses --variant-fail-rates if unspecified.")
- parser.add_option("--tasks", dest="tasks",
- default=None,
- help="Names of tasks to analyze for tagging unreliable tests."
- " If specified and no tests are specified, then only tests"
- " associated with the tasks will be analyzed."
- " If unspecified and no tests are specified, the list of tasks will be"
- " the non-excluded list of tasks from the file specified by"
- " '--evergreenYML'.")
- parser.add_option("--variants", dest="variants",
- default="",
- help="Names of variants to analyze for tagging unreliable tests.")
- parser.add_option("--distros", dest="distros",
- default="",
- help="Names of distros to analyze for tagging unreliable tests [UNUSED].")
- parser.add_option("--evergreenYML", dest="evergreen_yml",
- default="etc/evergreen.yml",
- help="Evergreen YML file used to get the list of tasks,"
- " defaults to '%default'.")
- parser.add_option("--lifecycleFile", dest="lifecycle_file",
+ parser = optparse.OptionParser(description=textwrap.dedent(main.__doc__),
+ usage="Usage: %prog [options] [test1 test2 ...]")
+
+ data_options = optparse.OptionGroup(
+ parser,
+ title="Data options",
+ description=("Options used to configure what historical test failure data to retrieve from"
+ " Evergreen."))
+ parser.add_option_group(data_options)
+
+ data_options.add_option(
+ "--project", dest="project",
+ metavar="<project-name>",
+ default=tf.TestHistory.DEFAULT_PROJECT,
+ help="The Evergreen project to analyze. Defaults to '%default'.")
+
+ data_options.add_option(
+ "--tasks", dest="tasks",
+ metavar="<task1,task2,...>",
+ help=("The Evergreen tasks to analyze for tagging unreliable tests. If specified in"
+ " additional to having test positional arguments, then only tests that run under the"
+ " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks"
+ " defaults to the non-excluded list of tasks from the specified"
+ " --evergreenProjectConfig file."))
+
+ data_options.add_option(
+ "--variants", dest="variants",
+ metavar="<variant1,variant2,...>",
+ default="",
+ help="The Evergreen build variants to analyze for tagging unreliable tests.")
+
+ data_options.add_option(
+ "--distros", dest="distros",
+ metavar="<distro1,distro2,...>",
+ default="",
+ help="The Evergreen distros to analyze for tagging unreliable tests.")
+
+ data_options.add_option(
+ "--evergreenProjectConfig", dest="evergreen_project_config",
+ metavar="<project-config-file>",
+ default="etc/evergreen.yml",
+ help=("The Evergreen project configuration file used to get the list of tasks if --tasks is"
+ " omitted. Defaults to '%default'."))
+
+ model_options = optparse.OptionGroup(
+ parser,
+ title="Model options",
+ description=("Options used to configure whether (test,), (test, task),"
+ " (test, task, variant), and (test, task, variant, distro) combinations are"
+ " considered unreliable."))
+ parser.add_option_group(model_options)
+
+ model_options.add_option(
+ "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs",
+ metavar="<reliable-min-runs>",
+ default=DEFAULT_CONFIG.reliable_min_runs,
+ help=("The minimum number of test executions required for a test's failure rate to"
+ " determine whether the test is considered reliable. If a test has fewer than"
+ " <reliable-min-runs> executions, then it cannot be considered unreliable."))
+
+ model_options.add_option(
+ "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs",
+ metavar="<unreliable-min-runs>",
+ default=DEFAULT_CONFIG.unreliable_min_runs,
+ help=("The minimum number of test executions required for a test's failure rate to"
+ " determine whether the test is considered unreliable. If a test has fewer than"
+ " <unreliable-min-runs> executions, then it cannot be considered unreliable."))
+
+ model_options.add_option(
+ "--testFailRates", type="float", nargs=2, dest="test_fail_rates",
+ metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>",
+ default=DEFAULT_CONFIG.test_fail_rates,
+ help=("Controls how readily a test is considered unreliable. Each failure rate must be a"
+ " number between 0 and 1 (inclusive) with"
+ " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no"
+ " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is"
+ " considered reliable. Otherwise, if a test fails at least as much as"
+ " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered"
+ " unreliable. Defaults to %default."))
+
+ model_options.add_option(
+ "--taskFailRates", type="float", nargs=2, dest="task_fail_rates",
+ metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>",
+ default=DEFAULT_CONFIG.task_fail_rates,
+ help=("Controls how readily a (test, task) combination is considered unreliable. Each"
+ " failure rate must be a number between 0 and 1 (inclusive) with"
+ " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)"
+ " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time,"
+ " then it is considered reliable. Otherwise, if a test fails at least as much as"
+ " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered"
+ " unreliable. Defaults to %default."))
+
+ model_options.add_option(
+ "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates",
+ metavar="<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>",
+ default=DEFAULT_CONFIG.variant_fail_rates,
+ help=("Controls how readily a (test, task, variant) combination is considered unreliable."
+ " Each failure rate must be a number between 0 and 1 (inclusive) with"
+ " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a"
+ " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>"
+ " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails"
+ " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time,"
+ " then it is considered unreliable. Defaults to %default."))
+
+ model_options.add_option(
+ "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates",
+ metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>",
+ default=DEFAULT_CONFIG.distro_fail_rates,
+ help=("Controls how readily a (test, task, variant, distro) combination is considered"
+ " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with"
+ " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a"
+ " (test, task, variant, distro) combination fails no more than"
+ " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered"
+ " reliable. Otherwise, if a test fails at least as much as"
+ " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered"
+ " unreliable. Defaults to %default."))
+
+ model_options.add_option(
+ "--reliableDays", type="int", dest="reliable_days",
+ metavar="<ndays>",
+ default=DEFAULT_CONFIG.reliable_time_period.days,
+ help=("The time period to analyze when determining if a test has become reliable. Defaults"
+ " to %default day(s)."))
+
+ model_options.add_option(
+ "--unreliableDays", type="int", dest="unreliable_days",
+ metavar="<ndays>",
+ default=DEFAULT_CONFIG.unreliable_time_period.days,
+ help=("The time period to analyze when determining if a test has become unreliable."
+ " Defaults to %default day(s)."))
+
+ parser.add_option("--resmokeTagFile", dest="tag_file",
+ metavar="<tagfile>",
default="etc/test_lifecycle.yml",
- help="Evergreen lifecycle file to update, defaults to '%default'.")
- parser.add_option("--reliableDays", dest="reliable_days",
- default=7,
- type="int",
- help="Number of days to check for reliable tests, defaults to '%default'.")
- parser.add_option("--unreliableDays", dest="unreliable_days",
- default=28,
- type="int",
- help="Number of days to check for unreliable tests, defaults to '%default'.")
- parser.add_option("--batchGroupSize", dest="batch_size",
+ help="The resmoke.py tag file to update. Defaults to '%default'.")
+
+ parser.add_option("--requestBatchSize", type="int", dest="batch_size",
+ metavar="<batch-size>",
default=100,
- type="int",
- help="Size of test batch group, defaults to '%default'.")
+ help=("The maximum number of tests to query the Evergreen API for in a single"
+ " request. A higher value for this option will reduce the number of"
+ " roundtrips between this client and Evergreen. Defaults to %default."))
(options, tests) = parser.parse_args()
- for option in required_options:
- if not getattr(options, option):
- parser.print_help()
- parser.error("Missing required option")
+ if options.distros:
+ warnings.warn(
+ ("Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information"
+ " isn't returned by the Evergreen API. This option will therefore be ignored."),
+ RuntimeWarning)
- evg_conf = evergreen.EvergreenProjectConfig(options.evergreen_yml)
+ evg_conf = ci_evergreen.EvergreenProjectConfig(options.evergreen_project_config)
use_test_tasks_membership = False
tasks = options.tasks.split(",") if options.tasks else []
@@ -282,25 +494,18 @@ def main():
distros = options.distros.split(",") if options.distros else []
- check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1])
- # The less specific failures rates are optional and default to a lower level value.
- if not options.task_fail_rates:
- options.task_fail_rates = options.test_fail_rates
- else:
- check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1])
- if not options.variant_fail_rates:
- options.variant_fail_rates = options.task_fail_rates
- else:
- check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1])
- if not options.distro_fail_rates:
- options.distro_fail_rates = options.variant_fail_rates
- else:
- check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1])
-
- check_days("Reliable days", options.reliable_days)
- check_days("Unreliable days", options.unreliable_days)
-
- lifecycle = tags.TagsConfig(options.lifecycle_file, cmp_func=compare_tags)
+ config = Config(
+ test_fail_rates=Rates(*options.test_fail_rates),
+ task_fail_rates=Rates(*options.task_fail_rates),
+ variant_fail_rates=Rates(*options.variant_fail_rates),
+ distro_fail_rates=Rates(*options.distro_fail_rates),
+ reliable_min_runs=options.reliable_test_min_runs,
+ reliable_time_period=datetime.timedelta(days=options.reliable_days),
+ unreliable_min_runs=options.unreliable_test_min_runs,
+ unreliable_time_period=datetime.timedelta(days=options.unreliable_days))
+ validate_config(config)
+
+ lifecycle = ci_tags.TagsConfig.from_file(options.tag_file, cmp_func=compare_tags)
test_tasks_membership = get_test_tasks_membership(evg_conf)
# If no tests are specified then the list of tests is generated from the list of tasks.
@@ -325,58 +530,24 @@ def main():
if not tasks:
print("Warning - No tasks found for tests {}, skipping this group.".format(tests))
continue
- report = tf.HistoryReport(period_type="revision",
- start=commit_prior,
- end=commit_last,
- group_period=options.reliable_days,
- project=options.project,
- tests=tests,
- tasks=tasks,
- variants=variants,
- distros=distros)
- view_report = report.generate_report()
-
- # We build up report_combo to check for more specific test failures rates.
- report_combo = []
- # TODO EVG-1665: Uncomment this line once this has been supported.
- # for combo in ["test", "task", "variant", "distro"]:
- for combo in ["test", "task", "variant"]:
- report_combo.append(combo)
- if combo == "distro":
- acceptable_fail_rate = options.distro_fail_rates[0]
- unacceptable_fail_rate = options.distro_fail_rates[1]
- elif combo == "variant":
- acceptable_fail_rate = options.variant_fail_rates[0]
- unacceptable_fail_rate = options.variant_fail_rates[1]
- elif combo == "task":
- acceptable_fail_rate = options.task_fail_rates[0]
- unacceptable_fail_rate = options.task_fail_rates[1]
- else:
- acceptable_fail_rate = options.test_fail_rates[0]
- unacceptable_fail_rate = options.test_fail_rates[1]
-
- # Unreliable tests are analyzed from the entire period.
- update_lifecycle(lifecycle,
- view_report.view_summary(group_on=report_combo),
- unreliable_test,
- True,
- unacceptable_fail_rate,
- options.unreliable_test_min_run)
-
- # Reliable tests are analyzed from the last period, i.e., last 14 days.
- (reliable_start_date, reliable_end_date) = view_report.last_period()
- update_lifecycle(lifecycle,
- view_report.view_summary(group_on=report_combo,
- start_date=reliable_start_date,
- end_date=reliable_end_date),
- reliable_test,
- False,
- acceptable_fail_rate,
- options.reliable_test_min_run)
-
- # Update the lifecycle_file only if there have been changes.
+
+ test_history = tf.TestHistory(project=options.project,
+ tests=tests,
+ tasks=tasks,
+ variants=variants,
+ distros=distros)
+
+ history_data = test_history.get_history_by_revision(start_revision=commit_prior,
+ end_revision=commit_last)
+
+ report = tf.Report(history_data)
+ update_tags(lifecycle, config, report)
+
+ # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there
+ # have been changes to the tags. In particular, we avoid modifying the file when only the header
+ # comment for the YAML file would change.
if lifecycle.is_modified():
- write_yaml_file(options.lifecycle_file, lifecycle)
+ write_yaml_file(options.tag_file, lifecycle)
if __name__ == "__main__":
main()