diff options
author | Jonathan Abrahams <jonathan@mongodb.com> | 2017-06-15 09:45:01 -0400 |
---|---|---|
committer | Jonathan Abrahams <jonathan@mongodb.com> | 2017-06-15 09:45:01 -0400 |
commit | 75a15b5e7c195af9db19ae995867e63a8f89f861 (patch) | |
tree | 835ce51d108d9aaba2c4fb307462142c7ad3fe0d /buildscripts/test_failures.py | |
parent | caa31b2d9ecdc725fc5cf872a656a3bf31900e79 (diff) | |
download | mongo-75a15b5e7c195af9db19ae995867e63a8f89f861.tar.gz |
SERVER-28786 Add buildscripts/update_test_lifecycle.py script for updating etc/test_lifecycle.yml
Diffstat (limited to 'buildscripts/test_failures.py')
-rwxr-xr-x | buildscripts/test_failures.py | 463 |
1 files changed, 252 insertions, 211 deletions
diff --git a/buildscripts/test_failures.py b/buildscripts/test_failures.py index 44994884a36..72a9785911c 100755 --- a/buildscripts/test_failures.py +++ b/buildscripts/test_failures.py @@ -12,7 +12,7 @@ import collections import datetime import itertools import operator -from optparse import OptionParser +import optparse import os import urlparse @@ -22,11 +22,29 @@ import yaml _API_SERVER_DEFAULT = "http://evergreen-api.mongodb.com:8080" _REST_PREFIX = "/rest/v1" _PROJECT = "mongodb-mongo-master" -_MIN_DATE = "0001-01-01" -_MAX_DATE = "3000-12-31" -_HistoryReportTuple = collections.namedtuple( - "Report", "test task variant distro start_dt test_status") + +class _Missing(object): + """Class to support missing fields from the report.""" + def __init__(self, kind): + self._kind = kind + + def __eq__(self, other): + if not isinstance(other, _Missing): + return NotImplemented + return self._kind == other._kind + + def __ne__(self, other): + return not self == other + + def __str__(self): + return "<_Missing: {}>".format(self._kind) + +_ALL_TEST = _Missing("test") +_ALL_TASK = _Missing("task") +_ALL_VARIANT = _Missing("variant") +_ALL_DISTRO = _Missing("distro") +_ALL_DATE = _Missing("date") def read_evg_config(): @@ -54,113 +72,121 @@ def date_to_datestr(date_time): return date_time.strftime("%Y-%m-%d") -def list_or_none(lst): - """Returns a stringified list or 'None'.""" - return ",".join(map(str, lst)) if lst else "None" - - -def normalize_test_file(test_file): - """Normalizes the test_file name: - - Changes single backslash (\\) to forward slash (/) - - Removes .exe extension - Returns normalized string.""" - return test_file.replace("\\", "/").replace(".exe", "") - - -def fail_rate(num_fail, num_pass): - """Computes fails rate, return N/A if total is 0.""" - total = num_fail + num_pass - if total: - return "{:.3f}".format(round(num_fail / total, 3)) - return "N/A" - - -class Missing(object): - """Class to support missing fields from the history report.""" - def __init__(self, kind): - self.kind = kind - - def __str__(self): - return self.kind - - class ViewReport(object): """"Class to support any views into the HistoryReport.""" + DetailGroup = collections.namedtuple( + "DetailGroup", + "test task variant distro start_date end_date") + Summary = collections.namedtuple( "Summary", "test task variant distro start_date end_date fail_rate num_fail num_pass") - DetailGroup = collections.namedtuple( - "DetailGroup", + SummaryGroup = collections.namedtuple( + "SummaryGroup", "test task variant distro start_date end_date") - group_by = ["test", "task", "variant", "distro"] - group_period_values = ["daily", "weekly"] - start_days = ["first_day", "sunday", "monday"] + _MIN_DATE = "{0:04}-01-01".format(datetime.MINYEAR) + _MAX_DATE = "{}-12-31".format(datetime.MAXYEAR) + _group_by = ["test", "task", "variant", "distro"] + _start_days = ["first_day", "sunday", "monday"] - def __init__(self, history_report, group_period="weekly", start_day_of_week="first_day"): + def __init__(self, + history_report=[], + group_period=7, + start_day_of_week="first_day"): self._report = history_report - self.group_period = group_period.lower() - if self.group_period not in self.group_period_values: - raise ValueError( - "Invalid group_period specified '{}'".format(self.group_period)) - self.group_days = self._num_days_for_group() - self.start_day_of_week = start_day_of_week.lower() - # Using 'first_day' means the a weekly group report will start on the day of the + + self.start_day_of_week = start_day_of_week + # Using 'first_day' means the a group report will start on the day of the # week from the earliest date in the test history. - if self.start_day_of_week not in self.start_days: + if self.start_day_of_week not in self._start_days: raise ValueError( "Invalid start_day_of_week specified '{}'".format(self.start_day_of_week)) - if history_report: - start_dts = [r.start_dt for r in history_report] + + # Set start and end dates of report and create the group_periods + self.group_period = group_period + if self._report: + start_dts = [r.start_dt for r in self._report] self.start_dt = min(start_dts) self.end_dt = max(start_dts) + self._group_periods = self._create_group_periods() else: - self.start_dt = datestr_to_date(_MAX_DATE) - self.end_dt = datestr_to_date(_MIN_DATE) - - def _num_days_for_group(self): - """Returns the number of days defined in the self.group_period.""" - if self.group_period == "daily": - return 1 - return 7 - - def _group_dates(self, test_dt): + self.start_dt = datestr_to_date(self._MIN_DATE) + self.end_dt = datestr_to_date(self._MAX_DATE) + self._group_periods = [] + + self._summary_report = {} + self._update_summary_report() + + # Create the lists of tests, tasks, variants & distros. + self._all_tests = list(set([r.test for r in self._report])) + self._all_tasks = list(set([r.task for r in self._report])) + self._all_variants = list(set([r.variant for r in self._report])) + self._all_distros = list(set([str(r.distro) for r in self._report])) + + def fail_rate(self, num_fail, num_pass): + """Computes fails rate, return 0 if no tests have run.""" + if num_pass == num_fail == 0: + return 0.0 + return num_fail / (num_pass + num_fail) + + def _group_dates(self, test_dt, from_end): """Returns start_date and end_date for the group_period, which are are included in the group_period.""" - # Computing the start and end dates for a weekly period may have special cases for the - # first and last periods. Since the first period may not start on the weekday for - # self.start_day_of_week (if it's 'sunday' or 'monday'), that period may be less than 7 - # days. Similarly the last period will always end on self.end_dt. + # Computing the start and end dates for a period may have special cases for the + # first and last periods, only if the self.group_period is 7, which represents weekly. + # Since the first period may not start on the weekday for start_day_of_week + # (if it's 'sunday' or 'monday'), that period may be less than the + # period days. Similarly the last period will always end on end_dt. # Example, if the start_date falls on a Wednesday, then all group starting - # dates are offset from that, if self.start_day_of_week is 'first_day'. - - # The start date for a 'weekly' group_period is one of the following: - # - self.start_dt (the earliest date in the report) - # - The day specified in self.start_day_of_week - # - A weekly offset from self.start_dt, if self.start_day_of_week is 'first_day' - # The ending date for a 'weekly' group_period is one of the following: - # - self.end_dt (the latest date in the report) + # dates are offset from that, if start_day_of_week is 'first_day'. + + # Use 'from_end=True' to produce group_dates for analyzing the report from the end. + + # The start date for a group_period is one of the following: + # - start_dt (the earliest date in the report) + # - The day specified in start_day_of_week + # - An offset from start_dt, if start_day_of_week is 'first_day' + # The ending date for a group_period is one of the following: + # - end_dt (the latest date in the report) # - The mod of difference of weekday of test_dt and the start_weekday if test_dt < self.start_dt or test_dt > self.end_dt: raise ValueError("The test_dt {} must be >= {} and <= {}".format( test_dt, self.start_dt, self.end_dt)) - if self.group_period == "daily": + if self.group_period == 1: return (test_dt, test_dt) - if self.start_day_of_week == "sunday": - start_weekday = 6 - elif self.start_day_of_week == "monday": - start_weekday = 0 - elif self.start_day_of_week == "first_day": - start_weekday = self.start_dt.weekday() - # 'start_day_offset' is the number of days 'test_dt' is from the start of the week. - start_day_offset = (test_dt.weekday() - start_weekday) % 7 + # Return group_dates relative to the end_dt. The start_day_of_week is not + # used in computing the dates. + if from_end: + group_end_dt = min( + self.end_dt, + test_dt + datetime.timedelta( + days=((self.end_dt - test_dt).days % self.group_period))) + group_st_dt = max( + self.start_dt, + group_end_dt - datetime.timedelta(days=self.group_period - 1)) + return (group_st_dt, group_end_dt) + + # When the self.group_period is 7, we support a start_day_of_week. + if self.group_period == 7: + if self.start_day_of_week == "sunday": + start_weekday = 6 + elif self.start_day_of_week == "monday": + start_weekday = 0 + elif self.start_day_of_week == "first_day": + start_weekday = self.start_dt.weekday() + # 'start_day_offset' is the number of days 'test_dt' is from the start of the week. + start_day_offset = (test_dt.weekday() - start_weekday) % 7 + else: + start_day_offset = (test_dt - self.start_dt).days % self.group_period + group_start_dt = test_dt - datetime.timedelta(days=start_day_offset) - group_end_dt = group_start_dt + datetime.timedelta(days=6) + group_end_dt = group_start_dt + datetime.timedelta(days=self.group_period - 1) return (max(group_start_dt, self.start_dt), min(group_end_dt, self.end_dt)) def _select_attribute(self, value, attributes): @@ -168,6 +194,62 @@ class ViewReport(object): attribute values.""" return not attributes or value in attributes + def _create_group_periods(self): + """Discover all group_periods.""" + group_periods = set() + test_dt = self.start_dt + end_dt = self.end_dt + while test_dt <= end_dt: + # We will summarize for time periods from start-to-end and end-to-start. + group_periods.add(self._group_dates(test_dt, False)) + group_periods.add(self._group_dates(test_dt, True)) + test_dt += datetime.timedelta(days=1) + return group_periods + + def _update_summary_record(self, report_key, status_key): + """Increments the self._summary_report report_key's status_key & fail_rate.""" + summary = self._summary_report.setdefault( + report_key, + {"num_fail": 0, "num_pass": 0, "fail_rate": 0.0}) + summary[status_key] += 1 + summary["fail_rate"] = self.fail_rate(summary["num_fail"], summary["num_pass"]) + + def _update_summary_report(self): + """Process self._report and updates the self._summary_report.""" + + for record in self._report: + if record.test_status == "pass": + status_key = "num_pass" + else: + status_key = "num_fail" + # Update each combination summary: + # _total_, test, test/task, test/task/variant, test/task/variant/distro + for combo in ["_total_", "test", "task", "variant", "distro"]: + test = record.test if combo != "_total_" else _ALL_TEST + task = record.task if combo in ["task", "variant", "distro"] else _ALL_TASK + variant = record.variant if combo in ["variant", "distro"] else _ALL_VARIANT + distro = record.distro if combo == "distro" else _ALL_DISTRO + # Update the summary for matching group periods. + for (group_start_dt, group_end_dt) in self._group_periods: + if record.start_dt >= group_start_dt and record.start_dt <= group_end_dt: + report_key = self.SummaryGroup( + test=test, + task=task, + variant=variant, + distro=distro, + start_date=date_to_datestr(group_start_dt), + end_date=date_to_datestr(group_end_dt)) + self._update_summary_record(report_key, status_key) + # Update the summary for the entire date period. + report_key = self.SummaryGroup( + test=test, + task=task, + variant=variant, + distro=distro, + start_date=_ALL_DATE, + end_date=_ALL_DATE) + self._update_summary_record(report_key, status_key) + def _filter_reports(self, start_date=_MIN_DATE, end_date=_MAX_DATE, @@ -190,7 +272,7 @@ class ViewReport(object): {num_pass, num_fail}.""" detail_report = {} for record in report: - group_start_dt, group_end_dt = self._group_dates(record.start_dt) + group_start_dt, group_end_dt = self._group_dates(record.start_dt, False) detail_group = self.DetailGroup( test=record.test, task=record.task, @@ -206,25 +288,11 @@ class ViewReport(object): detail_report[detail_group][status_key] += 1 return detail_report - def _summary_report(self, report, tests=None, tasks=None, variants=None, distros=None): - """Returns the summary report for the specifed combinations of paramters. The format - is a nametuple, with {num_pass, num_fail} based on the _detailed_report.""" - summary_report = {} - if not report: - return summary_report - start_dt = min([r.start_dt for r in report]) - end_dt = max([r.start_dt for r in report]) - num_pass = sum([r.test_status == "pass" for r in report]) - num_fail = sum([r.test_status != "pass" for r in report]) - detail_group = self.DetailGroup( - test=list_or_none(tests), - task=list_or_none(tasks), - variant=list_or_none(variants), - distro=list_or_none(distros), - start_date=start_dt, - end_date=end_dt) - summary_report[detail_group] = {"num_pass": num_pass, "num_fail": num_fail} - return summary_report + def last_period(self): + """Returns start_date and end_date for the last period in the report.""" + start_dt = max(self.start_dt, + self.end_dt - datetime.timedelta(days=self.group_period - 1)) + return date_to_datestr(start_dt), date_to_datestr(self.end_dt) def view_detail(self, tests=None, tasks=None, variants=None, distros=None): """Provides a detailed view of specified parameters. @@ -240,104 +308,78 @@ class ViewReport(object): view_report = [] detail_report = self._detail_report(filter_results) for detail_group in detail_report: - view_report.append(self.Summary(test=detail_group.test, - task=detail_group.task, - variant=detail_group.variant, - distro=detail_group.distro, - start_date=detail_group.start_date, - end_date=detail_group.end_date, - fail_rate=fail_rate( - detail_report[detail_group]["num_fail"], - detail_report[detail_group]["num_pass"]), - num_fail=detail_report[detail_group]["num_fail"], - num_pass=detail_report[detail_group]["num_pass"])) - return sorted(view_report) - - def view_summary_groups(self, group_on=None): - """Provides a summary view report, based on the group_on list, for each self.group_period. - If group_on is empty, then a total summary report is provided. - Returns the view as a sorted list of namedtuples: - (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass) - """ - - group_on = group_on if group_on is not None else [] - - # Discover all group_period date ranges - group_periods = set() - dt = self.start_dt - while dt <= self.end_dt: - group_periods.add(self._group_dates(dt)) - dt += datetime.timedelta(days=1) - - view_report = [] - for (start_dt, end_dt) in group_periods: - view_report.extend(self.view_summary(group_on, - start_date=date_to_datestr(start_dt), - end_date=date_to_datestr(end_dt))) - return sorted(view_report) - - def view_summary(self, group_on=None, start_date=_MIN_DATE, end_date=_MAX_DATE): + view_report.append(self.Summary( + test=detail_group.test, + task=detail_group.task, + variant=detail_group.variant, + distro=detail_group.distro, + start_date=detail_group.start_date, + end_date=detail_group.end_date, + fail_rate=self.fail_rate( + detail_report[detail_group]["num_fail"], + detail_report[detail_group]["num_pass"]), + num_fail=detail_report[detail_group]["num_fail"], + num_pass=detail_report[detail_group]["num_pass"])) + return view_report + + def view_summary(self, + group_on=None, + start_date=_ALL_DATE, + end_date=_ALL_DATE): """Provides a summary view report, based on the group_on list. If group_on is empty, then - a total summary report is provided. - Returns the view as a sorted list of namedtuples: + a total summary report is provided. The start_date and end_date must match the + group periods for a result to be returned. + Returns the view as a list of namedtuples: (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass) """ group_on = group_on if group_on is not None else [] for group_name in group_on: - if group_name not in self.group_by: + if group_name not in self._group_by: raise ValueError("Invalid group '{}' specified, the supported groups are {}" - .format(group_name, self.group_by)) + .format(group_name, self._group_by)) - tests = list(set([r.test for r in self._report])) \ - if "test" in group_on else [Missing("__all_tests")] - tasks = list(set([r.task for r in self._report])) \ - if "task" in group_on else [Missing("__all_tasks")] - variants = list(set([r.variant for r in self._report])) \ - if "variant" in group_on else [Missing("__all_variants")] - distros = list(set([str(r.distro) for r in self._report])) \ - if "distro" in group_on else [Missing("__all_distros")] + tests = self._all_tests if "test" in group_on else [_ALL_TEST] + tasks = self._all_tasks if "task" in group_on else [_ALL_TASK] + variants = self._all_variants if "variant" in group_on else [_ALL_VARIANT] + distros = self._all_distros if "distro" in group_on else [_ALL_DISTRO] group_lists = [tests, tasks, variants, distros] group_combos = list(itertools.product(*group_lists)) view_report = [] for group in group_combos: - test_filter = [group[0]] if group[0] and not isinstance(group[0], Missing) else None - task_filter = [group[1]] if group[1] and not isinstance(group[1], Missing) else None - variant_filter = [group[2]] if group[2] and not isinstance(group[2], Missing) else None - distro_filter = [group[3]] if group[3] and not isinstance(group[3], Missing) else None - filter_results = self._filter_reports(start_date=start_date, - end_date=end_date, - tests=test_filter, - tasks=task_filter, - variants=variant_filter, - distros=distro_filter) - summary_report = self._summary_report(filter_results, - tests=test_filter, - tasks=task_filter, - variants=variant_filter, - distros=distro_filter) - for summary in summary_report: - view_report.append(self.Summary(test=summary.test, - task=summary.task, - variant=summary.variant, - distro=summary.distro, - start_date=summary.start_date, - end_date=summary.end_date, - fail_rate=fail_rate( - summary_report[summary]["num_fail"], - summary_report[summary]["num_pass"]), - num_fail=summary_report[summary]["num_fail"], - num_pass=summary_report[summary]["num_pass"])) - return sorted(view_report) + test_filter = group[0] if group[0] else _ALL_TEST + task_filter = group[1] if group[1] else _ALL_TASK + variant_filter = group[2] if group[2] else _ALL_VARIANT + distro_filter = group[3] if group[3] else _ALL_DISTRO + report_key = self.SummaryGroup( + test=test_filter, + task=task_filter, + variant=variant_filter, + distro=distro_filter, + start_date=start_date, + end_date=end_date) + if report_key in self._summary_report: + view_report.append(self.Summary( + test=test_filter if test_filter != _ALL_TEST else None, + task=task_filter if task_filter != _ALL_TASK else None, + variant=variant_filter if variant_filter != _ALL_VARIANT else None, + distro=distro_filter if distro_filter != _ALL_DISTRO else None, + start_date=start_date if start_date != _ALL_DATE else None, + end_date=end_date if end_date != _ALL_DATE else None, + fail_rate=self._summary_report[report_key]["fail_rate"], + num_fail=self._summary_report[report_key]["num_fail"], + num_pass=self._summary_report[report_key]["num_pass"])) + return view_report class HistoryReport(object): """The HistoryReport class interacts with the Evergreen REST API to generate a history_report. The history_report is meant to be viewed from the ViewReport class methods.""" - group_period_values = ["daily", "weekly"] + HistoryReportTuple = collections.namedtuple( + "Report", "test task variant distro start_dt test_status") # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate options are exposed. # period_types = ["date", "revision"] @@ -348,7 +390,7 @@ class HistoryReport(object): start, end, start_day_of_week="first_day", - group_period="weekly", + group_period=7, project=_PROJECT, tests=None, tasks=None, @@ -357,19 +399,13 @@ class HistoryReport(object): evg_cfg=None): # Initialize the report and object variables. self._report_tuples = [] - self._start_date = _MAX_DATE - self._end_date = _MIN_DATE self._report = {"tests": {}} self.period_type = period_type.lower() if self.period_type not in self.period_types: raise ValueError( "Invalid time period type '{}' specified." " supported types are {}.".format(self.period_type, self.period_types)) - self.group_period = group_period.lower() - if self.group_period not in self.group_period_values: - raise ValueError( - "Invalid group_period '{}' specified," - " supported periods are {}.".format(self.group_period, self.group_period_values)) + self.group_period = group_period self.start_day_of_week = start_day_of_week.lower() self.start = start @@ -492,6 +528,14 @@ class HistoryReport(object): return history_data + @staticmethod + def normalize_test_file(test_file): + """Normalizes the test_file name: + - Changes single backslash (\\) to forward slash (/) + - Removes .exe extension + Returns normalized string.""" + return test_file.replace("\\", "/").replace(".exe", "") + def generate_report(self): """Creates detail for self._report from specified test history options. Returns a ViewReport object of self._report.""" @@ -505,30 +549,26 @@ class HistoryReport(object): rest_api_report = report_method(test_statuses=["fail", "pass"]) - missing_distro = Missing("no_distro") for record in rest_api_report: - self._start_date = min(self._start_date, record["start_time"]) - self._end_date = max(self._end_date, record["start_time"]) # Save API record as namedtuple self._report_tuples.append( - _HistoryReportTuple( - test=normalize_test_file(record["test_file"]), - task=record["task_name"], - variant=record["variant"], - distro=record.get("distro", missing_distro), + self.HistoryReportTuple( + test=str(HistoryReport.normalize_test_file(record["test_file"])), + task=str(record["task_name"]), + variant=str(record["variant"]), + distro=record.get("distro", _ALL_DISTRO), start_dt=datestr_to_date(record["start_time"]), test_status=record["test_status"])) - sorted_report = sorted(self._report_tuples, key=operator.attrgetter("start_dt")) - - return ViewReport(sorted_report, + return ViewReport(history_report=self._report_tuples, group_period=self.group_period, start_day_of_week=self.start_day_of_week) def main(): - parser = OptionParser(description=__doc__, usage="Usage: %prog [options] test1 test2 ...") + parser = optparse.OptionParser(description=__doc__, + usage="Usage: %prog [options] test1 test2 ...") parser.add_option("--project", dest="project", default=_PROJECT, @@ -564,16 +604,16 @@ def main(): " Must be specified with --sinceRevision") parser.add_option("--groupPeriod", dest="group_period", - choices=["daily", "weekly"], - default="weekly", - help="Set to 'daily' or 'weekly', defaults to '%default'.") + type="int", + default=7, + help="Set group period days, defaults to '%default'.") parser.add_option("--weekStartDay", dest="start_day_of_week", choices=["sunday", "monday", "first_day"], default="first_day", - help="The group starting day of week, when --groupPeriod is set to'weekly'. " + help="The group starting day of week, when --groupPeriod is not 1. " " Set to 'sunday', 'monday' or 'first_day'." - " If 'first_day', the weekly group will start on the first day of the" + " If 'first_day', the group will start on the first day of the" " starting date from the history result, defaults to '%default'.") parser.add_option("--tasks", dest="tasks", @@ -588,7 +628,7 @@ def main(): default="", help="Comma separated list of build distros to analyze.") - (options, args) = parser.parse_args() + (options, tests) = parser.parse_args() # TODO EVG-1653: Uncomment these lines once the --sinceDate and --untilDate options are # exposed. @@ -609,7 +649,7 @@ def main(): parser.print_help() parser.error("Must specify both --sinceRevision & --untilRevision") - if not options.tasks and not args: + if not options.tasks and not tests: parser.print_help() parser.error("Must specify either --tasks or at least one test") @@ -619,14 +659,15 @@ def main(): group_period=options.group_period, start_day_of_week=options.start_day_of_week, project=options.project, - tests=args, + tests=tests, tasks=options.tasks.split(","), variants=options.variants.split(","), distros=options.distros.split(","), evg_cfg=read_evg_config()) view_report = report.generate_report() - for record in view_report.view_detail(): - print(record) + summ_report = view_report.view_summary(group_on=["test", "task", "variant"]) + for s in sorted(summ_report): + print(s) if __name__ == "__main__": main() |