SERVER-29642 SERVER-29643 Add Python tests for test lifecycle scripts.

For test_failures.py: * Replaces HistoryReport with a TestHistory class that has get_history_by_revision() and get_history_by_date() methods. They both return a list of ReportEntry tuples that can be used to construct a Report instance. * Adds Python unit test cases for the Report and ReportEntry classes. * Creates Wildcard class as separate concept from Missing class. * Enables --sinceDate and --untilDate with a warning that the script may not return a complete result set. * Adds support for running the script with Python 3. For update_test_lifecycle.py: * Introduces Config namedtuple to represent the test lifecycle model. * Adds Python unit tests cases for the update_tags() function. * Takes advantage of the partial grouping so that computing summaries for (test, task, variant), (test, task), and (test,) combinations do not require re-processing the entire result set.
author: Max Hirschhorn <max.hirschhorn@mongodb.com> 2017-07-17 11:09:34 -0400
committer: Max Hirschhorn <max.hirschhorn@mongodb.com> 2017-07-17 11:09:34 -0400
commit: 58a3909a3f678dec7bd94bfb38f96756c970113e (patch)
tree: 96675ca63ab47a93aca816909805264e6667ea7b /buildscripts
parent: 27cf9fd7b31f043af913da135385367126f5691b (diff)
download: mongo-58a3909a3f678dec7bd94bfb38f96756c970113e.tar.gz
7 files changed, 2432 insertions, 748 deletions
diff --git a/buildscripts/ciconfig/tags.py b/buildscripts/ciconfig/tags.py
index dbd090bc9a8..dfab58832fa 100644
--- a/buildscripts/ciconfig/tags.py
+++ b/buildscripts/ciconfig/tags.py
@@ -1,4 +1,5 @@
 """Module to access and modify tag configuration files used by resmoke."""
+
 from __future__ import absolute_import
 from __future__ import print_function
 
@@ -11,7 +12,7 @@ import yaml
 
 # Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021
 def _represent_dict_order(self, data):
-    return self.represent_mapping('tag:yaml.org,2002:map', data.items())
+    return self.represent_mapping("tag:yaml.org,2002:map", data.items())
 
 yaml.add_representer(collections.OrderedDict, _represent_dict_order)
 # End setup
@@ -20,17 +21,38 @@ yaml.add_representer(collections.OrderedDict, _represent_dict_order)
 class TagsConfig(object):
     """Represent a test tag configuration file."""
 
-    def __init__(self, filename, cmp_func=None):
-        """Initialize a TagsConfig from a file.
+    def __init__(self, raw, cmp_func=None):
+        """Initialize a TagsConfig from a dict representing the associations between tests and tags.
 
         'cmp_func' can be used to specify a comparison function that will be used when sorting tags.
         """
-        with open(filename, "r") as fstream:
-            self.raw = yaml.safe_load(fstream)
+
+        self.raw = raw
         self._conf = self.raw["selector"]
         self._conf_copy = copy.deepcopy(self._conf)
         self._cmp_func = cmp_func
 
+    @classmethod
+    def from_file(cls, filename, **kwargs):
+        """Return a TagsConfig from a file containing the associations between tests and tags.
+
+        See TagsConfig.__init__() for the keyword arguments that can be specified.
+        """
+
+        with open(filename, "r") as fstream:
+            raw = yaml.safe_load(fstream)
+
+        return cls(raw, **kwargs)
+
+    @classmethod
+    def from_dict(cls, raw, **kwargs):
+        """Return a TagsConfig from a dict representing the associations between tests and tags.
+
+        See TagsConfig.__init__() for the keyword arguments that can be specified.
+        """
+
+        return cls(copy.deepcopy(raw), **kwargs)
+
     def get_test_kinds(self):
         """List the test kinds."""
         return self._conf.keys()
@@ -75,9 +97,14 @@ class TagsConfig(object):
         """
         with open(filename, "w") as fstream:
             if preamble:
-                print(textwrap.fill(preamble, width=100, initial_indent="# ",
+                print(textwrap.fill(preamble,
+                                    width=100,
+                                    initial_indent="# ",
                                     subsequent_indent="# "),
                       file=fstream)
+
+            # We use yaml.safe_dump() in order avoid having strings being written to the file as
+            # "!!python/unicode ..." and instead have them written as plain 'str' instances.
             yaml.safe_dump(self.raw, fstream, default_flow_style=False)
 
 
diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py
index a3448116efb..e1cec945603 100644
--- a/buildscripts/resmokelib/selector.py
+++ b/buildscripts/resmokelib/selector.py
@@ -28,7 +28,7 @@ def _parse_tag_file(test_kind):
     a list of tags, i.e., {'file1.js': ['tag1', 'tag2'], 'file2.js': ['tag2', 'tag3']}
     """
     if config.TAG_FILE:
-        tags_conf = _tags.TagsConfig(config.TAG_FILE)
+        tags_conf = _tags.TagsConfig.from_file(config.TAG_FILE)
         tagged_roots = tags_conf.get_test_patterns(test_kind)
     else:
         tagged_roots = []
diff --git a/buildscripts/test_failures.py b/buildscripts/test_failures.py
index 72a9785911c..3a874c55fc4 100755
--- a/buildscripts/test_failures.py
+++ b/buildscripts/test_failures.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 
-"""Test Failures
-
-Compute Test failures rates from Evergreen API for specified tests, tasks, etc.
 """
+Utility for computing test failure rates from the Evergreen API.
+"""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -14,660 +14,710 @@ import itertools
 import operator
 import optparse
 import os
-import urlparse
+import sys
+import warnings
+
+try:
+    from urlparse import urlparse
+except ImportError:
+    from urllib.parse import urlparse
 
 import requests
 import yaml
 
-_API_SERVER_DEFAULT = "http://evergreen-api.mongodb.com:8080"
-_REST_PREFIX = "/rest/v1"
-_PROJECT = "mongodb-mongo-master"
 
+if sys.version_info[0] == 2:
+    _STRING_TYPES = (basestring,)
+else:
+    _STRING_TYPES = (str,)
+
+
+_ReportEntry = collections.namedtuple("_ReportEntry", [
+    "test",
+    "task",
+    "variant",
+    "distro",
+    "start_date",
+    "end_date",
+    "num_pass",
+    "num_fail",
+])
+
+
+class Wildcard(object):
+    """
+    A class for representing there are multiple values associated with a particular component.
+    """
 
-class _Missing(object):
-    """Class to support missing fields from the report."""
     def __init__(self, kind):
         self._kind = kind
 
     def __eq__(self, other):
-        if not isinstance(other, _Missing):
+        if not isinstance(other, Wildcard):
             return NotImplemented
+
         return self._kind == other._kind
 
     def __ne__(self, other):
         return not self == other
 
-    def __str__(self):
-        return "<_Missing: {}>".format(self._kind)
-
-_ALL_TEST = _Missing("test")
-_ALL_TASK = _Missing("task")
-_ALL_VARIANT = _Missing("variant")
-_ALL_DISTRO = _Missing("distro")
-_ALL_DATE = _Missing("date")
+    def __hash__(self):
+        return hash(self._kind)
 
+    def __str__(self):
+        return "<multiple {}>".format(self._kind)
 
-def read_evg_config():
-    # Expand out evergreen config file possibilities
-    file_list = [
-        "./.evergreen.yml",
-        os.path.expanduser("~/.evergreen.yml"),
-        os.path.expanduser("~/cli_bin/.evergreen.yml")]
 
-    for filename in file_list:
-        if os.path.isfile(filename):
-            with open(filename, "r") as fstream:
-                return yaml.load(fstream)
-    return None
+class ReportEntry(_ReportEntry):
+    """
+    Holds information about Evergreen test executions.
+    """
 
+    _MULTIPLE_TESTS = Wildcard("tests")
+    _MULTIPLE_TASKS = Wildcard("tasks")
+    _MULTIPLE_VARIANTS = Wildcard("variants")
+    _MULTIPLE_DISTROS = Wildcard("distros")
 
-def datestr_to_date(date_str):
-    """Returns datetime from a date string in the format of YYYY-MM-DD.
-       Note that any time in the date string is stripped off."""
-    return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date()
+    _MIN_DATE = datetime.date(datetime.MINYEAR, 1, 1)
+    _MAX_DATE = datetime.date(datetime.MAXYEAR, 12, 31)
 
+    @property
+    def fail_rate(self):
+        """
+        Returns the fraction of test failures to total number of test executions.
 
-def date_to_datestr(date_time):
-    """Returns date string in the format of YYYY-MM-DD from a datetime."""
-    return date_time.strftime("%Y-%m-%d")
+        If a test hasn't been run at all, then we still say it has a failure rate of 0% for
+        convenience when applying thresholds.
+        """
 
+        if self.num_pass == self.num_fail == 0:
+            return 0.0
+        return self.num_fail / (self.num_pass + self.num_fail)
 
-class ViewReport(object):
-    """"Class to support any views into the HistoryReport."""
+    def period_start_date(self, start_date, period_size):
+        """
+        Returns a datetime.date() instance corresponding to the beginning of the time period
+        containing 'self.start_date'.
+        """
 
-    DetailGroup = collections.namedtuple(
-        "DetailGroup",
-        "test task variant distro start_date end_date")
+        if not isinstance(start_date, datetime.date):
+            raise TypeError("'start_date' argument must be a date")
 
-    Summary = collections.namedtuple(
-        "Summary",
-        "test task variant distro start_date end_date fail_rate num_fail num_pass")
+        if not isinstance(period_size, datetime.timedelta):
+            raise TypeError("'period_size' argument must a datetime.timedelta instance")
+        elif period_size.days <= 0:
+            raise ValueError("'period_size' argument must be a positive number of days")
+        elif period_size - datetime.timedelta(days=period_size.days) > datetime.timedelta():
+            raise ValueError("'period_size' argument must be an integral number of days")
 
-    SummaryGroup = collections.namedtuple(
-        "SummaryGroup",
-        "test task variant distro start_date end_date")
+        # 'start_day_offset' is the number of days 'self.start_date' is from the start of the time
+        # period.
+        start_day_offset = (self.start_date - start_date).days % period_size.days
+        return self.start_date - datetime.timedelta(days=start_day_offset)
 
-    _MIN_DATE = "{0:04}-01-01".format(datetime.MINYEAR)
-    _MAX_DATE = "{}-12-31".format(datetime.MAXYEAR)
-    _group_by = ["test", "task", "variant", "distro"]
-    _start_days = ["first_day", "sunday", "monday"]
+    def week_start_date(self, start_day_of_week):
+        """
+        Returns a datetime.date() instance corresponding to the beginning of the week containing
+        'self.start_date'. The first day of the week can be specified as the strings "Sunday" or
+        "Monday", as well as an arbitrary datetime.date() instance.
+        """
 
-    def __init__(self,
-                 history_report=[],
-                 group_period=7,
-                 start_day_of_week="first_day"):
-        self._report = history_report
-
-        self.start_day_of_week = start_day_of_week
-        # Using 'first_day' means the a group report will start on the day of the
-        # week from the earliest date in the test history.
-        if self.start_day_of_week not in self._start_days:
-            raise ValueError(
-                "Invalid start_day_of_week specified '{}'".format(self.start_day_of_week))
-
-        # Set start and end dates of report and create the group_periods
-        self.group_period = group_period
-        if self._report:
-            start_dts = [r.start_dt for r in self._report]
-            self.start_dt = min(start_dts)
-            self.end_dt = max(start_dts)
-            self._group_periods = self._create_group_periods()
-        else:
-            self.start_dt = datestr_to_date(self._MIN_DATE)
-            self.end_dt = datestr_to_date(self._MAX_DATE)
-            self._group_periods = []
-
-        self._summary_report = {}
-        self._update_summary_report()
-
-        # Create the lists of tests, tasks, variants & distros.
-        self._all_tests = list(set([r.test for r in self._report]))
-        self._all_tasks = list(set([r.task for r in self._report]))
-        self._all_variants = list(set([r.variant for r in self._report]))
-        self._all_distros = list(set([str(r.distro) for r in self._report]))
-
-    def fail_rate(self, num_fail, num_pass):
-        """Computes fails rate, return 0 if no tests have run."""
-        if num_pass == num_fail == 0:
-            return 0.0
-        return num_fail / (num_pass + num_fail)
-
-    def _group_dates(self, test_dt, from_end):
-        """Returns start_date and end_date for the group_period, which are are included
-           in the group_period."""
-        # Computing the start and end dates for a period may have special cases for the
-        # first and last periods, only if the self.group_period is 7, which represents weekly.
-        # Since the first period may not start on the weekday for start_day_of_week
-        # (if it's 'sunday' or 'monday'), that period may be less than the
-        # period days. Similarly the last period will always end on end_dt.
-        # Example, if the start_date falls on a Wednesday, then all group starting
-        # dates are offset from that, if start_day_of_week is 'first_day'.
-
-        # Use 'from_end=True' to produce group_dates for analyzing the report from the end.
-
-        # The start date for a group_period is one of the following:
-        # - start_dt (the earliest date in the report)
-        # - The day specified in start_day_of_week
-        # - An offset from start_dt, if start_day_of_week is 'first_day'
-        # The ending date for a group_period is one of the following:
-        # - end_dt (the latest date in the report)
-        # - The mod of difference of weekday of test_dt and the start_weekday
-
-        if test_dt < self.start_dt or test_dt > self.end_dt:
-            raise ValueError("The test_dt {} must be >= {} and <= {}".format(
-                test_dt, self.start_dt, self.end_dt))
-
-        if self.group_period == 1:
-            return (test_dt, test_dt)
-
-        # Return group_dates relative to the end_dt. The start_day_of_week is not
-        # used in computing the dates.
-        if from_end:
-            group_end_dt = min(
-                self.end_dt,
-                test_dt + datetime.timedelta(
-                    days=((self.end_dt - test_dt).days % self.group_period)))
-            group_st_dt = max(
-                self.start_dt,
-                group_end_dt - datetime.timedelta(days=self.group_period - 1))
-            return (group_st_dt, group_end_dt)
-
-        # When the self.group_period is 7, we support a start_day_of_week.
-        if self.group_period == 7:
-            if self.start_day_of_week == "sunday":
+        if isinstance(start_day_of_week, _STRING_TYPES):
+            start_day_of_week = start_day_of_week.lower()
+            if start_day_of_week == "sunday":
                 start_weekday = 6
-            elif self.start_day_of_week == "monday":
+            elif start_day_of_week == "monday":
                 start_weekday = 0
-            elif self.start_day_of_week == "first_day":
-                start_weekday = self.start_dt.weekday()
-            # 'start_day_offset' is the number of days 'test_dt' is from the start of the week.
-            start_day_offset = (test_dt.weekday() - start_weekday) % 7
-        else:
-            start_day_offset = (test_dt - self.start_dt).days % self.group_period
-
-        group_start_dt = test_dt - datetime.timedelta(days=start_day_offset)
-        group_end_dt = group_start_dt + datetime.timedelta(days=self.group_period - 1)
-        return (max(group_start_dt, self.start_dt), min(group_end_dt, self.end_dt))
-
-    def _select_attribute(self, value, attributes):
-        """Returns true if attribute value list is None or a value matches from the list of
-           attribute values."""
-        return not attributes or value in attributes
-
-    def _create_group_periods(self):
-        """Discover all group_periods."""
-        group_periods = set()
-        test_dt = self.start_dt
-        end_dt = self.end_dt
-        while test_dt <= end_dt:
-            # We will summarize for time periods from start-to-end and end-to-start.
-            group_periods.add(self._group_dates(test_dt, False))
-            group_periods.add(self._group_dates(test_dt, True))
-            test_dt += datetime.timedelta(days=1)
-        return group_periods
-
-    def _update_summary_record(self, report_key, status_key):
-        """Increments the self._summary_report report_key's status_key & fail_rate."""
-        summary = self._summary_report.setdefault(
-            report_key,
-            {"num_fail": 0, "num_pass": 0, "fail_rate": 0.0})
-        summary[status_key] += 1
-        summary["fail_rate"] = self.fail_rate(summary["num_fail"], summary["num_pass"])
-
-    def _update_summary_report(self):
-        """Process self._report and updates the self._summary_report."""
-
-        for record in self._report:
-            if record.test_status == "pass":
-                status_key = "num_pass"
-            else:
-                status_key = "num_fail"
-            # Update each combination summary:
-            #   _total_, test, test/task, test/task/variant, test/task/variant/distro
-            for combo in ["_total_", "test", "task", "variant", "distro"]:
-                test = record.test if combo != "_total_" else _ALL_TEST
-                task = record.task if combo in ["task", "variant", "distro"] else _ALL_TASK
-                variant = record.variant if combo in ["variant", "distro"] else _ALL_VARIANT
-                distro = record.distro if combo == "distro" else _ALL_DISTRO
-                # Update the summary for matching group periods.
-                for (group_start_dt, group_end_dt) in self._group_periods:
-                    if record.start_dt >= group_start_dt and record.start_dt <= group_end_dt:
-                        report_key = self.SummaryGroup(
-                            test=test,
-                            task=task,
-                            variant=variant,
-                            distro=distro,
-                            start_date=date_to_datestr(group_start_dt),
-                            end_date=date_to_datestr(group_end_dt))
-                        self._update_summary_record(report_key, status_key)
-                # Update the summary for the entire date period.
-                report_key = self.SummaryGroup(
-                    test=test,
-                    task=task,
-                    variant=variant,
-                    distro=distro,
-                    start_date=_ALL_DATE,
-                    end_date=_ALL_DATE)
-                self._update_summary_record(report_key, status_key)
-
-    def _filter_reports(self,
-                        start_date=_MIN_DATE,
-                        end_date=_MAX_DATE,
-                        tests=None,
-                        tasks=None,
-                        variants=None,
-                        distros=None):
-        """Returns filter of self._report."""
-        return [r for r in self._report
-                if r.start_dt >= datestr_to_date(start_date) and
-                r.start_dt <= datestr_to_date(end_date) and
-                self._select_attribute(r.test, tests) and
-                self._select_attribute(r.task, tasks) and
-                self._select_attribute(r.variant, variants) and
-                (r.distro is None or self._select_attribute(r.distro, distros))]
-
-    def _detail_report(self, report):
-        """Returns the detailed report, which is a dictionary in the form of key tuples,
-           '(test, task, variant, distro, start_date, end_date)', with a value of
-           {num_pass, num_fail}."""
-        detail_report = {}
-        for record in report:
-            group_start_dt, group_end_dt = self._group_dates(record.start_dt, False)
-            detail_group = self.DetailGroup(
-                test=record.test,
-                task=record.task,
-                variant=record.variant,
-                distro=record.distro,
-                start_date=group_start_dt,
-                end_date=group_end_dt)
-            detail_report.setdefault(detail_group, {"num_pass": 0, "num_fail": 0})
-            if record.test_status == "pass":
-                status_key = "num_pass"
             else:
-                status_key = "num_fail"
-            detail_report[detail_group][status_key] += 1
-        return detail_report
-
-    def last_period(self):
-        """Returns start_date and end_date for the last period in the report."""
-        start_dt = max(self.start_dt,
-                       self.end_dt - datetime.timedelta(days=self.group_period - 1))
-        return date_to_datestr(start_dt), date_to_datestr(self.end_dt)
-
-    def view_detail(self, tests=None, tasks=None, variants=None, distros=None):
-        """Provides a detailed view of specified parameters.
-           The parameters are used as a filter, so an unspecified parameter provides
-           more results.
-           Returns the view as a list of namedtuples:
-                (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass)
+                raise ValueError(
+                    "'start_day_of_week' can only be the string \"sunday\" or \"monday\"")
+        elif isinstance(start_day_of_week, datetime.date):
+            start_weekday = start_day_of_week.weekday()
+        else:
+            raise TypeError("'start_day_of_week' argument must be a string or a date")
+
+        # 'start_day_offset' is the number of days 'self.start_date' is from the start of the week.
+        start_day_offset = (self.start_date.weekday() - start_weekday) % 7
+        return self.start_date - datetime.timedelta(days=start_day_offset)
+
+    @classmethod
+    def sum(cls, entries):
+        """
+        Returns a single ReportEntry() instance corresponding to all test executions represented by
+        'entries'.
+        """
+
+        test = set()
+        task = set()
+        variant = set()
+        distro = set()
+        start_date = cls._MAX_DATE
+        end_date = cls._MIN_DATE
+        num_pass = 0
+        num_fail = 0
+
+        for entry in entries:
+            test.add(entry.test)
+            task.add(entry.task)
+            variant.add(entry.variant)
+            distro.add(entry.distro)
+            start_date = min(entry.start_date, start_date)
+            end_date = max(entry.end_date, end_date)
+            num_pass += entry.num_pass
+            num_fail += entry.num_fail
+
+        test = next(iter(test)) if len(test) == 1 else ReportEntry._MULTIPLE_TESTS
+        task = next(iter(task)) if len(task) == 1 else ReportEntry._MULTIPLE_TASKS
+        variant = next(iter(variant)) if len(variant) == 1 else ReportEntry._MULTIPLE_VARIANTS
+        distro = next(iter(distro)) if len(distro) == 1 else ReportEntry._MULTIPLE_DISTROS
+
+        return ReportEntry(test=test,
+                           task=task,
+                           variant=variant,
+                           distro=distro,
+                           start_date=start_date,
+                           end_date=end_date,
+                           num_pass=num_pass,
+                           num_fail=num_fail)
+
+
+class Report(object):
+    """
+    A class for generating summarizations about Evergreen test executions.
+    """
+
+    TEST = ("test",)
+    TEST_TASK = ("test", "task")
+    TEST_TASK_VARIANT = ("test", "task", "variant")
+    TEST_TASK_VARIANT_DISTRO = ("test", "task", "variant", "distro")
+
+    DAILY = "daily"
+    WEEKLY = "weekly"
+
+    SUNDAY = "sunday"
+    MONDAY = "monday"
+    FIRST_DAY = "first-day"
+
+    def __init__(self, entries):
+        """
+        Initializes the Report instance.
         """
 
-        filter_results = self._filter_reports(
-            tests=tests, tasks=tasks, variants=variants, distros=distros)
-
-        view_report = []
-        detail_report = self._detail_report(filter_results)
-        for detail_group in detail_report:
-            view_report.append(self.Summary(
-                test=detail_group.test,
-                task=detail_group.task,
-                variant=detail_group.variant,
-                distro=detail_group.distro,
-                start_date=detail_group.start_date,
-                end_date=detail_group.end_date,
-                fail_rate=self.fail_rate(
-                    detail_report[detail_group]["num_fail"],
-                    detail_report[detail_group]["num_pass"]),
-                num_fail=detail_report[detail_group]["num_fail"],
-                num_pass=detail_report[detail_group]["num_pass"]))
-        return view_report
-
-    def view_summary(self,
-                     group_on=None,
-                     start_date=_ALL_DATE,
-                     end_date=_ALL_DATE):
-        """Provides a summary view report, based on the group_on list. If group_on is empty, then
-           a total summary report is provided. The start_date and end_date must match the
-           group periods for a result to be returned.
-           Returns the view as a list of namedtuples:
-                (test, task, variant, distro, start_date, end_date, fail_rate, num_fail, num_pass)
+        if not isinstance(entries, list):
+            # It is possible that 'entries' is a generator function, so we convert it to a list in
+            # order to be able to iterate it multiple times.
+            entries = list(entries)
+
+        self.start_date = min(entry.start_date for entry in entries)
+        self.end_date = max(entry.end_date for entry in entries)
+
+        self._entries = entries
+
+    @property
+    def raw_data(self):
+        """
+        Returns a copy of the list of ReportEntry instances underlying the report.
+        """
+
+        return self._entries[:]
+
+    def summarize_by(self, components, time_period=None, start_day_of_week=FIRST_DAY):
+        """
+        Returns a list of ReportEntry instances grouped by
+
+            'components' if 'time_period' is None,
+
+            'components' followed by Entry.start_date if 'time_period' is "daily",
+
+            'components' followed by Entry.week_start_date(start_day_of_week) if 'time_period' is
+            "weekly". See Entry.week_start_date() for more details on the possible values for
+            'start_day_of_week'.
+
+            'components' followed by Entry.period_start_date(self.start_date, time_period) if
+            'time_period' is a datetime.timedelta instance.
         """
 
-        group_on = group_on if group_on is not None else []
-
-        for group_name in group_on:
-            if group_name not in self._group_by:
-                raise ValueError("Invalid group '{}' specified, the supported groups are {}"
-                                 .format(group_name, self._group_by))
-
-        tests = self._all_tests if "test" in group_on else [_ALL_TEST]
-        tasks = self._all_tasks if "task" in group_on else [_ALL_TASK]
-        variants = self._all_variants if "variant" in group_on else [_ALL_VARIANT]
-        distros = self._all_distros if "distro" in group_on else [_ALL_DISTRO]
-
-        group_lists = [tests, tasks, variants, distros]
-        group_combos = list(itertools.product(*group_lists))
-        view_report = []
-        for group in group_combos:
-            test_filter = group[0] if group[0] else _ALL_TEST
-            task_filter = group[1] if group[1] else _ALL_TASK
-            variant_filter = group[2] if group[2] else _ALL_VARIANT
-            distro_filter = group[3] if group[3] else _ALL_DISTRO
-            report_key = self.SummaryGroup(
-                test=test_filter,
-                task=task_filter,
-                variant=variant_filter,
-                distro=distro_filter,
-                start_date=start_date,
-                end_date=end_date)
-            if report_key in self._summary_report:
-                view_report.append(self.Summary(
-                    test=test_filter if test_filter != _ALL_TEST else None,
-                    task=task_filter if task_filter != _ALL_TASK else None,
-                    variant=variant_filter if variant_filter != _ALL_VARIANT else None,
-                    distro=distro_filter if distro_filter != _ALL_DISTRO else None,
-                    start_date=start_date if start_date != _ALL_DATE else None,
-                    end_date=end_date if end_date != _ALL_DATE else None,
-                    fail_rate=self._summary_report[report_key]["fail_rate"],
-                    num_fail=self._summary_report[report_key]["num_fail"],
-                    num_pass=self._summary_report[report_key]["num_pass"]))
-        return view_report
-
-
-class HistoryReport(object):
-    """The HistoryReport class interacts with the Evergreen REST API to generate a history_report.
-       The history_report is meant to be viewed from the ViewReport class methods."""
-
-    HistoryReportTuple = collections.namedtuple(
-        "Report", "test task variant distro start_dt test_status")
-
-    # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate options are exposed.
-    # period_types = ["date", "revision"]
-    period_types = ["revision"]
+        if not isinstance(components, (list, tuple)):
+            raise TypeError("'components' argument must be a list or tuple")
+
+        for component in components:
+            if not isinstance(component, _STRING_TYPES):
+                raise TypeError("Each element of 'components' argument must be a string")
+            elif component not in ReportEntry._fields:
+                raise ValueError(
+                    "Each element of 'components' argument must be one of {}".format(
+                        ReportEntry._fields))
+
+        group_by = [operator.attrgetter(component) for component in components]
+
+        if start_day_of_week == self.FIRST_DAY:
+            start_day_of_week = self.start_date
+
+        period_size = None
+        if isinstance(time_period, _STRING_TYPES):
+            if time_period == self.DAILY:
+                group_by.append(operator.attrgetter("start_date"))
+                period_size = datetime.timedelta(days=1)
+            elif time_period == self.WEEKLY:
+                group_by.append(lambda entry: entry.week_start_date(start_day_of_week))
+                period_size = datetime.timedelta(days=7)
+            else:
+                raise ValueError(
+                    "'time_period' argument can only be the string \"{}\" or \"{}\"".format(
+                        self.DAILY, self.WEEKLY))
+        elif isinstance(time_period, datetime.timedelta):
+            group_by.append(lambda entry: entry.period_start_date(self.start_date, time_period))
+            period_size = time_period
+        elif time_period is not None:
+            raise TypeError(("'time_period' argument must be a string or a datetime.timedelta"
+                             " instance"))
+
+        def key_func(entry):
+            """
+            Assigns a key for sorting and grouping ReportEntry instances based on the combination of
+            options summarize_by() was called with.
+            """
+
+            return [func(entry) for func in group_by]
+
+        sorted_entries = sorted(self._entries, key=key_func)
+        grouped_entries = itertools.groupby(sorted_entries, key=key_func)
+        summed_entries = [ReportEntry.sum(group) for (_key, group) in grouped_entries]
+
+        if period_size is not None and period_size.days > 1:
+            # Overwrite the 'start_date' and 'end_date' attributes so that they correspond to the
+            # beginning and end of the period, respectively. If the beginning or end of the week
+            # falls outside the range [self.start_date, self.end_date], then the new 'start_date'
+            # and 'end_date' attributes are clamped to that range.
+            for (i, summed_entry) in enumerate(summed_entries):
+                if time_period == self.WEEKLY:
+                    period_start_date = summed_entry.week_start_date(start_day_of_week)
+                else:
+                    period_start_date = summed_entry.period_start_date(self.start_date, period_size)
+
+                period_end_date = period_start_date + period_size - datetime.timedelta(days=1)
+                start_date = max(period_start_date, self.start_date)
+                end_date = min(period_end_date, self.end_date)
+                summed_entries[i] = summed_entry._replace(start_date=start_date, end_date=end_date)
+
+        return summed_entries
+
+
+class Missing(object):
+    """
+    A class for representing the value associated with a particular component is unknown.
+    """
+
+    def __init__(self, kind):
+        self._kind = kind
+
+    def __eq__(self, other):
+        if not isinstance(other, Missing):
+            return NotImplemented
+
+        return self._kind == other._kind
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        return hash(self._kind)
+
+    def __str__(self):
+        return "<unknown {}>".format(self._kind)
+
+
+class TestHistory(object):
+    """
+    A class for interacting with the /test_history Evergreen API endpoint.
+    """
+
+    DEFAULT_API_SERVER = "http://evergreen-api.mongodb.com:8080"
+    DEFAULT_PROJECT = "mongodb-mongo-master"
+
+    DEFAULT_TEST_STATUSES = ("pass", "fail", "silentfail")
+    DEFAULT_TASK_STATUSES = ("success", "failed", "timeout", "sysfail")
+
+    # The Evergreen API requires specifying the "limit" parameter when not specifying a range of
+    # revisions.
+    DEFAULT_LIMIT = 20
+
+    _MISSING_DISTRO = Missing("distro")
 
     def __init__(self,
-                 period_type,
-                 start,
-                 end,
-                 start_day_of_week="first_day",
-                 group_period=7,
-                 project=_PROJECT,
+                 api_server=DEFAULT_API_SERVER,
+                 project=DEFAULT_PROJECT,
                  tests=None,
                  tasks=None,
                  variants=None,
-                 distros=None,
-                 evg_cfg=None):
-        # Initialize the report and object variables.
-        self._report_tuples = []
-        self._report = {"tests": {}}
-        self.period_type = period_type.lower()
-        if self.period_type not in self.period_types:
-            raise ValueError(
-                "Invalid time period type '{}' specified."
-                " supported types are {}.".format(self.period_type, self.period_types))
-        self.group_period = group_period
-        self.start_day_of_week = start_day_of_week.lower()
-
-        self.start = start
-        self.end = end
-
-        self.project = project
-
-        if not tests and not tasks:
-            raise ValueError("Must specify either tests or tasks.")
-        self.tests = tests if tests is not None else []
-        self.tasks = tasks if tasks is not None else []
-        self.variants = variants if variants is not None else []
-        self.distros = distros if distros is not None else []
-
-        if evg_cfg is not None and "api_server_host" in evg_cfg:
-            api_server = "{url.scheme}://{url.netloc}".format(
-                url=urlparse.urlparse(evg_cfg["api_server_host"]))
-        else:
-            api_server = _API_SERVER_DEFAULT
-        self.api_prefix = api_server + _REST_PREFIX
-
-    def _all_tests(self):
-        """Returns a list of all test file name types from self.tests.
-           Since the test file names can be specifed as either Windows or Linux style,
-           we will ensure that both are specified for each test.
-           Add Windows style naming, backslashes and possibly .exe extension.
-           Add Linux style naming, forward slashes and removes .exe extension."""
-        tests_set = set(self.tests)
-        for test in self.tests:
-            if "/" in test:
-                windows_test = test.replace("/", "\\")
-                if not os.path.splitext(test)[1]:
-                    windows_test += ".exe"
-                tests_set.add(windows_test)
-            if "\\" in test:
-                linux_test = test.replace("\\", "/")
-                linux_test = linux_test.replace(".exe", "")
-                tests_set.add(linux_test)
-        return list(tests_set)
-
-    def _history_request_params(self, test_statuses):
-        """Returns a dictionary of params used in requests.get."""
-        return {
-            "distros": ",".join(self.distros),
-            "sort": "latest",
-            "tasks": ",".join(self.tasks),
-            "tests": ",".join(self.tests),
-            "taskStatuses": "failed,timeout,success,sysfail",
-            "testStatuses": ",".join(test_statuses),
-            "variants": ",".join(self.variants),
-            }
-
-    def _get_history_by_revision(self, test_statuses):
-        """ Returns a list of history data for specified options."""
-        after_revision = self.start
-        before_revision = self.end
-        params = self._history_request_params(test_statuses)
-        params["beforeRevision"] = before_revision
-        url = "{prefix}/projects/{project}/test_history".format(
-            prefix=self.api_prefix,
-            project=self.project)
-
-        # Since the API limits the results, with each invocation being distinct, we can
-        # simulate pagination, by requesting results using afterRevision.
+                 distros=None):
+        """
+        Initializes the TestHistory instance with the list of tests, tasks, variants, and distros
+        specified.
+
+        The list of tests specified are augmented to ensure that failures on both POSIX and Windows
+        platforms are returned by the Evergreen API.
+        """
+
+        tests = tests if tests is not None else []
+        tests = [test for test_file in tests for test in self._denormalize_test_file(test_file)]
+
+        self._tests = tests
+        self._tasks = tasks if tasks is not None else []
+        self._variants = variants if variants is not None else []
+        self._distros = distros if distros is not None else []
+
+        self._test_history_url = "{api_server}/rest/v1/projects/{project}/test_history".format(
+            api_server=api_server,
+            project=project,
+        )
+
+    def get_history_by_revision(self,
+                                start_revision,
+                                end_revision,
+                                test_statuses=DEFAULT_TEST_STATUSES,
+                                task_statuses=DEFAULT_TASK_STATUSES):
+        """
+        Returns a list of ReportEntry instances corresponding to each individual test execution
+        between 'start_revision' and 'end_revision'.
+
+        Only tests with status 'test_statuses' are included in the result. Similarly, only tests
+        with status 'task_statuses' are included in the result. By default, both passing and failing
+        test executions are returned.
+        """
+
+        params = self._history_request_params(test_statuses, task_statuses)
+        params["beforeRevision"] = end_revision
+
         history_data = []
-        while after_revision != before_revision:
-            params["afterRevision"] = after_revision
-            response = requests.get(url=url, params=params)
+
+        # Since the API limits the results, with each invocation being distinct, we can simulate
+        # pagination by making subsequent requests using "afterRevision".
+        while start_revision != end_revision:
+            params["afterRevision"] = start_revision
+            response = requests.get(url=self._test_history_url, params=params)
             response.raise_for_status()
-            if not response.json():
+
+            test_results = response.json()
+            if not test_results:
                 break
 
-            # The first test will have the latest revision for this result set.
-            after_revision = response.json()[0]["revision"]
-            history_data.extend(response.json())
+            for test_result in test_results:
+                history_data.append(self._process_test_result(test_result))
+
+            # The first test will have the latest revision for this result set because
+            # TestHistory._history_request_params() sorts by "latest".
+            start_revision = test_results[0]["revision"]
 
         return history_data
 
-    def _get_history_by_date(self, test_statuses):
-        """ Returns a list of history data for specified options."""
-        # Note this functionality requires EVG-1653
-        start_date = self.start
-        end_date = self.end
-        params = self._history_request_params(test_statuses)
-        params["beforeDate"] = end_date + "T23:59:59Z"
-        url = "{prefix}/projects/{project}/test_history".format(
-            prefix=self.api_prefix,
-            project=self.project)
-
-        # Since the API limits the results, with each invocation being distinct, we can
-        # simulate pagination, by requesting results using afterDate, being careful to
-        # filter out possible duplicate entries.
-        start_time = start_date + "T00:00:00Z"
-        history_data = []
-        history_data_set = set()
-        last_sorted_tests = []
+    def get_history_by_date(self,
+                            start_date,
+                            end_date,
+                            test_statuses=DEFAULT_TEST_STATUSES,
+                            task_statuses=DEFAULT_TASK_STATUSES):
+        """
+        Returns a list of ReportEntry instances corresponding to each individual test execution
+        between 'start_date' and 'end_date'.
+
+        Only tests with status 'test_statuses' are included in the result. Similarly, only tests
+        with status 'task_statuses' are included in the result. By default, both passing and failing
+        test executions are returned.
+        """
+
+        warnings.warn(
+            "Until https://jira.mongodb.org/browse/EVG-1653 is implemented, pagination using dates"
+            " isn't guaranteed to returned a complete result set. It is possible for the results"
+            " from an Evergreen task that started between the supplied start date and the"
+            " response's latest test start time to be omitted.", RuntimeWarning)
+
+        params = self._history_request_params(test_statuses, task_statuses)
+        params["beforeDate"] = "{:%Y-%m-%d}T23:59:59Z".format(end_date)
+        params["limit"] = self.DEFAULT_LIMIT
+
+        start_time = "{:%Y-%m-%d}T00:00:00Z".format(start_date)
+        history_data = set()
+
+        # Since the API limits the results, with each invocation being distinct, we can simulate
+        # pagination by making subsequent requests using "afterDate" and being careful to filter out
+        # duplicate test results.
         while True:
             params["afterDate"] = start_time
-            response = requests.get(url=url, params=params)
+            response = requests.get(url=self._test_history_url, params=params)
             response.raise_for_status()
-            if not response.json():
-                return history_data
 
-            sorted_tests = sorted(response.json(), key=operator.itemgetter("start_time"))
+            test_results = response.json()
+            if not test_results:
+                break
+
+            original_size = len(history_data)
+            for test_result in test_results:
+                start_time = max(test_result["start_time"], start_time)
+                history_data.add(self._process_test_result(test_result))
 
-            # To prevent an infinite loop, we need to bail out if the result set is the same
-            # as the previous one.
-            if sorted_tests == last_sorted_tests:
+            # To prevent an infinite loop, we need to bail out if test results returned by the
+            # request were identical to the ones we got back in an earlier request.
+            if original_size == len(history_data):
                 break
 
-            last_sorted_tests = sorted_tests
+        return list(history_data)
 
-            for test in sorted_tests:
-                start_time = test["start_time"]
-                # Create a unique hash for the test entry and check if it's been processed.
-                test_hash = hash(str(sorted(test.items())))
-                if test_hash not in history_data_set:
-                    history_data_set.add(test_hash)
-                    history_data.append(test)
+    def _process_test_result(self, test_result):
+        """
+        Returns a ReportEntry() tuple representing the 'test_result' dictionary.
+        """
 
-        return history_data
+        def parse_date(date_str):
+            """
+            Returns a datetime.date() instance representing the specified yyyy-mm-dd date string.
+
+            Note that any time component of 'date_str', including the timezone, is ignored.
+            """
+
+            return datetime.datetime.strptime(date_str.split("T")[0], "%Y-%m-%d").date()
+
+        # For individual test executions, we intentionally use the "start_time" of the test as both
+        # its 'start_date' and 'end_date' to avoid complicating how the test history is potentially
+        # summarized by time. By the time the test has started, the Evergreen task has already been
+        # assigned to a particular machine and is using a specific set of binaries, so there's
+        # unlikely to be a significance to when the test actually finishes.
+        start_date = end_date = parse_date(test_result["start_time"])
+
+        return ReportEntry(
+            test=self._normalize_test_file(test_result["test_file"]),
+            task=test_result["task_name"],
+            variant=test_result["variant"],
+            distro=test_result.get("distro", self._MISSING_DISTRO),
+            start_date=start_date,
+            end_date=end_date,
+            num_pass=(1 if test_result["test_status"] == "pass" else 0),
+            num_fail=(1 if test_result["test_status"] not in ("pass", "skip") else 0))
 
     @staticmethod
-    def normalize_test_file(test_file):
-        """Normalizes the test_file name:
-           - Changes single backslash (\\) to forward slash (/)
-           - Removes .exe extension
-           Returns normalized string."""
-        return test_file.replace("\\", "/").replace(".exe", "")
-
-    def generate_report(self):
-        """Creates detail for self._report from specified test history options.
-           Returns a ViewReport object of self._report."""
-
-        if self.period_type == "date":
-            report_method = self._get_history_by_date
-        else:
-            report_method = self._get_history_by_revision
+    def _normalize_test_file(test_file):
+        """
+        If 'test_file' represents a Windows-style path, then it is converted to a POSIX-style path
+        with
 
-        self.tests = self._all_tests()
+            - backslashes (\\) as the path separator replaced with forward slashes (/) and
+            - the ".exe" extension, if present, removed.
 
-        rest_api_report = report_method(test_statuses=["fail", "pass"])
+        If 'test_file' already represents a POSIX-style path, then it is returned unmodified.
+        """
 
-        for record in rest_api_report:
-            # Save API record as namedtuple
-            self._report_tuples.append(
-                self.HistoryReportTuple(
-                    test=str(HistoryReport.normalize_test_file(record["test_file"])),
-                    task=str(record["task_name"]),
-                    variant=str(record["variant"]),
-                    distro=record.get("distro", _ALL_DISTRO),
-                    start_dt=datestr_to_date(record["start_time"]),
-                    test_status=record["test_status"]))
+        if "\\" in test_file:
+            posix_test_file = test_file.replace("\\", "/")
+            (test_file_root, test_file_ext) = os.path.splitext(posix_test_file)
+            if test_file_ext == ".exe":
+                return test_file_root
+            return posix_test_file
 
-        return ViewReport(history_report=self._report_tuples,
-                          group_period=self.group_period,
-                          start_day_of_week=self.start_day_of_week)
+        return test_file
 
+    def _denormalize_test_file(self, test_file):
+        """
+        Returns a list containing 'test_file' as both a POSIX-style path and a Windows-style path.
 
-def main():
+        The conversion process may involving replacing forward slashes (/) as the path separator
+        with backslashes (\\), as well as adding a ".exe" extension if 'test_file' has no file
+        extension.
+        """
 
-    parser = optparse.OptionParser(description=__doc__,
-                                   usage="Usage: %prog [options] test1 test2 ...")
+        test_file = self._normalize_test_file(test_file)
 
-    parser.add_option("--project", dest="project",
-                      default=_PROJECT,
-                      help="Evergreen project to analyze, defaults to '%default'.")
+        if "/" in test_file:
+            windows_test_file = test_file.replace("/", "\\")
+            if not os.path.splitext(test_file)[1]:
+                windows_test_file += ".exe"
+            return [test_file, windows_test_file]
 
-    # TODO EVG-1653: Expose the --sinceDate and --untilDate command line arguments after pagination
-    # is made possible using the /test_history Evergreen API endpoint.
-    # parser.add_option("--sinceDate", dest="start_date",
-    #                   metavar="YYYY-MM-DD",
-    #                   default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)),
-    #                   help="History from this date, defaults to 1 week ago (%default).")
+        return [test_file]
 
-    # parser.add_option("--untilDate", dest="end_date",
-    #                   metavar="YYYY-MM-DD",
-    #                   default="{:%Y-%m-%d}".format(today),
-    #                   help="History up to, and including, this date, defaults to today (%default).")
+    def _history_request_params(self, test_statuses, task_statuses):
+        """
+        Returns the query parameters for /test_history GET request as a dictionary.
+        """
+
+        return {
+            "distros": ",".join(self._distros),
+            "sort": "latest",
+            "tasks": ",".join(self._tasks),
+            "tests": ",".join(self._tests),
+            "taskStatuses": ",".join(task_statuses),
+            "testStatuses": ",".join(test_statuses),
+            "variants": ",".join(self._variants),
+        }
+
+
+def main():
+    """
+    Utility computing test failure rates from the Evergreen API.
+    """
+
+    parser = optparse.OptionParser(description=main.__doc__,
+                                   usage="Usage: %prog [options] [test1 test2 ...]")
+
+    parser.add_option("--project", dest="project",
+                      metavar="<project-name>",
+                      default=TestHistory.DEFAULT_PROJECT,
+                      help="The Evergreen project to analyze. Defaults to '%default'.")
+
+    today = datetime.datetime.utcnow().replace(microsecond=0, tzinfo=None)
+    parser.add_option("--sinceDate", dest="since_date",
+                      metavar="<yyyy-mm-dd>",
+                      default="{:%Y-%m-%d}".format(today - datetime.timedelta(days=6)),
+                      help=("The starting period as a date in UTC to analyze the test history for,"
+                            " including the specified date. Defaults to 1 week ago (%default)."))
+
+    parser.add_option("--untilDate", dest="until_date",
+                      metavar="<yyyy-mm-dd>",
+                      default="{:%Y-%m-%d}".format(today),
+                      help=("The ending period as a date in UTC to analyze the test history for,"
+                            " including the specified date. Defaults to today (%default)."))
 
     parser.add_option("--sinceRevision", dest="since_revision",
+                      metavar="<gitrevision>",
                       default=None,
-                      help="History after this revision."
-                           # TODO EVG-1653: Uncomment this line once the --sinceDate and --untilDate
-                           # options are exposed.
-                           # "History after this revision, overrides --sinceDate & --untilDate."
-                           " Must be specified with --untilRevision")
+                      help=("The starting period as a git revision to analyze the test history for,"
+                            " excluding the specified commit. This option must be specified in"
+                            " conjuction with --untilRevision and takes precedence over --sinceDate"
+                            " and --untilDate."))
 
     parser.add_option("--untilRevision", dest="until_revision",
+                      metavar="<gitrevision>",
                       default=None,
-                      help="History up to, and including, this revision."
-                           # TODO EVG-1653: Uncomment this line once the --sinceDate and
-                           # --untilDate options are exposed.
-                           # "History up to, and including, this revision, overrides"
-                           # " --sinceDate & --untilDate."
-                           " Must be specified with --sinceRevision")
+                      help=("The ending period as a git revision to analyze the test history for,"
+                            " including the specified commit. This option must be specified in"
+                            " conjuction with --sinceRevision and takes precedence over --sinceDate"
+                            " and --untilDate."))
 
     parser.add_option("--groupPeriod", dest="group_period",
-                      type="int",
-                      default=7,
-                      help="Set group period days, defaults to '%default'.")
+                      metavar="[{}]".format("|".join([Report.DAILY, Report.WEEKLY, "<ndays>"])),
+                      default=Report.WEEKLY,
+                      help=("The time period over which to group test executions. Defaults to"
+                            " '%default'."))
 
     parser.add_option("--weekStartDay", dest="start_day_of_week",
-                      choices=["sunday", "monday", "first_day"],
-                      default="first_day",
-                      help="The group starting day of week, when --groupPeriod is not 1. "
-                           " Set to 'sunday', 'monday' or 'first_day'."
-                           " If 'first_day', the group will start on the first day of the"
-                           " starting date from the history result, defaults to '%default'.")
+                      choices=(Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY),
+                      metavar="[{}]".format(
+                          "|".join([Report.SUNDAY, Report.MONDAY, Report.FIRST_DAY])),
+                      default=Report.FIRST_DAY,
+                      help=("The day to use as the beginning of the week when grouping over time."
+                            " This option is only relevant in conjuction with --groupPeriod={}. If"
+                            " '{}' is specified, then the day of week of the earliest date is used"
+                            " as the beginning of the week. Defaults to '%default'.".format(
+                                Report.WEEKLY, Report.FIRST_DAY)))
 
     parser.add_option("--tasks", dest="tasks",
+                      metavar="<task1,task2,...>",
                       default="",
-                      help="Comma separated list of task display names to analyze.")
+                      help="Comma-separated list of Evergreen task names to analyze.")
 
     parser.add_option("--variants", dest="variants",
+                      metavar="<variant1,variant2,...>",
                       default="",
-                      help="Comma separated list of build variants to analyze.")
+                      help="Comma-separated list of Evergreen build variants to analyze.")
 
     parser.add_option("--distros", dest="distros",
+                      metavar="<distro1,distro2,...>",
                       default="",
-                      help="Comma separated list of build distros to analyze.")
+                      help="Comma-separated list of Evergreen build distros to analyze.")
 
     (options, tests) = parser.parse_args()
 
-    # TODO EVG-1653: Uncomment these lines once the --sinceDate and --untilDate options are
-    # exposed.
-    # period_type = "date"
-    # start = options.start_date
-    # end = options.end_date
-
-    if options.since_revision and options.until_revision:
-        period_type = "revision"
-        start = options.since_revision
-        end = options.until_revision
-    elif options.since_revision or options.until_revision:
-        parser.print_help()
-        parser.error("Must specify both --sinceRevision & --untilRevision")
-    # TODO EVG-1653: Remove this else clause once the --sinceDate and --untilDate options are
-    # exposed.
-    else:
-        parser.print_help()
-        parser.error("Must specify both --sinceRevision & --untilRevision")
+    for (option_name, option_dest) in (("--sinceDate", "since_date"),
+                                       ("--untilDate", "until_date")):
+        option_value = getattr(options, option_dest)
+        try:
+            setattr(options,
+                    option_dest,
+                    datetime.datetime.strptime(option_value, "%Y-%m-%d").date())
+        except ValueError:
+            parser.print_help(file=sys.stderr)
+            print(file=sys.stderr)
+            parser.error("{} must be specified in yyyy-mm-dd format, but got {}".format(
+                option_name, option_value))
+
+    if options.since_revision and not options.until_revision:
+        parser.print_help(file=sys.stderr)
+        print(file=sys.stderr)
+        parser.error("Must specify --untilRevision in conjuction with --sinceRevision")
+    elif options.until_revision and not options.since_revision:
+        parser.print_help(file=sys.stderr)
+        print(file=sys.stderr)
+        parser.error("Must specify --sinceRevision in conjuction with --untilRevision")
+
+    if options.group_period not in (Report.DAILY, Report.WEEKLY):
+        try:
+            options.group_period = datetime.timedelta(days=int(options.group_period))
+        except ValueError:
+            parser.print_help(file=sys.stderr)
+            print(file=sys.stderr)
+            parser.error("--groupPeriod must be an integral number, but got {}".format(
+                options.group_period))
 
     if not options.tasks and not tests:
-        parser.print_help()
+        parser.print_help(file=sys.stderr)
+        print(file=sys.stderr)
         parser.error("Must specify either --tasks or at least one test")
 
-    report = HistoryReport(period_type=period_type,
-                           start=start,
-                           end=end,
-                           group_period=options.group_period,
-                           start_day_of_week=options.start_day_of_week,
-                           project=options.project,
-                           tests=tests,
-                           tasks=options.tasks.split(","),
-                           variants=options.variants.split(","),
-                           distros=options.distros.split(","),
-                           evg_cfg=read_evg_config())
-    view_report = report.generate_report()
-    summ_report = view_report.view_summary(group_on=["test", "task", "variant"])
-    for s in sorted(summ_report):
-        print(s)
+    def read_evg_config():
+        """
+        Attempts to parse the user's or system's Evergreen configuration from its known locations.
+
+        Returns None if the configuration file wasn't found anywhere.
+        """
+
+        known_locations = [
+            "./.evergreen.yml",
+            os.path.expanduser("~/.evergreen.yml"),
+            os.path.expanduser("~/cli_bin/.evergreen.yml"),
+        ]
+
+        for filename in known_locations:
+            if os.path.isfile(filename):
+                with open(filename, "r") as fstream:
+                    return yaml.safe_load(fstream)
+
+        return None
+
+    evg_config = read_evg_config()
+    evg_config = evg_config if evg_config is not None else {}
+    api_server = "{url.scheme}://{url.netloc}".format(
+        url=urlparse(evg_config.get("api_server_host", TestHistory.DEFAULT_API_SERVER)))
+
+    test_history = TestHistory(api_server=api_server,
+                               project=options.project,
+                               tests=tests,
+                               tasks=options.tasks.split(","),
+                               variants=options.variants.split(","),
+                               distros=options.distros.split(","))
+
+    if options.since_revision:
+        history_data = test_history.get_history_by_revision(
+            start_revision=options.since_revision,
+            end_revision=options.until_revision)
+    elif options.since_date:
+        history_data = test_history.get_history_by_date(
+            start_date=options.since_date,
+            end_date=options.until_date)
+
+    report = Report(history_data)
+    summary = report.summarize_by(Report.TEST_TASK_VARIANT_DISTRO,
+                                  time_period=options.group_period,
+                                  start_day_of_week=options.start_day_of_week)
+
+    for entry in summary:
+        print("(test={e.test},"
+              " task={e.task},"
+              " variant={e.variant},"
+              " distro={e.distro},"
+              " start_date={e.start_date:%Y-%m-%d},"
+              " end_date={e.end_date:%Y-%m-%d},"
+              " num_pass={e.num_pass},"
+              " num_fail={e.num_fail},"
+              " fail_rate={e.fail_rate:0.2%})".format(e=entry))
+
 
 if __name__ == "__main__":
     main()
diff --git a/buildscripts/tests/ciconfig/test_tags.py b/buildscripts/tests/ciconfig/test_tags.py
index 4678bcc1f34..8f9b3932f4a 100644
--- a/buildscripts/tests/ciconfig/test_tags.py
+++ b/buildscripts/tests/ciconfig/test_tags.py
@@ -15,12 +15,12 @@ class TestTagsConfig(unittest.TestCase):
     """Unit tests for the TagsConfig class."""
 
     def setUp(self):
-        self.conf = _tags.TagsConfig(TEST_FILE_PATH)
+        self.conf = _tags.TagsConfig.from_file(TEST_FILE_PATH)
 
     def test_invalid_path(self):
         invalid_path = "non_existing_file"
         with self.assertRaises(IOError):
-            _tags.TagsConfig(invalid_path)
+            _tags.TagsConfig.from_file(invalid_path)
 
     def test_list_test_kinds(self):
         test_kinds = self.conf.get_test_kinds()
@@ -151,7 +151,7 @@ class TestTagsConfig(unittest.TestCase):
 
         def custom_cmp(tag_a, tag_b):
             return cmp(tag_a.split("|"), tag_b.split("|"))
-        conf = _tags.TagsConfig(TEST_FILE_PATH, cmp_func=custom_cmp)
+        conf = _tags.TagsConfig.from_file(TEST_FILE_PATH, cmp_func=custom_cmp)
         tags = conf.get_tags(test_kind, test_pattern)
 
         self.assertEqual(["tag1", "tag2", "tag3"], tags)
diff --git a/buildscripts/tests/test_test_failures.py b/buildscripts/tests/test_test_failures.py
new file mode 100644
index 00000000000..0a2c570897b
--- /dev/null
+++ b/buildscripts/tests/test_test_failures.py
@@ -0,0 +1,676 @@
+"""
+Tests for buildscripts/test_failures.py.
+"""
+
+from __future__ import absolute_import
+
+import datetime
+import unittest
+
+from buildscripts import test_failures
+
+
+class TestReportEntry(unittest.TestCase):
+    """
+    Tests for the test_failures.ReportEntry class.
+    """
+
+    ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+                                      task="jsCore_WT",
+                                      variant="linux-64",
+                                      distro="rhel62",
+                                      start_date=datetime.date(2017, 6, 3),
+                                      end_date=datetime.date(2017, 6, 3),
+                                      num_pass=0,
+                                      num_fail=0)
+
+    def test_fail_rate(self):
+        """
+        Tests for the test_failures.ReportEntry.fail_rate property.
+        """
+
+        entry = self.ENTRY._replace(num_pass=0, num_fail=1)
+        self.assertEqual(1, entry.fail_rate)
+
+        entry = self.ENTRY._replace(num_pass=9, num_fail=1)
+        self.assertAlmostEqual(0.1, entry.fail_rate)
+
+        # Verify that we don't attempt to divide by zero.
+        entry = self.ENTRY._replace(num_pass=0, num_fail=0)
+        self.assertEqual(0, entry.fail_rate)
+
+    def test_week_start_date_with_sunday(self):
+        """
+        Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+        specified as different forms of the string "Sunday".
+        """
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+        self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("sunday"))
+        self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("Sunday"))
+        self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date("SUNDAY"))
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4))
+        self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday"))
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5))
+        self.assertEqual(datetime.date(2017, 6, 4), entry.week_start_date("sunday"))
+
+    def test_week_start_date_with_monday(self):
+        """
+        Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+        specified as different forms of the string "Monday".
+        """
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+        self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday"))
+        self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("Monday"))
+        self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("MONDAY"))
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4))
+        self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date("monday"))
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 5))
+        self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday"))
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 6))
+        self.assertEqual(datetime.date(2017, 6, 5), entry.week_start_date("monday"))
+
+    def test_week_start_date_with_date(self):
+        """
+        Tests for test_failures.ReportEntry.week_start_date() with the beginning of the week
+        specified as a datetime.date() value.
+        """
+
+        entry = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3))
+
+        date = datetime.date(2017, 5, 21)
+        self.assertEqual(6, date.weekday(), "2017 May 21 is a Sunday")
+        self.assertEqual(datetime.date(2017, 5, 28), entry.week_start_date(date))
+
+        date = datetime.date(2017, 5, 22)
+        self.assertEqual(0, date.weekday(), "2017 May 22 is a Monday")
+        self.assertEqual(datetime.date(2017, 5, 29), entry.week_start_date(date))
+
+        date = datetime.date(2017, 6, 6)
+        self.assertEqual(1, date.weekday(), "2017 Jun 06 is a Tuesday")
+        self.assertEqual(datetime.date(2017, 5, 30), entry.week_start_date(date))
+
+        date = datetime.date(2017, 6, 9)
+        self.assertEqual(4, date.weekday(), "2017 Jun 09 is a Friday")
+        self.assertEqual(datetime.date(2017, 6, 2), entry.week_start_date(date))
+
+        date = datetime.date(2017, 6, 3)
+        self.assertEqual(5, date.weekday(), "2017 Jun 03 is a Saturday")
+        self.assertEqual(datetime.date(2017, 6, 3), entry.week_start_date(date))
+
+    def test_sum_combines_test_results(self):
+        """
+        Tests for test_failures.ReportEntry.sum() that verify the start_date, end_date, num_pass,
+        and num_fail attributes are accumulated correctly.
+        """
+
+        entry1 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 1),
+                                     end_date=datetime.date(2017, 6, 1),
+                                     num_pass=1,
+                                     num_fail=0)
+
+        entry2 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 2),
+                                     end_date=datetime.date(2017, 6, 2),
+                                     num_pass=0,
+                                     num_fail=3)
+
+        entry3 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 3),
+                                     end_date=datetime.date(2017, 6, 3),
+                                     num_pass=0,
+                                     num_fail=0)
+
+        entry4 = self.ENTRY._replace(start_date=datetime.date(2017, 6, 4),
+                                     end_date=datetime.date(2017, 6, 4),
+                                     num_pass=2,
+                                     num_fail=2)
+
+        entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4])
+        entry_1432 = test_failures.ReportEntry.sum([entry1, entry4, entry3, entry2])
+        entry_124 = test_failures.ReportEntry.sum([entry1, entry2, entry4])
+        entry_13 = test_failures.ReportEntry.sum([entry1, entry3])
+        entry_42 = test_failures.ReportEntry.sum([entry4, entry2])
+
+        self.assertEqual(datetime.date(2017, 6, 1), entry_1234.start_date)
+        self.assertEqual(datetime.date(2017, 6, 4), entry_1234.end_date)
+        self.assertEqual(3, entry_1234.num_pass)
+        self.assertEqual(5, entry_1234.num_fail)
+
+        self.assertEqual(entry_1234, entry_1432, "order of arguments shouldn't matter")
+        self.assertEqual(entry_1234, entry_124, "entry3 didn't have any test executions")
+
+        self.assertEqual(datetime.date(2017, 6, 1), entry_13.start_date)
+        self.assertEqual(datetime.date(2017, 6, 3), entry_13.end_date)
+        self.assertEqual(1, entry_13.num_pass)
+        self.assertEqual(0, entry_13.num_fail)
+
+        self.assertEqual(datetime.date(2017, 6, 2), entry_42.start_date)
+        self.assertEqual(datetime.date(2017, 6, 4), entry_42.end_date)
+        self.assertEqual(2, entry_42.num_pass)
+        self.assertEqual(5, entry_42.num_fail)
+
+    def test_sum_combines_test_info(self):
+        """
+        Tests for test_failures.ReportEntry.sum() that verify the test, task, variant, and distro
+        attributes are accumulated correctly.
+        """
+
+        entry1 = self.ENTRY._replace(test="jstests/core/all.js",
+                                     task="jsCore_WT",
+                                     variant="linux-64",
+                                     distro="rhel62")
+
+        entry2 = self.ENTRY._replace(test="jstests/core/all.js",
+                                     task="jsCore_WT",
+                                     variant="linux-64",
+                                     distro="rhel55")
+
+        entry3 = self.ENTRY._replace(test="jstests/core/all2.js",
+                                     task="jsCore_WT",
+                                     variant="linux-64-debug",
+                                     distro="rhel62")
+
+        entry4 = self.ENTRY._replace(test="jstests/core/all.js",
+                                     task="jsCore",
+                                     variant="linux-64-debug",
+                                     distro="rhel62")
+
+        entry_12 = test_failures.ReportEntry.sum([entry1, entry2])
+        self.assertEqual("jstests/core/all.js", entry_12.test)
+        self.assertEqual("jsCore_WT", entry_12.task)
+        self.assertEqual("linux-64", entry_12.variant)
+        self.assertIsInstance(entry_12.distro, test_failures.Wildcard)
+
+        entry_123 = test_failures.ReportEntry.sum([entry1, entry2, entry3])
+        self.assertIsInstance(entry_123.test, test_failures.Wildcard)
+        self.assertEqual("jsCore_WT", entry_123.task)
+        self.assertIsInstance(entry_123.variant, test_failures.Wildcard)
+        self.assertIsInstance(entry_123.distro, test_failures.Wildcard)
+
+        entry_1234 = test_failures.ReportEntry.sum([entry1, entry2, entry3, entry4])
+        self.assertIsInstance(entry_1234.test, test_failures.Wildcard)
+        self.assertIsInstance(entry_1234.task, test_failures.Wildcard)
+        self.assertIsInstance(entry_1234.variant, test_failures.Wildcard)
+        self.assertIsInstance(entry_1234.distro, test_failures.Wildcard)
+
+        entry_34 = test_failures.ReportEntry.sum([entry3, entry4])
+        self.assertIsInstance(entry_34.test, test_failures.Wildcard)
+        self.assertIsInstance(entry_34.task, test_failures.Wildcard)
+        self.assertEqual("linux-64-debug", entry_34.variant)
+        self.assertEqual("rhel62", entry_34.distro)
+
+
+class TestReportSummarization(unittest.TestCase):
+    """
+    Tests for test_failures.Report.summarize_by().
+    """
+
+    ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+                                      task="jsCore_WT",
+                                      variant="linux-64",
+                                      distro="rhel62",
+                                      start_date=datetime.date(2017, 6, 3),
+                                      end_date=datetime.date(2017, 6, 3),
+                                      num_pass=0,
+                                      num_fail=0)
+
+    ENTRIES = [
+        ENTRY._replace(start_date=datetime.date(2017, 6, 3),
+                       end_date=datetime.date(2017, 6, 3),
+                       num_pass=1,
+                       num_fail=0),
+        ENTRY._replace(task="jsCore",
+                       start_date=datetime.date(2017, 6, 5),
+                       end_date=datetime.date(2017, 6, 5),
+                       num_pass=0,
+                       num_fail=1),
+        ENTRY._replace(start_date=datetime.date(2017, 6, 10),
+                       end_date=datetime.date(2017, 6, 10),
+                       num_pass=1,
+                       num_fail=0),
+        # The following entry is intentionally not in timestamp order to verify that the
+        # 'time_period' parameter becomes part of the sort in summarize_by().
+        ENTRY._replace(start_date=datetime.date(2017, 6, 9),
+                       end_date=datetime.date(2017, 6, 9),
+                       num_pass=1,
+                       num_fail=0),
+        ENTRY._replace(distro="rhel55",
+                       start_date=datetime.date(2017, 6, 10),
+                       end_date=datetime.date(2017, 6, 10),
+                       num_pass=0,
+                       num_fail=1),
+        ENTRY._replace(test="jstests/core/all2.js",
+                       start_date=datetime.date(2017, 6, 10),
+                       end_date=datetime.date(2017, 6, 10),
+                       num_pass=1,
+                       num_fail=0),
+        ENTRY._replace(variant="linux-64-debug",
+                       start_date=datetime.date(2017, 6, 17),
+                       end_date=datetime.date(2017, 6, 17),
+                       num_pass=0,
+                       num_fail=1),
+    ]
+
+    def test_group_all_by_test_task_variant_distro(self):
+        """
+        Tests that summarize_by() correctly accumulates all unique combinations of
+        (test, task, variant, distro).
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT_DISTRO)
+        self.assertEqual(5, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task="jsCore",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 5),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            distro="rhel55",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=3,
+            num_fail=0,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 17),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[4], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_all_by_test_task_variant(self):
+        """
+        Tests that summarize_by() correctly accumulates all unique combinations of
+        (test, task, variant).
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST_TASK_VARIANT)
+        self.assertEqual(4, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task="jsCore",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 5),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=3,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 17),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_all_by_test_task(self):
+        """
+        Tests that summarize_by() correctly accumulates all unique combinations of (test, task).
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST_TASK)
+        self.assertEqual(3, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task="jsCore",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 5),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            variant=test_failures.Wildcard("variants"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=3,
+            num_fail=2,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_all_by_test(self):
+        """
+        Tests that summarize_by() correctly accumulates all unique combinations of (test,).
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST)
+        self.assertEqual(2, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            variant=test_failures.Wildcard("variants"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=3,
+            num_fail=3,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_all_by_variant_task(self):
+        """
+        Tests that summarize_by() correctly accumulates all unique combinations of (variant, task).
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(["variant", "task"])
+        self.assertEqual(3, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task="jsCore",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 5),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            test=test_failures.Wildcard("tests"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=4,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 17),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+
+    def test_group_weekly_by_test_starting_on_sunday(self):
+        """
+        Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+        specified as the string "sunday".
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=test_failures.Report.WEEKLY,
+                                             start_day_of_week=test_failures.Report.SUNDAY)
+
+        self.assertEqual(4, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 3),
+            num_pass=1,
+            num_fail=0,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 4),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=2,
+            num_fail=2,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 11),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 4),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_weekly_by_test_starting_on_monday(self):
+        """
+        Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+        specified as the string "monday".
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=test_failures.Report.WEEKLY,
+                                             start_day_of_week=test_failures.Report.MONDAY)
+
+        self.assertEqual(4, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 4),
+            num_pass=1,
+            num_fail=0,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 11),
+            num_pass=2,
+            num_fail=2,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 12),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 11),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_weekly_by_test_starting_on_date(self):
+        """
+        Tests that summarize_by() correctly accumulates by week when the beginning of the week is
+        specified as a datetime.date() value.
+        """
+
+        date = datetime.date(2017, 6, 7)
+        self.assertEqual(2, date.weekday(), "2017 Jun 07 is a Wednesday")
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=test_failures.Report.WEEKLY,
+                                             start_day_of_week=date)
+
+        self.assertEqual(4, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 6),
+            num_pass=1,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 7),
+            end_date=datetime.date(2017, 6, 13),
+            num_pass=2,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 14),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 7),
+            end_date=datetime.date(2017, 6, 13),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_daily_by_test(self):
+        """
+        Tests that summarize_by() correctly accumulates by day.
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=test_failures.Report.DAILY)
+
+        self.assertEqual(6, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 3),
+            num_pass=1,
+            num_fail=0,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            task="jsCore",
+            start_date=datetime.date(2017, 6, 5),
+            end_date=datetime.date(2017, 6, 5),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            start_date=datetime.date(2017, 6, 9),
+            end_date=datetime.date(2017, 6, 9),
+            num_pass=1,
+            num_fail=0,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[4], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 17),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[5], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 10),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_4days_by_test(self):
+        """
+        Tests that summarize_by() correctly accumulates by multiple days.
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=datetime.timedelta(days=4))
+
+        self.assertEqual(4, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 6),
+            num_pass=1,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 7),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=2,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 15),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[3], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 7),
+            end_date=datetime.date(2017, 6, 10),
+            num_pass=1,
+            num_fail=0,
+        ))
+
+    def test_group_9days_by_test(self):
+        """
+        Tests that summarize_by() correctly accumulates by multiple days, including time periods
+        greater than 1 week.
+        """
+
+        report = test_failures.Report(self.ENTRIES)
+        summed_entries = report.summarize_by(test_failures.Report.TEST,
+                                             time_period=datetime.timedelta(days=9))
+
+        self.assertEqual(3, len(summed_entries))
+        self.assertEqual(summed_entries[0], self.ENTRY._replace(
+            task=test_failures.Wildcard("tasks"),
+            distro=test_failures.Wildcard("distros"),
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 11),
+            num_pass=3,
+            num_fail=2,
+        ))
+        self.assertEqual(summed_entries[1], self.ENTRY._replace(
+            variant="linux-64-debug",
+            start_date=datetime.date(2017, 6, 12),
+            end_date=datetime.date(2017, 6, 17),
+            num_pass=0,
+            num_fail=1,
+        ))
+        self.assertEqual(summed_entries[2], self.ENTRY._replace(
+            test="jstests/core/all2.js",
+            start_date=datetime.date(2017, 6, 3),
+            end_date=datetime.date(2017, 6, 11),
+            num_pass=1,
+            num_fail=0,
+        ))
diff --git a/buildscripts/tests/test_update_test_lifecycle.py b/buildscripts/tests/test_update_test_lifecycle.py
new file mode 100644
index 00000000000..145065c2c6c
--- /dev/null
+++ b/buildscripts/tests/test_update_test_lifecycle.py
@@ -0,0 +1,760 @@
+"""
+Tests for buildscripts/update_test_lifecycle.py.
+"""
+
+from __future__ import absolute_import
+
+import collections
+import copy
+import datetime
+import unittest
+
+from buildscripts import test_failures
+from buildscripts import update_test_lifecycle
+from buildscripts.ciconfig import tags as ci_tags
+
+
+class TestValidateConfig(unittest.TestCase):
+    """
+    Tests for the validate_config() function.
+    """
+
+    CONFIG = update_test_lifecycle.Config(
+        test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        reliable_min_runs=2,
+        reliable_time_period=datetime.timedelta(days=1),
+        unreliable_min_runs=2,
+        unreliable_time_period=datetime.timedelta(days=1))
+
+    def test_acceptable_test_fail_rate(self):
+        """
+        Tests the validation of the 'test_fail_rates.acceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_unacceptable_test_fail_rate(self):
+        """
+        Tests the validation of the 'test_fail_rates.unacceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_test_fail_rates(self):
+        """
+        Tests the validation of the 'test_fail_rates' attribute.
+        """
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9,
+                                                                     unacceptable=0.1))
+            update_test_lifecycle.validate_config(config)
+
+    def test_acceptable_task_fail_rate(self):
+        """
+        Tests the validation of the 'test_fail_rates.acceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_unacceptable_task_fail_rate(self):
+        """
+        Tests the validation of the 'task_fail_rates.unacceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_task_fail_rates(self):
+        """
+        Tests the validation of the 'task_fail_rates' attribute.
+        """
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9,
+                                                                     unacceptable=0.1))
+            update_test_lifecycle.validate_config(config)
+
+    def test_acceptable_variant_fail_rate(self):
+        """
+        Tests the validation of the 'variant_fail_rates.acceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(
+                    acceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_unacceptable_variant_fail_rate(self):
+        """
+        Tests the validation of the 'variant_fail_rates.unacceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(
+                    unacceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_variant_fail_rates(self):
+        """
+        Tests the validation of the 'variant_fail_rates' attribute.
+        """
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9,
+                                                                           unacceptable=0.1))
+            update_test_lifecycle.validate_config(config)
+
+    def test_acceptable_distro_fail_rate(self):
+        """
+        Tests the validation of the 'distro_fail_rates.acceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_unacceptable_distro_fail_rate(self):
+        """
+        Tests the validation of the 'distro_fail_rates.unacceptable' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(
+                    unacceptable="not a number"))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=2))
+            update_test_lifecycle.validate_config(config)
+
+    def test_distro_fail_rates(self):
+        """
+        Tests the validation of the 'distro_fail_rates' attribute.
+        """
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9,
+                                                                         unacceptable=0.1))
+            update_test_lifecycle.validate_config(config)
+
+    def test_reliable_min_runs(self):
+        """
+        Tests the validation of the 'reliable_min_runs' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(reliable_min_runs="not a number")
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_min_runs=-1)
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_min_runs=0)
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_min_runs=1.5)
+            update_test_lifecycle.validate_config(config)
+
+    def test_reliable_time_period(self):
+        """
+        Tests the validation of the 'reliable_time_period' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(reliable_time_period="not a datetime.timedelta")
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=0))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(reliable_time_period=datetime.timedelta(days=1, hours=1))
+            update_test_lifecycle.validate_config(config)
+
+    def test_unreliable_min_runs(self):
+        """
+        Tests the validation of the 'unreliable_min_runs' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(unreliable_min_runs="not a number")
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(unreliable_min_runs=-1)
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(unreliable_min_runs=0)
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(unreliable_min_runs=1.5)
+            update_test_lifecycle.validate_config(config)
+
+    def test_unreliable_time_period(self):
+        """
+        Tests the validation of the 'unreliable_time_period' attribute.
+        """
+
+        with self.assertRaises(TypeError):
+            config = self.CONFIG._replace(unreliable_time_period="not a datetime.timedelta")
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=-1))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(unreliable_time_period=datetime.timedelta(days=0))
+            update_test_lifecycle.validate_config(config)
+
+        with self.assertRaises(ValueError):
+            config = self.CONFIG._replace(
+                unreliable_time_period=datetime.timedelta(days=1, hours=1))
+            update_test_lifecycle.validate_config(config)
+
+
+class TestUpdateTags(unittest.TestCase):
+    """
+    Tests for the update_tags() function.
+    """
+
+    CONFIG = update_test_lifecycle.Config(
+        test_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        task_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        variant_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        distro_fail_rates=update_test_lifecycle.Rates(acceptable=0, unacceptable=1),
+        reliable_min_runs=2,
+        reliable_time_period=datetime.timedelta(days=1),
+        unreliable_min_runs=2,
+        unreliable_time_period=datetime.timedelta(days=1))
+
+    ENTRY = test_failures.ReportEntry(test="jstests/core/all.js",
+                                      task="jsCore_WT",
+                                      variant="linux-64",
+                                      distro="rhel62",
+                                      start_date=datetime.date(2017, 6, 3),
+                                      end_date=datetime.date(2017, 6, 3),
+                                      num_pass=0,
+                                      num_fail=0)
+
+    def assert_has_only_js_tests(self, lifecycle):
+        """
+        Raises an AssertionError exception if 'lifecycle' is not of the following form:
+
+            selector:
+              js_test:
+                ...
+        """
+
+        self.assertIn("selector", lifecycle.raw)
+        self.assertEqual(1, len(lifecycle.raw), msg=str(lifecycle.raw))
+        self.assertIn("js_test", lifecycle.raw["selector"])
+        self.assertEqual(1, len(lifecycle.raw["selector"]), msg=str(lifecycle.raw))
+
+        return lifecycle.raw["selector"]["js_test"]
+
+    def transition_from_reliable_to_unreliable(self, config, expected_tags):
+        """
+        Tests that update_tags() tags a formerly reliable combination as being unreliable.
+        """
+
+        initial_tags = collections.OrderedDict()
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+            self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, expected_tags)
+
+    def test_transition_test_from_reliable_to_unreliable(self):
+        """
+        Tests that update_tags() tags a formerly reliable (test,) combination as being unreliable.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1))
+
+        self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable"]),
+        ]))
+
+    def test_transition_task_from_reliable_to_unreliable(self):
+        """
+        Tests that update_tags() tags a formerly reliable (test, task) combination as being
+        unreliable.
+        """
+
+        config = self.CONFIG._replace(
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1))
+
+        self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT"]),
+        ]))
+
+    def test_transition_variant_from_reliable_to_unreliable(self):
+        """
+        Tests that update_tags() tags a formerly reliable (test, task, variant) combination as being
+        unreliable.
+        """
+
+        config = self.CONFIG._replace(
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1))
+
+        self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]),
+        ]))
+
+    def test_transition_distro_from_reliable_to_unreliable(self):
+        """
+        Tests that update_tags() tags a formerly reliable (test, task, variant, distro) combination
+        as being unreliable.
+        """
+
+        config = self.CONFIG._replace(
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+        self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]),
+        ]))
+
+    def test_transition_from_reliable_to_unreliable(self):
+        """
+        Tests that update_tags() tags multiple formerly reliable combination as being unreliable.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+        self.transition_from_reliable_to_unreliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ]))
+
+    def transition_from_unreliable_to_reliable(self, config, initial_tags):
+        """
+        Tests that update_tags() untags a formerly unreliable combination after it has become
+        reliable again.
+        """
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+            self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, collections.OrderedDict())
+
+    def test_transition_test_from_unreliable_to_reliable(self):
+        """
+        Tests that update_tags() untags a formerly unreliable (test,) combination after it has
+        become reliable again.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9))
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable"]),
+        ]))
+
+    def test_transition_task_from_unreliable_to_reliable(self):
+        """
+        Tests that update_tags() untags a formerly unreliable (test, task) combination after it has
+        become reliable again.
+        """
+
+        config = self.CONFIG._replace(
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9))
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT"]),
+        ]))
+
+    def test_transition_variant_from_unreliable_to_reliable(self):
+        """
+        Tests that update_tags() untags a formerly unreliable (test, task, variant) combination
+        after it has become reliable again.
+        """
+
+        config = self.CONFIG._replace(
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9))
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64"]),
+        ]))
+
+    def test_transition_distro_from_unreliable_to_reliable(self):
+        """
+        Tests that update_tags() untags a formerly unreliable (test, task, variant, distro)
+        combination after it has become reliable again.
+        """
+
+        config = self.CONFIG._replace(
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable|jsCore_WT|linux-64|rhel62"]),
+        ]))
+
+    def test_transition_from_unreliable_to_reliable(self):
+        """
+        Tests that update_tags() untags multiple formerly unreliable combination after it has become
+        reliable again.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ]))
+
+    def test_remain_reliable(self):
+        """
+        Tests that update_tags() preserves the absence of tags for reliable combinations.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+        initial_tags = collections.OrderedDict()
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+            self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, initial_tags)
+
+    def test_remain_unreliable(self):
+        """
+        Tests that update_tags() preserves the tags for unreliable combinations.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+            self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, initial_tags)
+
+    def test_obeys_reliable_min_runs(self):
+        """
+        Tests that update_tags() considers a test reliable if it has fewer than 'reliable_min_runs'.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9),
+            reliable_min_runs=100)
+
+        self.transition_from_unreliable_to_reliable(config, collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ]))
+
+    def test_obeys_reliable_time_period(self):
+        """
+        Tests that update_tags() ignores passes from before 'reliable_time_period'.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9))
+
+        initial_tags = collections.OrderedDict()
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
+                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
+                                num_pass=1,
+                                num_fail=0),
+            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)),
+                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)),
+                                num_pass=1,
+                                num_fail=0),
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+            self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ]))
+
+    def test_obeys_unreliable_min_runs(self):
+        """
+        Tests that update_tags() only considers a test unreliable if it has more than
+        'unreliable_min_runs'.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1),
+            unreliable_min_runs=100)
+
+        initial_tags = collections.OrderedDict()
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(num_pass=0, num_fail=1),
+            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
+            self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, initial_tags)
+
+    def test_obeys_unreliable_time_period(self):
+        """
+        Tests that update_tags() ignores failures from before 'unreliable_time_period'.
+        """
+
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1))
+
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        report = test_failures.Report([
+            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
+                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
+                                num_pass=0,
+                                num_fail=1),
+            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)),
+                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)),
+                                num_pass=0,
+                                num_fail=1),
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=1, num_fail=0),
+            self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
+            self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"),
+            self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(lifecycle, config, report)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        self.assertEqual(updated_tags, collections.OrderedDict())
diff --git a/buildscripts/update_test_lifecycle.py b/buildscripts/update_test_lifecycle.py
index 4b4325b255a..9699a5418d8 100755
--- a/buildscripts/update_test_lifecycle.py
+++ b/buildscripts/update_test_lifecycle.py
@@ -4,30 +4,73 @@
 
 Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates.
 """
+
+from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import collections
-import copy
+import datetime
 import optparse
-import os
+import os.path
 import subprocess
 import sys
+import textwrap
+import warnings
 
 # Get relative imports to work when the package is not installed on the PYTHONPATH.
 if __name__ == "__main__" and __package__ is None:
     sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 from buildscripts import resmokelib
-from buildscripts.ciconfig import evergreen
-from buildscripts.ciconfig import tags
 from buildscripts import test_failures as tf
+from buildscripts.ciconfig import evergreen as ci_evergreen
+from buildscripts.ciconfig import tags as ci_tags
+
+
+if sys.version_info[0] == 2:
+    _NUMBER_TYPES = (int, long, float)
+else:
+    _NUMBER_TYPES = (int, float)
+
+
+Rates = collections.namedtuple("Rates", ["acceptable", "unacceptable"])
+
+
+Config = collections.namedtuple("Config", [
+    "test_fail_rates",
+    "task_fail_rates",
+    "variant_fail_rates",
+    "distro_fail_rates",
+    "reliable_min_runs",
+    "reliable_time_period",
+    "unreliable_min_runs",
+    "unreliable_time_period",
+])
+
+
+DEFAULT_CONFIG = Config(
+    test_fail_rates=Rates(acceptable=0.1, unacceptable=0.3),
+    task_fail_rates=Rates(acceptable=0.1, unacceptable=0.3),
+    variant_fail_rates=Rates(acceptable=0.2, unacceptable=0.4),
+    distro_fail_rates=Rates(acceptable=0.2, unacceptable=0.4),
+    reliable_min_runs=5,
+    reliable_time_period=datetime.timedelta(weeks=1),
+    unreliable_min_runs=20,
+    unreliable_time_period=datetime.timedelta(weeks=4))
+
+
+DEFAULT_PROJECT = "mongodb-mongo-master"
 
 
 def write_yaml_file(yaml_file, lifecycle):
     """Writes the lifecycle object to yaml_file."""
-    comment = ("This file was generated by {} and shouldn't be edited by hand. It was"
-               " generated against commit {} with the following invocation: {}.").format(
-        sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv))
+
+    comment = (
+        "This file was generated by {} and shouldn't be edited by hand. It was generated against"
+        " commit {} with the following invocation: {}."
+    ).format(sys.argv[0], callo(["git", "rev-parse", "HEAD"]).rstrip(), " ".join(sys.argv))
+
     lifecycle.write_file(yaml_file, comment)
 
 
@@ -116,7 +159,7 @@ def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run):
     A test should be added to the set of tests believed not to run reliably when it has more
     than min_run executions with a failure percentage greater than unacceptable_fr.
     """
-    return test_runs >= min_run and test_fr > unacceptable_fr
+    return test_runs >= min_run and test_fr >= unacceptable_fr
 
 
 def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
@@ -125,7 +168,7 @@ def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
     A test should then removed from the set of tests believed not to run reliably when it has
     less than min_run executions or has a failure percentage less than acceptable_fr.
     """
-    return test_runs < min_run or test_fr < acceptable_fr
+    return test_runs < min_run or test_fr <= acceptable_fr
 
 
 def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr):
@@ -141,16 +184,21 @@ def check_days(name, days):
         raise ValueError("'{}' days must be greater than 0.".format(name))
 
 
-def unreliable_tag(test, task, variant, distro):
+def unreliable_tag(task, variant, distro):
     """Returns the unreliable tag."""
-    if distro and variant and task and test:
-        return "unreliable|{}|{}|{}".format(task, variant, distro)
-    elif variant and task and test:
-        return "unreliable|{}|{}".format(task, variant)
-    elif task and test:
-        return "unreliable|{}".format(task)
-    elif test:
-        return "unreliable"
+
+    for (component_name, component_value) in (("task", task),
+                                              ("variant", variant),
+                                              ("distro", distro)):
+        if isinstance(component_value, (tf.Wildcard, tf.Missing)):
+            if component_name == "task":
+                return "unreliable"
+            elif component_name == "variant":
+                return "unreliable|{}".format(task)
+            elif component_name == "distro":
+                return "unreliable|{}|{}".format(task, variant)
+
+    return "unreliable|{}|{}|{}".format(task, variant, distro)
 
 
 def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run):
@@ -163,8 +211,7 @@ def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_ru
                        fail_rate,
                        summary.num_pass + summary.num_fail,
                        min_run):
-            update_tag = unreliable_tag(
-                summary.test, summary.task, summary.variant, summary.distro)
+            update_tag = unreliable_tag(summary.task, summary.variant, summary.distro)
             if add_tags:
                 lifecycle.add_tag("js_test", summary.test, update_tag)
             else:
@@ -175,101 +222,266 @@ def compare_tags(tag_a, tag_b):
     return cmp(tag_a.split("|"), tag_b.split("|"))
 
 
+def validate_config(config):
+    """
+    Raises a TypeError or ValueError exception if 'config' isn't a valid model.
+    """
+
+    for (name, fail_rates) in (("test", config.test_fail_rates),
+                               ("task", config.task_fail_rates),
+                               ("variant", config.variant_fail_rates),
+                               ("distro", config.distro_fail_rates)):
+        if not isinstance(fail_rates.acceptable, _NUMBER_TYPES):
+            raise TypeError("The acceptable {} failure rate must be a number, but got {}".format(
+                name, fail_rates.acceptable))
+        elif fail_rates.acceptable < 0 or fail_rates.acceptable > 1:
+            raise ValueError(("The acceptable {} failure rate must be between 0 and 1 (inclusive),"
+                              " but got {}").format(name, fail_rates.acceptable))
+        elif not isinstance(fail_rates.unacceptable, _NUMBER_TYPES):
+            raise TypeError("The unacceptable {} failure rate must be a number, but got {}".format(
+                name, fail_rates.unacceptable))
+        elif fail_rates.unacceptable < 0 or fail_rates.unacceptable > 1:
+            raise ValueError(("The unacceptable {} failure rate must be between 0 and 1"
+                              " (inclusive), but got {}").format(name, fail_rates.unacceptable))
+        elif fail_rates.acceptable > fail_rates.unacceptable:
+            raise ValueError(
+                ("The acceptable {0} failure rate ({1}) must be no larger than unacceptable {0}"
+                 " failure rate ({2})").format(
+                     name, fail_rates.acceptable, fail_rates.unacceptable))
+
+    for (name, min_runs) in (("reliable", config.reliable_min_runs),
+                             ("unreliable", config.unreliable_min_runs)):
+        if not isinstance(min_runs, _NUMBER_TYPES):
+            raise TypeError(("The minimum number of runs for considering a test {} must be a"
+                             " number, but got {}").format(name, min_runs))
+        elif min_runs <= 0:
+            raise ValueError(("The minimum number of runs for considering a test {} must be a"
+                              " positive integer, but got {}").format(name, min_runs))
+        elif isinstance(min_runs, float) and not min_runs.is_integer():
+            raise ValueError(("The minimum number of runs for considering a test {} must be an"
+                              " integer, but got {}").format(name, min_runs))
+
+    for (name, time_period) in (("reliable", config.reliable_time_period),
+                                ("unreliable", config.unreliable_time_period)):
+        if not isinstance(time_period, datetime.timedelta):
+            raise TypeError(
+                "The {} time period must be a datetime.timedelta instance, but got {}".format(
+                    name, time_period))
+        elif time_period.days <= 0:
+            raise ValueError(
+                "The {} time period must be a positive number of days, but got {}".format(
+                    name, time_period))
+        elif time_period - datetime.timedelta(days=time_period.days) > datetime.timedelta():
+            raise ValueError(
+                "The {} time period must be an integral number of days, but got {}".format(
+                    name, time_period))
+
+
+def update_tags(lifecycle, config, report):
+    """
+    Updates the tags in 'lifecycle' based on the historical test failures mentioned in 'report'
+    according to the model described by 'config'.
+    """
+
+    # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used
+    # before assignment.
+    grouped_entries = None
+    for (i, (components, rates)) in enumerate(
+            ((tf.Report.TEST_TASK_VARIANT_DISTRO, config.distro_fail_rates),
+             (tf.Report.TEST_TASK_VARIANT, config.variant_fail_rates),
+             (tf.Report.TEST_TASK, config.task_fail_rates),
+             (tf.Report.TEST, config.test_fail_rates))):
+        if i > 0:
+            report = tf.Report(grouped_entries)
+
+        # We reassign the value of 'grouped_entries' to take advantage of how data that is on
+        # (test, task, variant, distro) preserves enough information to be grouped on any subset of
+        # those components, etc.
+        grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY)
+
+        # Filter out any test executions from prior to 'config.unreliable_time_period'.
+        unreliable_start_date = (report.end_date - config.unreliable_time_period
+                                 + datetime.timedelta(days=1))
+        unreliable_report = tf.Report(entry for entry in grouped_entries
+                                      if entry.start_date >= unreliable_start_date)
+        update_lifecycle(lifecycle,
+                         unreliable_report.summarize_by(components),
+                         unreliable_test,
+                         True,
+                         rates.unacceptable,
+                         config.unreliable_min_runs)
+
+        # Filter out any test executions from prior to 'config.reliable_time_period'.
+        reliable_start_date = (report.end_date - config.reliable_time_period
+                               + datetime.timedelta(days=1))
+        reliable_report = tf.Report(entry for entry in grouped_entries
+                                    if entry.start_date >= reliable_start_date)
+        update_lifecycle(lifecycle,
+                         reliable_report.summarize_by(components),
+                         reliable_test,
+                         False,
+                         rates.acceptable,
+                         config.reliable_min_runs)
+
+
 def main():
+    """
+    Utility for updating a resmoke.py tag file based on computing test failure rates from the
+    Evergreen API.
+    """
 
-    required_options = ["project",
-                        "reliable_test_min_run",
-                        "unreliable_test_min_run",
-                        "test_fail_rates",
-                        ]
-    parser = optparse.OptionParser(description=__doc__,
-                                   usage="Usage: %prog [options] test1 test2 ...")
-    parser.add_option("--project", dest="project",
-                      default=None,
-                      help="Evergreen project to analyze [REQUIRED].")
-    parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run",
-                      default=None,
-                      type="int",
-                      help="Minimum number of tests runs for test to be considered as reliable"
-                           " [REQUIRED].")
-    parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run",
-                      default=None,
-                      type="int",
-                      help="Minimum number of tests runs for test to be considered as unreliable"
-                           " [REQUIRED].")
-    parser.add_option("--testFailRates", dest="test_fail_rates",
-                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
-                      default=None,
-                      type="float",
-                      nargs=2,
-                      help="Test fail rates: acceptable fail rate and unacceptable fail rate"
-                           " Specify floating numbers between 0.0 and 1.0 [REQUIRED].")
-    parser.add_option("--taskFailRates", dest="task_fail_rates",
-                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
-                      default=None,
-                      type="float",
-                      nargs=2,
-                      help="Task fail rates: acceptable fail rate and unacceptable fail rate."
-                           " Specify floating numbers between 0.0 and 1.0."
-                           " Uses --test-fail-rates if unspecified.")
-    parser.add_option("--variantFailRates", dest="variant_fail_rates",
-                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
-                      default=None,
-                      type="float",
-                      nargs=2,
-                      help="Variant fail rates: acceptable fail rate and unacceptable fail rate."
-                           " Specify floating numbers between 0.0 and 1.0."
-                           " Uses --task-fail-rates if unspecified.")
-    parser.add_option("--distroFailRates", dest="distro_fail_rates",
-                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
-                      default=None,
-                      type="float",
-                      nargs=2,
-                      help="Distro fail rates: acceptable fail rate and unacceptable fail rate."
-                           " Specify floating numbers between 0.0 and 1.0."
-                           " Uses --variant-fail-rates if unspecified.")
-    parser.add_option("--tasks", dest="tasks",
-                      default=None,
-                      help="Names of tasks to analyze for tagging unreliable tests."
-                           " If specified and no tests are specified, then only tests"
-                           " associated with the tasks will be analyzed."
-                           " If unspecified and no tests are specified, the list of tasks will be"
-                           " the non-excluded list of tasks from the file specified by"
-                           " '--evergreenYML'.")
-    parser.add_option("--variants", dest="variants",
-                      default="",
-                      help="Names of variants to analyze for tagging unreliable tests.")
-    parser.add_option("--distros", dest="distros",
-                      default="",
-                      help="Names of distros to analyze for tagging unreliable tests [UNUSED].")
-    parser.add_option("--evergreenYML", dest="evergreen_yml",
-                      default="etc/evergreen.yml",
-                      help="Evergreen YML file used to get the list of tasks,"
-                           " defaults to '%default'.")
-    parser.add_option("--lifecycleFile", dest="lifecycle_file",
+    parser = optparse.OptionParser(description=textwrap.dedent(main.__doc__),
+                                   usage="Usage: %prog [options] [test1 test2 ...]")
+
+    data_options = optparse.OptionGroup(
+        parser,
+        title="Data options",
+        description=("Options used to configure what historical test failure data to retrieve from"
+                     " Evergreen."))
+    parser.add_option_group(data_options)
+
+    data_options.add_option(
+        "--project", dest="project",
+        metavar="<project-name>",
+        default=tf.TestHistory.DEFAULT_PROJECT,
+        help="The Evergreen project to analyze. Defaults to '%default'.")
+
+    data_options.add_option(
+        "--tasks", dest="tasks",
+        metavar="<task1,task2,...>",
+        help=("The Evergreen tasks to analyze for tagging unreliable tests. If specified in"
+              " additional to having test positional arguments, then only tests that run under the"
+              " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks"
+              " defaults to the non-excluded list of tasks from the specified"
+              " --evergreenProjectConfig file."))
+
+    data_options.add_option(
+        "--variants", dest="variants",
+        metavar="<variant1,variant2,...>",
+        default="",
+        help="The Evergreen build variants to analyze for tagging unreliable tests.")
+
+    data_options.add_option(
+        "--distros", dest="distros",
+        metavar="<distro1,distro2,...>",
+        default="",
+        help="The Evergreen distros to analyze for tagging unreliable tests.")
+
+    data_options.add_option(
+        "--evergreenProjectConfig", dest="evergreen_project_config",
+        metavar="<project-config-file>",
+        default="etc/evergreen.yml",
+        help=("The Evergreen project configuration file used to get the list of tasks if --tasks is"
+              " omitted. Defaults to '%default'."))
+
+    model_options = optparse.OptionGroup(
+        parser,
+        title="Model options",
+        description=("Options used to configure whether (test,), (test, task),"
+                     " (test, task, variant), and (test, task, variant, distro) combinations are"
+                     " considered unreliable."))
+    parser.add_option_group(model_options)
+
+    model_options.add_option(
+        "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs",
+        metavar="<reliable-min-runs>",
+        default=DEFAULT_CONFIG.reliable_min_runs,
+        help=("The minimum number of test executions required for a test's failure rate to"
+              " determine whether the test is considered reliable. If a test has fewer than"
+              " <reliable-min-runs> executions, then it cannot be considered unreliable."))
+
+    model_options.add_option(
+        "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs",
+        metavar="<unreliable-min-runs>",
+        default=DEFAULT_CONFIG.unreliable_min_runs,
+        help=("The minimum number of test executions required for a test's failure rate to"
+              " determine whether the test is considered unreliable. If a test has fewer than"
+              " <unreliable-min-runs> executions, then it cannot be considered unreliable."))
+
+    model_options.add_option(
+        "--testFailRates", type="float", nargs=2, dest="test_fail_rates",
+        metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>",
+        default=DEFAULT_CONFIG.test_fail_rates,
+        help=("Controls how readily a test is considered unreliable. Each failure rate must be a"
+              " number between 0 and 1 (inclusive) with"
+              " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no"
+              " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is"
+              " considered reliable. Otherwise, if a test fails at least as much as"
+              " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered"
+              " unreliable. Defaults to %default."))
+
+    model_options.add_option(
+        "--taskFailRates", type="float", nargs=2, dest="task_fail_rates",
+        metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>",
+        default=DEFAULT_CONFIG.task_fail_rates,
+        help=("Controls how readily a (test, task) combination is considered unreliable. Each"
+              " failure rate must be a number between 0 and 1 (inclusive) with"
+              " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)"
+              " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time,"
+              " then it is considered reliable. Otherwise, if a test fails at least as much as"
+              " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered"
+              " unreliable. Defaults to %default."))
+
+    model_options.add_option(
+        "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates",
+        metavar="<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>",
+        default=DEFAULT_CONFIG.variant_fail_rates,
+        help=("Controls how readily a (test, task, variant) combination is considered unreliable."
+              " Each failure rate must be a number between 0 and 1 (inclusive) with"
+              " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a"
+              " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>"
+              " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails"
+              " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time,"
+              " then it is considered unreliable. Defaults to %default."))
+
+    model_options.add_option(
+        "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates",
+        metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>",
+        default=DEFAULT_CONFIG.distro_fail_rates,
+        help=("Controls how readily a (test, task, variant, distro) combination is considered"
+              " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with"
+              " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a"
+              " (test, task, variant, distro) combination fails no more than"
+              " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered"
+              " reliable. Otherwise, if a test fails at least as much as"
+              " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered"
+              " unreliable. Defaults to %default."))
+
+    model_options.add_option(
+        "--reliableDays", type="int", dest="reliable_days",
+        metavar="<ndays>",
+        default=DEFAULT_CONFIG.reliable_time_period.days,
+        help=("The time period to analyze when determining if a test has become reliable. Defaults"
+              " to %default day(s)."))
+
+    model_options.add_option(
+        "--unreliableDays", type="int", dest="unreliable_days",
+        metavar="<ndays>",
+        default=DEFAULT_CONFIG.unreliable_time_period.days,
+        help=("The time period to analyze when determining if a test has become unreliable."
+              " Defaults to %default day(s)."))
+
+    parser.add_option("--resmokeTagFile", dest="tag_file",
+                      metavar="<tagfile>",
                       default="etc/test_lifecycle.yml",
-                      help="Evergreen lifecycle file to update, defaults to '%default'.")
-    parser.add_option("--reliableDays", dest="reliable_days",
-                      default=7,
-                      type="int",
-                      help="Number of days to check for reliable tests, defaults to '%default'.")
-    parser.add_option("--unreliableDays", dest="unreliable_days",
-                      default=28,
-                      type="int",
-                      help="Number of days to check for unreliable tests, defaults to '%default'.")
-    parser.add_option("--batchGroupSize", dest="batch_size",
+                      help="The resmoke.py tag file to update. Defaults to '%default'.")
+
+    parser.add_option("--requestBatchSize", type="int", dest="batch_size",
+                      metavar="<batch-size>",
                       default=100,
-                      type="int",
-                      help="Size of test batch group, defaults to '%default'.")
+                      help=("The maximum number of tests to query the Evergreen API for in a single"
+                            " request. A higher value for this option will reduce the number of"
+                            " roundtrips between this client and Evergreen. Defaults to %default."))
 
     (options, tests) = parser.parse_args()
 
-    for option in required_options:
-        if not getattr(options, option):
-            parser.print_help()
-            parser.error("Missing required option")
+    if options.distros:
+        warnings.warn(
+            ("Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information"
+             " isn't returned by the Evergreen API. This option will therefore be ignored."),
+            RuntimeWarning)
 
-    evg_conf = evergreen.EvergreenProjectConfig(options.evergreen_yml)
+    evg_conf = ci_evergreen.EvergreenProjectConfig(options.evergreen_project_config)
     use_test_tasks_membership = False
 
     tasks = options.tasks.split(",") if options.tasks else []
@@ -282,25 +494,18 @@ def main():
 
     distros = options.distros.split(",") if options.distros else []
 
-    check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1])
-    # The less specific failures rates are optional and default to a lower level value.
-    if not options.task_fail_rates:
-        options.task_fail_rates = options.test_fail_rates
-    else:
-        check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1])
-    if not options.variant_fail_rates:
-        options.variant_fail_rates = options.task_fail_rates
-    else:
-        check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1])
-    if not options.distro_fail_rates:
-        options.distro_fail_rates = options.variant_fail_rates
-    else:
-        check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1])
-
-    check_days("Reliable days", options.reliable_days)
-    check_days("Unreliable days", options.unreliable_days)
-
-    lifecycle = tags.TagsConfig(options.lifecycle_file, cmp_func=compare_tags)
+    config = Config(
+        test_fail_rates=Rates(*options.test_fail_rates),
+        task_fail_rates=Rates(*options.task_fail_rates),
+        variant_fail_rates=Rates(*options.variant_fail_rates),
+        distro_fail_rates=Rates(*options.distro_fail_rates),
+        reliable_min_runs=options.reliable_test_min_runs,
+        reliable_time_period=datetime.timedelta(days=options.reliable_days),
+        unreliable_min_runs=options.unreliable_test_min_runs,
+        unreliable_time_period=datetime.timedelta(days=options.unreliable_days))
+    validate_config(config)
+
+    lifecycle = ci_tags.TagsConfig.from_file(options.tag_file, cmp_func=compare_tags)
 
     test_tasks_membership = get_test_tasks_membership(evg_conf)
     # If no tests are specified then the list of tests is generated from the list of tasks.
@@ -325,58 +530,24 @@ def main():
         if not tasks:
             print("Warning - No tasks found for tests {}, skipping this group.".format(tests))
             continue
-        report = tf.HistoryReport(period_type="revision",
-                                  start=commit_prior,
-                                  end=commit_last,
-                                  group_period=options.reliable_days,
-                                  project=options.project,
-                                  tests=tests,
-                                  tasks=tasks,
-                                  variants=variants,
-                                  distros=distros)
-        view_report = report.generate_report()
-
-        # We build up report_combo to check for more specific test failures rates.
-        report_combo = []
-        # TODO EVG-1665: Uncomment this line once this has been supported.
-        # for combo in ["test", "task", "variant", "distro"]:
-        for combo in ["test", "task", "variant"]:
-            report_combo.append(combo)
-            if combo == "distro":
-                acceptable_fail_rate = options.distro_fail_rates[0]
-                unacceptable_fail_rate = options.distro_fail_rates[1]
-            elif combo == "variant":
-                acceptable_fail_rate = options.variant_fail_rates[0]
-                unacceptable_fail_rate = options.variant_fail_rates[1]
-            elif combo == "task":
-                acceptable_fail_rate = options.task_fail_rates[0]
-                unacceptable_fail_rate = options.task_fail_rates[1]
-            else:
-                acceptable_fail_rate = options.test_fail_rates[0]
-                unacceptable_fail_rate = options.test_fail_rates[1]
-
-            # Unreliable tests are analyzed from the entire period.
-            update_lifecycle(lifecycle,
-                             view_report.view_summary(group_on=report_combo),
-                             unreliable_test,
-                             True,
-                             unacceptable_fail_rate,
-                             options.unreliable_test_min_run)
-
-            # Reliable tests are analyzed from the last period, i.e., last 14 days.
-            (reliable_start_date, reliable_end_date) = view_report.last_period()
-            update_lifecycle(lifecycle,
-                             view_report.view_summary(group_on=report_combo,
-                                                      start_date=reliable_start_date,
-                                                      end_date=reliable_end_date),
-                             reliable_test,
-                             False,
-                             acceptable_fail_rate,
-                             options.reliable_test_min_run)
-
-    # Update the lifecycle_file only if there have been changes.
+
+        test_history = tf.TestHistory(project=options.project,
+                                      tests=tests,
+                                      tasks=tasks,
+                                      variants=variants,
+                                      distros=distros)
+
+        history_data = test_history.get_history_by_revision(start_revision=commit_prior,
+                                                            end_revision=commit_last)
+
+        report = tf.Report(history_data)
+        update_tags(lifecycle, config, report)
+
+    # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there
+    # have been changes to the tags. In particular, we avoid modifying the file when only the header
+    # comment for the YAML file would change.
     if lifecycle.is_modified():
-        write_yaml_file(options.lifecycle_file, lifecycle)
+        write_yaml_file(options.tag_file, lifecycle)
 
 if __name__ == "__main__":
     main()
author	Max Hirschhorn <max.hirschhorn@mongodb.com>	2017-07-17 11:09:34 -0400
committer	Max Hirschhorn <max.hirschhorn@mongodb.com>	2017-07-17 11:09:34 -0400
commit	58a3909a3f678dec7bd94bfb38f96756c970113e (patch)
tree	96675ca63ab47a93aca816909805264e6667ea7b /buildscripts
parent	27cf9fd7b31f043af913da135385367126f5691b (diff)
download	mongo-58a3909a3f678dec7bd94bfb38f96756c970113e.tar.gz