SERVER-30828 Untag unreliable tests that are not running

author: Yves Duhem <yves.duhem@mongodb.com> 2017-10-06 10:37:38 -0400
committer: Yves Duhem <yves.duhem@mongodb.com> 2017-10-06 10:37:53 -0400
commit: 0875727af8e43230497e9e88222d53020ee9cdf6 (patch)
tree: 2dd6c2abdef98da4b87a935f6aae71949682f100 /buildscripts
parent: aff9c7e6afe0b78e91a724fe5e53ea4717d7d813 (diff)
download: mongo-0875727af8e43230497e9e88222d53020ee9cdf6.tar.gz
2 files changed, 211 insertions, 23 deletions
diff --git a/buildscripts/tests/test_update_test_lifecycle.py b/buildscripts/tests/test_update_test_lifecycle.py
index 0909ee76725..f92c0141419 100644
--- a/buildscripts/tests/test_update_test_lifecycle.py
+++ b/buildscripts/tests/test_update_test_lifecycle.py
@@ -373,6 +373,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(num_pass=0, num_fail=1),
             self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@@ -382,7 +383,7 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, expected_tags)
 
@@ -468,6 +469,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(num_pass=1, num_fail=0),
             self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
@@ -477,10 +479,92 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, collections.OrderedDict())
 
+    def test_non_running_in_reliable_period_is_reliable(self):
+        """
+        Tests that tests that have a failure rate above the unacceptable rate during the unreliable
+        period but haven't run during the reliable period are marked as reliable.
+        """
+        # Unreliable period is 2 days: 2017-06-03 to 2017-06-04.
+        # Reliable period is 1 day: 2016-06-04.
+        reliable_period_date = datetime.date(2017, 6, 4)
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1),
+            unreliable_time_period=datetime.timedelta(days=2))
+
+        tests = ["jstests/core/all.js"]
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        # The test did not run on the reliable period on linux-64.
+        report = test_failures.Report([
+            # Failing.
+            self.ENTRY._replace(num_pass=0,
+                                num_fail=2),
+            # Passing on a different variant.
+            self.ENTRY._replace(start_date=reliable_period_date,
+                                end_date=reliable_period_date,
+                                num_pass=3,
+                                num_fail=0,
+                                variant="linux-alt",
+                                distro="debian7"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        # The tags for variant and distro have been removed.
+        self.assertEqual(updated_tags, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable", "unreliable|jsCore_WT"])]))
+
+    def test_non_running_at_all_is_reliable(self):
+        """
+        Tests that tests that are tagged as unreliable but no longer running (either during the
+        reliable or the unreliable period) have their tags removed.
+        """
+        config = self.CONFIG
+
+        tests = ["jstests/core/all.js", "jstests/core/all2.js"]
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all2.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        # all2.js did not run at all
+        report = test_failures.Report([self.ENTRY])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        # The tags for variant and distro have been removed.
+        self.assertEqual(updated_tags, collections.OrderedDict([]))
+
     def test_transition_test_from_unreliable_to_reliable(self):
         """
         Tests that update_tags() untags a formerly unreliable (test,) combination after it has
@@ -571,6 +655,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(num_pass=1, num_fail=0),
             self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
@@ -580,7 +665,7 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, initial_tags)
 
@@ -609,6 +694,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(num_pass=0, num_fail=1),
             self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@@ -618,7 +704,7 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, initial_tags)
 
@@ -660,6 +746,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
                                 end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
@@ -677,7 +764,7 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, collections.OrderedDict([
             ("jstests/core/all.js", [
@@ -707,6 +794,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(num_pass=0, num_fail=1),
             self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@@ -716,7 +804,7 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, initial_tags)
 
@@ -745,6 +833,7 @@ class TestUpdateTags(unittest.TestCase):
         summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
         self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
 
+        tests = ["jstests/core/all.js"]
         report = test_failures.Report([
             self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
                                 end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
@@ -762,11 +851,53 @@ class TestUpdateTags(unittest.TestCase):
         ])
 
         update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
         updated_tags = self.assert_has_only_js_tests(lifecycle)
         self.assertEqual(updated_tags, collections.OrderedDict())
 
 
+class TestCombinationHelpers(unittest.TestCase):
+    def test_from_entry(self):
+        entry = test_failures._ReportEntry(
+            "testA", "taskA", "variantA", "distroA",
+            datetime.date.today(),
+            datetime.date.today(), 0, 0)
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST)
+        self.assertEqual(combination, ("testA",))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK)
+        self.assertEqual(combination, ("testA", "taskA"))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK_VARIANT)
+        self.assertEqual(combination, ("testA", "taskA", "variantA"))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK_VARIANT_DISTRO)
+        self.assertEqual(combination, ("testA", "taskA", "variantA", "distroA"))
+
+    def test_make_from_tag(self):
+        test = "testA"
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable")
+        self.assertEqual(combination, ("testA",))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA")
+        self.assertEqual(combination, ("testA", "taskA"))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA|variantA")
+        self.assertEqual(combination, ("testA", "taskA", "variantA"))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA|variantA|distroA")
+        self.assertEqual(combination, ("testA", "taskA", "variantA", "distroA"))
+
+
 class TestCleanUpTags(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -847,11 +978,11 @@ class TestJiraIssueCreator(unittest.TestCase):
         desc = update_test_lifecycle.JiraIssueCreator._make_updated_tags_description(data)
         expected = ("- *js_test*\n"
                     "-- {{testfile1}}\n"
-                    "--- {{tag1}} (0.10 %)\n"
-                    "--- {{tag2}} (0.20 %)\n"
+                    "--- {{tag1}} (0.10)\n"
+                    "--- {{tag2}} (0.20)\n"
                     "-- {{testfile2}}\n"
-                    "--- {{tag1}} (0.10 %)\n"
-                    "--- {{tag3}} (0.30 %)")
+                    "--- {{tag1}} (0.10)\n"
+                    "--- {{tag3}} (0.30)")
         self.assertEqual(expected, desc)
 
     def test_description_empty(self):
diff --git a/buildscripts/update_test_lifecycle.py b/buildscripts/update_test_lifecycle.py
index 7ca17f7d8ed..52bf204edf5 100755
--- a/buildscripts/update_test_lifecycle.py
+++ b/buildscripts/update_test_lifecycle.py
@@ -12,6 +12,7 @@ import collections
 import datetime
 import logging
 import multiprocessing.dummy
+import operator
 import optparse
 import os.path
 import posixpath
@@ -328,10 +329,35 @@ def validate_config(config):
                     name, time_period))
 
 
-def update_tags(lifecycle_tags, config, report):
+def _test_combination_from_entry(entry, components):
+    """Creates a test combination tuple from a tf._ReportEntry and target components.
+
+    Returns:
+        A tuple containing the entry fields specified in components.
     """
-    Updates the tags in 'lifecycle_tags' based on the historical test failures mentioned in
-    'report' according to the model described by 'config'.
+    combination = []
+    for component in components:
+        combination.append(operator.attrgetter(component)(entry))
+    return tuple(combination)
+
+
+def _test_combination_from_tag(test, tag):
+    """Creates a test combination tuple from a test name and a tag.
+
+    Returns:
+        A tuple containing the test name and the components found in the tag.
+    """
+    combination = [test]
+    for element in _split_tag(tag):
+        if element:
+            combination.append(element)
+    return tuple(combination)
+
+
+def update_tags(lifecycle_tags, config, report, tests):
+    """
+    Updates the tags in 'lifecycle_tags' based on the historical test failures of tests 'tests'
+    mentioned in 'report' according to the model described by 'config'.
     """
 
     # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used
@@ -350,11 +376,30 @@ def update_tags(lifecycle_tags, config, report):
         # those components, etc.
         grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY)
 
+        # Create the reliable report.
+        # Filter out any test executions from prior to 'config.reliable_time_period'.
+        reliable_start_date = (report.end_date - config.reliable_time_period
+                               + datetime.timedelta(days=1))
+        reliable_entries = [entry for entry in grouped_entries
+                            if entry.start_date >= reliable_start_date]
+        reliable_report = tf.Report(reliable_entries)
+        reliable_combinations = {_test_combination_from_entry(entry, components)
+                                 for entry in reliable_entries}
+
+        # Create the unreliable report.
         # Filter out any test executions from prior to 'config.unreliable_time_period'.
+        # Also filter out any test that is not present in the reliable_report in order
+        # to avoid tagging as unreliable tests that are no longer running.
         unreliable_start_date = (report.end_date - config.unreliable_time_period
                                  + datetime.timedelta(days=1))
-        unreliable_report = tf.Report(entry for entry in grouped_entries
-                                      if entry.start_date >= unreliable_start_date)
+        unreliable_entries = [
+            entry for entry in grouped_entries
+            if (entry.start_date >= unreliable_start_date and
+                _test_combination_from_entry(entry, components) in reliable_combinations)
+        ]
+        unreliable_report = tf.Report(unreliable_entries)
+
+        # Update the tags using the unreliable report.
         update_lifecycle(lifecycle_tags,
                          unreliable_report.summarize_by(components),
                          unreliable_test,
@@ -362,11 +407,7 @@ def update_tags(lifecycle_tags, config, report):
                          rates.unacceptable,
                          config.unreliable_min_runs)
 
-        # Filter out any test executions from prior to 'config.reliable_time_period'.
-        reliable_start_date = (report.end_date - config.reliable_time_period
-                               + datetime.timedelta(days=1))
-        reliable_report = tf.Report(entry for entry in grouped_entries
-                                    if entry.start_date >= reliable_start_date)
+        # Update the tags using the reliable report.
         update_lifecycle(lifecycle_tags,
                          reliable_report.summarize_by(components),
                          reliable_test,
@@ -374,6 +415,22 @@ def update_tags(lifecycle_tags, config, report):
                          rates.acceptable,
                          config.reliable_min_runs)
 
+        def should_be_removed(test, tag):
+            combination = _test_combination_from_tag(test, tag)
+            if len(combination) != len(components):
+                # The tag is not for these components.
+                return False
+            return combination not in reliable_combinations
+
+        # Remove the tags that correspond to tests that have not run during the reliable period.
+        for test in tests:
+            tags = lifecycle_tags.lifecycle.get_tags("js_test", test)
+            for tag in tags[:]:
+                if should_be_removed(test, tag):
+                    LOGGER.info("Removing tag '%s' of test '%s' because the combination did not run"
+                                " during the reliable period", tag, test)
+                    lifecycle_tags.remove_tag("js_test", test, tag, failure_rate=0)
+
 
 def _split_tag(tag):
     """Split a tag into its components.
@@ -566,7 +623,7 @@ class JiraIssueCreator(object):
                 tags_lines.append("-- {0}".format(mono(test)))
                 for tag in sorted(tags.keys()):
                     coefficient = tags[tag]
-                    tags_lines.append("--- {0} ({1:.2})".format(mono(tag), coefficient))
+                    tags_lines.append("--- {0} ({1:.2f})".format(mono(tag), coefficient))
         if tags_lines:
             return "\n".join(tags_lines)
         else:
@@ -1111,7 +1168,7 @@ def main():
             continue
         history_data = test_history_source.get_history_data(tests, tasks)
         report = tf.Report(history_data)
-        update_tags(lifecycle_tags_file.changelog_lifecycle, config, report)
+        update_tags(lifecycle_tags_file.changelog_lifecycle, config, report, tests)
 
     # Remove tags that are no longer relevant
     clean_up_tags(lifecycle_tags_file.changelog_lifecycle, evg_conf)
author	Yves Duhem <yves.duhem@mongodb.com>	2017-10-06 10:37:38 -0400
committer	Yves Duhem <yves.duhem@mongodb.com>	2017-10-06 10:37:53 -0400
commit	0875727af8e43230497e9e88222d53020ee9cdf6 (patch)
tree	2dd6c2abdef98da4b87a935f6aae71949682f100 /buildscripts
parent	aff9c7e6afe0b78e91a724fe5e53ea4717d7d813 (diff)
download	mongo-0875727af8e43230497e9e88222d53020ee9cdf6.tar.gz