SERVER-29999 Implement FSM Scheduler for concurrency_simultaneous

(cherry picked from commit b107fce3ef35a9e234f8169f8cae7cf8d9aa4134)
author: Robert Guo <robert.guo@10gen.com> 2018-05-17 21:29:56 -0400
committer: Robert Guo <robert.guo@10gen.com> 2018-06-04 17:35:21 -0400
commit: 3bc3ea2d2033d12784aebc12544301e11c368e4c (patch)
tree: 28e03fb6b4fc31011b675e0841d300e76e691d3f
parent: bf72dbc9922412b01c0e4d2f485338aa7ae9b76c (diff)
download: mongo-3bc3ea2d2033d12784aebc12544301e11c368e4c.tar.gz
10 files changed, 211 insertions, 39 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_simultaneous.yml b/buildscripts/resmokeconfig/suites/concurrency_simultaneous.yml
index d469cf96882..74e0cf71724 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_simultaneous.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_simultaneous.yml
@@ -1,11 +1,19 @@
-test_kind: js_test
+test_kind: parallel_fsm_workload_test
 
 selector:
   roots:
-  - jstests/concurrency/*.js
+  - jstests/concurrency/fsm_workloads/**/*.js
   exclude_files:
-  # Skip the sharding tests because they're run in different suites.
-  - jstests/concurrency/fsm_all_sharded*.js
+  # These workloads implicitly assume that their tid ranges are [0, $config.threadCount). This
+  # isn't guaranteed to be true when they are run in parallel with other workloads.
+  - jstests/concurrency/fsm_workloads/list_indexes.js
+  - jstests/concurrency/fsm_workloads/update_inc_capped.js
+
+  # These workloads uses >100MB of data, which can overwhelm test hosts.
+  - jstests/concurrency/fsm_workloads/agg_group_external.js
+  - jstests/concurrency/fsm_workloads/agg_sort_external.js
+  group_size: 10
+  group_count_multiplier: 2.5
 
 executor:
   archive:
@@ -21,6 +29,7 @@ executor:
       global_vars:
         TestData:
           skipValidationOnNamespaceNotFound: false
+  - class: CleanupConcurrencyWorkloads
   fixture:
     class: MongoDFixture
     mongod_options:
diff --git a/buildscripts/resmokeconfig/suites/no_passthrough.yml b/buildscripts/resmokeconfig/suites/no_passthrough.yml
index bd87e5e56e0..1129ba416cd 100644
--- a/buildscripts/resmokeconfig/suites/no_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/no_passthrough.yml
@@ -3,7 +3,12 @@ test_kind: js_test
 selector:
   roots:
   - jstests/noPassthrough/*.js
+
+  # Self-tests for the Concurrency testing framework are run as part of this test suite.
+  - jstests/concurrency/*.js
   exclude_files:
+  # Exclude files that are not self-tests.
+  - jstests/concurrency/fsm_all*.js
   # Disable inmem_full as per SERVER-27014
   - jstests/noPassthrough/inmem_full.js
 
diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py
index a4e6f15aa8f..d878710f1d7 100644
--- a/buildscripts/resmokelib/selector.py
+++ b/buildscripts/resmokelib/selector.py
@@ -9,7 +9,9 @@ from __future__ import absolute_import
 import collections
 import errno
 import fnmatch
+import math
 import os.path
+import random
 import subprocess
 import sys
 
@@ -433,7 +435,15 @@ class _Selector(object):
         # 5. Apply the include files last with force=True to take precedence over the tags.
         if self._tests_are_files and selector_config.include_files:
             test_list.include_files(selector_config.include_files, force=True)
-        return test_list.get_tests()
+
+        return self.sort_tests(*test_list.get_tests())
+
+    @staticmethod
+    def sort_tests(tests, excluded):
+        """Sort the tests before returning them."""
+        if config.ORDER_TESTS_BY_NAME:
+            return sorted(tests, key=str.lower), sorted(excluded, key=str.lower)
+        return tests, excluded
 
     @staticmethod
     def get_tags(test_file):  # pylint: disable=unused-argument
@@ -442,7 +452,7 @@ class _Selector(object):
 
 
 class _JSTestSelectorConfig(_SelectorConfig):
-    """_SelectorConfig subclass for js_test tests."""
+    """_SelectorConfig subclass for JavaScript tests."""
 
     def __init__(  # pylint: disable=too-many-arguments
             self, roots=None, include_files=None, exclude_files=None, include_with_any_tags=None,
@@ -455,7 +465,7 @@ class _JSTestSelectorConfig(_SelectorConfig):
 
 
 class _JSTestSelector(_Selector):
-    """_Selector subclass for js_test tests."""
+    """_Selector subclass for JavaScript tests."""
 
     def __init__(self, test_file_explorer):
         _Selector.__init__(self, test_file_explorer)
@@ -469,6 +479,74 @@ class _JSTestSelector(_Selector):
         return file_tags
 
 
+class _MultiJSTestSelectorConfig(_JSTestSelectorConfig):
+    """_SelectorConfig subclass for selecting groups of JavaScript tests."""
+
+    def __init__(self, group_size=None, group_count_multiplier=1, **kwargs):
+        """Init function.
+
+        :param group_size: number of tests in each group.
+        :param group_count_multiplier: number of times to schedule each workload, can be a decimal.
+               E.g. 2.5 means half of the workloads get scheduled twice, and half get scheduled
+               3 times.
+        :param kwargs: arguments forwarded to the superclass.
+        """
+        _JSTestSelectorConfig.__init__(self, **kwargs)
+        self.group_size = group_size
+        self.group_count_multiplier = group_count_multiplier
+
+
+class _MultiJSTestSelector(_JSTestSelector):
+    """_Selector subclass for selecting one group of JavaScript tests at a time.
+
+    Each group can include one or more tests.
+
+    E.g. [[test1.js, test2.js], [test3.js, test4.js]].
+    """
+
+    def select(self, selector_config):
+        """Select the tests as follows.
+
+        1. Create a corpus of tests to group by concatenating shuffled lists of raw tests
+           until we exceed "total_tests" number of tests.
+        2. Slice the corpus into "group_size" lists, put these lists in "grouped_tests".
+        """
+        tests, excluded = _JSTestSelector.select(self, selector_config)
+
+        group_size = selector_config.group_size
+        multi = selector_config.group_count_multiplier
+
+        # We use the group size as a sentinel to determine if the tests are coming from
+        # the command line, in which case group_size would be undefined. For command line
+        # tests, we assume the user is trying to repro a certain issue, so we group all
+        # of the tests together.
+        if group_size is None:
+            multi = 1
+            group_size = len(tests)
+
+        grouped_tests = []
+
+        start = 0
+        corpus = tests[:]
+        random.shuffle(corpus)
+
+        num_groups = len(tests) * multi / group_size
+        while len(grouped_tests) < num_groups:
+            if start + group_size > len(corpus):
+                recycled_tests = corpus[:start]
+                random.shuffle(recycled_tests)
+                corpus = corpus[start:] + recycled_tests
+                start = 0
+            grouped_tests.append(corpus[start:start + group_size])
+            start += group_size
+        return grouped_tests, excluded
+
+    @staticmethod
+    def sort_tests(tests, excluded):
+        """There is no need to sort FSM test groups."""
+        return tests, excluded
+
+
 class _CppTestSelectorConfig(_SelectorConfig):
     """_SelectorConfig subclass for cpp_integration_test and cpp_unit_test tests."""
 
@@ -600,6 +678,7 @@ _SELECTOR_REGISTRY = {
     "benchmark_test": (_CppTestSelectorConfig, _CppTestSelector),
     "db_test": (_DbTestSelectorConfig, _DbTestSelector),
     "fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector),
+    "parallel_fsm_workload_test": (_MultiJSTestSelectorConfig, _MultiJSTestSelector),
     "json_schema_test": (_JsonSchemaTestSelectorConfig, _Selector),
     "js_test": (_JSTestSelectorConfig, _JSTestSelector),
     "multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector),
diff --git a/buildscripts/resmokelib/suitesconfig.py b/buildscripts/resmokelib/suitesconfig.py
index 87bc1e1e9f5..607edc837ce 100644
--- a/buildscripts/resmokelib/suitesconfig.py
+++ b/buildscripts/resmokelib/suitesconfig.py
@@ -49,7 +49,7 @@ def create_test_membership_map(fail_on_missing_selector=False, test_kind=None):
             raise
 
         for testfile in suite.tests:
-            if isinstance(testfile, dict):
+            if isinstance(testfile, (dict, list)):
                 continue
             test_membership[testfile].append(suite_name)
     return test_membership
diff --git a/buildscripts/resmokelib/testing/hook_test_archival.py b/buildscripts/resmokelib/testing/hook_test_archival.py
index 4189dd0ac43..ad5ac8376e6 100644
--- a/buildscripts/resmokelib/testing/hook_test_archival.py
+++ b/buildscripts/resmokelib/testing/hook_test_archival.py
@@ -13,7 +13,7 @@ from ..utils import globstar
 class HookTestArchival(object):
     """Archive hooks and tests to S3."""
 
-    def __init__(self, suite, hooks, archive_instance, archive_config):
+    def __init__(self, suite, hooks, archive_instance, archive_config):  #pylint: disable=unused-argument
         """Initialize HookTestArchival."""
         self.archive_instance = archive_instance
         archive_config = utils.default_if_none(archive_config, {})
@@ -21,13 +21,14 @@ class HookTestArchival(object):
         self.on_success = archive_config.get("on_success", False)
 
         self.tests = []
+        self.archive_all = False
         if "tests" in archive_config:
             # 'tests' is either a list of tests to archive or a bool (archive all if True).
             if not isinstance(archive_config["tests"], bool):
                 for test in archive_config["tests"]:
                     self.tests += globstar.glob(test)
             elif archive_config["tests"]:
-                self.tests = suite.tests
+                self.archive_all = True
 
         self.hooks = []
         if "hooks" in archive_config:
@@ -57,12 +58,17 @@ class HookTestArchival(object):
     def _archive_test(self, logger, test, success):
         """Provide helper to archive tests."""
         test_name = test.test_name
-        test_match = False
-        for arch_test in self.tests:
-            # Ensure that the test_name is in the same format as the arch_test.
-            if os.path.normpath(test_name) == os.path.normpath(arch_test):
-                test_match = True
-                break
+
+        if self.archive_all:
+            test_match = True
+        else:
+            test_match = False
+            for arch_test in self.tests:
+                # Ensure that the test_name is in the same format as the arch_test.
+                if os.path.normpath(test_name) == os.path.normpath(arch_test):
+                    test_match = True
+                    break
+
         if not test_match or not self._should_archive(success):
             return
 
diff --git a/buildscripts/resmokelib/testing/suite.py b/buildscripts/resmokelib/testing/suite.py
index 1a5c06b09f1..1a57b6c7716 100644
--- a/buildscripts/resmokelib/testing/suite.py
+++ b/buildscripts/resmokelib/testing/suite.py
@@ -53,21 +53,17 @@ class Suite(object):  # pylint: disable=too-many-instance-attributes
 
     def _get_tests_for_kind(self, test_kind):
         """Return the tests to run based on the 'test_kind'-specific filtering policy."""
-        test_info = self.get_selector_config()
+        selector_config = self.get_selector_config()
 
-        # The mongos_test doesn't have to filter anything, the test_info is just the arguments to
-        # the mongos program to be used as the test case.
+        # The mongos_test doesn't have to filter anything, the selector_config is just the
+        # arguments to the mongos program to be used as the test case.
         if test_kind == "mongos_test":
-            mongos_options = test_info  # Just for easier reading.
+            mongos_options = selector_config  # Just for easier reading.
             if not isinstance(mongos_options, dict):
                 raise TypeError("Expected dictionary of arguments to mongos")
             return [mongos_options], []
 
-        tests, excluded = _selector.filter_tests(test_kind, test_info)
-        if _config.ORDER_TESTS_BY_NAME:
-            return sorted(tests, key=str.lower), sorted(excluded, key=str.lower)
-
-        return tests, excluded
+        return _selector.filter_tests(test_kind, selector_config)
 
     def get_name(self):
         """Return the name of the test suite."""
diff --git a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
index 1598273a448..648ae776dbd 100644
--- a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
+++ b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
@@ -2,7 +2,7 @@
 
 from __future__ import absolute_import
 
-import os.path
+import hashlib
 import threading
 
 from buildscripts.resmokelib.testing.testcases import interface
@@ -18,7 +18,7 @@ class FSMWorkloadTestCase(jsrunnerfile.JSRunnerFileTestCase):
     _COUNTER = 0
 
     def __init__(  #pylint: disable=too-many-arguments
-            self, logger, fsm_workload, shell_executable=None, shell_options=None, same_db=False,
+            self, logger, selected_tests, shell_executable=None, shell_options=None, same_db=False,
             same_collection=False, db_name_prefix=None):
         """Initialize the FSMWorkloadTestCase with the FSM workload file."""
 
@@ -26,8 +26,12 @@ class FSMWorkloadTestCase(jsrunnerfile.JSRunnerFileTestCase):
         self.same_db = same_db or self.same_collection
         self.db_name_prefix = db_name_prefix
         self.dbpath_prefix = None
+        self.fsm_workload_group = self.get_workload_group(selected_tests)
+
+        test_name = self.get_workload_uid(selected_tests)
+
         jsrunnerfile.JSRunnerFileTestCase.__init__(
-            self, logger, "FSM workload", fsm_workload,
+            self, logger, "FSM workload", test_name,
             test_runner_file="jstests/concurrency/fsm_libs/resmoke_runner.js",
             shell_executable=shell_executable, shell_options=shell_options)
 
@@ -45,14 +49,9 @@ class FSMWorkloadTestCase(jsrunnerfile.JSRunnerFileTestCase):
         global_vars["TestData"] = test_data
         self.shell_options["global_vars"] = global_vars
 
-    @property
-    def fsm_workload(self):
-        """Get the test name."""
-        return self.test_name
-
     def _populate_test_data(self, test_data):
-
-        test_data["fsmWorkloads"] = self.fsm_workload
+        test_data["fsmWorkloads"] = self.fsm_workload_group
+        test_data["fsmWorkloads"] = self.fsm_workload_group
         test_data["resmokeDbPathPrefix"] = self.dbpath_prefix
 
         with FSMWorkloadTestCase._COUNTER_LOCK:
@@ -70,3 +69,37 @@ class FSMWorkloadTestCase(jsrunnerfile.JSRunnerFileTestCase):
             test_data["sameDB"] = True
         if not self.same_collection:
             test_data["sameCollection"] = True
+
+    @staticmethod
+    def get_workload_group(selected_tests):
+        """Generate an FSM workload group from tests selected by the selector."""
+        # Selectors for non-parallel FSM suites return the name of a workload, we
+        # put it into a list to create a workload group of size 1.
+        return [selected_tests]
+
+    @staticmethod
+    def get_workload_uid(selected_tests):
+        """Get an unique identifier for a workload group."""
+        # For non-parallel versions of the FSM framework, the workload group name is just the
+        # name of the workload.
+        return selected_tests
+
+
+class ParallelFSMWorkloadTestCase(FSMWorkloadTestCase):
+    """An FSM workload to execute."""
+
+    REGISTERED_NAME = "parallel_fsm_workload_test"
+
+    @staticmethod
+    def get_workload_group(selected_tests):
+        """Generate an FSM workload group from tests selected by the selector."""
+        # Just return the list of selected tests as the workload.
+        return selected_tests
+
+    @staticmethod
+    def get_workload_uid(selected_tests):
+        """Get an unique identifier for a workload group."""
+        uid = hashlib.md5()
+        for workload_name in sorted(selected_tests):
+            uid.update(workload_name)
+        return uid.hexdigest()
diff --git a/buildscripts/tests/resmokelib/test_selector.py b/buildscripts/tests/resmokelib/test_selector.py
index 50694c05a3e..edba22ba7b3 100644
--- a/buildscripts/tests/resmokelib/test_selector.py
+++ b/buildscripts/tests/resmokelib/test_selector.py
@@ -108,6 +108,8 @@ class TestTestFileExplorer(unittest.TestCase):
 class MockTestFileExplorer(object):
     """Component giving access to mock test files data."""
 
+    NUM_JS_FILES = 4  # Total number of JS files in self.files.
+
     def __init__(self):
         self.files = [
             "dir/subdir1/test11.js", "dir/subdir1/test12.js", "dir/subdir2/test21.js",
@@ -318,6 +320,12 @@ class TestSelectorConfig(unittest.TestCase):
         with self.assertRaises(ValueError):
             selector._SelectorConfig(include_tags="tag1", exclude_tags="tag2")
 
+    def test_multi_jstest_selector_config(self):
+        sc = selector._MultiJSTestSelectorConfig(roots=["test1", "test2"], group_size=1234,
+                                                 group_count_multiplier=5678)
+        self.assertEqual(sc.group_size, 1234)
+        self.assertEqual(sc.group_count_multiplier, 5678)
+
 
 class TestSelector(unittest.TestCase):
     @classmethod
@@ -377,6 +385,46 @@ class TestSelector(unittest.TestCase):
         ], excluded)
 
 
+class TestMultiJSSelector(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.selector = selector._MultiJSTestSelector(MockTestFileExplorer())
+
+    def test_multi_js_test_selector_normal(self):
+        config = selector._MultiJSTestSelectorConfig(roots=["dir/**/*.js"], group_size=3,
+                                                     group_count_multiplier=2)
+
+        selected, _ = self.selector.select(config)
+        total = 0
+
+        for group in selected[:-1]:
+            self.assertEqual(len(group), 3, "{} did not have 3 unique tests".format(group))
+            total += 3
+
+        self.assertLessEqual(
+            len(selected[-1]), 3, "Last selected group did not have 3 or fewer tests: {}".format(
+                selected[-1]))
+        total += len(selected[-1])
+
+        self.assertEqual(total, MockTestFileExplorer.NUM_JS_FILES * config.group_count_multiplier,
+                         "The total number of workloads is incorrect")
+
+    def test_multi_js_test_selector_one_group(self):
+        """Test we return only one group if the group size equals number of files"""
+        num_files = MockTestFileExplorer.NUM_JS_FILES
+        config = selector._MultiJSTestSelectorConfig(roots=["dir/**/*.js"], group_size=num_files,
+                                                     group_count_multiplier=9999999)
+        selected, _ = self.selector.select(config)
+        self.assertEqual(len(selected), 1)
+        self.assertEqual(len(selected[0]), num_files)
+
+    def test_multi_js_test_selector_group_too_large(self):
+        config = selector._MultiJSTestSelectorConfig(roots=["dir/**/*.js"], group_size=9999999,
+                                                     group_count_multiplier=3)
+        with self.assertRaises(ValueError):
+            self.selector.select(config)
+
+
 class TestFilterTests(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 19ed7e2c8ed..ab689db4bde 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -4778,7 +4778,6 @@ tasks:
   - func: "run tests"
     vars:
       resmoke_args: --suites=concurrency_simultaneous --storageEngine=wiredTiger
-      timeout_secs: 21600 # 6 hour timeout
 
 - <<: *task_template
   name: rlp
diff --git a/jstests/concurrency/fsm_libs/resmoke_runner.js b/jstests/concurrency/fsm_libs/resmoke_runner.js
index 5228d7f16c6..1101bf4cd9f 100644
--- a/jstests/concurrency/fsm_libs/resmoke_runner.js
+++ b/jstests/concurrency/fsm_libs/resmoke_runner.js
@@ -226,9 +226,6 @@
     }
 
     let workloads = TestData.fsmWorkloads;
-    if (!Array.isArray(workloads)) {
-        workloads = [workloads];
-    }
 
     let sessionOptions = {};
     if (TestData.runningWithCausalConsistency) {
author	Robert Guo <robert.guo@10gen.com>	2018-05-17 21:29:56 -0400
committer	Robert Guo <robert.guo@10gen.com>	2018-06-04 17:35:21 -0400
commit	3bc3ea2d2033d12784aebc12544301e11c368e4c (patch)
tree	28e03fb6b4fc31011b675e0841d300e76e691d3f
parent	bf72dbc9922412b01c0e4d2f485338aa7ae9b76c (diff)
download	mongo-3bc3ea2d2033d12784aebc12544301e11c368e4c.tar.gz