SERVER-29560 Split hooks.py module into separate files.

Also defines a generic Registry metaclass for making class declarations store a reference to the class along with a name so that they can be looked up later.
author: Max Hirschhorn <max.hirschhorn@mongodb.com> 2017-06-14 20:44:52 -0400
committer: Max Hirschhorn <max.hirschhorn@mongodb.com> 2017-06-14 20:44:52 -0400
commit: b8f6239094f8159ee011b244d98441633fe7bed1 (patch)
tree: 529ae78e2fe4a54caf8e20d28ab3c24bf175761e
parent: f3b60130e2192547a633e28423ef8b1b40984532 (diff)
download: mongo-b8f6239094f8159ee011b244d98441633fe7bed1.tar.gz
12 files changed, 875 insertions, 692 deletions
diff --git a/buildscripts/resmokelib/testing/hooks.py b/buildscripts/resmokelib/testing/hooks.py
deleted file mode 100644
index c9ea274b4ca..00000000000
--- a/buildscripts/resmokelib/testing/hooks.py
+++ /dev/null
@@ -1,692 +0,0 @@
-"""
-Customize the behavior of a fixture by allowing special code to be
-executed before or after each test, and before or after each suite.
-"""
-
-from __future__ import absolute_import
-
-import os
-import sys
-import time
-
-import bson
-import pymongo
-import pymongo.errors
-import random
-
-from . import fixtures
-from . import testcases
-from .. import errors
-from .. import logging
-from .. import utils
-
-
-def make_custom_behavior(class_name, *args, **kwargs):
-    """
-    Factory function for creating CustomBehavior instances.
-    """
-
-    if class_name not in _CUSTOM_BEHAVIORS:
-        raise ValueError("Unknown custom behavior class '%s'" % (class_name))
-    return _CUSTOM_BEHAVIORS[class_name](*args, **kwargs)
-
-
-class CustomBehavior(object):
-    """
-    The common interface all CustomBehaviors will inherit from.
-    """
-
-    @staticmethod
-    def start_dynamic_test(hook_test_case, test_report):
-        """
-        If a CustomBehavior wants to add a test case that will show up
-        in the test report, it should use this method to add it to the
-        report, since we will need to count it as a dynamic test to get
-        the stats in the summary information right.
-        """
-        test_report.startTest(hook_test_case, dynamic=True)
-
-    def __init__(self, hook_logger, fixture, description):
-        """
-        Initializes the CustomBehavior with the specified fixture.
-        """
-
-        if not isinstance(hook_logger, logging.Logger):
-            raise TypeError("logger must be a Logger instance")
-
-        self.logger = hook_logger
-        self.fixture = fixture
-        self.hook_test_case = None
-        self.logger_name = self.__class__.__name__
-        self.description = description
-
-    def before_suite(self, test_report):
-        """
-        The test runner calls this exactly once before they start
-        running the suite.
-        """
-        pass
-
-    def after_suite(self, test_report):
-        """
-        The test runner calls this exactly once after all tests have
-        finished executing. Be sure to reset the behavior back to its
-        original state so that it can be run again.
-        """
-        pass
-
-    def before_test(self, test, test_report):
-        """
-        Each test will call this before it executes.
-        """
-        pass
-
-    def after_test(self, test, test_report):
-        """
-        Each test will call this after it executes.
-        """
-        pass
-
-
-class CleanEveryN(CustomBehavior):
-    """
-    Restarts the fixture after it has ran 'n' tests.
-    On mongod-related fixtures, this will clear the dbpath.
-    """
-
-    DEFAULT_N = 20
-
-    def __init__(self, hook_logger, fixture, n=DEFAULT_N):
-        description = "CleanEveryN (restarts the fixture after running `n` tests)"
-        CustomBehavior.__init__(self, hook_logger, fixture, description)
-        self.hook_test_case = testcases.TestCase(hook_logger, "Hook", "CleanEveryN")
-
-        # Try to isolate what test triggers the leak by restarting the fixture each time.
-        if "detect_leaks=1" in os.getenv("ASAN_OPTIONS", ""):
-            self.logger.info("ASAN_OPTIONS environment variable set to detect leaks, so restarting"
-                             " the fixture after each test instead of after every %d.", n)
-            n = 1
-
-        self.n = n
-        self.tests_run = 0
-
-    def after_test(self, test, test_report):
-        self.tests_run += 1
-        if self.tests_run < self.n:
-            return
-
-        self.hook_test_case.test_name = test.short_name() + ":" + self.logger_name
-        CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
-        try:
-            self.logger.info("%d tests have been run against the fixture, stopping it...",
-                             self.tests_run)
-            self.tests_run = 0
-
-            if not self.fixture.teardown():
-                raise errors.ServerFailure("%s did not exit cleanly" % (self.fixture))
-
-            self.logger.info("Starting the fixture back up again...")
-            self.fixture.setup()
-            self.fixture.await_ready()
-
-            self.hook_test_case.return_code = 0
-            test_report.addSuccess(self.hook_test_case)
-        finally:
-            test_report.stopTest(self.hook_test_case)
-
-
-class JsCustomBehavior(CustomBehavior):
-    def __init__(self, hook_logger, fixture, js_filename, description, shell_options=None):
-        CustomBehavior.__init__(self, hook_logger, fixture, description)
-        self.hook_test_case = testcases.JSTestCase(hook_logger,
-                                                   js_filename,
-                                                   shell_options=shell_options,
-                                                   test_kind="Hook")
-        self.test_case_is_configured = False
-
-    def before_suite(self, test_report):
-        if not self.test_case_is_configured:
-            # Configure the test case after the fixture has been set up.
-            self.hook_test_case.configure(self.fixture)
-            self.test_case_is_configured = True
-
-    def _should_run_after_test_impl(self):
-        return True
-
-    def _after_test_impl(self, test, test_report, description):
-        self.hook_test_case.run_test()
-
-    def after_test(self, test, test_report):
-        if not self._should_run_after_test_impl():
-            return
-
-        # Change test_name and description to be more descriptive.
-        description = "{0} after running '{1}'".format(self.description, test.short_name())
-        self.hook_test_case.test_name = test.short_name() + ":" + self.logger_name
-        CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
-
-        try:
-            self._after_test_impl(test, test_report, description)
-        except pymongo.errors.OperationFailure as err:
-            self.hook_test_case.logger.exception("{0} failed".format(description))
-            self.hook_test_case.return_code = 1
-            test_report.addFailure(self.hook_test_case, sys.exc_info())
-            raise errors.StopExecution(err.args[0])
-        except self.hook_test_case.failureException as err:
-            self.hook_test_case.logger.exception("{0} failed".format(description))
-            test_report.addFailure(self.hook_test_case, sys.exc_info())
-            raise errors.StopExecution(err.args[0])
-        else:
-            self.hook_test_case.return_code = 0
-            test_report.addSuccess(self.hook_test_case)
-        finally:
-            test_report.stopTest(self.hook_test_case)
-
-
-class BackgroundInitialSync(JsCustomBehavior):
-    """
-    After every test, this hook checks if a background node has finished initial sync and if so,
-    validates it, tears it down, and restarts it.
-
-    This test accepts a parameter 'n' that specifies a number of tests after which it will wait for
-    replication to finish before validating and restarting the initial sync node. It also accepts
-    a parameter 'use_resync' for whether to restart the initial sync node with resync or by
-    shutting it down and restarting it.
-
-    This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. If used
-    at the same time as CleanEveryN, the 'n' value passed to this hook should be equal to the 'n'
-    value for CleanEveryN.
-    """
-
-    DEFAULT_N = CleanEveryN.DEFAULT_N
-
-    def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N, shell_options=None):
-        description = "Background Initial Sync"
-        js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
-        JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename,
-                                  description, shell_options)
-
-        self.use_resync = use_resync
-        self.n = n
-        self.tests_run = 0
-        self.random_restarts = 0
-
-    # Restarts initial sync by shutting down the node, clearing its data, and restarting it,
-    # or by calling resync if use_resync is specified.
-    def __restart_init_sync(self, test_report, sync_node, sync_node_conn):
-        if self.use_resync:
-            self.hook_test_case.logger.info("Calling resync on initial sync node...")
-            cmd = bson.SON([("resync", 1), ("wait", 0)])
-            sync_node_conn.admin.command(cmd)
-        else:
-            # Tear down and restart the initial sync node to start initial sync again.
-            if not sync_node.teardown():
-                raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
-
-            self.hook_test_case.logger.info("Starting the initial sync node back up again...")
-            sync_node.setup()
-            sync_node.await_ready()
-
-    def _after_test_impl(self, test, test_report, description):
-        self.tests_run += 1
-        sync_node = self.fixture.get_initial_sync_node()
-        sync_node_conn = utils.new_mongo_client(port=sync_node.port)
-
-        # If it's been 'n' tests so far, wait for the initial sync node to finish syncing.
-        if self.tests_run >= self.n:
-            self.hook_test_case.logger.info(
-                "%d tests have been run against the fixture, waiting for initial sync"
-                " node to go into SECONDARY state",
-                self.tests_run)
-            self.tests_run = 0
-
-            cmd = bson.SON([("replSetTest", 1),
-                            ("waitForMemberState", 2),
-                            ("timeoutMillis", 20 * 60 * 1000)])
-            sync_node_conn.admin.command(cmd)
-
-        # Check if the initial sync node is in SECONDARY state. If it's been 'n' tests, then it
-        # should have waited to be in SECONDARY state and the test should be marked as a failure.
-        # Otherwise, we just skip the hook and will check again after the next test.
-        try:
-            state = sync_node_conn.admin.command("replSetGetStatus").get("myState")
-            if state != 2:
-                if self.tests_run == 0:
-                    msg = "Initial sync node did not catch up after waiting 20 minutes"
-                    self.hook_test_case.logger.exception("{0} failed: {1}".format(description, msg))
-                    raise errors.TestFailure(msg)
-
-                self.hook_test_case.logger.info(
-                    "Initial sync node is in state %d, not state SECONDARY (2)."
-                    " Skipping BackgroundInitialSync hook for %s",
-                    state,
-                    test.short_name())
-
-                # If we have not restarted initial sync since the last time we ran the data
-                # validation, restart initial sync with a 20% probability.
-                if self.random_restarts < 1 and random.random() < 0.2:
-                    hook_type = "resync" if self.use_resync else "initial sync"
-                    self.hook_test_case.logger.info("randomly restarting " + hook_type +
-                                                    " in the middle of " + hook_type)
-                    self.__restart_init_sync(test_report, sync_node, sync_node_conn)
-                    self.random_restarts += 1
-                return
-        except pymongo.errors.OperationFailure:
-            # replSetGetStatus can fail if the node is in STARTUP state. The node will soon go into
-            # STARTUP2 state and replSetGetStatus will succeed after the next test.
-            self.hook_test_case.logger.info(
-                "replSetGetStatus call failed in BackgroundInitialSync hook, skipping hook for %s",
-                test.short_name())
-            return
-
-        self.random_restarts = 0
-
-        # Run data validation and dbhash checking.
-        self.hook_test_case.run_test()
-
-        self.__restart_init_sync(test_report, sync_node, sync_node_conn)
-
-
-class IntermediateInitialSync(JsCustomBehavior):
-    """
-    This hook accepts a parameter 'n' that specifies a number of tests after which it will start up
-    a node to initial sync, wait for replication to finish, and then validate the data. It also
-    accepts a parameter 'use_resync' for whether to restart the initial sync node with resync or by
-    shutting it down and restarting it.
-
-    This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'.
-    """
-
-    DEFAULT_N = CleanEveryN.DEFAULT_N
-
-    def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N):
-        description = "Intermediate Initial Sync"
-        js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
-        JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename, description)
-
-        self.use_resync = use_resync
-        self.n = n
-        self.tests_run = 0
-
-    def _should_run_after_test_impl(self):
-        self.tests_run += 1
-
-        # If we have not run 'n' tests yet, skip this hook.
-        if self.tests_run < self.n:
-            return False
-
-        self.tests_run = 0
-        return True
-
-    def _after_test_impl(self, test, test_report, description):
-        sync_node = self.fixture.get_initial_sync_node()
-        sync_node_conn = utils.new_mongo_client(port=sync_node.port)
-
-        if self.use_resync:
-            self.hook_test_case.logger.info("Calling resync on initial sync node...")
-            cmd = bson.SON([("resync", 1)])
-            sync_node_conn.admin.command(cmd)
-        else:
-            if not sync_node.teardown():
-                raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
-
-            self.hook_test_case.logger.info("Starting the initial sync node back up again...")
-            sync_node.setup()
-            sync_node.await_ready()
-
-        # Do initial sync round.
-        self.hook_test_case.logger.info("Waiting for initial sync node to go into SECONDARY state")
-        cmd = bson.SON([("replSetTest", 1),
-                        ("waitForMemberState", 2),
-                        ("timeoutMillis", 20 * 60 * 1000)])
-        sync_node_conn.admin.command(cmd)
-
-        # Run data validation and dbhash checking.
-        self.hook_test_case.run_test()
-
-
-class ValidateCollections(JsCustomBehavior):
-    """
-    Runs full validation on all collections in all databases on every stand-alone
-    node, primary replica-set node, or primary shard node.
-    """
-    def __init__(self, hook_logger, fixture, shell_options=None):
-        description = "Full collection validation"
-        js_filename = os.path.join("jstests", "hooks", "run_validate_collections.js")
-        JsCustomBehavior.__init__(self,
-                                  hook_logger,
-                                  fixture,
-                                  js_filename,
-                                  description,
-                                  shell_options=shell_options)
-
-
-class CheckReplDBHash(JsCustomBehavior):
-    """
-    Checks that the dbhashes of all non-local databases and non-replicated system collections
-    match on the primary and secondaries.
-    """
-    def __init__(self, hook_logger, fixture, shell_options=None):
-        description = "Check dbhashes of all replica set or master/slave members"
-        js_filename = os.path.join("jstests", "hooks", "run_check_repl_dbhash.js")
-        JsCustomBehavior.__init__(self,
-                                  hook_logger,
-                                  fixture,
-                                  js_filename,
-                                  description,
-                                  shell_options=shell_options)
-
-
-class CheckReplOplogs(JsCustomBehavior):
-    """
-    Checks that local.oplog.rs matches on the primary and secondaries.
-    """
-    def __init__(self, hook_logger, fixture, shell_options=None):
-        description = "Check oplogs of all replica set members"
-        js_filename = os.path.join("jstests", "hooks", "run_check_repl_oplogs.js")
-        JsCustomBehavior.__init__(self,
-                                  hook_logger,
-                                  fixture,
-                                  js_filename,
-                                  description,
-                                  shell_options=shell_options)
-
-
-class PeriodicKillSecondaries(CustomBehavior):
-    """
-    Periodically kills the secondaries in a replica set and verifies
-    that they can reach the SECONDARY state without having connectivity
-    to the primary after an unclean shutdown.
-    """
-
-    DEFAULT_PERIOD_SECS = 30
-
-    def __init__(self, hook_logger, fixture, period_secs=DEFAULT_PERIOD_SECS):
-        if not isinstance(fixture, fixtures.ReplicaSetFixture):
-            raise TypeError("%s either does not support replication or does not support writing to"
-                            " its oplog early"
-                            % (fixture.__class__.__name__))
-
-        if fixture.num_nodes <= 1:
-            raise ValueError("PeriodicKillSecondaries requires the replica set to contain at least"
-                             " one secondary")
-
-        description = ("PeriodicKillSecondaries (kills the secondary after running tests for a"
-                       " configurable period of time)")
-        CustomBehavior.__init__(self, hook_logger, fixture, description)
-
-        self._period_secs = period_secs
-        self._start_time = None
-
-    def after_suite(self, test_report):
-        if self._start_time is not None:
-            # Ensure that we test killing the secondary and having it reach state SECONDARY after
-            # being restarted at least once when running the suite.
-            self._run(test_report)
-
-    def before_test(self, test, test_report):
-        if self._start_time is not None:
-            # The "rsSyncApplyStop" failpoint is already enabled.
-            return
-
-        # Enable the "rsSyncApplyStop" failpoint on each of the secondaries to prevent them from
-        # applying any oplog entries while the test is running.
-        for secondary in self.fixture.get_secondaries():
-            client = utils.new_mongo_client(port=secondary.port)
-            try:
-                client.admin.command(bson.SON([
-                    ("configureFailPoint", "rsSyncApplyStop"),
-                    ("mode", "alwaysOn")]))
-            except pymongo.errors.OperationFailure as err:
-                self.logger.exception(
-                    "Unable to disable oplog application on the mongod on port %d", secondary.port)
-                raise errors.ServerFailure(
-                    "Unable to disable oplog application on the mongod on port %d: %s"
-                    % (secondary.port, err.args[0]))
-
-        self._start_time = time.time()
-
-    def after_test(self, test, test_report):
-        self._last_test_name = test.short_name()
-
-        # Kill the secondaries and verify that they can reach the SECONDARY state if the specified
-        # period has elapsed.
-        should_check_secondaries = time.time() - self._start_time >= self._period_secs
-        if not should_check_secondaries:
-            return
-
-        self._run(test_report)
-
-    def _run(self, test_report):
-        self.hook_test_case = testcases.TestCase(
-            self.logger,
-            "Hook",
-            "%s:%s" % (self._last_test_name, self.logger_name))
-        CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
-
-        try:
-            self._kill_secondaries()
-            self._check_secondaries_and_restart_fixture()
-
-            # Validate all collections on all nodes after having the secondaries reconcile the end
-            # of their oplogs.
-            self._validate_collections(test_report)
-
-            # Verify that the dbhashes match across all nodes after having the secondaries reconcile
-            # the end of their oplogs.
-            self._check_repl_dbhash(test_report)
-
-            self._restart_and_clear_fixture()
-        except Exception as err:
-            self.hook_test_case.logger.exception(
-                "Encountered an error running PeriodicKillSecondaries.")
-            self.hook_test_case.return_code = 2
-            test_report.addFailure(self.hook_test_case, sys.exc_info())
-            raise errors.StopExecution(err.args[0])
-        else:
-            self.hook_test_case.return_code = 0
-            test_report.addSuccess(self.hook_test_case)
-        finally:
-            test_report.stopTest(self.hook_test_case)
-
-            # Set the hook back into a state where it will disable oplog application at the start
-            # of the next test that runs.
-            self._start_time = None
-
-    def _kill_secondaries(self):
-        for secondary in self.fixture.get_secondaries():
-            # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying
-            # oplog entries.
-            for secondary in self.fixture.get_secondaries():
-                client = utils.new_mongo_client(port=secondary.port)
-                try:
-                    client.admin.command(bson.SON([
-                        ("configureFailPoint", "rsSyncApplyStop"),
-                        ("mode", "off")]))
-                except pymongo.errors.OperationFailure as err:
-                    self.logger.exception(
-                        "Unable to re-enable oplog application on the mongod on port %d",
-                        secondary.port)
-                    raise errors.ServerFailure(
-                        "Unable to re-enable oplog application on the mongod on port %d: %s"
-                        % (secondary.port, err.args[0]))
-
-            # Wait a little bit for the secondary to start apply oplog entries so that we are more
-            # likely to kill the mongod process while it is partway into applying a batch.
-            time.sleep(0.1)
-
-            # Check that the secondary is still running before forcibly terminating it. This ensures
-            # we still detect some cases in which the secondary has already crashed.
-            if not secondary.is_running():
-                raise errors.ServerFailure(
-                    "mongod on port %d was expected to be running in"
-                    " PeriodicKillSecondaries.after_test(), but wasn't."
-                    % (secondary.port))
-
-            self.hook_test_case.logger.info(
-                "Killing the secondary on port %d..." % (secondary.port))
-            secondary.mongod.stop(kill=True)
-
-        # Teardown may or may not be considered a success as a result of killing a secondary, so we
-        # ignore the return value of Fixture.teardown().
-        self.fixture.teardown()
-
-    def _check_secondaries_and_restart_fixture(self):
-        preserve_dbpaths = []
-        for node in self.fixture.nodes:
-            preserve_dbpaths.append(node.preserve_dbpath)
-            node.preserve_dbpath = True
-
-        for secondary in self.fixture.get_secondaries():
-            self._check_invariants_as_standalone(secondary)
-
-            # Start the 'secondary' mongod back up as part of the replica set and wait for it to
-            # reach state SECONDARY.
-            secondary.setup()
-            secondary.await_ready()
-            self._await_secondary_state(secondary)
-
-            teardown_success = secondary.teardown()
-            if not teardown_success:
-                raise errors.ServerFailure(
-                    "%s did not exit cleanly after reconciling the end of its oplog" % (secondary))
-
-        self.hook_test_case.logger.info(
-            "Starting the fixture back up again with its data files intact...")
-
-        try:
-            self.fixture.setup()
-            self.fixture.await_ready()
-        finally:
-            for (i, node) in enumerate(self.fixture.nodes):
-                node.preserve_dbpath = preserve_dbpaths[i]
-
-    def _validate_collections(self, test_report):
-        validate_test_case = ValidateCollections(self.logger, self.fixture)
-        validate_test_case.before_suite(test_report)
-        validate_test_case.before_test(self.hook_test_case, test_report)
-        validate_test_case.after_test(self.hook_test_case, test_report)
-        validate_test_case.after_suite(test_report)
-
-    def _check_repl_dbhash(self, test_report):
-        dbhash_test_case = CheckReplDBHash(self.logger, self.fixture)
-        dbhash_test_case.before_suite(test_report)
-        dbhash_test_case.before_test(self.hook_test_case, test_report)
-        dbhash_test_case.after_test(self.hook_test_case, test_report)
-        dbhash_test_case.after_suite(test_report)
-
-    def _restart_and_clear_fixture(self):
-        # We restart the fixture after setting 'preserve_dbpath' back to its original value in order
-        # to clear the contents of the data directory if desired. The CleanEveryN hook cannot be
-        # used in combination with the PeriodicKillSecondaries hook because we may attempt to call
-        # Fixture.teardown() while the "rsSyncApplyStop" failpoint is still enabled on the
-        # secondaries, causing them to exit with a non-zero return code.
-        self.hook_test_case.logger.info(
-            "Finished verifying data consistency, stopping the fixture...")
-
-        teardown_success = self.fixture.teardown()
-        if not teardown_success:
-            raise errors.ServerFailure(
-                "%s did not exit cleanly after verifying data consistency"
-                % (self.fixture))
-
-        self.hook_test_case.logger.info("Starting the fixture back up again...")
-        self.fixture.setup()
-        self.fixture.await_ready()
-
-    def _check_invariants_as_standalone(self, secondary):
-        # We remove the --replSet option in order to start the node as a standalone.
-        replset_name = secondary.mongod_options.pop("replSet")
-
-        try:
-            secondary.setup()
-            secondary.await_ready()
-
-            client = utils.new_mongo_client(port=secondary.port)
-            minvalid_doc = client.local["replset.minvalid"].find_one()
-
-            latest_oplog_doc = client.local["oplog.rs"].find_one(
-                sort=[("$natural", pymongo.DESCENDING)])
-
-            if minvalid_doc is not None:
-                # Check the invariants 'begin <= minValid', 'minValid <= oplogDeletePoint', and
-                # 'minValid <= top of oplog' before the secondary has reconciled the end of its
-                # oplog.
-                null_ts = bson.Timestamp(0, 0)
-                begin_ts = minvalid_doc.get("begin", {}).get("ts", null_ts)
-                minvalid_ts = minvalid_doc.get("ts", begin_ts)
-                oplog_delete_point_ts = minvalid_doc.get("oplogDeleteFromPoint", minvalid_ts)
-
-                if minvalid_ts == null_ts:
-                    # The server treats the "ts" field in the minValid document as missing when its
-                    # value is the null timestamp.
-                    minvalid_ts = begin_ts
-
-                if oplog_delete_point_ts == null_ts:
-                    # The server treats the "oplogDeleteFromPoint" field as missing when its value
-                    # is the null timestamp.
-                    oplog_delete_point_ts = minvalid_ts
-
-                latest_oplog_entry_ts = latest_oplog_doc.get("ts", oplog_delete_point_ts)
-
-                if not begin_ts <= minvalid_ts:
-                    raise errors.ServerFailure(
-                        "The condition begin <= minValid (%s <= %s) doesn't hold: minValid"
-                        " document=%s, latest oplog entry=%s"
-                        % (begin_ts, minvalid_ts, minvalid_doc, latest_oplog_doc))
-
-                if not minvalid_ts <= oplog_delete_point_ts:
-                    raise errors.ServerFailure(
-                        "The condition minValid <= oplogDeletePoint (%s <= %s) doesn't hold:"
-                        " minValid document=%s, latest oplog entry=%s"
-                        % (minvalid_ts, oplog_delete_point_ts, minvalid_doc, latest_oplog_doc))
-
-                if not minvalid_ts <= latest_oplog_entry_ts:
-                    raise errors.ServerFailure(
-                        "The condition minValid <= top of oplog (%s <= %s) doesn't hold: minValid"
-                        " document=%s, latest oplog entry=%s"
-                        % (minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc))
-
-            teardown_success = secondary.teardown()
-            if not teardown_success:
-                raise errors.ServerFailure(
-                    "%s did not exit cleanly after being started up as a standalone" % (secondary))
-        except pymongo.errors.OperationFailure as err:
-            self.hook_test_case.logger.exception(
-                "Failed to read the minValid document or the latest oplog entry from the mongod on"
-                " port %d",
-                secondary.port)
-            raise errors.ServerFailure(
-                "Failed to read the minValid document or the latest oplog entry from the mongod on"
-                " port %d: %s"
-                % (secondary.port, err.args[0]))
-        finally:
-            # Set the secondary's options back to their original values.
-            secondary.mongod_options["replSet"] = replset_name
-
-    def _await_secondary_state(self, secondary):
-        client = utils.new_mongo_client(port=secondary.port)
-        try:
-            client.admin.command(bson.SON([
-                ("replSetTest", 1),
-                ("waitForMemberState", 2),  # 2 = SECONDARY
-                ("timeoutMillis", fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000)]))
-        except pymongo.errors.OperationFailure as err:
-            self.hook_test_case.logger.exception(
-                "mongod on port %d failed to reach state SECONDARY after %d seconds",
-                secondary.port,
-                fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60)
-            raise errors.ServerFailure(
-                "mongod on port %d failed to reach state SECONDARY after %d seconds: %s"
-                % (secondary.port, fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60, err.args[0]))
-
-
-_CUSTOM_BEHAVIORS = {
-    "CleanEveryN": CleanEveryN,
-    "CheckReplDBHash": CheckReplDBHash,
-    "CheckReplOplogs": CheckReplOplogs,
-    "ValidateCollections": ValidateCollections,
-    "IntermediateInitialSync": IntermediateInitialSync,
-    "BackgroundInitialSync": BackgroundInitialSync,
-    "PeriodicKillSecondaries": PeriodicKillSecondaries,
-}
diff --git a/buildscripts/resmokelib/testing/hooks/__init__.py b/buildscripts/resmokelib/testing/hooks/__init__.py
new file mode 100644
index 00000000000..f1d6ec5970c
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/__init__.py
@@ -0,0 +1,15 @@
+"""
+Package containing classes to customize the behavior of a test fixture
+by allowing special code to be executed before or after each test, and
+before or after each suite.
+"""
+
+from __future__ import absolute_import
+
+from .interface import make_custom_behavior
+from ...utils import autoloader as _autoloader
+
+
+# We dynamically load all modules in the hooks/ package so that any CustomBehavior classes declared
+# within them are automatically registered.
+_autoloader.load_all_modules(name=__name__, path=__path__)
diff --git a/buildscripts/resmokelib/testing/hooks/cleanup.py b/buildscripts/resmokelib/testing/hooks/cleanup.py
new file mode 100644
index 00000000000..f468ccd0709
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/cleanup.py
@@ -0,0 +1,58 @@
+"""
+Testing hook for cleaning up data files created by the fixture.
+"""
+
+from __future__ import absolute_import
+
+import os
+
+from . import interface
+from .. import testcases
+from ... import errors
+
+
+class CleanEveryN(interface.CustomBehavior):
+    """
+    Restarts the fixture after it has ran 'n' tests.
+    On mongod-related fixtures, this will clear the dbpath.
+    """
+
+    DEFAULT_N = 20
+
+    def __init__(self, hook_logger, fixture, n=DEFAULT_N):
+        description = "CleanEveryN (restarts the fixture after running `n` tests)"
+        interface.CustomBehavior.__init__(self, hook_logger, fixture, description)
+        self.hook_test_case = testcases.TestCase(hook_logger, "Hook", "CleanEveryN")
+
+        # Try to isolate what test triggers the leak by restarting the fixture each time.
+        if "detect_leaks=1" in os.getenv("ASAN_OPTIONS", ""):
+            self.logger.info("ASAN_OPTIONS environment variable set to detect leaks, so restarting"
+                             " the fixture after each test instead of after every %d.", n)
+            n = 1
+
+        self.n = n
+        self.tests_run = 0
+
+    def after_test(self, test, test_report):
+        self.tests_run += 1
+        if self.tests_run < self.n:
+            return
+
+        self.hook_test_case.test_name = test.short_name() + ":" + self.logger_name
+        interface.CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
+        try:
+            self.logger.info("%d tests have been run against the fixture, stopping it...",
+                             self.tests_run)
+            self.tests_run = 0
+
+            if not self.fixture.teardown():
+                raise errors.ServerFailure("%s did not exit cleanly" % (self.fixture))
+
+            self.logger.info("Starting the fixture back up again...")
+            self.fixture.setup()
+            self.fixture.await_ready()
+
+            self.hook_test_case.return_code = 0
+            test_report.addSuccess(self.hook_test_case)
+        finally:
+            test_report.stopTest(self.hook_test_case)
diff --git a/buildscripts/resmokelib/testing/hooks/dbhash.py b/buildscripts/resmokelib/testing/hooks/dbhash.py
new file mode 100644
index 00000000000..02b7b18acac
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/dbhash.py
@@ -0,0 +1,25 @@
+"""
+Testing hook for verifying data consistency across a replica set.
+"""
+
+from __future__ import absolute_import
+
+import os.path
+
+from . import jsfile
+
+
+class CheckReplDBHash(jsfile.JsCustomBehavior):
+    """
+    Checks that the dbhashes of all non-local databases and non-replicated system collections
+    match on the primary and secondaries.
+    """
+    def __init__(self, hook_logger, fixture, shell_options=None):
+        description = "Check dbhashes of all replica set or master/slave members"
+        js_filename = os.path.join("jstests", "hooks", "run_check_repl_dbhash.js")
+        jsfile.JsCustomBehavior.__init__(self,
+                                         hook_logger,
+                                         fixture,
+                                         js_filename,
+                                         description,
+                                         shell_options=shell_options)
diff --git a/buildscripts/resmokelib/testing/hooks/initialsync.py b/buildscripts/resmokelib/testing/hooks/initialsync.py
new file mode 100644
index 00000000000..013c4c1d380
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/initialsync.py
@@ -0,0 +1,179 @@
+"""
+Testing hook for verifying correctness of initial sync.
+"""
+
+from __future__ import absolute_import
+
+import os.path
+import random
+
+import bson
+import pymongo
+import pymongo.errors
+
+from . import cleanup
+from . import jsfile
+from ... import errors
+from ... import utils
+
+
+class BackgroundInitialSync(jsfile.JsCustomBehavior):
+    """
+    After every test, this hook checks if a background node has finished initial sync and if so,
+    validates it, tears it down, and restarts it.
+
+    This test accepts a parameter 'n' that specifies a number of tests after which it will wait for
+    replication to finish before validating and restarting the initial sync node. It also accepts
+    a parameter 'use_resync' for whether to restart the initial sync node with resync or by
+    shutting it down and restarting it.
+
+    This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. If used
+    at the same time as CleanEveryN, the 'n' value passed to this hook should be equal to the 'n'
+    value for CleanEveryN.
+    """
+
+    DEFAULT_N = cleanup.CleanEveryN.DEFAULT_N
+
+    def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N, shell_options=None):
+        description = "Background Initial Sync"
+        js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
+        jsfile.JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename,
+                                         description, shell_options)
+
+        self.use_resync = use_resync
+        self.n = n
+        self.tests_run = 0
+        self.random_restarts = 0
+
+    # Restarts initial sync by shutting down the node, clearing its data, and restarting it,
+    # or by calling resync if use_resync is specified.
+    def __restart_init_sync(self, test_report, sync_node, sync_node_conn):
+        if self.use_resync:
+            self.hook_test_case.logger.info("Calling resync on initial sync node...")
+            cmd = bson.SON([("resync", 1), ("wait", 0)])
+            sync_node_conn.admin.command(cmd)
+        else:
+            # Tear down and restart the initial sync node to start initial sync again.
+            if not sync_node.teardown():
+                raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
+
+            self.hook_test_case.logger.info("Starting the initial sync node back up again...")
+            sync_node.setup()
+            sync_node.await_ready()
+
+    def _after_test_impl(self, test, test_report, description):
+        self.tests_run += 1
+        sync_node = self.fixture.get_initial_sync_node()
+        sync_node_conn = utils.new_mongo_client(port=sync_node.port)
+
+        # If it's been 'n' tests so far, wait for the initial sync node to finish syncing.
+        if self.tests_run >= self.n:
+            self.hook_test_case.logger.info(
+                "%d tests have been run against the fixture, waiting for initial sync"
+                " node to go into SECONDARY state",
+                self.tests_run)
+            self.tests_run = 0
+
+            cmd = bson.SON([("replSetTest", 1),
+                            ("waitForMemberState", 2),
+                            ("timeoutMillis", 20 * 60 * 1000)])
+            sync_node_conn.admin.command(cmd)
+
+        # Check if the initial sync node is in SECONDARY state. If it's been 'n' tests, then it
+        # should have waited to be in SECONDARY state and the test should be marked as a failure.
+        # Otherwise, we just skip the hook and will check again after the next test.
+        try:
+            state = sync_node_conn.admin.command("replSetGetStatus").get("myState")
+            if state != 2:
+                if self.tests_run == 0:
+                    msg = "Initial sync node did not catch up after waiting 20 minutes"
+                    self.hook_test_case.logger.exception("{0} failed: {1}".format(description, msg))
+                    raise errors.TestFailure(msg)
+
+                self.hook_test_case.logger.info(
+                    "Initial sync node is in state %d, not state SECONDARY (2)."
+                    " Skipping BackgroundInitialSync hook for %s",
+                    state,
+                    test.short_name())
+
+                # If we have not restarted initial sync since the last time we ran the data
+                # validation, restart initial sync with a 20% probability.
+                if self.random_restarts < 1 and random.random() < 0.2:
+                    hook_type = "resync" if self.use_resync else "initial sync"
+                    self.hook_test_case.logger.info("randomly restarting " + hook_type +
+                                                    " in the middle of " + hook_type)
+                    self.__restart_init_sync(test_report, sync_node, sync_node_conn)
+                    self.random_restarts += 1
+                return
+        except pymongo.errors.OperationFailure:
+            # replSetGetStatus can fail if the node is in STARTUP state. The node will soon go into
+            # STARTUP2 state and replSetGetStatus will succeed after the next test.
+            self.hook_test_case.logger.info(
+                "replSetGetStatus call failed in BackgroundInitialSync hook, skipping hook for %s",
+                test.short_name())
+            return
+
+        self.random_restarts = 0
+
+        # Run data validation and dbhash checking.
+        self.hook_test_case.run_test()
+
+        self.__restart_init_sync(test_report, sync_node, sync_node_conn)
+
+
+class IntermediateInitialSync(jsfile.JsCustomBehavior):
+    """
+    This hook accepts a parameter 'n' that specifies a number of tests after which it will start up
+    a node to initial sync, wait for replication to finish, and then validate the data. It also
+    accepts a parameter 'use_resync' for whether to restart the initial sync node with resync or by
+    shutting it down and restarting it.
+
+    This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'.
+    """
+
+    DEFAULT_N = cleanup.CleanEveryN.DEFAULT_N
+
+    def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N):
+        description = "Intermediate Initial Sync"
+        js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
+        jsfile.JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename, description)
+
+        self.use_resync = use_resync
+        self.n = n
+        self.tests_run = 0
+
+    def _should_run_after_test_impl(self):
+        self.tests_run += 1
+
+        # If we have not run 'n' tests yet, skip this hook.
+        if self.tests_run < self.n:
+            return False
+
+        self.tests_run = 0
+        return True
+
+    def _after_test_impl(self, test, test_report, description):
+        sync_node = self.fixture.get_initial_sync_node()
+        sync_node_conn = utils.new_mongo_client(port=sync_node.port)
+
+        if self.use_resync:
+            self.hook_test_case.logger.info("Calling resync on initial sync node...")
+            cmd = bson.SON([("resync", 1)])
+            sync_node_conn.admin.command(cmd)
+        else:
+            if not sync_node.teardown():
+                raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
+
+            self.hook_test_case.logger.info("Starting the initial sync node back up again...")
+            sync_node.setup()
+            sync_node.await_ready()
+
+        # Do initial sync round.
+        self.hook_test_case.logger.info("Waiting for initial sync node to go into SECONDARY state")
+        cmd = bson.SON([("replSetTest", 1),
+                        ("waitForMemberState", 2),
+                        ("timeoutMillis", 20 * 60 * 1000)])
+        sync_node_conn.admin.command(cmd)
+
+        # Run data validation and dbhash checking.
+        self.hook_test_case.run_test()
diff --git a/buildscripts/resmokelib/testing/hooks/interface.py b/buildscripts/resmokelib/testing/hooks/interface.py
new file mode 100644
index 00000000000..6ca4ae79a7d
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/interface.py
@@ -0,0 +1,83 @@
+"""
+Interface for customizing the behavior of a test fixture.
+"""
+
+from __future__ import absolute_import
+
+from ... import logging
+from ...utils import registry
+
+
+_HOOKS = {}
+
+
+def make_custom_behavior(class_name, *args, **kwargs):
+    """
+    Factory function for creating CustomBehavior instances.
+    """
+
+    if class_name not in _HOOKS:
+        raise ValueError("Unknown custom behavior class '%s'" % (class_name))
+
+    return _HOOKS[class_name](*args, **kwargs)
+
+
+class CustomBehavior(object):
+    """
+    The common interface all CustomBehaviors will inherit from.
+    """
+
+    __metaclass__ = registry.make_registry_metaclass(_HOOKS)
+
+    REGISTERED_NAME = registry.LEAVE_UNREGISTERED
+
+    @staticmethod
+    def start_dynamic_test(hook_test_case, test_report):
+        """
+        If a CustomBehavior wants to add a test case that will show up
+        in the test report, it should use this method to add it to the
+        report, since we will need to count it as a dynamic test to get
+        the stats in the summary information right.
+        """
+        test_report.startTest(hook_test_case, dynamic=True)
+
+    def __init__(self, hook_logger, fixture, description):
+        """
+        Initializes the CustomBehavior with the specified fixture.
+        """
+
+        if not isinstance(hook_logger, logging.Logger):
+            raise TypeError("logger must be a Logger instance")
+
+        self.logger = hook_logger
+        self.fixture = fixture
+        self.hook_test_case = None
+        self.logger_name = self.__class__.__name__
+        self.description = description
+
+    def before_suite(self, test_report):
+        """
+        The test runner calls this exactly once before they start
+        running the suite.
+        """
+        pass
+
+    def after_suite(self, test_report):
+        """
+        The test runner calls this exactly once after all tests have
+        finished executing. Be sure to reset the behavior back to its
+        original state so that it can be run again.
+        """
+        pass
+
+    def before_test(self, test, test_report):
+        """
+        Each test will call this before it executes.
+        """
+        pass
+
+    def after_test(self, test, test_report):
+        """
+        Each test will call this after it executes.
+        """
+        pass
diff --git a/buildscripts/resmokelib/testing/hooks/jsfile.py b/buildscripts/resmokelib/testing/hooks/jsfile.py
new file mode 100644
index 00000000000..556ddd2d702
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/jsfile.py
@@ -0,0 +1,66 @@
+"""
+Interface for customizing the behavior of a test fixture by executing a
+JavaScript file.
+"""
+
+from __future__ import absolute_import
+
+import sys
+
+import pymongo
+import pymongo.errors
+
+from . import interface
+from .. import testcases
+from ... import errors
+from ...utils import registry
+
+
+class JsCustomBehavior(interface.CustomBehavior):
+    REGISTERED_NAME = registry.LEAVE_UNREGISTERED
+
+    def __init__(self, hook_logger, fixture, js_filename, description, shell_options=None):
+        interface.CustomBehavior.__init__(self, hook_logger, fixture, description)
+        self.hook_test_case = testcases.JSTestCase(hook_logger,
+                                                   js_filename,
+                                                   shell_options=shell_options,
+                                                   test_kind="Hook")
+        self.test_case_is_configured = False
+
+    def before_suite(self, test_report):
+        if not self.test_case_is_configured:
+            # Configure the test case after the fixture has been set up.
+            self.hook_test_case.configure(self.fixture)
+            self.test_case_is_configured = True
+
+    def _should_run_after_test_impl(self):
+        return True
+
+    def _after_test_impl(self, test, test_report, description):
+        self.hook_test_case.run_test()
+
+    def after_test(self, test, test_report):
+        if not self._should_run_after_test_impl():
+            return
+
+        # Change test_name and description to be more descriptive.
+        description = "{0} after running '{1}'".format(self.description, test.short_name())
+        self.hook_test_case.test_name = test.short_name() + ":" + self.logger_name
+        interface.CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
+
+        try:
+            self._after_test_impl(test, test_report, description)
+        except pymongo.errors.OperationFailure as err:
+            self.hook_test_case.logger.exception("{0} failed".format(description))
+            self.hook_test_case.return_code = 1
+            test_report.addFailure(self.hook_test_case, sys.exc_info())
+            raise errors.StopExecution(err.args[0])
+        except self.hook_test_case.failureException as err:
+            self.hook_test_case.logger.exception("{0} failed".format(description))
+            test_report.addFailure(self.hook_test_case, sys.exc_info())
+            raise errors.StopExecution(err.args[0])
+        else:
+            self.hook_test_case.return_code = 0
+            test_report.addSuccess(self.hook_test_case)
+        finally:
+            test_report.stopTest(self.hook_test_case)
diff --git a/buildscripts/resmokelib/testing/hooks/oplog.py b/buildscripts/resmokelib/testing/hooks/oplog.py
new file mode 100644
index 00000000000..62fa24026fc
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/oplog.py
@@ -0,0 +1,25 @@
+"""
+Testing hook for verifying members of a replica set have matching
+oplogs.
+"""
+
+from __future__ import absolute_import
+
+import os.path
+
+from . import jsfile
+
+
+class CheckReplOplogs(jsfile.JsCustomBehavior):
+    """
+    Checks that local.oplog.rs matches on the primary and secondaries.
+    """
+    def __init__(self, hook_logger, fixture, shell_options=None):
+        description = "Check oplogs of all replica set members"
+        js_filename = os.path.join("jstests", "hooks", "run_check_repl_oplogs.js")
+        jsfile.JsCustomBehavior.__init__(self,
+                                         hook_logger,
+                                         fixture,
+                                         js_filename,
+                                         description,
+                                         shell_options=shell_options)
diff --git a/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py b/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py
new file mode 100644
index 00000000000..fd81483551b
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py
@@ -0,0 +1,310 @@
+"""
+Testing hook for verifying correctness of a secondary's behavior during
+an unclean shutdown.
+"""
+
+from __future__ import absolute_import
+
+import sys
+import time
+
+import bson
+import pymongo
+import pymongo.errors
+
+from . import dbhash
+from . import interface
+from . import validate
+from .. import fixtures
+from .. import testcases
+from ... import errors
+from ... import utils
+
+
+class PeriodicKillSecondaries(interface.CustomBehavior):
+    """
+    Periodically kills the secondaries in a replica set and verifies
+    that they can reach the SECONDARY state without having connectivity
+    to the primary after an unclean shutdown.
+    """
+
+    DEFAULT_PERIOD_SECS = 30
+
+    def __init__(self, hook_logger, fixture, period_secs=DEFAULT_PERIOD_SECS):
+        if not isinstance(fixture, fixtures.ReplicaSetFixture):
+            raise TypeError("%s either does not support replication or does not support writing to"
+                            " its oplog early"
+                            % (fixture.__class__.__name__))
+
+        if fixture.num_nodes <= 1:
+            raise ValueError("PeriodicKillSecondaries requires the replica set to contain at least"
+                             " one secondary")
+
+        description = ("PeriodicKillSecondaries (kills the secondary after running tests for a"
+                       " configurable period of time)")
+        interface.CustomBehavior.__init__(self, hook_logger, fixture, description)
+
+        self._period_secs = period_secs
+        self._start_time = None
+
+    def after_suite(self, test_report):
+        if self._start_time is not None:
+            # Ensure that we test killing the secondary and having it reach state SECONDARY after
+            # being restarted at least once when running the suite.
+            self._run(test_report)
+
+    def before_test(self, test, test_report):
+        if self._start_time is not None:
+            # The "rsSyncApplyStop" failpoint is already enabled.
+            return
+
+        # Enable the "rsSyncApplyStop" failpoint on each of the secondaries to prevent them from
+        # applying any oplog entries while the test is running.
+        for secondary in self.fixture.get_secondaries():
+            client = utils.new_mongo_client(port=secondary.port)
+            try:
+                client.admin.command(bson.SON([
+                    ("configureFailPoint", "rsSyncApplyStop"),
+                    ("mode", "alwaysOn")]))
+            except pymongo.errors.OperationFailure as err:
+                self.logger.exception(
+                    "Unable to disable oplog application on the mongod on port %d", secondary.port)
+                raise errors.ServerFailure(
+                    "Unable to disable oplog application on the mongod on port %d: %s"
+                    % (secondary.port, err.args[0]))
+
+        self._start_time = time.time()
+
+    def after_test(self, test, test_report):
+        self._last_test_name = test.short_name()
+
+        # Kill the secondaries and verify that they can reach the SECONDARY state if the specified
+        # period has elapsed.
+        should_check_secondaries = time.time() - self._start_time >= self._period_secs
+        if not should_check_secondaries:
+            return
+
+        self._run(test_report)
+
+    def _run(self, test_report):
+        self.hook_test_case = testcases.TestCase(
+            self.logger,
+            "Hook",
+            "%s:%s" % (self._last_test_name, self.logger_name))
+        interface.CustomBehavior.start_dynamic_test(self.hook_test_case, test_report)
+
+        try:
+            self._kill_secondaries()
+            self._check_secondaries_and_restart_fixture()
+
+            # Validate all collections on all nodes after having the secondaries reconcile the end
+            # of their oplogs.
+            self._validate_collections(test_report)
+
+            # Verify that the dbhashes match across all nodes after having the secondaries reconcile
+            # the end of their oplogs.
+            self._check_repl_dbhash(test_report)
+
+            self._restart_and_clear_fixture()
+        except Exception as err:
+            self.hook_test_case.logger.exception(
+                "Encountered an error running PeriodicKillSecondaries.")
+            self.hook_test_case.return_code = 2
+            test_report.addFailure(self.hook_test_case, sys.exc_info())
+            raise errors.StopExecution(err.args[0])
+        else:
+            self.hook_test_case.return_code = 0
+            test_report.addSuccess(self.hook_test_case)
+        finally:
+            test_report.stopTest(self.hook_test_case)
+
+            # Set the hook back into a state where it will disable oplog application at the start
+            # of the next test that runs.
+            self._start_time = None
+
+    def _kill_secondaries(self):
+        for secondary in self.fixture.get_secondaries():
+            # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying
+            # oplog entries.
+            for secondary in self.fixture.get_secondaries():
+                client = utils.new_mongo_client(port=secondary.port)
+                try:
+                    client.admin.command(bson.SON([
+                        ("configureFailPoint", "rsSyncApplyStop"),
+                        ("mode", "off")]))
+                except pymongo.errors.OperationFailure as err:
+                    self.logger.exception(
+                        "Unable to re-enable oplog application on the mongod on port %d",
+                        secondary.port)
+                    raise errors.ServerFailure(
+                        "Unable to re-enable oplog application on the mongod on port %d: %s"
+                        % (secondary.port, err.args[0]))
+
+            # Wait a little bit for the secondary to start apply oplog entries so that we are more
+            # likely to kill the mongod process while it is partway into applying a batch.
+            time.sleep(0.1)
+
+            # Check that the secondary is still running before forcibly terminating it. This ensures
+            # we still detect some cases in which the secondary has already crashed.
+            if not secondary.is_running():
+                raise errors.ServerFailure(
+                    "mongod on port %d was expected to be running in"
+                    " PeriodicKillSecondaries.after_test(), but wasn't."
+                    % (secondary.port))
+
+            self.hook_test_case.logger.info(
+                "Killing the secondary on port %d..." % (secondary.port))
+            secondary.mongod.stop(kill=True)
+
+        # Teardown may or may not be considered a success as a result of killing a secondary, so we
+        # ignore the return value of Fixture.teardown().
+        self.fixture.teardown()
+
+    def _check_secondaries_and_restart_fixture(self):
+        preserve_dbpaths = []
+        for node in self.fixture.nodes:
+            preserve_dbpaths.append(node.preserve_dbpath)
+            node.preserve_dbpath = True
+
+        for secondary in self.fixture.get_secondaries():
+            self._check_invariants_as_standalone(secondary)
+
+            # Start the 'secondary' mongod back up as part of the replica set and wait for it to
+            # reach state SECONDARY.
+            secondary.setup()
+            secondary.await_ready()
+            self._await_secondary_state(secondary)
+
+            teardown_success = secondary.teardown()
+            if not teardown_success:
+                raise errors.ServerFailure(
+                    "%s did not exit cleanly after reconciling the end of its oplog" % (secondary))
+
+        self.hook_test_case.logger.info(
+            "Starting the fixture back up again with its data files intact...")
+
+        try:
+            self.fixture.setup()
+            self.fixture.await_ready()
+        finally:
+            for (i, node) in enumerate(self.fixture.nodes):
+                node.preserve_dbpath = preserve_dbpaths[i]
+
+    def _validate_collections(self, test_report):
+        validate_test_case = validate.ValidateCollections(self.logger, self.fixture)
+        validate_test_case.before_suite(test_report)
+        validate_test_case.before_test(self.hook_test_case, test_report)
+        validate_test_case.after_test(self.hook_test_case, test_report)
+        validate_test_case.after_suite(test_report)
+
+    def _check_repl_dbhash(self, test_report):
+        dbhash_test_case = dbhash.CheckReplDBHash(self.logger, self.fixture)
+        dbhash_test_case.before_suite(test_report)
+        dbhash_test_case.before_test(self.hook_test_case, test_report)
+        dbhash_test_case.after_test(self.hook_test_case, test_report)
+        dbhash_test_case.after_suite(test_report)
+
+    def _restart_and_clear_fixture(self):
+        # We restart the fixture after setting 'preserve_dbpath' back to its original value in order
+        # to clear the contents of the data directory if desired. The CleanEveryN hook cannot be
+        # used in combination with the PeriodicKillSecondaries hook because we may attempt to call
+        # Fixture.teardown() while the "rsSyncApplyStop" failpoint is still enabled on the
+        # secondaries, causing them to exit with a non-zero return code.
+        self.hook_test_case.logger.info(
+            "Finished verifying data consistency, stopping the fixture...")
+
+        teardown_success = self.fixture.teardown()
+        if not teardown_success:
+            raise errors.ServerFailure(
+                "%s did not exit cleanly after verifying data consistency"
+                % (self.fixture))
+
+        self.hook_test_case.logger.info("Starting the fixture back up again...")
+        self.fixture.setup()
+        self.fixture.await_ready()
+
+    def _check_invariants_as_standalone(self, secondary):
+        # We remove the --replSet option in order to start the node as a standalone.
+        replset_name = secondary.mongod_options.pop("replSet")
+
+        try:
+            secondary.setup()
+            secondary.await_ready()
+
+            client = utils.new_mongo_client(port=secondary.port)
+            minvalid_doc = client.local["replset.minvalid"].find_one()
+
+            latest_oplog_doc = client.local["oplog.rs"].find_one(
+                sort=[("$natural", pymongo.DESCENDING)])
+
+            if minvalid_doc is not None:
+                # Check the invariants 'begin <= minValid', 'minValid <= oplogDeletePoint', and
+                # 'minValid <= top of oplog' before the secondary has reconciled the end of its
+                # oplog.
+                null_ts = bson.Timestamp(0, 0)
+                begin_ts = minvalid_doc.get("begin", {}).get("ts", null_ts)
+                minvalid_ts = minvalid_doc.get("ts", begin_ts)
+                oplog_delete_point_ts = minvalid_doc.get("oplogDeleteFromPoint", minvalid_ts)
+
+                if minvalid_ts == null_ts:
+                    # The server treats the "ts" field in the minValid document as missing when its
+                    # value is the null timestamp.
+                    minvalid_ts = begin_ts
+
+                if oplog_delete_point_ts == null_ts:
+                    # The server treats the "oplogDeleteFromPoint" field as missing when its value
+                    # is the null timestamp.
+                    oplog_delete_point_ts = minvalid_ts
+
+                latest_oplog_entry_ts = latest_oplog_doc.get("ts", oplog_delete_point_ts)
+
+                if not begin_ts <= minvalid_ts:
+                    raise errors.ServerFailure(
+                        "The condition begin <= minValid (%s <= %s) doesn't hold: minValid"
+                        " document=%s, latest oplog entry=%s"
+                        % (begin_ts, minvalid_ts, minvalid_doc, latest_oplog_doc))
+
+                if not minvalid_ts <= oplog_delete_point_ts:
+                    raise errors.ServerFailure(
+                        "The condition minValid <= oplogDeletePoint (%s <= %s) doesn't hold:"
+                        " minValid document=%s, latest oplog entry=%s"
+                        % (minvalid_ts, oplog_delete_point_ts, minvalid_doc, latest_oplog_doc))
+
+                if not minvalid_ts <= latest_oplog_entry_ts:
+                    raise errors.ServerFailure(
+                        "The condition minValid <= top of oplog (%s <= %s) doesn't hold: minValid"
+                        " document=%s, latest oplog entry=%s"
+                        % (minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc))
+
+            teardown_success = secondary.teardown()
+            if not teardown_success:
+                raise errors.ServerFailure(
+                    "%s did not exit cleanly after being started up as a standalone" % (secondary))
+        except pymongo.errors.OperationFailure as err:
+            self.hook_test_case.logger.exception(
+                "Failed to read the minValid document or the latest oplog entry from the mongod on"
+                " port %d",
+                secondary.port)
+            raise errors.ServerFailure(
+                "Failed to read the minValid document or the latest oplog entry from the mongod on"
+                " port %d: %s"
+                % (secondary.port, err.args[0]))
+        finally:
+            # Set the secondary's options back to their original values.
+            secondary.mongod_options["replSet"] = replset_name
+
+    def _await_secondary_state(self, secondary):
+        client = utils.new_mongo_client(port=secondary.port)
+        try:
+            client.admin.command(bson.SON([
+                ("replSetTest", 1),
+                ("waitForMemberState", 2),  # 2 = SECONDARY
+                ("timeoutMillis", fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000)]))
+        except pymongo.errors.OperationFailure as err:
+            self.hook_test_case.logger.exception(
+                "mongod on port %d failed to reach state SECONDARY after %d seconds",
+                secondary.port,
+                fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60)
+            raise errors.ServerFailure(
+                "mongod on port %d failed to reach state SECONDARY after %d seconds: %s"
+                % (secondary.port, fixtures.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60, err.args[0]))
diff --git a/buildscripts/resmokelib/testing/hooks/validate.py b/buildscripts/resmokelib/testing/hooks/validate.py
new file mode 100644
index 00000000000..367b905d7ed
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/validate.py
@@ -0,0 +1,26 @@
+"""
+Testing hook for verifying the consistency and integrity of collection
+and index data.
+"""
+
+from __future__ import absolute_import
+
+import os.path
+
+from . import jsfile
+
+
+class ValidateCollections(jsfile.JsCustomBehavior):
+    """
+    Runs full validation on all collections in all databases on every stand-alone
+    node, primary replica-set node, or primary shard node.
+    """
+    def __init__(self, hook_logger, fixture, shell_options=None):
+        description = "Full collection validation"
+        js_filename = os.path.join("jstests", "hooks", "run_validate_collections.js")
+        jsfile.JsCustomBehavior.__init__(self,
+                                         hook_logger,
+                                         fixture,
+                                         js_filename,
+                                         description,
+                                         shell_options=shell_options)
diff --git a/buildscripts/resmokelib/utils/autoloader.py b/buildscripts/resmokelib/utils/autoloader.py
new file mode 100644
index 00000000000..1ac58abc892
--- /dev/null
+++ b/buildscripts/resmokelib/utils/autoloader.py
@@ -0,0 +1,27 @@
+"""
+Utility for loading all modules within a package.
+"""
+
+from __future__ import absolute_import
+
+import importlib
+import pkgutil
+
+
+def load_all_modules(name, path):
+    """
+    Dynamically loads all modules in the 'name' package.
+
+    This function is useful in combination with the registry.py module
+    so that any classes declared within the package are automatically
+    registered.
+
+    The following is the intended usage within the __init__.py file for
+    a package:
+
+        from utils import autoloader as _autoloader
+        _autoloader.load_all_modules(name=__name__, path=__path__)
+    """
+
+    for (_, module, _) in pkgutil.walk_packages(path=path):
+        importlib.import_module("." + module, package=name)
diff --git a/buildscripts/resmokelib/utils/registry.py b/buildscripts/resmokelib/utils/registry.py
new file mode 100644
index 00000000000..47d53d9d891
--- /dev/null
+++ b/buildscripts/resmokelib/utils/registry.py
@@ -0,0 +1,61 @@
+"""
+Utility for having class declarations automatically cause a reference to
+the class to be stored along with its name.
+
+This pattern enables the associated class to be looked up later by using
+its name.
+"""
+
+from __future__ import absolute_import
+
+
+# Specifying 'LEAVE_UNREGISTERED' as the "REGISTERED_NAME" attribute will cause the class to be
+# omitted from the registry. This is particularly useful for base classes that define an interface
+# or common functionality, and aren't intended to be constructed explicitly.
+LEAVE_UNREGISTERED = object()
+
+
+def make_registry_metaclass(registry_store):
+    """
+    Returns a new Registry metaclass.
+    """
+
+    if not isinstance(registry_store, dict):
+        raise TypeError("'registry_store' argument must be a dict")
+
+    class Registry(type):
+        """
+        A metaclass that stores a reference to all registered classes.
+        """
+
+        def __new__(meta, class_name, base_classes, class_dict):
+            """
+            Creates and returns a new instance of Registry, which is a
+            class named 'class_name' derived from 'base_classes' that
+            defines 'class_dict' as additional attributes.
+
+            The returned class is added to 'registry_store' using
+            class_dict["REGISTERED_NAME"] as the name, or 'class_name'
+            if the "REGISTERED_NAME" attribute isn't defined. If the
+            sentinel value 'LEAVE_UNREGISTERED' is specified as the
+            name, then the returned class isn't added to
+            'registry_store'.
+
+            The returned class will have the "REGISTERED_NAME" attribute
+            defined either as its associated key in 'registry_store' or
+            the 'LEAVE_UNREGISTERED' sentinel value.
+            """
+
+            registered_name = class_dict.setdefault("REGISTERED_NAME", class_name)
+            cls = type.__new__(meta, class_name, base_classes, class_dict)
+
+            if registered_name is not LEAVE_UNREGISTERED:
+                if registered_name in registry_store:
+                    raise ValueError(
+                        "The name %s is already registered; a different value for the"
+                        " 'REGISTERED_NAME' attribute must be chosen" % (registered_name))
+                registry_store[registered_name] = cls
+
+            return cls
+
+    return Registry
author	Max Hirschhorn <max.hirschhorn@mongodb.com>	2017-06-14 20:44:52 -0400
committer	Max Hirschhorn <max.hirschhorn@mongodb.com>	2017-06-14 20:44:52 -0400
commit	b8f6239094f8159ee011b244d98441633fe7bed1 (patch)
tree	529ae78e2fe4a54caf8e20d28ab3c24bf175761e
parent	f3b60130e2192547a633e28423ef8b1b40984532 (diff)
download	mongo-b8f6239094f8159ee011b244d98441633fe7bed1.tar.gz