diff options
author | Judah Schvimer <judah@mongodb.com> | 2016-09-13 10:06:25 -0400 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2016-09-13 10:06:25 -0400 |
commit | 5f40282b37c25719eaaf89ee24ae2d4fa7d537be (patch) | |
tree | 5064035cecafa727394a1304b6cb09b648aaef35 /buildscripts | |
parent | 5fa0cd255af2bf7ba946e74dbe3f725d63bd68c7 (diff) | |
download | mongo-5f40282b37c25719eaaf89ee24ae2d4fa7d537be.tar.gz |
SERVER-25068 Add passthrough hook to include periodic initial sync members
Diffstat (limited to 'buildscripts')
-rw-r--r-- | buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml | 2 | ||||
-rw-r--r-- | buildscripts/resmokelib/testing/hooks.py | 134 |
2 files changed, 128 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml b/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml index ead42eb2260..ca6c529b0ca 100644 --- a/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml +++ b/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml @@ -20,7 +20,7 @@ executor: readMode: commands hooks: - class: IntermediateInitialSync - useResync: True + use_resync: True n: *run_hook_interval - class: CleanEveryN n: *run_hook_interval diff --git a/buildscripts/resmokelib/testing/hooks.py b/buildscripts/resmokelib/testing/hooks.py index b67987fb8a0..15e71c8d19d 100644 --- a/buildscripts/resmokelib/testing/hooks.py +++ b/buildscripts/resmokelib/testing/hooks.py @@ -10,6 +10,7 @@ import sys import bson import pymongo +import random from . import fixtures from . import testcases @@ -136,9 +137,9 @@ class JsCustomBehavior(CustomBehavior): def __init__(self, logger, fixture, js_filename, description, shell_options=None): CustomBehavior.__init__(self, logger, fixture, description) self.hook_test_case = testcases.JSTestCase(logger, - js_filename, - shell_options=shell_options, - test_kind="Hook") + js_filename, + shell_options=shell_options, + test_kind="Hook") self.test_case_is_configured = False def before_suite(self, test_report): @@ -163,23 +164,141 @@ class JsCustomBehavior(CustomBehavior): finally: test_report.stopTest(self.hook_test_case) +class BackgroundInitialSync(JsCustomBehavior): + """ + After every test, this hook checks if a background node has finished initial sync and if so, + validates it, tears it down, and restarts it. + + This test accepts a parameter 'n' that specifies a number of tests after which it will wait for + replication to finish before validating and restarting the initial sync node. It also accepts + a parameter 'use_resync' for whether to restart the initial sync node with resync or by + shutting it down and restarting it. + + This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. If used + at the same time as CleanEveryN, the 'n' value passed to this hook should be equal to the 'n' + value for CleanEveryN. + """ + + DEFAULT_N = CleanEveryN.DEFAULT_N + + def __init__(self, logger, fixture, use_resync=False, n=DEFAULT_N): + description = "Background Initial Sync" + js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js") + JsCustomBehavior.__init__(self, logger, fixture, js_filename, description) + + self.use_resync = use_resync + self.n = n + self.tests_run = 0 + self.random_restarts = 0 + + + def after_test(self, test, test_report): + self.tests_run += 1 + sync_node = self.fixture.get_initial_sync_node(); + sync_node_conn = utils.new_mongo_client(port=sync_node.port) + description = "{0} after running '{1}'".format(self.description, test.short_name()) + + # Restarts initial sync by shutting down the node, clearing its data, and restarting it, + # or by calling resync if use_resync is specified. + def restart_init_sync(): + if self.use_resync: + self.logger.info("Calling resync on initial sync node...") + cmd = bson.SON([("resync", 1), ("wait", 0)]) + try: + sync_node_conn.admin.command(cmd) + except pymongo.errors.OperationFailure as err: + self.logger.exception("{0} failed".format(description)) + test_report.addFailure(self.hook_test_case, sys.exc_info()) + raise errors.TestFailure(err.args[0]) + else: + # Tear down and restart the initial sync node to start initial sync again. + teardown_success = sync_node.teardown() + + self.logger.info("Starting the initial sync node back up again...") + sync_node.setup() + sync_node.await_ready() + if not teardown_success: + raise errors.TestFailure("%s did not exit cleanly" % (sync_node)) + + # If it's been 'n' tests so far, wait for the initial sync node to finish syncing. + if self.tests_run >= self.n: + self.tests_run = 0 + self.logger.info("%d tests have been run against the fixture, waiting for initial sync" + " node to go into SECONDARY state", self.tests_run) + cmd = bson.SON([("replSetTest", 1), + ("waitForMemberState", 2), + ("timeoutMillis", 20 * 60 * 1000)]) + try: + sync_node_conn.admin.command(cmd) + except pymongo.errors.OperationFailure as err: + self.logger.exception("{0} failed".format(description)) + test_report.addFailure(self.hook_test_case, sys.exc_info()) + raise errors.TestFailure(err.args[0]) + + # Check if the initial sync node is in SECONDARY state. If it's been 'n' tests, then it + # should have waited to be in SECONDARY state and the test should be marked as a failure. + # Otherwise, we just skip the hook and will check again after the next test. + try: + state = sync_node_conn.admin.command("replSetGetStatus").get("myState") + if state != 2: + if self.tests_run == 0: + self.logger.exception("{0} failed".format(description)) + test_report.addFailure(self.hook_test_case, sys.exc_info()) + raise errors.TestFailure("Initial sync node did not catch up after waiting" + " 20 minutes") + + self.logger.info("Initial sync node is in state %d, not state SECONDARY (2)." + " Skipping BackgroundInitialSync hook for %s", + state, + test.short_name()) + + # If we have not restarted initial sync since the last time we ran the data + # validation, and if we're using resync for restarts, restart initial sync with a + # 20% probability. + if self.random_restarts < 1 and self.use_resync and random.random() < 0.2: + self.logger.info("Calling resync randomly in the middle of initial sync") + restart_init_sync() + self.random_restarts += 1 + return + except pymongo.errors.OperationFailure: + # replSetGetStatus can fail if the node is in STARTUP state. The node will soon go into + # STARTUP2 state and replSetGetStatus will succeed after the next test. + self.logger.info("replSetGetStatus call failed in BackgroundInitialSync hook," + " skipping hook for %s", test.short_name()) + return + + self.random_restarts = 0 + + # We're in SECONDARY state so validate the data. If there's a failure restart the fixture + # so we don't get multiple occurrences of the same failure, and then rethrow the failure. + try: + JsCustomBehavior.after_test(self, test, test_report) + except errors.TestFailure as err: + self.logger.exception("{0} failed with {1}".format(description, err.args[0])) + restart_init_sync() + raise errors.TestFailure(err.args[0]) + + restart_init_sync() + class IntermediateInitialSync(JsCustomBehavior): """ This hook accepts a parameter 'n' that specifies a number of tests after which it will start up - a node to initial sync, wait for replication to finish, and then validate the data. + a node to initial sync, wait for replication to finish, and then validate the data. It also + accepts a parameter 'use_resync' for whether to restart the initial sync node with resync or by + shutting it down and restarting it. This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. """ DEFAULT_N = CleanEveryN.DEFAULT_N - def __init__(self, logger, fixture, useResync=False, n=DEFAULT_N): + def __init__(self, logger, fixture, use_resync=False, n=DEFAULT_N): description = "Intermediate Initial Sync" js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js") JsCustomBehavior.__init__(self, logger, fixture, js_filename, description) - self.useResync = useResync + self.use_resync = use_resync self.n = n self.tests_run = 0 @@ -195,7 +314,7 @@ class IntermediateInitialSync(JsCustomBehavior): description = "{0} after running '{1}'".format(self.description, test.short_name()) teardown_success = True - if self.useResync: + if self.use_resync: self.logger.info("Calling resync on initial sync node...") cmd = bson.SON([("resync", 1)]) try: @@ -266,4 +385,5 @@ _CUSTOM_BEHAVIORS = { "CheckReplDBHash": CheckReplDBHash, "ValidateCollections": ValidateCollections, "IntermediateInitialSync": IntermediateInitialSync, + "BackgroundInitialSync": BackgroundInitialSync, } |