summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2016-09-13 10:06:25 -0400
committerJudah Schvimer <judah@mongodb.com>2016-09-13 10:06:25 -0400
commit5f40282b37c25719eaaf89ee24ae2d4fa7d537be (patch)
tree5064035cecafa727394a1304b6cb09b648aaef35
parent5fa0cd255af2bf7ba946e74dbe3f725d63bd68c7 (diff)
downloadmongo-5f40282b37c25719eaaf89ee24ae2d4fa7d537be.tar.gz
SERVER-25068 Add passthrough hook to include periodic initial sync members
-rw-r--r--buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml2
-rw-r--r--buildscripts/resmokelib/testing/hooks.py134
2 files changed, 128 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml b/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml
index ead42eb2260..ca6c529b0ca 100644
--- a/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml
+++ b/buildscripts/resmokeconfig/suites/core_small_oplog_rs_resync_static.yml
@@ -20,7 +20,7 @@ executor:
readMode: commands
hooks:
- class: IntermediateInitialSync
- useResync: True
+ use_resync: True
n: *run_hook_interval
- class: CleanEveryN
n: *run_hook_interval
diff --git a/buildscripts/resmokelib/testing/hooks.py b/buildscripts/resmokelib/testing/hooks.py
index b67987fb8a0..15e71c8d19d 100644
--- a/buildscripts/resmokelib/testing/hooks.py
+++ b/buildscripts/resmokelib/testing/hooks.py
@@ -10,6 +10,7 @@ import sys
import bson
import pymongo
+import random
from . import fixtures
from . import testcases
@@ -136,9 +137,9 @@ class JsCustomBehavior(CustomBehavior):
def __init__(self, logger, fixture, js_filename, description, shell_options=None):
CustomBehavior.__init__(self, logger, fixture, description)
self.hook_test_case = testcases.JSTestCase(logger,
- js_filename,
- shell_options=shell_options,
- test_kind="Hook")
+ js_filename,
+ shell_options=shell_options,
+ test_kind="Hook")
self.test_case_is_configured = False
def before_suite(self, test_report):
@@ -163,23 +164,141 @@ class JsCustomBehavior(CustomBehavior):
finally:
test_report.stopTest(self.hook_test_case)
+class BackgroundInitialSync(JsCustomBehavior):
+ """
+ After every test, this hook checks if a background node has finished initial sync and if so,
+ validates it, tears it down, and restarts it.
+
+ This test accepts a parameter 'n' that specifies a number of tests after which it will wait for
+ replication to finish before validating and restarting the initial sync node. It also accepts
+ a parameter 'use_resync' for whether to restart the initial sync node with resync or by
+ shutting it down and restarting it.
+
+ This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. If used
+ at the same time as CleanEveryN, the 'n' value passed to this hook should be equal to the 'n'
+ value for CleanEveryN.
+ """
+
+ DEFAULT_N = CleanEveryN.DEFAULT_N
+
+ def __init__(self, logger, fixture, use_resync=False, n=DEFAULT_N):
+ description = "Background Initial Sync"
+ js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
+ JsCustomBehavior.__init__(self, logger, fixture, js_filename, description)
+
+ self.use_resync = use_resync
+ self.n = n
+ self.tests_run = 0
+ self.random_restarts = 0
+
+
+ def after_test(self, test, test_report):
+ self.tests_run += 1
+ sync_node = self.fixture.get_initial_sync_node();
+ sync_node_conn = utils.new_mongo_client(port=sync_node.port)
+ description = "{0} after running '{1}'".format(self.description, test.short_name())
+
+ # Restarts initial sync by shutting down the node, clearing its data, and restarting it,
+ # or by calling resync if use_resync is specified.
+ def restart_init_sync():
+ if self.use_resync:
+ self.logger.info("Calling resync on initial sync node...")
+ cmd = bson.SON([("resync", 1), ("wait", 0)])
+ try:
+ sync_node_conn.admin.command(cmd)
+ except pymongo.errors.OperationFailure as err:
+ self.logger.exception("{0} failed".format(description))
+ test_report.addFailure(self.hook_test_case, sys.exc_info())
+ raise errors.TestFailure(err.args[0])
+ else:
+ # Tear down and restart the initial sync node to start initial sync again.
+ teardown_success = sync_node.teardown()
+
+ self.logger.info("Starting the initial sync node back up again...")
+ sync_node.setup()
+ sync_node.await_ready()
+ if not teardown_success:
+ raise errors.TestFailure("%s did not exit cleanly" % (sync_node))
+
+ # If it's been 'n' tests so far, wait for the initial sync node to finish syncing.
+ if self.tests_run >= self.n:
+ self.tests_run = 0
+ self.logger.info("%d tests have been run against the fixture, waiting for initial sync"
+ " node to go into SECONDARY state", self.tests_run)
+ cmd = bson.SON([("replSetTest", 1),
+ ("waitForMemberState", 2),
+ ("timeoutMillis", 20 * 60 * 1000)])
+ try:
+ sync_node_conn.admin.command(cmd)
+ except pymongo.errors.OperationFailure as err:
+ self.logger.exception("{0} failed".format(description))
+ test_report.addFailure(self.hook_test_case, sys.exc_info())
+ raise errors.TestFailure(err.args[0])
+
+ # Check if the initial sync node is in SECONDARY state. If it's been 'n' tests, then it
+ # should have waited to be in SECONDARY state and the test should be marked as a failure.
+ # Otherwise, we just skip the hook and will check again after the next test.
+ try:
+ state = sync_node_conn.admin.command("replSetGetStatus").get("myState")
+ if state != 2:
+ if self.tests_run == 0:
+ self.logger.exception("{0} failed".format(description))
+ test_report.addFailure(self.hook_test_case, sys.exc_info())
+ raise errors.TestFailure("Initial sync node did not catch up after waiting"
+ " 20 minutes")
+
+ self.logger.info("Initial sync node is in state %d, not state SECONDARY (2)."
+ " Skipping BackgroundInitialSync hook for %s",
+ state,
+ test.short_name())
+
+ # If we have not restarted initial sync since the last time we ran the data
+ # validation, and if we're using resync for restarts, restart initial sync with a
+ # 20% probability.
+ if self.random_restarts < 1 and self.use_resync and random.random() < 0.2:
+ self.logger.info("Calling resync randomly in the middle of initial sync")
+ restart_init_sync()
+ self.random_restarts += 1
+ return
+ except pymongo.errors.OperationFailure:
+ # replSetGetStatus can fail if the node is in STARTUP state. The node will soon go into
+ # STARTUP2 state and replSetGetStatus will succeed after the next test.
+ self.logger.info("replSetGetStatus call failed in BackgroundInitialSync hook,"
+ " skipping hook for %s", test.short_name())
+ return
+
+ self.random_restarts = 0
+
+ # We're in SECONDARY state so validate the data. If there's a failure restart the fixture
+ # so we don't get multiple occurrences of the same failure, and then rethrow the failure.
+ try:
+ JsCustomBehavior.after_test(self, test, test_report)
+ except errors.TestFailure as err:
+ self.logger.exception("{0} failed with {1}".format(description, err.args[0]))
+ restart_init_sync()
+ raise errors.TestFailure(err.args[0])
+
+ restart_init_sync()
+
class IntermediateInitialSync(JsCustomBehavior):
"""
This hook accepts a parameter 'n' that specifies a number of tests after which it will start up
- a node to initial sync, wait for replication to finish, and then validate the data.
+ a node to initial sync, wait for replication to finish, and then validate the data. It also
+ accepts a parameter 'use_resync' for whether to restart the initial sync node with resync or by
+ shutting it down and restarting it.
This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'.
"""
DEFAULT_N = CleanEveryN.DEFAULT_N
- def __init__(self, logger, fixture, useResync=False, n=DEFAULT_N):
+ def __init__(self, logger, fixture, use_resync=False, n=DEFAULT_N):
description = "Intermediate Initial Sync"
js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
JsCustomBehavior.__init__(self, logger, fixture, js_filename, description)
- self.useResync = useResync
+ self.use_resync = use_resync
self.n = n
self.tests_run = 0
@@ -195,7 +314,7 @@ class IntermediateInitialSync(JsCustomBehavior):
description = "{0} after running '{1}'".format(self.description, test.short_name())
teardown_success = True
- if self.useResync:
+ if self.use_resync:
self.logger.info("Calling resync on initial sync node...")
cmd = bson.SON([("resync", 1)])
try:
@@ -266,4 +385,5 @@ _CUSTOM_BEHAVIORS = {
"CheckReplDBHash": CheckReplDBHash,
"ValidateCollections": ValidateCollections,
"IntermediateInitialSync": IntermediateInitialSync,
+ "BackgroundInitialSync": BackgroundInitialSync,
}