summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2017-01-24 17:27:52 -0500
committerJudah Schvimer <judah@mongodb.com>2017-01-24 17:27:52 -0500
commit9710251a203ed703055a8435058183c2ddaa4222 (patch)
treeddac96d95b2325064cf9618c5410746c1ea1b3ee
parent981bd654b74fac78b1fd7334821f0f6b7e812ee3 (diff)
downloadmongo-9710251a203ed703055a8435058183c2ddaa4222.tar.gz
SERVER-27551 added retries to replSetInitiate call in python test fixture
-rw-r--r--buildscripts/resmokelib/testing/fixtures/replicaset.py24
1 files changed, 23 insertions, 1 deletions
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py
index 1b59457b181..71e5925679b 100644
--- a/buildscripts/resmokelib/testing/fixtures/replicaset.py
+++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py
@@ -21,6 +21,9 @@ class ReplicaSetFixture(interface.ReplFixture):
Fixture which provides JSTests with a replica set to run against.
"""
+ # Error response codes copied from mongo/base/error_codes.err.
+ _NODE_NOT_FOUND = 74
+
def __init__(self,
logger,
job_num,
@@ -131,7 +134,26 @@ class ReplicaSetFixture(interface.ReplFixture):
initiate_cmd_obj["replSetInitiate"]["settings"] = replset_settings
self.logger.info("Issuing replSetInitiate command...%s", initiate_cmd_obj)
- client.admin.command(initiate_cmd_obj)
+
+ # replSetInitiate and replSetReconfig commands can fail with a NodeNotFound error
+ # if a heartbeat times out during the quorum check. We retry three times to reduce
+ # the chance of failing this way.
+ num_initiate_attempts = 3
+ for attempt in range(1, num_initiate_attempts + 1):
+ try:
+ client.admin.command(initiate_cmd_obj)
+ break
+ except pymongo.errors.OperationFailure as err:
+ # Retry on NodeNotFound errors from the "replSetInitiate" command.
+ if err.code != ReplicaSetFixture._NODE_NOT_FOUND:
+ raise
+
+ msg = "replSetInitiate failed attempt {0} of {1} with error: {2}".format(
+ attempt, num_initiate_attempts, err)
+ self.logger.error(msg)
+ if attempt == num_initiate_attempts:
+ raise
+ time.sleep(5) # Wait a little bit before trying again.
def await_ready(self):
# Wait for the primary to be elected.