summaryrefslogtreecommitdiff
path: root/buildscripts/resmokelib/testing/fixtures/replicaset.py
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2020-06-08 23:20:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-09 22:39:43 +0000
commit64fcdabe8c7cc38188f31fd606379f935c94555a (patch)
tree8dbc2fc938b52f9c7f410abff410d30beaeaeceb /buildscripts/resmokelib/testing/fixtures/replicaset.py
parent915402884c52da861b1660cd6a7172c552ce1806 (diff)
downloadmongo-64fcdabe8c7cc38188f31fd606379f935c94555a.tar.gz
SERVER-46541 enable automatic reconfigs for initial sync semantics by default
Diffstat (limited to 'buildscripts/resmokelib/testing/fixtures/replicaset.py')
-rw-r--r--buildscripts/resmokelib/testing/fixtures/replicaset.py92
1 files changed, 80 insertions, 12 deletions
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py
index e5bd42521ee..7f9cb751e00 100644
--- a/buildscripts/resmokelib/testing/fixtures/replicaset.py
+++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py
@@ -20,8 +20,12 @@ from buildscripts.resmokelib.testing.fixtures import standalone
class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-instance-attributes
"""Fixture which provides JSTests with a replica set to run against."""
- # Error response codes copied from mongo/base/error_codes.err.
+ # Error response codes copied from mongo/base/error_codes.yml.
_NODE_NOT_FOUND = 74
+ _NEW_REPLICA_SET_CONFIGURATION_INCOMPATIBLE = 103
+ _CONFIGURATION_IN_PROGRESS = 109
+ _CURRENT_CONFIG_NOT_COMMITTED_YET = 308
+ _INTERRUPTED_DUE_TO_REPL_STATE_CHANGE = 11602
def __init__( # pylint: disable=too-many-arguments, too-many-locals
self, logger, job_num, mongod_options=None, dbpath_prefix=None, preserve_dbpath=False,
@@ -108,7 +112,7 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
self.initial_sync_node = None
self.initial_sync_node_idx = -1
- def setup(self): # pylint: disable=too-many-branches,too-many-statements
+ def setup(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals
"""Set up the replica set."""
self.replset_name = self.mongod_options.get("replSet", "rs")
if not self.nodes:
@@ -232,18 +236,44 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
node.await_ready()
# Add in the members one at a time, since non force reconfigs can only add/remove a
# single voting member at a time.
- repl_config["version"] = client.admin.command(
- {"replSetGetConfig": 1})['config']['version']
for ind in range(2, len(members) + 1):
- repl_config["version"] = repl_config["version"] + 1
- repl_config["members"] = members[:ind]
- self.logger.info("Issuing replSetReconfig command: %s", repl_config)
- self._configure_repl_set(
- client, {
- "replSetReconfig": repl_config,
- "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000
- })
+ self.logger.info("Adding in node %d: %s", ind, members[ind - 1])
+ while True:
+ try:
+ # 'newlyAdded' removal reconfigs could bump the version.
+ # Get the current version to be safe.
+ curr_version = client.admin.command(
+ {"replSetGetConfig": 1})['config']['version']
+ repl_config["version"] = curr_version + 1
+ repl_config["members"] = members[:ind]
+ self.logger.info("Issuing replSetReconfig command: %s", repl_config)
+ self._configure_repl_set(
+ client, {
+ "replSetReconfig": repl_config,
+ "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000
+ })
+ break
+ except pymongo.errors.OperationFailure as err:
+ # These error codes may be transient, and so we retry the reconfig with a
+ # (potentially) higher config version. We should not receive these codes
+ # indefinitely.
+ if (err.code !=
+ ReplicaSetFixture._NEW_REPLICA_SET_CONFIGURATION_INCOMPATIBLE
+ and err.code != ReplicaSetFixture._CURRENT_CONFIG_NOT_COMMITTED_YET
+ and err.code != ReplicaSetFixture._CONFIGURATION_IN_PROGRESS
+ and err.code != ReplicaSetFixture._NODE_NOT_FOUND and err.code !=
+ ReplicaSetFixture._INTERRUPTED_DUE_TO_REPL_STATE_CHANGE):
+ msg = ("Operation failure while setting up the "
+ "replica set fixture: {}").format(err)
+ self.logger.error(msg)
+ raise errors.ServerFailure(msg)
+
+ msg = ("Retrying failed attempt to add new node to fixture: {}").format(err)
+ self.logger.error(msg)
+ time.sleep(0.1) # Wait a little bit before trying again.
+
self._await_secondaries()
+ self._await_newly_added_removals()
def pids(self):
""":return: all pids owned by this fixture if any."""
@@ -407,6 +437,44 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
break
time.sleep(0.1) # Wait a little bit before trying again.
+ def _should_await_newly_added_removals_longer(self, client):
+ """
+ Return whether the current replica set config has any 'newlyAdded' fields.
+
+ Return true if the current config is not committed.
+ """
+
+ get_config_res = client.admin.command(
+ {"replSetGetConfig": 1, "commitmentStatus": True, "$_internalIncludeNewlyAdded": True})
+ for member in get_config_res["config"]["members"]:
+ if "newlyAdded" in member:
+ self.logger.info(
+ "Waiting longer for 'newlyAdded' removals, " +
+ "member %d is still 'newlyAdded'", member["_id"])
+ return True
+ if not get_config_res["commitmentStatus"]:
+ self.logger.info("Waiting longer for 'newlyAdded' removals, " +
+ "config is not yet committed")
+ return True
+
+ return False
+
+ def _await_newly_added_removals(self):
+ """
+ Wait for all 'newlyAdded' fields to be removed from the replica set config.
+
+ Additionally, wait for that config to be committed, and for the in-memory
+ and on-disk configs to match.
+ """
+
+ self.logger.info("Waiting to remove all 'newlyAdded' fields")
+ primary = self.get_primary()
+ client = primary.mongo_client()
+ self.auth(client, self.auth_options)
+ while self._should_await_newly_added_removals_longer(client):
+ time.sleep(0.1) # Wait a little bit before trying again.
+ self.logger.info("All 'newlyAdded' fields removed")
+
def _setup_cwrwc_defaults(self):
"""Set up the cluster-wide read/write concern defaults."""
if self.default_read_concern is None and self.default_write_concern is None: