summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Nawrocki <brett.nawrocki@mongodb.com>2021-10-15 19:02:39 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-23 22:53:36 +0000
commit417283d903c8234f7c444851a438275fa8a67e1b (patch)
tree8308313ffd2111aae50aace0825fca632f423e71
parent0abc2f80baa4a9f77955e4c64419bff29799301a (diff)
downloadmongo-417283d903c8234f7c444851a438275fa8a67e1b.tar.gz
SERVER-54623 Wait longer for session collection in ShardedClusterFixture
Test suites using the ShardedClusterFixture could time out waiting for replication after calling refreshLogicalSessionCacheNow during setup. This was most commonly seen during periods of high disk utilization. Instead of failing the test, wait however long it takes. (cherry picked from commit 4dd7f84be70ee95893ecf863f2d82c79edb2b8df)
-rw-r--r--buildscripts/resmokelib/testing/fixtures/interface.py8
-rw-r--r--buildscripts/resmokelib/testing/fixtures/replicaset.py12
-rw-r--r--buildscripts/resmokelib/testing/fixtures/shardedcluster.py20
3 files changed, 29 insertions, 11 deletions
diff --git a/buildscripts/resmokelib/testing/fixtures/interface.py b/buildscripts/resmokelib/testing/fixtures/interface.py
index 321955bcddd..c56353d60c3 100644
--- a/buildscripts/resmokelib/testing/fixtures/interface.py
+++ b/buildscripts/resmokelib/testing/fixtures/interface.py
@@ -26,6 +26,14 @@ def make_fixture(class_name, *args, **kwargs):
class Fixture(object, metaclass=registry.make_registry_metaclass(_FIXTURES)):
"""Base class for all fixtures."""
+ # Error response codes copied from mongo/base/error_codes.yml.
+ _WRITE_CONCERN_FAILED = 64
+ _NODE_NOT_FOUND = 74
+ _NEW_REPLICA_SET_CONFIGURATION_INCOMPATIBLE = 103
+ _CONFIGURATION_IN_PROGRESS = 109
+ _CURRENT_CONFIG_NOT_COMMITTED_YET = 308
+ _INTERRUPTED_DUE_TO_REPL_STATE_CHANGE = 11602
+
# We explicitly set the 'REGISTERED_NAME' attribute so that PyLint realizes that the attribute
# is defined for all subclasses of Fixture.
REGISTERED_NAME = "Fixture"
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py
index 9c449dbc432..0f0996c5d76 100644
--- a/buildscripts/resmokelib/testing/fixtures/replicaset.py
+++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py
@@ -19,9 +19,6 @@ from ... import utils
class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-instance-attributes
"""Fixture which provides JSTests with a replica set to run against."""
- # Error response codes copied from mongo/base/error_codes.err.
- _NODE_NOT_FOUND = 74
-
def __init__( # pylint: disable=too-many-arguments, too-many-locals
self, logger, job_num, mongod_executable=None, mongod_options=None, dbpath_prefix=None,
preserve_dbpath=False, num_nodes=2, start_initial_sync_node=False,
@@ -196,7 +193,7 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
raise errors.ServerFailure(msg)
time.sleep(5) # Wait a little bit before trying again.
- def await_last_op_committed(self):
+ def await_last_op_committed(self, timeout_secs=None):
"""Wait for the last majority committed op to be visible."""
primary_client = self.get_primary().mongo_client()
self.auth(primary_client, self.auth_options)
@@ -221,7 +218,8 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
return len(up_to_date_nodes) == len(self.nodes)
- self._await_cmd_all_nodes(check_rcmaj_optime, "waiting for last committed optime")
+ self._await_cmd_all_nodes(check_rcmaj_optime, "waiting for last committed optime",
+ timeout_secs)
def await_ready(self):
"""Wait for replica set to be ready."""
@@ -391,7 +389,7 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
return self._await_cmd_all_nodes(is_primary, "waiting for a primary", timeout_secs)
- def _await_cmd_all_nodes(self, fn, msg, timeout_secs=30):
+ def _await_cmd_all_nodes(self, fn, msg, timeout_secs=None):
"""Run `fn` on all nodes until it returns a truthy value.
Return the node for which makes `fn` become truthy.
@@ -400,6 +398,8 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
the MongoDFixture corresponding to that node.
"""
+ if timeout_secs is None:
+ timeout_secs = self.AWAIT_REPL_TIMEOUT_MINS * 60
start = time.time()
clients = {}
while True:
diff --git a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
index f2a6814b171..50f1573381d 100644
--- a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
+++ b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
@@ -82,6 +82,19 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst
for shard in self.shards:
shard.setup()
+ def refresh_logical_session_cache(self, target):
+ """Refresh logical session cache with no timeout."""
+ primary = (target.mongo_client()
+ if self.num_rs_nodes_per_shard is None else target.get_primary().mongo_client())
+ try:
+ primary.admin.command({"refreshLogicalSessionCacheNow": 1})
+ except pymongo.errors.OperationFailure as err:
+ if err.code != self._WRITE_CONCERN_FAILED:
+ raise err
+ self.logger.info("Ignoring write concern timeout for refreshLogicalSessionCacheNow "
+ "command and continuing to wait")
+ target.await_last_op_committed(target.AWAIT_REPL_TIMEOUT_FOREVER_MINS * 60)
+
def await_ready(self):
"""Block until the fixture can be used for testing."""
# Wait for the config server
@@ -137,13 +150,10 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst
# Ensure that the sessions collection gets auto-sharded by the config server
if self.configsvr is not None:
- primary = self.configsvr.get_primary().mongo_client()
- primary.admin.command({"refreshLogicalSessionCacheNow": 1})
+ self.refresh_logical_session_cache(self.configsvr)
for shard in self.shards:
- primary = (shard.mongo_client() if self.num_rs_nodes_per_shard is None else
- shard.get_primary().mongo_client())
- primary.admin.command({"refreshLogicalSessionCacheNow": 1})
+ self.refresh_logical_session_cache(shard)
def _auth_to_db(self, client):
"""Authenticate client for the 'authenticationDatabase'."""