diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2018-07-02 23:56:37 -0400 |
---|---|---|
committer | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2018-07-02 23:56:37 -0400 |
commit | 393c0da8464d2e3aea20d17168f529c00466d6f3 (patch) | |
tree | 007b00e9748fc0baaebe4480c7093c829a4107ec | |
parent | ac99fe1f00339897b162d41625982298ba4eb34c (diff) | |
download | mongo-393c0da8464d2e3aea20d17168f529c00466d6f3.tar.gz |
SERVER-35124 Tolerate replSetStepDown failing in stepdown suites.
It is possible for a database operation to prevent the global X lock
from being acquired within 10 seconds. We'll simply retry 8 seconds
later.
(cherry picked from commit d7ed31017007fd5963390247e6ae68714cb6a61c)
-rw-r--r-- | buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml | 2 | ||||
-rw-r--r-- | buildscripts/resmokelib/testing/hooks/stepdown.py | 9 |
2 files changed, 10 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml b/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml index 58cd51f8c5f..5b1bfda7cd0 100644 --- a/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml @@ -45,7 +45,7 @@ executor: enableTestCommands: 1 numInitialSyncAttempts: 1 writePeriodicNoops: 1 - num_rs_nodes_per_shard: 2 + num_rs_nodes_per_shard: 3 num_shards: 2 configsvr_options: all_nodes_electable: true diff --git a/buildscripts/resmokelib/testing/hooks/stepdown.py b/buildscripts/resmokelib/testing/hooks/stepdown.py index 4ada20bebff..7f4eaa0e8c7 100644 --- a/buildscripts/resmokelib/testing/hooks/stepdown.py +++ b/buildscripts/resmokelib/testing/hooks/stepdown.py @@ -189,6 +189,7 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at self._step_down(rs_fixture) self._is_idle_evt.set() + # pylint: disable=R0912,R0915 def _step_down(self, rs_fixture): try: primary = rs_fixture.get_primary(timeout_secs=self._stepdown_interval_secs) @@ -223,6 +224,14 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at except pymongo.errors.AutoReconnect: # AutoReconnect exceptions are expected as connections are closed during stepdown. pass + except pymongo.errors.ExecutionTimeout as err: + # ExecutionTimeout exceptions are expected when the election attempt fails due to + # not being able to acquire the global X lock within self._stepdown_duration_secs + # seconds. We'll try again after self._stepdown_interval_secs seconds. + self.logger.info( + "Failed to step down the primary on port %d of replica set '%s': %s", + primary.port, rs_fixture.replset_name, err) + return except pymongo.errors.PyMongoError: self.logger.exception( "Error while stepping down the primary on port %d of replica set '%s'.", |