summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2018-07-02 23:56:37 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2018-07-02 23:56:37 -0400
commit393c0da8464d2e3aea20d17168f529c00466d6f3 (patch)
tree007b00e9748fc0baaebe4480c7093c829a4107ec
parentac99fe1f00339897b162d41625982298ba4eb34c (diff)
downloadmongo-393c0da8464d2e3aea20d17168f529c00466d6f3.tar.gz
SERVER-35124 Tolerate replSetStepDown failing in stepdown suites.
It is possible for a database operation to prevent the global X lock from being acquired within 10 seconds. We'll simply retry 8 seconds later. (cherry picked from commit d7ed31017007fd5963390247e6ae68714cb6a61c)
-rw-r--r--buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml2
-rw-r--r--buildscripts/resmokelib/testing/hooks/stepdown.py9
2 files changed, 10 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml b/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml
index 58cd51f8c5f..5b1bfda7cd0 100644
--- a/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/jstestfuzz_sharded_continuous_stepdown.yml
@@ -45,7 +45,7 @@ executor:
enableTestCommands: 1
numInitialSyncAttempts: 1
writePeriodicNoops: 1
- num_rs_nodes_per_shard: 2
+ num_rs_nodes_per_shard: 3
num_shards: 2
configsvr_options:
all_nodes_electable: true
diff --git a/buildscripts/resmokelib/testing/hooks/stepdown.py b/buildscripts/resmokelib/testing/hooks/stepdown.py
index 4ada20bebff..7f4eaa0e8c7 100644
--- a/buildscripts/resmokelib/testing/hooks/stepdown.py
+++ b/buildscripts/resmokelib/testing/hooks/stepdown.py
@@ -189,6 +189,7 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at
self._step_down(rs_fixture)
self._is_idle_evt.set()
+ # pylint: disable=R0912,R0915
def _step_down(self, rs_fixture):
try:
primary = rs_fixture.get_primary(timeout_secs=self._stepdown_interval_secs)
@@ -223,6 +224,14 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at
except pymongo.errors.AutoReconnect:
# AutoReconnect exceptions are expected as connections are closed during stepdown.
pass
+ except pymongo.errors.ExecutionTimeout as err:
+ # ExecutionTimeout exceptions are expected when the election attempt fails due to
+ # not being able to acquire the global X lock within self._stepdown_duration_secs
+ # seconds. We'll try again after self._stepdown_interval_secs seconds.
+ self.logger.info(
+ "Failed to step down the primary on port %d of replica set '%s': %s",
+ primary.port, rs_fixture.replset_name, err)
+ return
except pymongo.errors.PyMongoError:
self.logger.exception(
"Error while stepping down the primary on port %d of replica set '%s'.",