SERVER-36817 replSetFreeze command run by stepdown thread may fail when server is already primary

(cherry picked from commit 0c0a4acea4a1c7bb579f5aaaa89a6f1545cf22ef)
author: Jonathan Abrahams <jonathan@mongodb.com> 2018-12-21 21:41:03 -0500
committer: Max Hirschhorn <max.hirschhorn@mongodb.com> 2018-12-21 21:41:03 -0500
commit: ccbe71910133c84645496ce360f6b65564d415fa (patch)
tree: 9ee61be0c428188779770616fe95a023a96e4933 /buildscripts/resmokelib
parent: d6b697e0c546521e5c3739ec45160512d149a368 (diff)
download: mongo-ccbe71910133c84645496ce360f6b65564d415fa.tar.gz
1 files changed, 9 insertions, 17 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/stepdown.py b/buildscripts/resmokelib/testing/hooks/stepdown.py
index d856850c134..47bd3e5720a 100644
--- a/buildscripts/resmokelib/testing/hooks/stepdown.py
+++ b/buildscripts/resmokelib/testing/hooks/stepdown.py
@@ -25,7 +25,7 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
 
     def __init__(  # pylint: disable=too-many-arguments
             self, hook_logger, fixture, config_stepdown=True, shard_stepdown=True,
-            stepdown_duration_secs=10, stepdown_interval_ms=8000, terminate=False, kill=False,
+            stepdown_interval_ms=8000, terminate=False, kill=False,
             use_stepdown_permitted_file=False, use_stepping_down_file=False,
             wait_for_mongos_retarget=False):
         """Initialize the ContinuousStepdown.
@@ -35,7 +35,6 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
             fixture: the target fixture (a replica set or sharded cluster).
             config_stepdown: whether to stepdown the CSRS.
             shard_stepdown: whether to stepdown the shard replica sets in a sharded cluster.
-            stepdown_duration_secs: the number of seconds to step down the primary.
             stepdown_interval_ms: the number of milliseconds between stepdowns.
             terminate: shut down the node cleanly as a means of stepping it down.
             kill: With a 50% probability, kill the node instead of shutting it down cleanly.
@@ -53,7 +52,6 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
         self._fixture = fixture
         self._config_stepdown = config_stepdown
         self._shard_stepdown = shard_stepdown
-        self._stepdown_duration_secs = stepdown_duration_secs
         self._stepdown_interval_secs = float(stepdown_interval_ms) / 1000
         self._wait_for_mongos_retarget = wait_for_mongos_retarget
 
@@ -88,8 +86,8 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
         utils.remove_if_exists(self._stepping_down_file)
         self._stepdown_thread = _StepdownThread(
             self.logger, self._mongos_fixtures, self._rs_fixtures, self._stepdown_interval_secs,
-            self._stepdown_duration_secs, self._terminate, self._kill,
-            self._stepdown_permitted_file, self._stepping_down_file, self._wait_for_mongos_retarget)
+            self._terminate, self._kill, self._stepdown_permitted_file, self._stepping_down_file,
+            self._wait_for_mongos_retarget)
         self.logger.info("Starting the stepdown thread.")
         self._stepdown_thread.start()
 
@@ -142,9 +140,8 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
 
 class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-attributes
     def __init__(  # pylint: disable=too-many-arguments
-            self, logger, mongos_fixtures, rs_fixtures, stepdown_interval_secs,
-            stepdown_duration_secs, terminate, kill, stepdown_permitted_file, stepping_down_file,
-            wait_for_mongos_retarget):
+            self, logger, mongos_fixtures, rs_fixtures, stepdown_interval_secs, terminate, kill,
+            stepdown_permitted_file, stepping_down_file, wait_for_mongos_retarget):
         """Initialize _StepdownThread."""
         threading.Thread.__init__(self, name="StepdownThread")
         self.daemon = True
@@ -152,7 +149,10 @@ class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-at
         self._mongos_fixtures = mongos_fixtures
         self._rs_fixtures = rs_fixtures
         self._stepdown_interval_secs = stepdown_interval_secs
-        self._stepdown_duration_secs = stepdown_duration_secs
+        # We set the self._stepdown_duration_secs to a very long time, to ensure that the former
+        # primary will not step back up on its own and the stepdown thread will cause it step up via
+        # replSetStepUp.
+        self._stepdown_duration_secs = 24 * 60 * 60  # 24 hours
         self._terminate = terminate
         self._kill = kill
         self._stepdown_permitted_file = stepdown_permitted_file
@@ -293,14 +293,6 @@ class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-at
             except pymongo.errors.AutoReconnect:
                 # AutoReconnect exceptions are expected as connections are closed during stepdown.
                 pass
-            except pymongo.errors.ExecutionTimeout as err:
-                # ExecutionTimeout exceptions are expected when the election attempt fails due to
-                # not being able to acquire the global X lock within self._stepdown_duration_secs
-                # seconds. We'll try again after self._stepdown_interval_secs seconds.
-                self.logger.info(
-                    "Failed to step down the primary on port %d of replica set '%s': %s",
-                    primary.port, rs_fixture.replset_name, err)
-                return
             except pymongo.errors.PyMongoError:
                 self.logger.exception(
                     "Error while stepping down the primary on port %d of replica set '%s'.",
author	Jonathan Abrahams <jonathan@mongodb.com>	2018-12-21 21:41:03 -0500
committer	Max Hirschhorn <max.hirschhorn@mongodb.com>	2018-12-21 21:41:03 -0500
commit	ccbe71910133c84645496ce360f6b65564d415fa (patch)
tree	9ee61be0c428188779770616fe95a023a96e4933 /buildscripts/resmokelib
parent	d6b697e0c546521e5c3739ec45160512d149a368 (diff)
download	mongo-ccbe71910133c84645496ce360f6b65564d415fa.tar.gz