summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2020-05-21 19:11:57 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-06-19 02:27:52 +0000
commitc9454a38845a3755e6ec638dea570122cc68ae61 (patch)
tree842196d94e4b6d4fcc3db6c019d35c8e996e87c4
parent239975ac9ca4f5f31adc3b4a727ba516f6587aac (diff)
downloadmongo-c9454a38845a3755e6ec638dea570122cc68ae61.tar.gz
SERVER-47950 Continuous stepdown thread should fail resmoke job loudly on exceptions
(cherry picked from commit 116d80c8267425fb71043f5a1200993355917707)
-rw-r--r--buildscripts/resmokelib/testing/hooks/stepdown.py18
-rw-r--r--buildscripts/tests/resmokelib/testing/hooks/test_stepdown.py2
2 files changed, 12 insertions, 8 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/stepdown.py b/buildscripts/resmokelib/testing/hooks/stepdown.py
index f61d070037e..7f42118a14c 100644
--- a/buildscripts/resmokelib/testing/hooks/stepdown.py
+++ b/buildscripts/resmokelib/testing/hooks/stepdown.py
@@ -101,24 +101,16 @@ class ContinuousStepdown(interface.Hook): # pylint: disable=too-many-instance-a
def before_test(self, test, test_report):
"""Before test."""
- self._check_thread()
self.logger.info("Resuming the stepdown thread.")
self._stepdown_thread.pause()
self._stepdown_thread.resume()
def after_test(self, test, test_report):
"""After test."""
- self._check_thread()
self.logger.info("Pausing the stepdown thread.")
self._stepdown_thread.pause()
self.logger.info("Paused the stepdown thread.")
- def _check_thread(self):
- if not self._stepdown_thread.is_alive():
- msg = "The stepdown thread is not running."
- self.logger.error(msg)
- raise errors.ServerFailure(msg)
-
def _add_fixture(self, fixture):
if isinstance(fixture, replicaset.ReplicaSetFixture):
if not fixture.all_nodes_electable:
@@ -419,6 +411,8 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at
# Proactively log the exception when it happens so it will be
# flushed immediately.
self.logger.exception("Stepdown Thread threw exception")
+ # The event should be signaled whenever the thread is not performing stepdowns.
+ self._is_idle_evt.set()
def stop(self):
"""Stop the thread."""
@@ -434,6 +428,8 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at
# Wait until we are no longer executing stepdowns.
self._is_idle_evt.wait()
+ # Check if the thread is alive in case it has thrown an exception while running.
+ self._check_thread()
# Wait until we all the replica sets have primaries.
self._await_primaries()
# Wait for Mongos to retarget the primary for each shard and the config server.
@@ -463,6 +459,12 @@ class _StepdownThread(threading.Thread): # pylint: disable=too-many-instance-at
# Wait until stop or timeout.
self._is_stopped_evt.wait(timeout)
+ def _check_thread(self):
+ if not self.is_alive():
+ msg = "The stepdown thread is not running."
+ self.logger.error(msg)
+ raise errors.ServerFailure(msg)
+
def _await_primaries(self):
for fixture in self._rs_fixtures:
fixture.get_primary()
diff --git a/buildscripts/tests/resmokelib/testing/hooks/test_stepdown.py b/buildscripts/tests/resmokelib/testing/hooks/test_stepdown.py
index c700ab60bf7..32436a4e346 100644
--- a/buildscripts/tests/resmokelib/testing/hooks/test_stepdown.py
+++ b/buildscripts/tests/resmokelib/testing/hooks/test_stepdown.py
@@ -24,6 +24,8 @@ def _get_threading_lock(test_case, MockCondition): # pylint: disable=invalid-na
class TestStepdownThread(unittest.TestCase):
@mock.patch("buildscripts.resmokelib.testing.fixtures.replicaset.ReplicaSetFixture")
@mock.patch("buildscripts.resmokelib.testing.fixtures.shardedcluster.ShardedClusterFixture")
+ @mock.patch("buildscripts.resmokelib.testing.hooks.stepdown._StepdownThread.is_alive",
+ mock.Mock(return_value=True))
def test_pause_throws_error(self, shardcluster_fixture, rs_fixture):
stepdown_thread = _stepdown._StepdownThread(
logger=logging.getLogger("hook_logger"),