summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@mongodb.com>2021-01-25 19:59:16 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-20 14:28:25 +0000
commit28efeba497f86e7d6c32cba7adeaf9ca04e14704 (patch)
treeb4d94d867c02c8885a2727484837b8d2036973b5
parent546487fe28e0a5679cecb6fb026692f3798c514a (diff)
downloadmongo-28efeba497f86e7d6c32cba7adeaf9ca04e14704.tar.gz
SERVER-53431 Server should report itself not writable during stepdown
(cherry picked from commit d73b402b349498d799d4d4458cff9b0c4cea5fb6)
-rw-r--r--jstests/replsets/step_down_on_secondary.js3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp19
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h5
3 files changed, 26 insertions, 1 deletions
diff --git a/jstests/replsets/step_down_on_secondary.js b/jstests/replsets/step_down_on_secondary.js
index 1f11e6c2747..1eeef11aa79 100644
--- a/jstests/replsets/step_down_on_secondary.js
+++ b/jstests/replsets/step_down_on_secondary.js
@@ -51,7 +51,8 @@ const joinStepDownThread = startParallelShell(() => {
waitForCurOpByFailPointNoNS(primaryDB, "stepdownHangBeforeRSTLEnqueue");
jsTestLog("Force reconfig to swap the electable node");
-const newConfig = rst.getReplSetConfigFromNode();
+// We must specify the node in getReplSetConfigFromNode, because we do not have a writable primary.
+const newConfig = rst.getReplSetConfigFromNode(0);
const oldPrimaryId = rst.getNodeId(primary);
const newPrimaryId = rst.getNodeId(secondary);
newConfig.members[newPrimaryId].priority = 1;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index a4845dbd025..837f142d5e9 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2049,6 +2049,17 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
"not primary so can't step down",
getMemberState().primary());
+ // This makes us tell the 'isMaster' command we can't accept writes (though in fact we can,
+ // it is not valid to disable writes until we actually acquire the RSTL).
+ {
+ stdx::lock_guard lk(_mutex);
+ _waitingForRSTLAtStepDown++;
+ }
+ auto clearStepDownFlag = makeGuard([&] {
+ stdx::lock_guard lk(_mutex);
+ _waitingForRSTLAtStepDown--;
+ });
+
CurOpFailpointHelpers::waitWhileFailPointEnabled(
&stepdownHangBeforeRSTLEnqueue, opCtx, "stepdownHangBeforeRSTLEnqueue");
@@ -2083,6 +2094,10 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
invariant(action == PostMemberStateUpdateAction::kActionNone);
invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk));
+ // We truly cannot accept writes now so no need for this flag any more.
+ _waitingForRSTLAtStepDown--;
+ clearStepDownFlag.dismiss();
+
auto updateMemberState = [&] {
invariant(lk.owns_lock());
invariant(opCtx->lockState()->isRSTLExclusive());
@@ -2499,6 +2514,10 @@ void ReplicationCoordinatorImpl::fillIsMasterForReplSet(
response->setIsSecondary(true);
}
+ if (_waitingForRSTLAtStepDown) {
+ response->setIsMaster(false);
+ }
+
if (_inShutdown) {
response->setIsMaster(false);
response->setIsSecondary(false);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 33d570c51eb..b0babd327a5 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1571,6 +1571,11 @@ private:
// Whether data replication is active.
bool _startedSteadyStateReplication = false; // (M)
+ // If we're waiting to get the RSTL at stepdown and therefore should claim we don't allow
+ // writes. This is a counter rather than a flag because there are scenarios where multiple
+ // stepdowns are attempted at once.
+ short _waitingForRSTLAtStepDown = 0;
+
// If we're in terminal shutdown. If true, we'll refuse to vote in elections.
bool _inTerminalShutdown = false; // (M)
};