diff options
author | Matthew Russotto <matthew.russotto@mongodb.com> | 2021-01-25 19:59:16 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-20 14:28:25 +0000 |
commit | 28efeba497f86e7d6c32cba7adeaf9ca04e14704 (patch) | |
tree | b4d94d867c02c8885a2727484837b8d2036973b5 | |
parent | 546487fe28e0a5679cecb6fb026692f3798c514a (diff) | |
download | mongo-28efeba497f86e7d6c32cba7adeaf9ca04e14704.tar.gz |
SERVER-53431 Server should report itself not writable during stepdown
(cherry picked from commit d73b402b349498d799d4d4458cff9b0c4cea5fb6)
-rw-r--r-- | jstests/replsets/step_down_on_secondary.js | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 5 |
3 files changed, 26 insertions, 1 deletions
diff --git a/jstests/replsets/step_down_on_secondary.js b/jstests/replsets/step_down_on_secondary.js index 1f11e6c2747..1eeef11aa79 100644 --- a/jstests/replsets/step_down_on_secondary.js +++ b/jstests/replsets/step_down_on_secondary.js @@ -51,7 +51,8 @@ const joinStepDownThread = startParallelShell(() => { waitForCurOpByFailPointNoNS(primaryDB, "stepdownHangBeforeRSTLEnqueue"); jsTestLog("Force reconfig to swap the electable node"); -const newConfig = rst.getReplSetConfigFromNode(); +// We must specify the node in getReplSetConfigFromNode, because we do not have a writable primary. +const newConfig = rst.getReplSetConfigFromNode(0); const oldPrimaryId = rst.getNodeId(primary); const newPrimaryId = rst.getNodeId(secondary); newConfig.members[newPrimaryId].priority = 1; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index a4845dbd025..837f142d5e9 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -2049,6 +2049,17 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, "not primary so can't step down", getMemberState().primary()); + // This makes us tell the 'isMaster' command we can't accept writes (though in fact we can, + // it is not valid to disable writes until we actually acquire the RSTL). + { + stdx::lock_guard lk(_mutex); + _waitingForRSTLAtStepDown++; + } + auto clearStepDownFlag = makeGuard([&] { + stdx::lock_guard lk(_mutex); + _waitingForRSTLAtStepDown--; + }); + CurOpFailpointHelpers::waitWhileFailPointEnabled( &stepdownHangBeforeRSTLEnqueue, opCtx, "stepdownHangBeforeRSTLEnqueue"); @@ -2083,6 +2094,10 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, invariant(action == PostMemberStateUpdateAction::kActionNone); invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk)); + // We truly cannot accept writes now so no need for this flag any more. + _waitingForRSTLAtStepDown--; + clearStepDownFlag.dismiss(); + auto updateMemberState = [&] { invariant(lk.owns_lock()); invariant(opCtx->lockState()->isRSTLExclusive()); @@ -2499,6 +2514,10 @@ void ReplicationCoordinatorImpl::fillIsMasterForReplSet( response->setIsSecondary(true); } + if (_waitingForRSTLAtStepDown) { + response->setIsMaster(false); + } + if (_inShutdown) { response->setIsMaster(false); response->setIsSecondary(false); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 33d570c51eb..b0babd327a5 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -1571,6 +1571,11 @@ private: // Whether data replication is active. bool _startedSteadyStateReplication = false; // (M) + // If we're waiting to get the RSTL at stepdown and therefore should claim we don't allow + // writes. This is a counter rather than a flag because there are scenarios where multiple + // stepdowns are attempted at once. + short _waitingForRSTLAtStepDown = 0; + // If we're in terminal shutdown. If true, we'll refuse to vote in elections. bool _inTerminalShutdown = false; // (M) }; |