diff options
author | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2023-02-09 21:39:49 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-10 00:49:23 +0000 |
commit | 6b19e54d461bab075ade6e3e05767a881ee37597 (patch) | |
tree | c0e39abeebfc3b116a8685894bc7b1926fa762dd /src/mongo/db/repl | |
parent | 8969fd59cb4e056c37c5a24b3f2e69822b6587f7 (diff) | |
download | mongo-6b19e54d461bab075ade6e3e05767a881ee37597.tar.gz |
SERVER-72774 Prevent a node in quiesce mode to win election.
Diffstat (limited to 'src/mongo/db/repl')
3 files changed, 18 insertions, 4 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index e6af5c08838..d4bd43a16ff 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1007,6 +1007,10 @@ bool ReplicationCoordinatorImpl::enterQuiesceModeIfSecondary(Milliseconds quiesc return false; } + // Cancel any ongoing election so that the node cannot become primary once in quiesce mode, + // and do not wait for cancellation to complete. + _cancelElectionIfNeeded(lk); + _inQuiesceMode = true; _quiesceDeadline = _replExecutor->now() + quiesceTime; diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index 57959db018b..a064fedf1e7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -49,6 +49,8 @@ namespace mongo { namespace repl { MONGO_FAIL_POINT_DEFINE(hangInWritingLastVoteForDryRun); +MONGO_FAIL_POINT_DEFINE(electionHangsBeforeUpdateMemberState); +MONGO_FAIL_POINT_DEFINE(hangBeforeOnVoteRequestCompleteCallback); class ReplicationCoordinatorImpl::ElectionState::LoseElectionGuardV1 { LoseElectionGuardV1(const LoseElectionGuardV1&) = delete; @@ -137,7 +139,12 @@ ReplicationCoordinatorImpl::ElectionState::getElectionDryRunFinishedEvent(WithLo void ReplicationCoordinatorImpl::ElectionState::cancel(WithLock) { _isCanceled = true; - _voteRequester->cancel(); + // This check is necessary because _voteRequester is only initialized in _startVoteRequester. + // Since we don't hold mutex during the entire election process, it is possible to get here + // before _startVoteRequester is ever called. + if (_voteRequester) { + _voteRequester->cancel(); + } } void ReplicationCoordinatorImpl::ElectionState::start(WithLock lk, StartElectionReasonEnum reason) { @@ -396,13 +403,16 @@ void ReplicationCoordinatorImpl::ElectionState::_requestVotesForRealElection( _replExecutor ->onEvent(nextPhaseEvh.getValue(), [=](const executor::TaskExecutor::CallbackArgs&) { + if (MONGO_unlikely(hangBeforeOnVoteRequestCompleteCallback.shouldFail())) { + LOGV2(7277400, + "Hang due to hangBeforeOnVoteRequestCompleteCallback failpoint"); + hangBeforeOnVoteRequestCompleteCallback.pauseWhileSet(); + } _onVoteRequestComplete(newTerm, reason); }) .status_with_transitional_ignore(); } -MONGO_FAIL_POINT_DEFINE(electionHangsBeforeUpdateMemberState); - void ReplicationCoordinatorImpl::ElectionState::_onVoteRequestComplete( long long newTerm, StartElectionReasonEnum reason) { stdx::lock_guard<Latch> lk(_repl->_mutex); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 83f39f24cd1..4330baaf036 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -1327,7 +1327,7 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(WithLock lk, _cancelCatchupTakeover_inlock(); _cancelPriorityTakeover_inlock(); _cancelAndRescheduleElectionTimeout_inlock(); - if (_inShutdown) { + if (_inShutdown || _inQuiesceMode) { LOGV2_FOR_ELECTION(4615654, 0, "Not starting an election, since we are shutting down"); return; } |