summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2020-03-13 22:10:43 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-03-25 15:55:41 +0000
commitb973f465871f19815f8a5b60b6aeffb10bd1cb78 (patch)
treebb88d8c5b67731be93128da3e5a459ffea470717
parent09ea2c315d2e7a44a49a990ed7649af3919acd4d (diff)
downloadmongo-b973f465871f19815f8a5b60b6aeffb10bd1cb78.tar.gz
SERVER-46517: Move the update of readWriteAbility out of _updateMemberStateFromTopologyCoordinator
(cherry picked from commit 6d0a10abd1e6f222bc16c59afc28dcfb9613b86f)
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp73
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h11
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp11
3 files changed, 52 insertions, 43 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 44b1723a268..8efa495886b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1031,7 +1031,13 @@ Status ReplicationCoordinatorImpl::_setFollowerMode(OperationContext* opCtx,
_topCoord->setFollowerMode(newState.s);
- const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ if (opCtx && _memberState.secondary() && newState == MemberState::RS_ROLLBACK) {
+ // If we are switching out of SECONDARY and to ROLLBACK, we must make sure that we hold the
+ // RSTL in mode X to prevent readers that have the RSTL in intent mode from reading.
+ _readWriteAbility->setCanServeNonLocalReads(opCtx, 0U);
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
_performPostMemberStateUpdateAction(action);
@@ -1156,8 +1162,9 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
_updateLastCommittedOpTimeAndWallTime(lk);
_wakeReadyWaiters(lk);
- // Update _canAcceptNonLocalWrites
- _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ _updateMemberStateFromTopologyCoordinator(lk);
LOGV2_OPTIONS(21331,
{logv2::LogTag::kRS},
@@ -2473,16 +2480,18 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
// of a stepdown attempt. This will prevent us from accepting writes so that if our stepdown
// attempt fails later we can release the RSTL and go to sleep to allow secondaries to
// catch up without allowing new writes in.
- auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ auto action = _updateMemberStateFromTopologyCoordinator(lk);
invariant(action == PostMemberStateUpdateAction::kActionNone);
invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk));
- // Make sure that we leave _canAcceptNonLocalWrites in the proper state.
auto updateMemberState = [&] {
invariant(lk.owns_lock());
invariant(opCtx->lockState()->isRSTLExclusive());
- auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ // Make sure that we leave _canAcceptNonLocalWrites in the proper state.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ auto action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
if (MONGO_unlikely(stepdownHangBeforePerformingPostMemberStateUpdateActions.shouldFail())) {
@@ -3029,8 +3038,7 @@ Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
return Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
}
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator(lk, nullptr);
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
_performPostMemberStateUpdateAction(action);
return Status::OK();
@@ -3314,6 +3322,9 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(OperationContext* opCtx,
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
+
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
} else {
// Release the rstl lock as the node might have stepped down due to
// other unconditional step down code paths like learning new term via heartbeat &
@@ -3565,24 +3576,20 @@ void ReplicationCoordinatorImpl::incrementTopologyVersion(OperationContext* opCt
_fulfillTopologyChangePromise(opCtx, lk);
}
-ReplicationCoordinatorImpl::PostMemberStateUpdateAction
-ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock lk,
- OperationContext* opCtx) {
- {
- // We have to do this check even if our current and target state are the same as we might
- // have just failed a stepdown attempt and thus are staying in PRIMARY state but restoring
- // our ability to accept writes.
- bool canAcceptWrites = _topCoord->canAcceptWrites();
- _readWriteAbility->setCanAcceptNonLocalWrites(lk, opCtx, canAcceptWrites);
- }
+void ReplicationCoordinatorImpl::_updateWriteAbilityFromTopologyCoordinator(
+ WithLock lk, OperationContext* opCtx) {
+ bool canAcceptWrites = _topCoord->canAcceptWrites();
+ _readWriteAbility->setCanAcceptNonLocalWrites(lk, opCtx, canAcceptWrites);
+}
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock lk) {
// We want to respond to any waiting isMasters even if our current and target state are the
// same as it is possible writes have been disabled during a stepDown but the primary has yet
// to transition to SECONDARY state.
ON_BLOCK_EXIT([&] {
if (_rsConfig.isInitialized()) {
- _fulfillTopologyChangePromise(opCtx, lk);
- // Use the global ServiceContext here in case the current opCtx is null.
+ _fulfillTopologyChangePromise(nullptr, lk);
IsMasterMetrics::get(getGlobalServiceContext())->resetNumAwaitingTopologyChanges();
}
});
@@ -3608,7 +3615,7 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
_opTimeWaiterList.setErrorAll_inlock(
{ErrorCodes::PrimarySteppedDown, "Primary stepped down while waiting for replication"});
- // _canAcceptNonLocalWrites should already be set above.
+ // _canAcceptNonLocalWrites should already be set.
invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk));
serverGlobalParams.validateFeaturesAsMaster.store(false);
@@ -3636,12 +3643,9 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
_externalState->startProducerIfStopped();
}
- if (_memberState.secondary() && newState.rollback()) {
- // If we are switching out of SECONDARY and to ROLLBACK, we must make sure that we hold the
- // RSTL in mode X to prevent readers that have the RSTL in intent mode from reading.
- _readWriteAbility->setCanServeNonLocalReads(opCtx, 0U);
- } else if (_memberState.secondary() && !newState.primary()) {
- // Switching out of SECONDARY, but not to PRIMARY or ROLLBACK.
+ if (_memberState.secondary() && !newState.primary() && !newState.rollback()) {
+ // Switching out of SECONDARY, but not to PRIMARY or ROLLBACK. Note that ROLLBACK case is
+ // handled separately and requires RSTL lock held, see setFollowerModeStrict.
_readWriteAbility->setCanServeNonLocalReads_UNSAFE(0U);
} else if (!_memberState.primary() && newState.secondary()) {
// Switching into SECONDARY, but not from PRIMARY.
@@ -3756,8 +3760,7 @@ void ReplicationCoordinatorImpl::_postWonElectionUpdateMemberState(WithLock lk)
_electionId = OID::fromTerm(_topCoord->getTerm());
auto ts = LogicalClock::get(getServiceContext())->reserveTicks(1).asTimestamp();
_topCoord->processWinElection(_electionId, ts);
- const PostMemberStateUpdateAction nextAction =
- _updateMemberStateFromTopologyCoordinator(lk, nullptr);
+ const PostMemberStateUpdateAction nextAction = _updateMemberStateFromTopologyCoordinator(lk);
invariant(nextAction == kActionFollowerModeStateChange,
str::stream() << "nextAction == " << static_cast<int>(nextAction));
@@ -4101,7 +4104,7 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig(WithLock lk,
_cancelPriorityTakeover_inlock();
_cancelAndRescheduleElectionTimeout_inlock();
- const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
if (_selfIndex >= 0) {
// Don't send heartbeats if we're not in the config, if we get re-added one of the
// nodes in the set will contact us.
@@ -5059,15 +5062,13 @@ bool ReplicationCoordinatorImpl::setContainsArbiter() const {
void ReplicationCoordinatorImpl::ReadWriteAbility::setCanAcceptNonLocalWrites(
WithLock lk, OperationContext* opCtx, bool canAcceptWrites) {
- if (canAcceptWrites == canAcceptNonLocalWrites(lk)) {
- return;
- }
-
// We must be holding the RSTL in mode X to change _canAcceptNonLocalWrites.
invariant(opCtx);
- if (opCtx->lockState()->isRSTLExclusive()) {
- _canAcceptNonLocalWrites.store(canAcceptWrites);
+ invariant(opCtx->lockState()->isRSTLExclusive());
+ if (canAcceptWrites == canAcceptNonLocalWrites(lk)) {
+ return;
}
+ _canAcceptNonLocalWrites.store(canAcceptWrites);
}
bool ReplicationCoordinatorImpl::ReadWriteAbility::canAcceptNonLocalWrites(WithLock) const {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 23aa2940b46..e0f975999f0 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1033,18 +1033,19 @@ private:
void _fulfillTopologyChangePromise(OperationContext* opCtx, WithLock);
/**
+ * Update _canAcceptNonLocalWrites based on _topCoord->canAcceptWrites().
+ */
+ void _updateWriteAbilityFromTopologyCoordinator(WithLock lk, OperationContext* opCtx);
+
+ /**
* Updates the cached value, _memberState, to match _topCoord's reported
* member state, from getMemberState().
*
* Returns an enum indicating what action to take after releasing _mutex, if any.
* Call performPostMemberStateUpdateAction on the return value after releasing
* _mutex.
- *
- * Note: opCtx may be null as currently not all paths thread an OperationContext all the way
- * down, but it must be non-null for any calls that change _canAcceptNonLocalWrites.
*/
- PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator(WithLock lk,
- OperationContext* opCtx);
+ PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator(WithLock lk);
/**
* Performs a post member-state update action. Do not call while holding _mutex.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 10c0bc1b0de..3bb13adefc4 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -285,7 +285,7 @@ stdx::unique_lock<Latch> ReplicationCoordinatorImpl::_handleHeartbeatResponseAct
// Update the cached member state if different than the current topology member state
if (_memberState != _topCoord->getMemberState()) {
const PostMemberStateUpdateAction postUpdateAction =
- _updateMemberStateFromTopologyCoordinator(lock, nullptr);
+ _updateMemberStateFromTopologyCoordinator(lock);
lock.unlock();
_performPostMemberStateUpdateAction(postUpdateAction);
lock.lock();
@@ -438,7 +438,11 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
_topCoord->finishUnconditionalStepDown();
- const auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx.get());
+
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx.get());
+
+ const auto action = _updateMemberStateFromTopologyCoordinator(lk);
if (_pendingTermUpdateDuringStepDown) {
TopologyCoordinator::UpdateTermResult result;
_updateTerm_inlock(*_pendingTermUpdateDuringStepDown, &result);
@@ -691,6 +695,9 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
+
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx.get());
} else {
// Release the rstl lock as the node might have stepped down due to
// other unconditional step down code paths like learning new term via heartbeat &