summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2020-03-13 22:10:43 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-07 03:28:36 +0000
commit8e2737e5b88a0f639b77b51f7b57730f337fea55 (patch)
tree9990b9d5ed73ec219115a882fa56ecf5fbee6306
parentf9170b2a35d3ab9d1d6d7669d1bacf9da785a94d (diff)
downloadmongo-8e2737e5b88a0f639b77b51f7b57730f337fea55.tar.gz
SERVER-46517: Move the update of readWriteAbility out of _updateMemberStateFromTopologyCoordinator
(cherry picked from commit 6d0a10abd1e6f222bc16c59afc28dcfb9613b86f)
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp67
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h11
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp10
3 files changed, 49 insertions, 39 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index fdd4bd983c9..800ef68f611 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1013,7 +1013,13 @@ Status ReplicationCoordinatorImpl::_setFollowerMode(OperationContext* opCtx,
_topCoord->setFollowerMode(newState.s);
- const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ if (opCtx && _memberState.secondary() && newState == MemberState::RS_ROLLBACK) {
+ // If we are switching out of SECONDARY and to ROLLBACK, we must make sure that we hold the
+ // RSTL in mode X to prevent readers that have the RSTL in intent mode from reading.
+ _readWriteAbility->setCanServeNonLocalReads(opCtx, 0U);
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
_performPostMemberStateUpdateAction(action);
@@ -1098,8 +1104,9 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
// our election in onTransitionToPrimary(), above.
_updateLastCommittedOpTimeAndWallTime(lk);
- // Update _canAcceptNonLocalWrites
- _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ _updateMemberStateFromTopologyCoordinator(lk);
log() << "transition to primary complete; database writes are now permitted" << rsLog;
_drainFinishedCond.notify_all();
@@ -2076,16 +2083,18 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
// of a stepdown attempt. This will prevent us from accepting writes so that if our stepdown
// attempt fails later we can release the RSTL and go to sleep to allow secondaries to
// catch up without allowing new writes in.
- auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ auto action = _updateMemberStateFromTopologyCoordinator(lk);
invariant(action == PostMemberStateUpdateAction::kActionNone);
invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk));
- // Make sure that we leave _canAcceptNonLocalWrites in the proper state.
auto updateMemberState = [&] {
invariant(lk.owns_lock());
invariant(opCtx->lockState()->isRSTLExclusive());
- auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ // Make sure that we leave _canAcceptNonLocalWrites in the proper state.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
+ auto action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
if (MONGO_FAIL_POINT(stepdownHangBeforePerformingPostMemberStateUpdateActions)) {
@@ -2571,8 +2580,7 @@ Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
return Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
}
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator(lk, nullptr);
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
lk.unlock();
_performPostMemberStateUpdateAction(action);
return Status::OK();
@@ -2767,6 +2775,9 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(OperationContext* opCtx,
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
_wMajorityWriteAvailabilityWaiter.reset();
+
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx);
} else {
// Release the rstl lock as the node might have stepped down due to
// other unconditional step down code paths like learning new term via heartbeat &
@@ -2920,17 +2931,14 @@ void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
}
}
-ReplicationCoordinatorImpl::PostMemberStateUpdateAction
-ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock lk,
- OperationContext* opCtx) {
- {
- // We have to do this check even if our current and target state are the same as we might
- // have just failed a stepdown attempt and thus are staying in PRIMARY state but restoring
- // our ability to accept writes.
- bool canAcceptWrites = _topCoord->canAcceptWrites();
- _readWriteAbility->setCanAcceptNonLocalWrites(lk, opCtx, canAcceptWrites);
- }
+void ReplicationCoordinatorImpl::_updateWriteAbilityFromTopologyCoordinator(
+ WithLock lk, OperationContext* opCtx) {
+ bool canAcceptWrites = _topCoord->canAcceptWrites();
+ _readWriteAbility->setCanAcceptNonLocalWrites(lk, opCtx, canAcceptWrites);
+}
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock lk) {
const MemberState newState = _topCoord->getMemberState();
if (newState == _memberState) {
if (_topCoord->getRole() == TopologyCoordinator::Role::kCandidate) {
@@ -2949,7 +2957,7 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
// Wake up the optime waiter that is waiting for primary catch-up to finish.
_opTimeWaiterList.signalAll_inlock();
- // _canAcceptNonLocalWrites should already be set above.
+ // _canAcceptNonLocalWrites should already be set.
invariant(!_readWriteAbility->canAcceptNonLocalWrites(lk));
serverGlobalParams.validateFeaturesAsMaster.store(false);
@@ -2977,12 +2985,9 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
_externalState->startProducerIfStopped();
}
- if (_memberState.secondary() && newState.rollback()) {
- // If we are switching out of SECONDARY and to ROLLBACK, we must make sure that we hold the
- // RSTL in mode X to prevent readers that have the RSTL in intent mode from reading.
- _readWriteAbility->setCanServeNonLocalReads(opCtx, 0U);
- } else if (_memberState.secondary() && !newState.primary()) {
- // Switching out of SECONDARY, but not to PRIMARY or ROLLBACK.
+ if (_memberState.secondary() && !newState.primary() && !newState.rollback()) {
+ // Switching out of SECONDARY, but not to PRIMARY or ROLLBACK. Note that ROLLBACK case is
+ // handled separately and requires RSTL lock held, see setFollowerModeStrict.
_readWriteAbility->setCanServeNonLocalReads_UNSAFE(0U);
} else if (!_memberState.primary() && newState.secondary()) {
// Switching into SECONDARY, but not from PRIMARY.
@@ -3091,8 +3096,7 @@ void ReplicationCoordinatorImpl::_postWonElectionUpdateMemberState(WithLock lk)
_electionId = OID::fromTerm(_topCoord->getTerm());
auto ts = LogicalClock::get(getServiceContext())->reserveTicks(1).asTimestamp();
_topCoord->processWinElection(_electionId, ts);
- const PostMemberStateUpdateAction nextAction =
- _updateMemberStateFromTopologyCoordinator(lk, nullptr);
+ const PostMemberStateUpdateAction nextAction = _updateMemberStateFromTopologyCoordinator(lk);
invariant(nextAction == kActionFollowerModeStateChange,
str::stream() << "nextAction == " << static_cast<int>(nextAction));
@@ -3351,7 +3355,7 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig(WithLock lk,
_cancelPriorityTakeover_inlock();
_cancelAndRescheduleElectionTimeout_inlock();
- const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator(lk);
if (_selfIndex >= 0) {
// Don't send heartbeats if we're not in the config, if we get re-added one of the
// nodes in the set will contact us.
@@ -4232,13 +4236,12 @@ bool ReplicationCoordinatorImpl::setContainsArbiter() const {
void ReplicationCoordinatorImpl::ReadWriteAbility::setCanAcceptNonLocalWrites(
WithLock lk, OperationContext* opCtx, bool canAcceptWrites) {
- if (canAcceptWrites == canAcceptNonLocalWrites(lk)) {
- return;
- }
-
// We must be holding the RSTL in mode X to change _canAcceptNonLocalWrites.
invariant(opCtx);
invariant(opCtx->lockState()->isRSTLExclusive());
+ if (canAcceptWrites == canAcceptNonLocalWrites(lk)) {
+ return;
+ }
_canAcceptNonLocalWrites.store(canAcceptWrites);
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 73c46dcc4e4..c6ab7789d71 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1014,18 +1014,19 @@ private:
void _setConfigState_inlock(ConfigState newState);
/**
+ * Update _canAcceptNonLocalWrites based on _topCoord->canAcceptWrites().
+ */
+ void _updateWriteAbilityFromTopologyCoordinator(WithLock lk, OperationContext* opCtx);
+
+ /**
* Updates the cached value, _memberState, to match _topCoord's reported
* member state, from getMemberState().
*
* Returns an enum indicating what action to take after releasing _mutex, if any.
* Call performPostMemberStateUpdateAction on the return value after releasing
* _mutex.
- *
- * Note: opCtx may be null as currently not all paths thread an OperationContext all the way
- * down, but it must be non-null for any calls that change _canAcceptNonLocalWrites.
*/
- PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator(WithLock lk,
- OperationContext* opCtx);
+ PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator(WithLock lk);
/**
* Performs a post member-state update action. Do not call while holding _mutex.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index a1afae9f3ae..7ec6bd06051 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -273,7 +273,7 @@ stdx::unique_lock<Latch> ReplicationCoordinatorImpl::_handleHeartbeatResponseAct
// Update the cached member state if different than the current topology member state
if (_memberState != _topCoord->getMemberState()) {
const PostMemberStateUpdateAction postUpdateAction =
- _updateMemberStateFromTopologyCoordinator(lock, nullptr);
+ _updateMemberStateFromTopologyCoordinator(lock);
lock.unlock();
_performPostMemberStateUpdateAction(postUpdateAction);
lock.lock();
@@ -430,7 +430,10 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
_topCoord->finishUnconditionalStepDown();
- const auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx.get());
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx.get());
+
+ const auto action = _updateMemberStateFromTopologyCoordinator(lk);
if (_pendingTermUpdateDuringStepDown) {
TopologyCoordinator::UpdateTermResult result;
_updateTerm_inlock(*_pendingTermUpdateDuringStepDown, &result);
@@ -654,6 +657,9 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
_wMajorityWriteAvailabilityWaiter.reset();
+
+ // Update _canAcceptNonLocalWrites.
+ _updateWriteAbilityFromTopologyCoordinator(lk, opCtx.get());
} else {
// Release the rstl lock as the node might have stepped down due to
// other unconditional step down code paths like learning new term via heartbeat &