summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2019-03-18 08:53:15 -0400
committerA. Jesse Jiryu Davis <jesse@mongodb.com>2019-03-21 16:50:34 -0400
commita1c7c798168f13284a486153dde4b335735cea0a (patch)
tree2058f50439c0dff139a264104f86e028a74932e6
parent979d456ccf4e6756e433799a626ca373a493fc8a (diff)
downloadmongo-a1c7c798168f13284a486153dde4b335735cea0a.tar.gz
SERVER-37255 Fix invariant when reconfig races with election
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_pv0.yml2
-rw-r--r--jstests/replsets/reconfig_during_election.js49
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp153
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h13
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp13
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp28
7 files changed, 191 insertions, 70 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml b/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
index 0131907c81e..bf393221bca 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
@@ -36,6 +36,8 @@ selector:
- jstests/replsets/catchup_takeover_two_nodes_ahead.js
# electionTimeoutMillis is not available in PV0.
- jstests/replsets/noop_writes_wait_for_write_concern_fcv.js
+ # Tests a race condition in PV1, requires a PV1-specific failpoint.
+ - jstests/replsets/reconfig_during_election.js
# Election handoff is not supported in PV0, since it requires replSetStepUp.
- jstests/replsets/election_handoff*.js
# replSetGetStatus doesn't return read concern majority optime in PV0.
diff --git a/jstests/replsets/reconfig_during_election.js b/jstests/replsets/reconfig_during_election.js
new file mode 100644
index 00000000000..4f58b3cd477
--- /dev/null
+++ b/jstests/replsets/reconfig_during_election.js
@@ -0,0 +1,49 @@
+/**
+ * SERVER-37255: replSetReconfig runs on a node that is concurrently processing an election win.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/libs/election_handoff.js");
+ load("jstests/libs/check_log.js");
+
+ const rst = ReplSetTest({nodes: 2});
+ const nodes = rst.startSet();
+ const config = rst.getReplSetConfig();
+ // Prevent elections and set heartbeat timeout >> electionHangsBeforeUpdateMemberState.
+ config.settings = {electionTimeoutMillis: 12 * 60 * 60 * 1000, heartbeatTimeoutSecs: 60 * 1000};
+ rst.initiate(config);
+
+ const incumbent = rst.getPrimary();
+ const candidate = rst.getSecondary();
+
+ jsTestLog("Step down");
+
+ assert.commandWorked(candidate.adminCommand({
+ configureFailPoint: "electionHangsBeforeUpdateMemberState",
+ mode: "alwaysOn",
+ data: {waitForMillis: 10 * 1000}
+ }));
+
+ // The incumbent sends replSetStepUp to the candidate for election handoff.
+ assert.adminCommandWorkedAllowingNetworkError(incumbent, {
+ replSetStepDown: ElectionHandoffTest.stepDownPeriodSecs,
+ secondaryCatchUpPeriodSecs: ElectionHandoffTest.stepDownPeriodSecs / 2
+ });
+
+ jsTestLog("Wait for candidate to win the election");
+
+ checkLog.contains(
+ candidate, "election succeeded - electionHangsBeforeUpdateMemberState fail point enabled");
+
+ jsTestLog("Try to interrupt it with a reconfig");
+
+ config.members[nodes.indexOf(candidate)].priority = 2;
+ config.version++;
+ assert.commandWorked(candidate.adminCommand({replSetReconfig: config, force: true}));
+
+ assert.commandWorked(candidate.adminCommand(
+ {configureFailPoint: "electionHangsBeforeUpdateMemberState", mode: "off"}));
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c2b41c82ad3..687c48f6168 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -556,7 +556,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
if (localConfig.getReplSetName() != _settings.ourSetName()) {
warning() << "Local replica set configuration document reports set name of "
<< localConfig.getReplSetName() << ", but command line reports "
- << _settings.ourSetName() << "; waitng for reconfig or remote heartbeat";
+ << _settings.ourSetName() << "; waiting for reconfig or remote heartbeat";
myIndex = StatusWith<int>(-1);
}
@@ -639,7 +639,12 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
_updateTerm_inlock(term);
}
LOG(1) << "Current term is now " << term;
- _performPostMemberStateUpdateAction(action);
+ if (action == kActionWinElection) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ _performPostMemberStateUpdateAction(action);
+ }
if (!isArbiter) {
_externalState->startThreads(_settings);
@@ -929,8 +934,13 @@ Status ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState)
const PostMemberStateUpdateAction action =
_updateMemberStateFromTopologyCoordinator_inlock(nullptr);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+
+ if (action == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ }
return Status::OK();
}
@@ -1783,24 +1793,30 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
invariant(opCtx->lockState()->isW());
auto action = _updateMemberStateFromTopologyCoordinator_inlock(opCtx);
- lk.unlock();
+ // Seems unlikely but handle kActionWinElection in case some surprising sequence leads here.
+ if (action == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ lk.unlock();
+ } else {
+ lk.unlock();
- if (MONGO_FAIL_POINT(stepdownHangBeforePerformingPostMemberStateUpdateActions)) {
- log() << "stepping down from primary - "
- "stepdownHangBeforePerformingPostMemberStateUpdateActions fail point enabled. "
- "Blocking until fail point is disabled.";
- while (MONGO_FAIL_POINT(stepdownHangBeforePerformingPostMemberStateUpdateActions)) {
- mongo::sleepsecs(1);
- {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (_inShutdown) {
- break;
+ if (MONGO_FAIL_POINT(stepdownHangBeforePerformingPostMemberStateUpdateActions)) {
+ log() << "stepping down from primary - "
+ "stepdownHangBeforePerformingPostMemberStateUpdateActions fail point "
+ "enabled. Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(stepdownHangBeforePerformingPostMemberStateUpdateActions)) {
+ mongo::sleepsecs(1);
+ {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (_inShutdown) {
+ break;
+ }
}
}
}
- }
- _performPostMemberStateUpdateAction(action);
+ _performPostMemberStateUpdateAction(action);
+ }
};
ScopeGuard onExitGuard = MakeGuard([&] {
abortFn();
@@ -1927,7 +1943,8 @@ void ReplicationCoordinatorImpl::_handleTimePassing(
}();
if (wonSingleNodeElection) {
- _performPostMemberStateUpdateAction(kActionWinElection);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _postWonElectionUpdateMemberState_inlock();
}
}
@@ -2256,8 +2273,14 @@ Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
const PostMemberStateUpdateAction action =
_updateMemberStateFromTopologyCoordinator_inlock(nullptr);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+
+ if (action == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ }
+
return Status::OK();
}
@@ -2505,13 +2528,20 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(
const PostMemberStateUpdateAction action =
_setCurrentRSConfig_inlock(opCtx.get(), newConfig, myIndex);
- // On a reconfig we drop all snapshots so we don't mistakenely read from the wrong one.
+ // On a reconfig we drop all snapshots so we don't mistakenly read from the wrong one.
// For example, if we change the meaning of the "committed" snapshot from applied -> durable.
_dropAllSnapshots_inlock();
lk.unlock();
_resetElectionInfoOnProtocolVersionUpgrade(opCtx.get(), oldConfig, newConfig);
- _performPostMemberStateUpdateAction(action);
+ if (action == kActionWinElection) {
+ lk.lock();
+ _postWonElectionUpdateMemberState_inlock();
+ lk.unlock();
+ } else {
+ _performPostMemberStateUpdateAction(action);
+ }
+
_replExecutor->signalEvent(finishedEvent);
}
@@ -2619,8 +2649,12 @@ void ReplicationCoordinatorImpl::_finishReplSetInitiate(OperationContext* opCtx,
invariant(_rsConfigState == kConfigInitiating);
invariant(!_rsConfig.isInitialized());
auto action = _setCurrentRSConfig_inlock(opCtx, newConfig, myIndex);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ if (action == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ }
}
void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
@@ -2772,52 +2806,21 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock(
void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
PostMemberStateUpdateAction action) {
+ invariant(action != kActionWinElection);
switch (action) {
case kActionNone:
break;
case kActionFollowerModeStateChange:
- // In follower mode, or sub-mode so ensure replication is active
- _externalState->signalApplierToChooseNewSyncSource();
+ _onFollowerModeStateChange();
break;
case kActionCloseAllConnections:
_externalState->closeConnections();
_externalState->shardingOnStepDownHook();
_externalState->stopNoopWriter();
break;
- case kActionWinElection: {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (isV1ElectionProtocol()) {
- invariant(_topCoord->getTerm() != OpTime::kUninitializedTerm);
- _electionId = OID::fromTerm(_topCoord->getTerm());
- } else {
- _electionId = OID::gen();
- }
-
- auto ts = LogicalClock::get(getServiceContext())->reserveTicks(1).asTimestamp();
- _topCoord->processWinElection(_electionId, ts);
- const PostMemberStateUpdateAction nextAction =
- _updateMemberStateFromTopologyCoordinator_inlock(nullptr);
- invariant(nextAction != kActionWinElection);
- lk.unlock();
- _performPostMemberStateUpdateAction(nextAction);
- lk.lock();
- if (!_getMemberState_inlock().primary()) {
- break;
- }
- // Notify all secondaries of the election win.
- _restartHeartbeats_inlock();
- if (isV1ElectionProtocol()) {
- invariant(!_catchupState);
- _catchupState = stdx::make_unique<CatchupState>(this);
- _catchupState->start_inlock();
- } else {
- _enterDrainMode_inlock();
- }
- break;
- }
case kActionStartSingleNodeElection:
- // In protocol version 1, single node replset will run an election instead of
- // kActionWinElection as in protocol version 0.
+ // In protocol version 1, single node replset will run an election instead of directly
+ // calling _postWonElectionUpdateMemberState_inlock as in protocol version 0.
_startElectSelfV1(TopologyCoordinator::StartElectionReason::kElectionTimeout);
break;
default:
@@ -2826,6 +2829,38 @@ void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
}
}
+void ReplicationCoordinatorImpl::_postWonElectionUpdateMemberState_inlock() {
+ if (isV1ElectionProtocol()) {
+ invariant(_topCoord->getTerm() != OpTime::kUninitializedTerm);
+ _electionId = OID::fromTerm(_topCoord->getTerm());
+ } else {
+ _electionId = OID::gen();
+ }
+
+ auto ts = LogicalClock::get(getServiceContext())->reserveTicks(1).asTimestamp();
+ _topCoord->processWinElection(_electionId, ts);
+ const PostMemberStateUpdateAction nextAction =
+ _updateMemberStateFromTopologyCoordinator_inlock(nullptr);
+ invariant(nextAction == kActionFollowerModeStateChange,
+ str::stream() << "nextAction == " << static_cast<int>(nextAction));
+ invariant(_getMemberState_inlock().primary());
+ // Clear the sync source.
+ _onFollowerModeStateChange();
+ // Notify all secondaries of the election win.
+ _restartHeartbeats_inlock();
+ if (isV1ElectionProtocol()) {
+ invariant(!_catchupState);
+ _catchupState = stdx::make_unique<CatchupState>(this);
+ _catchupState->start_inlock();
+ } else {
+ _enterDrainMode_inlock();
+ }
+}
+
+void ReplicationCoordinatorImpl::_onFollowerModeStateChange() {
+ _externalState->signalApplierToChooseNewSyncSource();
+}
+
void ReplicationCoordinatorImpl::CatchupState::start_inlock() {
log() << "Entering primary catch-up mode.";
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 02cf0ee8816..939dcd52233 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -814,11 +814,22 @@ private:
OperationContext* opCtx);
/**
- * Performs a post member-state update action. Do not call while holding _mutex.
+ * Performs a post member-state update action. Do not call while holding _mutex. "action" must
+ * not be kActionWinElection, use _postWonElectionUpdateMemberState_inlock instead.
*/
void _performPostMemberStateUpdateAction(PostMemberStateUpdateAction action);
/**
+ * Update state after winning an election.
+ */
+ void _postWonElectionUpdateMemberState_inlock();
+
+ /**
+ * Helper to select appropriate sync source after transitioning from a follower state.
+ */
+ void _onFollowerModeStateChange();
+
+ /**
* Begins an attempt to elect this node.
* Called after an incoming heartbeat changes this node's view of the set such that it
* believes it can be elected PRIMARY.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
index 07ec5d45930..a4ab4e0de83 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
@@ -272,8 +272,7 @@ void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
_freshnessChecker.reset(NULL);
_electCmdRunner.reset(NULL);
auto electionFinishedEvent = _electionFinishedEvent;
- lk.unlock();
- _performPostMemberStateUpdateAction(kActionWinElection);
+ _postWonElectionUpdateMemberState_inlock();
_replExecutor->signalEvent(electionFinishedEvent);
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index 4c0c810781a..5b5983789c7 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -290,6 +290,8 @@ void ReplicationCoordinatorImpl::_startVoteRequester_inlock(long long newTerm) {
.status_with_transitional_ignore();
}
+MONGO_FP_DECLARE(electionHangsBeforeUpdateMemberState);
+
void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long newTerm) {
stdx::unique_lock<stdx::mutex> lk(_mutex);
LoseElectionGuardV1 lossGuard(this);
@@ -329,9 +331,16 @@ void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long newTerm) {
_voteRequester.reset();
auto electionFinishedEvent = _electionFinishedEvent;
- lk.unlock();
- _performPostMemberStateUpdateAction(kActionWinElection);
+ MONGO_FAIL_POINT_BLOCK(electionHangsBeforeUpdateMemberState, customWait) {
+ auto waitForMillis = Milliseconds(customWait.getData()["waitForMillis"].numberInt());
+ log() << "election succeeded - electionHangsBeforeUpdateMemberState fail point "
+ "enabled, sleeping "
+ << waitForMillis;
+ sleepFor(waitForMillis);
+ }
+ _postWonElectionUpdateMemberState_inlock();
+ lk.unlock();
_replExecutor->signalEvent(electionFinishedEvent);
lossGuard.dismiss();
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index cfe87d4a4f3..1934ad0c40b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -263,9 +263,13 @@ stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatRespo
if (_memberState != _topCoord->getMemberState()) {
const PostMemberStateUpdateAction postUpdateAction =
_updateMemberStateFromTopologyCoordinator_inlock(nullptr);
- lock.unlock();
- _performPostMemberStateUpdateAction(postUpdateAction);
- lock.lock();
+ if (postUpdateAction == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ lock.unlock();
+ _performPostMemberStateUpdateAction(postUpdateAction);
+ lock.lock();
+ }
}
break;
case HeartbeatResponseAction::Reconfig:
@@ -420,8 +424,14 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
invariant(result != TopologyCoordinator::UpdateTermResult::kTriggerStepDown);
_pendingTermUpdateDuringStepDown = boost::none;
}
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+
+ if (action == kActionWinElection) {
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ }
+
_replExecutor->signalEvent(finishedEvent);
}
@@ -640,9 +650,15 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
const int myIndexValue = myIndex.getStatus().isOK() ? myIndex.getValue() : -1;
const PostMemberStateUpdateAction action =
_setCurrentRSConfig_inlock(opCtx.get(), newConfig, myIndexValue);
+
lk.unlock();
_resetElectionInfoOnProtocolVersionUpgrade(opCtx.get(), oldConfig, newConfig);
- _performPostMemberStateUpdateAction(action);
+ if (action == kActionWinElection) {
+ lk.lock();
+ _postWonElectionUpdateMemberState_inlock();
+ } else {
+ _performPostMemberStateUpdateAction(action);
+ }
}
void ReplicationCoordinatorImpl::_trackHeartbeatHandle_inlock(