summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2017-06-16 16:25:39 -0400
committerSpencer T Brody <spencer@mongodb.com>2017-06-28 18:28:57 -0400
commitf577af234306bdceeb27c5ec09606143d7347f9d (patch)
treedf2df74b3a2ce3d44d98763e0058851c3262f32f /src
parent4926efd55328fadd997e69080b1ca544df210e7e (diff)
downloadmongo-f577af234306bdceeb27c5ec09606143d7347f9d.tar.gz
SERVER-28545 Do not wait for election to finish while holding global lock
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/error_codes.err1
-rw-r--r--src/mongo/db/repl/replication_coordinator.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp10
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp10
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp3
-rw-r--r--src/mongo/db/repl/rollback_impl_test.cpp6
-rw-r--r--src/mongo/db/repl/rollback_test_fixture.cpp8
-rw-r--r--src/mongo/db/repl/rollback_test_fixture.h9
-rw-r--r--src/mongo/db/repl/rs_rollback_no_uuid_test.cpp8
-rw-r--r--src/mongo/db/repl/rs_rollback_test.cpp4
11 files changed, 48 insertions, 24 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index baa1f435077..d5bf1194132 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -213,6 +213,7 @@ error_code("IncompatibleRollbackAlgorithm", 212)
error_code("DuplicateSession", 213)
error_code("AuthenticationRestrictionUnmet", 214)
error_code("DatabaseDropPending", 215)
+error_code("ElectionInProgress", 216)
# Error codes 4000-8999 are reserved.
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 822e6cffb70..c9d478a4ec7 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -396,8 +396,9 @@ public:
/**
* Sets this node into a specific follower mode.
*
- * Returns OK if the follower mode was successfully set. Returns IllegalOperation if the
- * node is or becomes a leader before setFollowerMode completes.
+ * Returns OK if the follower mode was successfully set. Returns NotSecondary if the
+ * node is a leader when setFollowerMode is called and ElectionInProgess if the node is in the
+ * process of trying to elect itself primary.
*
* Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and
* RS_RECOVERING. They are the valid states of a node whose topology coordinator has the
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 5ddf127a464..bb2c96e7d20 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -812,10 +812,12 @@ Status ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState)
// We were a candidate, which means _topCoord believed us to be in state RS_SECONDARY, and
// we know that newState != RS_SECONDARY because we would have returned early, above if
// the old and new state were equal. So, try again after the election is over to
- // finish setting the follower mode.
- lk.unlock();
- _replExecutor->waitForEvent(electionFinishedEvent);
- return setFollowerMode(newState);
+ // finish setting the follower mode. We cannot wait for the election to finish here as we
+ // may be holding a global X lock, so we return a bad status and rely on the caller to
+ // retry.
+ return Status(ErrorCodes::ElectionInProgress,
+ str::stream() << "Cannot set follower mode to " << newState.toString()
+ << " because we are in the middle of running an election");
}
_topCoord->setFollowerMode(newState.s);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
index 71b9742bf5d..424f8c88689 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
@@ -329,7 +329,7 @@ TEST_F(ReplCoordElectTest, VotesWithStringValuesAreNotCountedAsYeas) {
countLogLinesContaining("wrong type for vote argument in replSetElect command"));
}
-TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) {
+TEST_F(ReplCoordElectTest, TransitionToRollbackFailsWhenElectionInProgress) {
BSONObj configObj = BSON("_id"
<< "mySet"
<< "version"
@@ -352,11 +352,13 @@ TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) {
simulateEnoughHeartbeatsForAllNodesUp();
simulateFreshEnoughForElectability();
- ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT_EQUALS(ErrorCodes::ElectionInProgress,
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
+
+ ASSERT_FALSE(getReplCoord()->getMemberState().rollback());
// We do not need to respond to any pending network operations because setFollowerMode() will
// cancel the freshness checker and election command runner.
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
}
TEST_F(ReplCoordElectTest, NodeWillNotStandForElectionDuringHeartbeatReconfig) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index 081c64c319b..3bbb4b783d7 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -606,7 +606,7 @@ TEST_F(ReplCoordTest, ElectionFailsWhenInsufficientVotesAreReceivedDuringRequest
countLogLinesContaining("not becoming primary, we received insufficient votes"));
}
-TEST_F(ReplCoordTest, ElectionsAbortWhenNodeTransitionsToRollbackState) {
+TEST_F(ReplCoordTest, TransitionToRollbackFailsWhenElectionInProgress) {
BSONObj configObj = BSON("_id"
<< "mySet"
<< "version"
@@ -632,11 +632,13 @@ TEST_F(ReplCoordTest, ElectionsAbortWhenNodeTransitionsToRollbackState) {
simulateEnoughHeartbeatsForAllNodesUp();
simulateSuccessfulDryRun();
- ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT_EQUALS(ErrorCodes::ElectionInProgress,
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
+
+ ASSERT_FALSE(getReplCoord()->getMemberState().rollback());
// We do not need to respond to any pending network operations because setFollowerMode() will
- // cancel the vote requester.
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ // cancel the freshness checker and election command runner.
}
TEST_F(ReplCoordTest, ElectionFailsWhenVoteRequestResponseContainsANewerTerm) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index a2a75255cdc..5a8a7ee8d0a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -2443,7 +2443,8 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection)
// This cancels the actual election.
// We do not need to respond to any pending network operations because setFollowerMode() will
// cancel the vote requester.
- ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT_EQUALS(ErrorCodes::ElectionInProgress,
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK));
}
TEST_F(ReplCoordTest,
diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp
index cdcbe647fd4..0bfc8278db3 100644
--- a/src/mongo/db/repl/rollback_impl_test.cpp
+++ b/src/mongo/db/repl/rollback_impl_test.cpp
@@ -374,7 +374,7 @@ DEATH_TEST_F(RollbackImplTest, RollbackTerminatesIfCompletionCallbackThrowsExcep
}
TEST_F(RollbackImplTest, RollbackReturnsNotSecondaryOnFailingToTransitionToRollback) {
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK;
+ _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary);
ASSERT_OK(_rollback->startup());
_rollback->join();
ASSERT_EQUALS(ErrorCodes::NotSecondary, _onCompletionResult);
@@ -400,7 +400,7 @@ DEATH_TEST_F(
RollbackTriggersFatalAssertionOnFailingToTransitionFromRollbackToSecondaryDuringTearDownPhase,
"Failed to transition into SECONDARY; expected to be in state ROLLBACK; found self in "
"ROLLBACK") {
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_SECONDARY;
+ _coordinator->failSettingFollowerMode(MemberState::RS_SECONDARY, ErrorCodes::IllegalOperation);
ASSERT_OK(_rollback->startup());
@@ -458,7 +458,7 @@ TEST_F(RollbackImplTest, RollbackResetsOnCompletionCallbackFunctionPointerUponCo
// Completion callback will be invoked on errors after startup() returns successfully.
// We cause the the rollback process to error out early by failing to transition to rollback.
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK;
+ _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary);
ASSERT_OK(_rollback->startup());
diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp
index 5e090d165a0..2608e2df567 100644
--- a/src/mongo/db/repl/rollback_test_fixture.cpp
+++ b/src/mongo/db/repl/rollback_test_fixture.cpp
@@ -111,10 +111,16 @@ RollbackTest::ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollback
void RollbackTest::ReplicationCoordinatorRollbackMock::resetLastOpTimesFromOplog(
OperationContext* opCtx) {}
+void RollbackTest::ReplicationCoordinatorRollbackMock::failSettingFollowerMode(
+ const MemberState& transitionToFail, ErrorCodes::Error codeToFailWith) {
+ _failSetFollowerModeOnThisMemberState = transitionToFail;
+ _failSetFollowerModeWithThisCode = codeToFailWith;
+}
+
Status RollbackTest::ReplicationCoordinatorRollbackMock::setFollowerMode(
const MemberState& newState) {
if (newState == _failSetFollowerModeOnThisMemberState) {
- return Status(ErrorCodes::IllegalOperation,
+ return Status(_failSetFollowerModeWithThisCode,
str::stream()
<< "ReplicationCoordinatorRollbackMock set to fail on setting state to "
<< _failSetFollowerModeOnThisMemberState.toString());
diff --git a/src/mongo/db/repl/rollback_test_fixture.h b/src/mongo/db/repl/rollback_test_fixture.h
index a00e7e30971..5a9e2a9221f 100644
--- a/src/mongo/db/repl/rollback_test_fixture.h
+++ b/src/mongo/db/repl/rollback_test_fixture.h
@@ -114,8 +114,17 @@ public:
*/
Status setFollowerMode(const MemberState& newState) override;
+ /**
+ * Set this to make transitioning to the given follower mode fail with the given error code.
+ */
+ void failSettingFollowerMode(const MemberState& transitionToFail,
+ ErrorCodes::Error codeToFailWith);
+
+private:
// Override this to make setFollowerMode() fail when called with this state.
MemberState _failSetFollowerModeOnThisMemberState = MemberState::RS_UNKNOWN;
+
+ ErrorCodes::Error _failSetFollowerModeWithThisCode = ErrorCodes::InternalError;
};
} // namespace repl
diff --git a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp b/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp
index f2291537cc2..81ad629302c 100644
--- a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp
+++ b/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp
@@ -1205,8 +1205,8 @@ TEST_F(RSRollbackTest, RollbackReturnsImmediatelyOnFailureToTransitionToRollback
// Inject ReplicationCoordinator::setFollowerMode() error. We set the current member state
// because it will be logged by rollback() on failing to transition to ROLLBACK.
- _coordinator->setFollowerMode(MemberState::RS_SECONDARY);
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK;
+ ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_SECONDARY));
+ _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary);
startCapturingLogMessages();
rollbackNoUUID(_opCtx.get(),
@@ -1286,14 +1286,14 @@ DEATH_TEST_F(RSRollbackTest,
DEATH_TEST_F(
RSRollbackTest,
RollbackTriggersFatalAssertionOnFailingToTransitionToRecoveringAfterSyncRollbackReturns,
- "Failed to transition into RECOVERING; expected to be in state ROLLBACK but found self in "
+ "Failed to transition into RECOVERING; expected to be in state ROLLBACK; found self in "
"ROLLBACK") {
auto commonOperation = makeNoopOplogEntryAndRecordId(Seconds(1));
OplogInterfaceMock localOplog({commonOperation});
RollbackSourceMock rollbackSource(
std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation})));
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_RECOVERING;
+ _coordinator->failSettingFollowerMode(MemberState::RS_RECOVERING, ErrorCodes::IllegalOperation);
createOplog(_opCtx.get());
rollbackNoUUID(
diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp
index 83128009bec..c848ebfa00a 100644
--- a/src/mongo/db/repl/rs_rollback_test.cpp
+++ b/src/mongo/db/repl/rs_rollback_test.cpp
@@ -1259,7 +1259,7 @@ TEST_F(RSRollbackTest, RollbackReturnsImmediatelyOnFailureToTransitionToRollback
// Inject ReplicationCoordinator::setFollowerMode() error. We set the current member state
// because it will be logged by rollback() on failing to transition to ROLLBACK.
ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_SECONDARY));
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK;
+ _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary);
startCapturingLogMessages();
rollback(_opCtx.get(),
@@ -1345,7 +1345,7 @@ DEATH_TEST_F(
RollbackSourceMock rollbackSource(
std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation})));
- _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_RECOVERING;
+ _coordinator->failSettingFollowerMode(MemberState::RS_RECOVERING, ErrorCodes::IllegalOperation);
createOplog(_opCtx.get());
rollback(_opCtx.get(), localOplog, rollbackSource, {}, _coordinator, _replicationProcess.get());