diff options
author | Spencer T Brody <spencer@mongodb.com> | 2017-06-16 16:25:39 -0400 |
---|---|---|
committer | Spencer T Brody <spencer@mongodb.com> | 2017-06-28 18:28:57 -0400 |
commit | f577af234306bdceeb27c5ec09606143d7347f9d (patch) | |
tree | df2df74b3a2ce3d44d98763e0058851c3262f32f /src | |
parent | 4926efd55328fadd997e69080b1ca544df210e7e (diff) | |
download | mongo-f577af234306bdceeb27c5ec09606143d7347f9d.tar.gz |
SERVER-28545 Do not wait for election to finish while holding global lock
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/base/error_codes.err | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator.h | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_test_fixture.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_test_fixture.h | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_rollback_no_uuid_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_rollback_test.cpp | 4 |
11 files changed, 48 insertions, 24 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index baa1f435077..d5bf1194132 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -213,6 +213,7 @@ error_code("IncompatibleRollbackAlgorithm", 212) error_code("DuplicateSession", 213) error_code("AuthenticationRestrictionUnmet", 214) error_code("DatabaseDropPending", 215) +error_code("ElectionInProgress", 216) # Error codes 4000-8999 are reserved. diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 822e6cffb70..c9d478a4ec7 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -396,8 +396,9 @@ public: /** * Sets this node into a specific follower mode. * - * Returns OK if the follower mode was successfully set. Returns IllegalOperation if the - * node is or becomes a leader before setFollowerMode completes. + * Returns OK if the follower mode was successfully set. Returns NotSecondary if the + * node is a leader when setFollowerMode is called and ElectionInProgess if the node is in the + * process of trying to elect itself primary. * * Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and * RS_RECOVERING. They are the valid states of a node whose topology coordinator has the diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 5ddf127a464..bb2c96e7d20 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -812,10 +812,12 @@ Status ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState) // We were a candidate, which means _topCoord believed us to be in state RS_SECONDARY, and // we know that newState != RS_SECONDARY because we would have returned early, above if // the old and new state were equal. So, try again after the election is over to - // finish setting the follower mode. - lk.unlock(); - _replExecutor->waitForEvent(electionFinishedEvent); - return setFollowerMode(newState); + // finish setting the follower mode. We cannot wait for the election to finish here as we + // may be holding a global X lock, so we return a bad status and rely on the caller to + // retry. + return Status(ErrorCodes::ElectionInProgress, + str::stream() << "Cannot set follower mode to " << newState.toString() + << " because we are in the middle of running an election"); } _topCoord->setFollowerMode(newState.s); diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp index 71b9742bf5d..424f8c88689 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp @@ -329,7 +329,7 @@ TEST_F(ReplCoordElectTest, VotesWithStringValuesAreNotCountedAsYeas) { countLogLinesContaining("wrong type for vote argument in replSetElect command")); } -TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) { +TEST_F(ReplCoordElectTest, TransitionToRollbackFailsWhenElectionInProgress) { BSONObj configObj = BSON("_id" << "mySet" << "version" @@ -352,11 +352,13 @@ TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) { simulateEnoughHeartbeatsForAllNodesUp(); simulateFreshEnoughForElectability(); - ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); + ASSERT_EQUALS(ErrorCodes::ElectionInProgress, + getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); + + ASSERT_FALSE(getReplCoord()->getMemberState().rollback()); // We do not need to respond to any pending network operations because setFollowerMode() will // cancel the freshness checker and election command runner. - ASSERT_TRUE(getReplCoord()->getMemberState().rollback()); } TEST_F(ReplCoordElectTest, NodeWillNotStandForElectionDuringHeartbeatReconfig) { diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp index 081c64c319b..3bbb4b783d7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp @@ -606,7 +606,7 @@ TEST_F(ReplCoordTest, ElectionFailsWhenInsufficientVotesAreReceivedDuringRequest countLogLinesContaining("not becoming primary, we received insufficient votes")); } -TEST_F(ReplCoordTest, ElectionsAbortWhenNodeTransitionsToRollbackState) { +TEST_F(ReplCoordTest, TransitionToRollbackFailsWhenElectionInProgress) { BSONObj configObj = BSON("_id" << "mySet" << "version" @@ -632,11 +632,13 @@ TEST_F(ReplCoordTest, ElectionsAbortWhenNodeTransitionsToRollbackState) { simulateEnoughHeartbeatsForAllNodesUp(); simulateSuccessfulDryRun(); - ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); + ASSERT_EQUALS(ErrorCodes::ElectionInProgress, + getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); + + ASSERT_FALSE(getReplCoord()->getMemberState().rollback()); // We do not need to respond to any pending network operations because setFollowerMode() will - // cancel the vote requester. - ASSERT_TRUE(getReplCoord()->getMemberState().rollback()); + // cancel the freshness checker and election command runner. } TEST_F(ReplCoordTest, ElectionFailsWhenVoteRequestResponseContainsANewerTerm) { diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index a2a75255cdc..5a8a7ee8d0a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -2443,7 +2443,8 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection) // This cancels the actual election. // We do not need to respond to any pending network operations because setFollowerMode() will // cancel the vote requester. - ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); + ASSERT_EQUALS(ErrorCodes::ElectionInProgress, + getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK)); } TEST_F(ReplCoordTest, diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index cdcbe647fd4..0bfc8278db3 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -374,7 +374,7 @@ DEATH_TEST_F(RollbackImplTest, RollbackTerminatesIfCompletionCallbackThrowsExcep } TEST_F(RollbackImplTest, RollbackReturnsNotSecondaryOnFailingToTransitionToRollback) { - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK; + _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary); ASSERT_OK(_rollback->startup()); _rollback->join(); ASSERT_EQUALS(ErrorCodes::NotSecondary, _onCompletionResult); @@ -400,7 +400,7 @@ DEATH_TEST_F( RollbackTriggersFatalAssertionOnFailingToTransitionFromRollbackToSecondaryDuringTearDownPhase, "Failed to transition into SECONDARY; expected to be in state ROLLBACK; found self in " "ROLLBACK") { - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_SECONDARY; + _coordinator->failSettingFollowerMode(MemberState::RS_SECONDARY, ErrorCodes::IllegalOperation); ASSERT_OK(_rollback->startup()); @@ -458,7 +458,7 @@ TEST_F(RollbackImplTest, RollbackResetsOnCompletionCallbackFunctionPointerUponCo // Completion callback will be invoked on errors after startup() returns successfully. // We cause the the rollback process to error out early by failing to transition to rollback. - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK; + _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary); ASSERT_OK(_rollback->startup()); diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp index 5e090d165a0..2608e2df567 100644 --- a/src/mongo/db/repl/rollback_test_fixture.cpp +++ b/src/mongo/db/repl/rollback_test_fixture.cpp @@ -111,10 +111,16 @@ RollbackTest::ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollback void RollbackTest::ReplicationCoordinatorRollbackMock::resetLastOpTimesFromOplog( OperationContext* opCtx) {} +void RollbackTest::ReplicationCoordinatorRollbackMock::failSettingFollowerMode( + const MemberState& transitionToFail, ErrorCodes::Error codeToFailWith) { + _failSetFollowerModeOnThisMemberState = transitionToFail; + _failSetFollowerModeWithThisCode = codeToFailWith; +} + Status RollbackTest::ReplicationCoordinatorRollbackMock::setFollowerMode( const MemberState& newState) { if (newState == _failSetFollowerModeOnThisMemberState) { - return Status(ErrorCodes::IllegalOperation, + return Status(_failSetFollowerModeWithThisCode, str::stream() << "ReplicationCoordinatorRollbackMock set to fail on setting state to " << _failSetFollowerModeOnThisMemberState.toString()); diff --git a/src/mongo/db/repl/rollback_test_fixture.h b/src/mongo/db/repl/rollback_test_fixture.h index a00e7e30971..5a9e2a9221f 100644 --- a/src/mongo/db/repl/rollback_test_fixture.h +++ b/src/mongo/db/repl/rollback_test_fixture.h @@ -114,8 +114,17 @@ public: */ Status setFollowerMode(const MemberState& newState) override; + /** + * Set this to make transitioning to the given follower mode fail with the given error code. + */ + void failSettingFollowerMode(const MemberState& transitionToFail, + ErrorCodes::Error codeToFailWith); + +private: // Override this to make setFollowerMode() fail when called with this state. MemberState _failSetFollowerModeOnThisMemberState = MemberState::RS_UNKNOWN; + + ErrorCodes::Error _failSetFollowerModeWithThisCode = ErrorCodes::InternalError; }; } // namespace repl diff --git a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp b/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp index f2291537cc2..81ad629302c 100644 --- a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp +++ b/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp @@ -1205,8 +1205,8 @@ TEST_F(RSRollbackTest, RollbackReturnsImmediatelyOnFailureToTransitionToRollback // Inject ReplicationCoordinator::setFollowerMode() error. We set the current member state // because it will be logged by rollback() on failing to transition to ROLLBACK. - _coordinator->setFollowerMode(MemberState::RS_SECONDARY); - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK; + ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_SECONDARY)); + _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary); startCapturingLogMessages(); rollbackNoUUID(_opCtx.get(), @@ -1286,14 +1286,14 @@ DEATH_TEST_F(RSRollbackTest, DEATH_TEST_F( RSRollbackTest, RollbackTriggersFatalAssertionOnFailingToTransitionToRecoveringAfterSyncRollbackReturns, - "Failed to transition into RECOVERING; expected to be in state ROLLBACK but found self in " + "Failed to transition into RECOVERING; expected to be in state ROLLBACK; found self in " "ROLLBACK") { auto commonOperation = makeNoopOplogEntryAndRecordId(Seconds(1)); OplogInterfaceMock localOplog({commonOperation}); RollbackSourceMock rollbackSource( std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation}))); - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_RECOVERING; + _coordinator->failSettingFollowerMode(MemberState::RS_RECOVERING, ErrorCodes::IllegalOperation); createOplog(_opCtx.get()); rollbackNoUUID( diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp index 83128009bec..c848ebfa00a 100644 --- a/src/mongo/db/repl/rs_rollback_test.cpp +++ b/src/mongo/db/repl/rs_rollback_test.cpp @@ -1259,7 +1259,7 @@ TEST_F(RSRollbackTest, RollbackReturnsImmediatelyOnFailureToTransitionToRollback // Inject ReplicationCoordinator::setFollowerMode() error. We set the current member state // because it will be logged by rollback() on failing to transition to ROLLBACK. ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_SECONDARY)); - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_ROLLBACK; + _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary); startCapturingLogMessages(); rollback(_opCtx.get(), @@ -1345,7 +1345,7 @@ DEATH_TEST_F( RollbackSourceMock rollbackSource( std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation}))); - _coordinator->_failSetFollowerModeOnThisMemberState = MemberState::RS_RECOVERING; + _coordinator->failSettingFollowerMode(MemberState::RS_RECOVERING, ErrorCodes::IllegalOperation); createOplog(_opCtx.get()); rollback(_opCtx.get(), localOplog, rollbackSource, {}, _coordinator, _replicationProcess.get()); |