diff options
author | Eric Milkie <milkie@10gen.com> | 2015-11-12 14:36:14 -0500 |
---|---|---|
committer | Eric Milkie <milkie@10gen.com> | 2015-11-12 16:23:07 -0500 |
commit | 0307dcde4cf50c4d245a6181e426343874950b4d (patch) | |
tree | cc73ef05155a697eb415ec1deca983635f528446 | |
parent | 80f0a56bfb80c17bf6edc8a35d7cab2f0c6016b0 (diff) | |
download | mongo-0307dcde4cf50c4d245a6181e426343874950b4d.tar.gz |
SERVER-21425 do not update term when triggering stepdown
11 files changed, 78 insertions, 42 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 096dae4dbf5..a4dc7172fce 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -3080,14 +3080,15 @@ void ReplicationCoordinatorImpl::_getTerm_helper(const ReplicationExecutor::Call *term = _topCoord->getTerm(); } -EventHandle ReplicationCoordinatorImpl::updateTerm_forTest(long long term, bool* updated) { +EventHandle ReplicationCoordinatorImpl::updateTerm_forTest( + long long term, TopologyCoordinator::UpdateTermResult* updateResult) { auto finishEvhStatus = _replExecutor.makeEvent(); invariantOK(finishEvhStatus.getStatus()); EventHandle finishEvh = finishEvhStatus.getValue(); auto signalFinishEvent = [this, finishEvh](const CallbackArgs&) { this->_replExecutor.signalEvent(finishEvh); }; - auto work = [this, term, updated, signalFinishEvent](const CallbackArgs& args) { - auto evh = _updateTerm_incallback(term, updated); + auto work = [this, term, updateResult, signalFinishEvent](const CallbackArgs& args) { + auto evh = _updateTerm_incallback(term, updateResult); if (evh.isValid()) { _replExecutor.onEvent(evh, signalFinishEvent); } else { @@ -3111,46 +3112,48 @@ Status ReplicationCoordinatorImpl::updateTerm(OperationContext* txn, long long t // Check we haven't acquired any lock, because potential stepdown needs global lock. dassert(!txn->lockState()->isLocked()); - bool updated = false; + TopologyCoordinator::UpdateTermResult updateTermResult; EventHandle finishEvh; - auto work = [this, term, &updated, &finishEvh](const CallbackArgs&) { - finishEvh = _updateTerm_incallback(term, &updated); + auto work = [this, term, &updateTermResult, &finishEvh](const CallbackArgs&) { + finishEvh = _updateTerm_incallback(term, &updateTermResult); }; _scheduleWorkAndWaitForCompletion(work); // Wait for potential stepdown to finish. if (finishEvh.isValid()) { _replExecutor.waitForEvent(finishEvh); } - if (updated) { + if (updateTermResult == TopologyCoordinator::UpdateTermResult::kUpdatedTerm || + updateTermResult == TopologyCoordinator::UpdateTermResult::kTriggerStepDown) { return {ErrorCodes::StaleTerm, "Replication term of this node was stale; retry query"}; } return Status::OK(); } -EventHandle ReplicationCoordinatorImpl::_updateTerm_incallback(long long term, bool* updated) { +EventHandle ReplicationCoordinatorImpl::_updateTerm_incallback( + long long term, TopologyCoordinator::UpdateTermResult* updateTermResult) { if (!isV1ElectionProtocol()) { LOG(3) << "Cannot update term in election protocol version 0"; return EventHandle(); } auto now = _replExecutor.now(); - bool termUpdated = _topCoord->updateTerm(term, now); + TopologyCoordinator::UpdateTermResult localUpdateTermResult = _topCoord->updateTerm(term, now); { stdx::lock_guard<stdx::mutex> lock(_mutex); _cachedTerm = _topCoord->getTerm(); - if (termUpdated) { + if (localUpdateTermResult == TopologyCoordinator::UpdateTermResult::kUpdatedTerm) { _cancelPriorityTakeover_inlock(); _cancelAndRescheduleElectionTimeout_inlock(); } } - if (updated) { - *updated = termUpdated; + if (updateTermResult) { + *updateTermResult = localUpdateTermResult; } - if (termUpdated && getMemberState().primary()) { + if (localUpdateTermResult == TopologyCoordinator::UpdateTermResult::kTriggerStepDown) { log() << "stepping down from primary, because a new term has begun: " << term; _topCoord->prepareForStepDown(); return _stepDownStart(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 96c6889c4a2..1b16cddfacc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -43,6 +43,7 @@ #include "mongo/db/repl/replication_coordinator_external_state.h" #include "mongo/db/repl/replication_executor.h" #include "mongo/db/repl/storage_interface.h" +#include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/repl/update_position_args.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/snapshot_name.h" @@ -364,10 +365,12 @@ public: /** * Non-blocking version of updateTerm. * Returns event handle that we can use to wait for the operation to complete. - * When the operation is complete (waitForEvent() returns), 'updated' will be set to true - * if the term increased and potential stepdown has finished. + * When the operation is complete (waitForEvent() returns), 'updateResult' will be set + * to a status telling if the term increased or a stepdown was triggered. + */ - ReplicationExecutor::EventHandle updateTerm_forTest(long long term, bool* updated); + ReplicationExecutor::EventHandle updateTerm_forTest( + long long term, TopologyCoordinator::UpdateTermResult* updateResult); /** * If called after _startElectSelfV1(), blocks until all asynchronous @@ -1043,11 +1046,12 @@ private: /** * Callback that attempts to set the current term in topology coordinator and * relinquishes primary if the term actually changes and we are primary. - * *updated will be true if the term increased. + * *updateTermResult will be the result of the update term attempt. * Returns the finish event if it does not finish in this function, for example, * due to stepdown, otherwise the returned EventHandle is invalid. */ - EventHandle _updateTerm_incallback(long long term, bool* updated = nullptr); + EventHandle _updateTerm_incallback( + long long term, TopologyCoordinator::UpdateTermResult* updateTermResult = nullptr); /** * Callback that processes the ReplSetMetadata returned from a command run against another diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index d371a1b221f..1ed1f7769b3 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -181,9 +181,9 @@ void ReplicationCoordinatorImpl::_onDryRunComplete(long long originalTerm) { log() << "dry election run succeeded, running for election"; // Stepdown is impossible from this term update. - bool updated = false; - _updateTerm_incallback(originalTerm + 1, &updated); - invariant(updated); + TopologyCoordinator::UpdateTermResult updateTermResult; + _updateTerm_incallback(originalTerm + 1, &updateTermResult); + invariant(updateTermResult == TopologyCoordinator::UpdateTermResult::kUpdatedTerm); // Secure our vote for ourself first _topCoord->voteForMyselfV1(); diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp index 50c7a43bf70..5422316f747 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp @@ -637,7 +637,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringDryRun) { auto onDryRunRequest = [this](const RemoteCommandRequest& request) { // Update to a future term before dry run completes. ASSERT_EQUALS(0, request.cmdObj.getIntField("candidateIndex")); - ASSERT_TRUE(getTopoCoord().updateTerm(1000, getNet()->now())); + ASSERT(getTopoCoord().updateTerm(1000, getNet()->now()) == + TopologyCoordinator::UpdateTermResult::kUpdatedTerm); }; simulateSuccessfulDryRun(onDryRunRequest); diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 821c891b824..6693573d8c8 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -1285,8 +1285,13 @@ TEST_F(ReplCoordTest, UpdateTerm) { // higher term, step down and change term Handle cbHandle; ASSERT_EQUALS(ErrorCodes::StaleTerm, getReplCoord()->updateTerm(&txn, 2).code()); - ASSERT_EQUALS(2, getReplCoord()->getTerm()); + // Term hasn't been incremented yet, as we need another try to update it after stepdown. + ASSERT_EQUALS(1, getReplCoord()->getTerm()); ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); + + // Now update term should actually update the term, as stepdown is complete. + ASSERT_EQUALS(ErrorCodes::StaleTerm, getReplCoord()->updateTerm(&txn, 2).code()); + ASSERT_EQUALS(2, getReplCoord()->getTerm()); } TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreThanOnce) { @@ -1322,11 +1327,11 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha }; replExec->scheduleWorkWithGlobalExclusiveLock(stepDownFinishBlocker); - bool termUpdated2 = false; + TopologyCoordinator::UpdateTermResult termUpdated2; auto updateTermEvh2 = getReplCoord()->updateTerm_forTest(2, &termUpdated2); ASSERT(updateTermEvh2.isValid()); - bool termUpdated3 = false; + TopologyCoordinator::UpdateTermResult termUpdated3; auto updateTermEvh3 = getReplCoord()->updateTerm_forTest(3, &termUpdated3); ASSERT(updateTermEvh3.isValid()); @@ -1335,14 +1340,22 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha // Both _updateTerm_incallback tasks should be scheduled. replExec->waitForEvent(updateTermEvh2); - ASSERT_TRUE(termUpdated2); + ASSERT(termUpdated2 == TopologyCoordinator::UpdateTermResult::kTriggerStepDown); replExec->waitForEvent(updateTermEvh3); - ASSERT_TRUE(termUpdated3); + ASSERT(termUpdated3 == TopologyCoordinator::UpdateTermResult::kTriggerStepDown); - ASSERT_EQUALS(3, getReplCoord()->getTerm()); + // Term hasn't updated yet. + ASSERT_EQUALS(1, getReplCoord()->getTerm()); // Update term event handles will wait for potential stepdown. ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); + + TopologyCoordinator::UpdateTermResult termUpdated4; + auto updateTermEvh4 = getReplCoord()->updateTerm_forTest(3, &termUpdated4); + ASSERT(updateTermEvh4.isValid()); + replExec->waitForEvent(updateTermEvh4); + ASSERT(termUpdated4 == TopologyCoordinator::UpdateTermResult::kUpdatedTerm); + ASSERT_EQUALS(3, getReplCoord()->getTerm()); } TEST_F(StepDownTest, StepDownNotPrimary) { diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 069c5b474cb..77a8a0eb696 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -112,12 +112,14 @@ public: */ virtual long long getTerm() = 0; + enum class UpdateTermResult { kAlreadyUpToDate, kTriggerStepDown, kUpdatedTerm }; + /** * Sets the latest term this member is aware of to the higher of its current value and * the value passed in as "term". - * Returns true if the local term value is changed. + * Returns the result of setting the term value, or if a stepdown should be triggered. */ - virtual bool updateTerm(long long term, Date_t now) = 0; + virtual UpdateTermResult updateTerm(long long term, Date_t now) = 0; //////////////////////////////////////////////////////////// // diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp index 90e0ee89be8..40e36edc9ba 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl.cpp @@ -2250,14 +2250,21 @@ int TopologyCoordinatorImpl::getMaintenanceCount() const { return _maintenanceModeCalls; } -bool TopologyCoordinatorImpl::updateTerm(long long term, Date_t now) { +TopologyCoordinator::UpdateTermResult TopologyCoordinatorImpl::updateTerm(long long term, + Date_t now) { if (term <= _term) { - return false; + return TopologyCoordinator::UpdateTermResult::kAlreadyUpToDate; } // Don't run election if we just stood up or learned about a new term. _electionSleepUntil = now + _rsConfig.getElectionTimeoutPeriod(); + + // Don't update the term just yet if we are going to step down, as we don't want to report + // that we are primary in the new term. + if (_iAmPrimary()) { + return TopologyCoordinator::UpdateTermResult::kTriggerStepDown; + } _term = term; - return true; + return TopologyCoordinator::UpdateTermResult::kUpdatedTerm; } diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h index 6ad6b0b712a..f3c673ab030 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.h +++ b/src/mongo/db/repl/topology_coordinator_impl.h @@ -149,7 +149,7 @@ public: virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const; virtual int getMaintenanceCount() const; virtual long long getTerm(); - virtual bool updateTerm(long long term, Date_t now); + virtual UpdateTermResult updateTerm(long long term, Date_t now); virtual void setForceSyncSourceIndex(int index); virtual HostAndPort chooseNewSyncSource(Date_t now, const Timestamp& lastTimestampApplied); virtual void blacklistSyncSource(const HostAndPort& host, Date_t until); diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp index b6a0992cd08..49af187894e 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp @@ -4759,7 +4759,8 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommands) { << "rs0" << "term" << 2 << "winnerId" << 30)); long long responseTerm; - ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm)); ASSERT_EQUALS(2, responseTerm); @@ -4804,7 +4805,8 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommandsDryRun) { 0); setSelfMemberState(MemberState::RS_SECONDARY); // set term to 1 - ASSERT(getTopoCoord().updateTerm(1, now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(1, now())); // and make sure we voted in term 1 ReplSetRequestVotesArgs argsForRealVote; argsForRealVote.initialize(BSON("replSetRequestVotes" @@ -4917,7 +4919,8 @@ TEST_F(TopoCoordTest, ProcessDeclareElectionWinner) { << "rs0" << "term" << 2 << "winnerId" << 30)); long long responseTerm = -1; - ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm)); ASSERT_EQUALS(2, responseTerm); diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp index 8d19018e1d2..32e258ee2b0 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp @@ -1957,7 +1957,8 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommands) { << "rs0" << "term" << 2 << "winnerId" << 30)); long long responseTerm; - ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm)); ASSERT_EQUALS(2, responseTerm); @@ -2002,7 +2003,8 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommandsDryRun) { 0); setSelfMemberState(MemberState::RS_SECONDARY); // set term to 1 - ASSERT(getTopoCoord().updateTerm(1, now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(1, now())); // and make sure we voted in term 1 ReplSetRequestVotesArgs argsForRealVote; argsForRealVote.initialize(BSON("replSetRequestVotes" @@ -2115,7 +2117,8 @@ TEST_F(TopoCoordTest, ProcessDeclareElectionWinner) { << "rs0" << "term" << 2 << "winnerId" << 30)); long long responseTerm = -1; - ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); + ASSERT(TopologyCoordinator::UpdateTermResult::kUpdatedTerm == + getTopoCoord().updateTerm(winnerArgs.getTerm(), now())); ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm)); ASSERT_EQUALS(2, responseTerm); diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index d69daa1f664..d68c683c62a 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -506,7 +506,7 @@ StatusWith<ShardRegistry::QueryResponse> ShardRegistry::exhaustiveFindOnConfig( for (int retry = 1; retry <= kOnErrorNumRetries; retry++) { auto result = _exhaustiveFindOnConfig(txn, readPref, nss, query, sort, limit); if (result.isOK()) { - return {std::move(result)}; + return result; } if ((ErrorCodes::isNetworkError(result.getStatus().code()) || @@ -680,7 +680,7 @@ StatusWith<ShardRegistry::CommandResponse> ShardRegistry::_runCommandWithRetries auto response = _runCommandWithMetadata( txn, executor, shard, readPref, dbname, cmdWithMaxTimeMS, metadata, errorsToCheck); if (response.isOK()) { - return {std::move(response)}; + return response; } if (errorsToCheck.count(response.getStatus().code()) && retry < kOnErrorNumRetries) { |