diff options
Diffstat (limited to 'src/mongo/db/repl/replication_coordinator_impl.cpp')
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 54 |
1 files changed, 44 insertions, 10 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 0f686fd0e40..8283b8ddfdc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -2385,6 +2385,8 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( BSONObj electionCandidateMetrics = ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON(); + BSONObj electionParticipantMetrics = + ReplicationMetrics::get(getServiceContext()).getElectionParticipantMetricsBSON(); stdx::lock_guard<stdx::mutex> lk(_mutex); Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse"); @@ -2395,6 +2397,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( _getCurrentCommittedSnapshotOpTimeAndWallTime_inlock(), initialSyncProgress, electionCandidateMetrics, + electionParticipantMetrics, _storage->getLastStableCheckpointTimestampDeprecated(_service), _storage->getLastStableRecoveryTimestamp(_service)}, response, @@ -3033,6 +3036,9 @@ void ReplicationCoordinatorImpl::_onFollowerModeStateChange() { void ReplicationCoordinatorImpl::CatchupState::start_inlock() { log() << "Entering primary catch-up mode."; + // Reset the number of catchup operations performed before starting catchup. + _numCatchUpOps = 0; + // No catchup in single node replica set. if (_repl->_rsConfig.getNumMembers() == 1) { abort_inlock(PrimaryCatchUpConclusionReason::kSkipped); @@ -3076,8 +3082,6 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() { return; } _timeoutCbh = status.getValue(); - - _numCatchUpOps = 0; } void ReplicationCoordinatorImpl::CatchupState::abort_inlock(PrimaryCatchUpConclusionReason reason) { @@ -3160,7 +3164,7 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() { _repl->_opTimeWaiterList.add_inlock(_waiter.get()); } -void ReplicationCoordinatorImpl::CatchupState::incrementNumCatchUpOps_inlock(int numOps) { +void ReplicationCoordinatorImpl::CatchupState::incrementNumCatchUpOps_inlock(long numOps) { _numCatchUpOps += numOps; } @@ -3173,7 +3177,7 @@ Status ReplicationCoordinatorImpl::abortCatchupIfNeeded(PrimaryCatchUpConclusion return Status(ErrorCodes::IllegalOperation, "The node is not in catch-up mode."); } -void ReplicationCoordinatorImpl::incrementNumCatchUpOpsIfCatchingUp(int numOps) { +void ReplicationCoordinatorImpl::incrementNumCatchUpOpsIfCatchingUp(long numOps) { stdx::lock_guard<stdx::mutex> lk(_mutex); if (_catchupState) { _catchupState->incrementNumCatchUpOps_inlock(numOps); @@ -3751,14 +3755,40 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes( _topCoord->processReplSetRequestVotes(args, response); } - if (!args.isADryRun() && response->getVoteGranted()) { - LastVote lastVote{args.getTerm(), args.getCandidateIndex()}; + if (!args.isADryRun()) { + const int candidateIndex = args.getCandidateIndex(); + LastVote lastVote{args.getTerm(), candidateIndex}; - Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote); - if (!status.isOK()) { - error() << "replSetRequestVotes failed to store LastVote document; " << status; - return status; + const bool votedForCandidate = response->getVoteGranted(); + + if (votedForCandidate) { + Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote); + if (!status.isOK()) { + error() << "replSetRequestVotes failed to store LastVote document; " << status; + return status; + } } + + // If the vote was not granted to the candidate, we still want to track metrics around the + // node's participation in the election. + const long long electionTerm = args.getTerm(); + const Date_t lastVoteDate = _replExecutor->now(); + const int electionCandidateMemberId = + _rsConfig.getMemberAt(candidateIndex).getId().getData(); + const std::string voteReason = response->getReason(); + const OpTime lastAppliedOpTime = _topCoord->getMyLastAppliedOpTime(); + const OpTime maxAppliedOpTime = _topCoord->latestKnownOpTime(); + const double priorityAtElection = _rsConfig.getMemberAt(_selfIndex).getPriority(); + + ReplicationMetrics::get(getServiceContext()) + .setElectionParticipantMetrics(votedForCandidate, + electionTerm, + lastVoteDate, + electionCandidateMemberId, + voteReason, + lastAppliedOpTime, + maxAppliedOpTime, + priorityAtElection); } return Status::OK(); } @@ -3898,6 +3928,10 @@ EventHandle ReplicationCoordinatorImpl::_updateTerm_inlock( auto now = _replExecutor->now(); TopologyCoordinator::UpdateTermResult localUpdateTermResult = _topCoord->updateTerm(term, now); if (localUpdateTermResult == TopologyCoordinator::UpdateTermResult::kUpdatedTerm) { + // When the node discovers a new term, the new term date metrics are now out-of-date, so we + // clear them. + ReplicationMetrics::get(getServiceContext()).clearParticipantNewTermDates(); + _termShadow.store(term); _cancelPriorityTakeover_inlock(); _cancelAndRescheduleElectionTimeout_inlock(); |