diff options
author | Samyukta Lanka <samy.lanka@mongodb.com> | 2019-10-16 20:54:38 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-10-16 20:54:38 +0000 |
commit | 9a9b82e95a88c5ce25c958690c2d3365bc62bacc (patch) | |
tree | b7d6ce480dccf68933eaba2340074ba2cf7bbceb | |
parent | c9349a22f68fac52f6056fb08ea3ce0993dd8cbe (diff) | |
download | mongo-9a9b82e95a88c5ce25c958690c2d3365bc62bacc.tar.gz |
SERVER-43239 Fixed bug causing numCatchUpOps in repSetGetStatus to be incorrect
(cherry picked from commit 71e4779b0da9e8d58dbb179c49b1a86c5e48c93d)
SERVER-41512 Added tracking for metrics around a node voting in an election
(cherry picked from commit 7538504cb584720c2cbbc6d44ea62d0743b41fcf)
SERVER-41513 Track the time the new term oplog entry was written by primary and applied in secondary
(cherry picked from commit efde009845f32d8de2d094088628e67608bfa419)
22 files changed, 437 insertions, 76 deletions
diff --git a/jstests/replsets/election_candidate_and_participant_metrics.js b/jstests/replsets/election_candidate_and_participant_metrics.js index 7e19f1a06f1..029db2fc883 100644 --- a/jstests/replsets/election_candidate_and_participant_metrics.js +++ b/jstests/replsets/election_candidate_and_participant_metrics.js @@ -1,17 +1,18 @@ /** - * This test checks that the metrics around election candidates and voters are set correctly. + * This test checks that the metrics around election candidates and participants are set and updated + * correctly. We test this with a two node replica set by forcing multiple election handoffs and + * checking the 'electionCandidateMetrics' and 'electionParticipantMetrics' fields of replSetStatus + * after each handoff. */ (function() { "use strict"; -load("jstests/libs/check_log.js"); -load("jstests/replsets/libs/election_metrics.js"); + load("jstests/replsets/libs/election_handoff.js"); const testName = jsTestName(); const numNodes = 2; const rst = ReplSetTest({name: testName, nodes: numNodes}); -const nodes = rst.nodeList(); rst.startSet(); // Make sure there are no election timeouts firing for the duration of the test. This helps @@ -35,10 +36,10 @@ assert.eq(originalPrimaryElectionCandidateMetrics.lastElectionReason, "electionT assert(originalPrimaryElectionCandidateMetrics.lastElectionDate, () => "Response should have an 'electionCandidateMetrics.lastElectionDate' field: " + tojson(originalPrimaryElectionCandidateMetrics)); -assert(originalPrimaryElectionCandidateMetrics.termAtElection, - () => "Response should have an 'electionCandidateMetrics.termAtElection' field: " + +assert(originalPrimaryElectionCandidateMetrics.electionTerm, + () => "Response should have an 'electionCandidateMetrics.electionTerm' field: " + tojson(originalPrimaryElectionCandidateMetrics)); -assert.eq(originalPrimaryElectionCandidateMetrics.termAtElection, 1); +assert.eq(originalPrimaryElectionCandidateMetrics.electionTerm, 1); assert( originalPrimaryElectionCandidateMetrics.lastCommittedOpTimeAtElection, () => @@ -81,10 +82,10 @@ assert.eq(newPrimaryElectionCandidateMetrics.lastElectionReason, "stepUpRequestS assert(newPrimaryElectionCandidateMetrics.lastElectionDate, () => "Response should have an 'electionCandidateMetrics.lastElectionDate' field: " + tojson(newPrimaryElectionCandidateMetrics)); -assert(newPrimaryElectionCandidateMetrics.termAtElection, - () => "Response should have an 'electionCandidateMetrics.termAtElection' field: " + +assert(newPrimaryElectionCandidateMetrics.electionTerm, + () => "Response should have an 'electionCandidateMetrics.electionTerm' field: " + tojson(newPrimaryElectionCandidateMetrics)); -assert.eq(newPrimaryElectionCandidateMetrics.termAtElection, 2); +assert.eq(newPrimaryElectionCandidateMetrics.electionTerm, 2); assert( newPrimaryElectionCandidateMetrics.lastCommittedOpTimeAtElection, () => @@ -109,8 +110,65 @@ assert.eq(newPrimaryElectionCandidateMetrics.electionTimeoutMillis, expectedElec // priorPrimaryMemberId field. assert.eq(newPrimaryElectionCandidateMetrics.priorPrimaryMemberId, 0); +let newPrimaryElectionParticipantMetrics = newPrimaryReplSetGetStatus.electionParticipantMetrics; + +// The new primary should not have its 'electionParticipantMetrics' field set, since it was the +// candidate in this election and did not vote for any other node. +assert(!newPrimaryElectionParticipantMetrics, + () => "Response should not have an 'electionParticipantMetric' field: " + + tojson(newPrimaryReplSetGetStatus)); + +originalPrimaryReplSetGetStatus = + assert.commandWorked(originalPrimary.adminCommand({replSetGetStatus: 1})); +let originalPrimaryElectionParticipantMetrics = + originalPrimaryReplSetGetStatus.electionParticipantMetrics; + +// Check that the 'electionParticipantMetrics' section of the replSetGetStatus response for the +// original primary has all of the required fields and that they are set correctly. +assert(originalPrimaryElectionParticipantMetrics.votedForCandidate, + () => "Response should have an 'electionParticipantMetrics.votedForCandidate' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert.eq(originalPrimaryElectionParticipantMetrics.votedForCandidate, true); +assert(originalPrimaryElectionParticipantMetrics.electionTerm, + () => "Response should have an 'electionParticipantMetrics.electionTerm' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert.eq(originalPrimaryElectionParticipantMetrics.electionTerm, 2); +assert(originalPrimaryElectionParticipantMetrics.lastVoteDate, + () => "Response should have an 'electionParticipantMetrics.lastVoteDate' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert( + originalPrimaryElectionParticipantMetrics.electionCandidateMemberId, + () => "Response should have an 'electionParticipantMetrics.electionCandidateMemberId' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert.eq(originalPrimaryElectionParticipantMetrics.electionCandidateMemberId, 1); +// Since the node voted for the new primary, we directly assert that its voteReason is equal to +// empty string. +assert.eq(originalPrimaryElectionParticipantMetrics.voteReason, ""); +assert( + originalPrimaryElectionParticipantMetrics.lastAppliedOpTimeAtElection, + () => + "Response should have an 'electionParticipantMetrics.lastAppliedOpTimeAtElection' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert(originalPrimaryElectionParticipantMetrics.maxAppliedOpTimeInSet, + () => "Response should have an 'electionParticipantMetrics.maxAppliedOpTimeInSet' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert(originalPrimaryElectionParticipantMetrics.priorityAtElection, + () => "Response should have an 'electionParticipantMetrics.priorityAtElection' field: " + + tojson(originalPrimaryElectionParticipantMetrics)); +assert.eq(originalPrimaryElectionParticipantMetrics.priorityAtElection, 1); + +originalPrimaryElectionCandidateMetrics = originalPrimaryReplSetGetStatus.electionCandidateMetrics; + +// The original primary should not have its 'electionCandidateMetrics' field set, since it was not a +// candidate in this election. +assert(!originalPrimaryElectionCandidateMetrics, + () => "Response should not have an 'electionCandidateMetrics' field: " + + tojson(originalPrimaryCandidateMetrics)); + +// testElectionHandoff steps down the primary with a non-zero step down period, so we need to +// unfreeze the node to allow it to initiate an election again. +assert.commandWorked(originalPrimary.adminCommand({replSetFreeze: 0})); // Step up the original primary. -sleep(ElectionHandoffTest.stepDownPeriodSecs * 1000); ElectionHandoffTest.testElectionHandoff(rst, 1, 0); originalPrimaryReplSetGetStatus = @@ -119,7 +177,7 @@ originalPrimaryElectionCandidateMetrics = originalPrimaryReplSetGetStatus.electi // Check that the original primary's metrics are also being set properly after the second election. assert.eq(originalPrimaryElectionCandidateMetrics.lastElectionReason, "stepUpRequestSkipDryRun"); -assert.eq(originalPrimaryElectionCandidateMetrics.termAtElection, 3); +assert.eq(originalPrimaryElectionCandidateMetrics.electionTerm, 3); assert.eq(originalPrimaryElectionCandidateMetrics.numVotesNeeded, 2); assert.eq(originalPrimaryElectionCandidateMetrics.priorityAtElection, 1); assert.eq(originalPrimaryElectionCandidateMetrics.electionTimeoutMillis, @@ -128,11 +186,45 @@ assert.eq(originalPrimaryElectionCandidateMetrics.priorPrimaryMemberId, 1); newPrimaryReplSetGetStatus = assert.commandWorked(newPrimary.adminCommand({replSetGetStatus: 1})); newPrimaryElectionCandidateMetrics = newPrimaryReplSetGetStatus.electionCandidateMetrics; +newPrimaryElectionParticipantMetrics = newPrimaryReplSetGetStatus.electionParticipantMetrics; // The other node should not have an electionCandidateMetrics, as it just stepped down. assert(!newPrimaryElectionCandidateMetrics, () => "Response should not have an 'electionCandidateMetrics' field: " + tojson(newPrimaryReplSetGetStatus)); +// Check that the primary that just stepped down has its 'electionParticipantMetrics' field set +// correctly. +assert.eq(newPrimaryElectionParticipantMetrics.votedForCandidate, true); +assert.eq(newPrimaryElectionParticipantMetrics.electionTerm, 3); +assert.eq(newPrimaryElectionParticipantMetrics.electionCandidateMemberId, 0); +assert.eq(newPrimaryElectionParticipantMetrics.voteReason, ""); +assert.eq(newPrimaryElectionParticipantMetrics.priorityAtElection, 1); + +// Since the election participant metrics are only set in the real election, set up a failpoint that +// tells a voting node to vote yes in the dry run election and no in the real election. +assert.commandWorked(originalPrimary.adminCommand( + {configureFailPoint: "voteYesInDryRunButNoInRealElection", mode: "alwaysOn"})); + +// Attempt to step up the new primary a second time. Due to the failpoint, the current primary +// should vote no, and as a result the election should fail. +assert.commandWorked(newPrimary.adminCommand({replSetFreeze: 0})); +assert.commandFailedWithCode(newPrimary.adminCommand({replSetStepUp: 1}), ErrorCodes.CommandFailed); + +originalPrimaryReplSetGetStatus = + assert.commandWorked(originalPrimary.adminCommand({replSetGetStatus: 1})); +originalPrimaryElectionParticipantMetrics = + originalPrimaryReplSetGetStatus.electionParticipantMetrics; + +// Check that the metrics in 'electionParticipantMetrics' were updated for the original primary +// after the second election that it participated in. +assert.eq(originalPrimaryElectionParticipantMetrics.votedForCandidate, false); +assert.eq(originalPrimaryElectionParticipantMetrics.electionTerm, 4); +assert.eq(originalPrimaryElectionParticipantMetrics.electionCandidateMemberId, 1); +assert.eq( + originalPrimaryElectionParticipantMetrics.voteReason, + "forced to vote no in real election due to failpoint voteYesInDryRunButNoInRealElection set"); +assert.eq(originalPrimaryElectionParticipantMetrics.priorityAtElection, 1); + rst.stopSet(); })();
\ No newline at end of file diff --git a/jstests/replsets/election_participant_new_term_metrics.js b/jstests/replsets/election_participant_new_term_metrics.js new file mode 100644 index 00000000000..aa76bdf4546 --- /dev/null +++ b/jstests/replsets/election_participant_new_term_metrics.js @@ -0,0 +1,109 @@ +/** + * This test checks that the 'newTermStartDate' and 'newTermAppliedDate' metrics in + * 'electionParticipantMetrics' are set and updated correctly. We test this with a three node + * replica set by successfully stepping up one of the secondaries, then failing to step up the + * original primary. We check that the metrics are appropriately set or unset after each election. + */ + +(function() { +"use strict"; + +const testName = jsTestName(); +const rst = ReplSetTest({name: testName, nodes: [{}, {}, {rsConfig: {priority: 0}}]}); +rst.startSet(); + +// Make sure there are no election timeouts firing for the duration of the test. This helps +// ensure that the test will only pass if the election succeeds. +rst.initiateWithHighElectionTimeout(); + +const originalPrimary = rst.getPrimary(); +const newPrimary = rst.getSecondaries()[0]; +const testNode = rst.getSecondaries()[1]; + +// Set up a failpoint that forces the original primary to vote no in this election. This guarantees +// that 'testNode' will be a participant in this election, since its vote will be needed for the new +// primary to win. +assert.commandWorked( + originalPrimary.adminCommand({configureFailPoint: "voteNoInElection", mode: "alwaysOn"})); + +// Step up the new primary. +assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1})); +rst.awaitNodesAgreeOnPrimary(); +assert.eq(newPrimary, rst.getPrimary()); + +// Since the new term oplog entry needs to be replicated onto testNode for the metrics to be set, we +// must await replication before checking the metrics. +rst.awaitReplication(); + +let testNodeReplSetGetStatus = assert.commandWorked(testNode.adminCommand({replSetGetStatus: 1})); +let testNodeElectionParticipantMetrics = testNodeReplSetGetStatus.electionParticipantMetrics; + +const originalNewTermStartDate = testNodeElectionParticipantMetrics.newTermStartDate; +const originalNewTermAppliedDate = testNodeElectionParticipantMetrics.newTermAppliedDate; + +// These fields should be set, since testNode has received and applied the new term oplog entry +// after the election. +assert(originalNewTermStartDate, + () => "Response should have an 'electionParticipantMetrics.newTermStartDate' field: " + + tojson(testNodeElectionParticipantMetrics)); +assert(originalNewTermAppliedDate, + () => "Response should have an 'electionParticipantMetrics.newTermAppliedDate' field: " + + tojson(testNodeElectionParticipantMetrics)); + +// Set up a failpoint that forces newPrimary and testNode to vote no in the election, in addition to +// the new primary above. This will cause the dry run to fail for the original primary. +assert.commandWorked( + newPrimary.adminCommand({configureFailPoint: "voteNoInElection", mode: "alwaysOn"})); +assert.commandWorked( + testNode.adminCommand({configureFailPoint: "voteNoInElection", mode: "alwaysOn"})); + +// Attempt to step up the original primary. +assert.commandFailedWithCode(originalPrimary.adminCommand({replSetStepUp: 1}), + ErrorCodes.CommandFailed); + +testNodeReplSetGetStatus = assert.commandWorked(testNode.adminCommand({replSetGetStatus: 1})); +testNodeElectionParticipantMetrics = testNodeReplSetGetStatus.electionParticipantMetrics; + +// The 'newTermStartDate' and 'newTermAppliedDate' fields should not be cleared, since the term is +// not incremented when a dry run election is initiated. +assert(testNodeElectionParticipantMetrics.newTermStartDate, + () => "Response should have an 'electionParticipantMetrics.newTermStartDate' field: " + + tojson(testNodeElectionParticipantMetrics)); +assert(testNodeElectionParticipantMetrics.newTermAppliedDate, + () => "Response should have an 'electionParticipantMetrics.newTermAppliedDate' field: " + + tojson(testNodeElectionParticipantMetrics)); + +// The fields should store the same dates, since a new term oplog entry was not received. +assert.eq(originalNewTermStartDate, testNodeElectionParticipantMetrics.newTermStartDate); +assert.eq(originalNewTermAppliedDate, testNodeElectionParticipantMetrics.newTermAppliedDate); + +// Clear the previous failpoint and set up a new one that forces the two current secondaries to vote +// yes for the candidate in the dry run election and no in the real election. This will cause the +// real election to fail. +assert.commandWorked( + newPrimary.adminCommand({configureFailPoint: "voteNoInElection", mode: "off"})); +assert.commandWorked(newPrimary.adminCommand( + {configureFailPoint: "voteYesInDryRunButNoInRealElection", mode: "alwaysOn"})); +assert.commandWorked(testNode.adminCommand({configureFailPoint: "voteNoInElection", mode: "off"})); +assert.commandWorked(testNode.adminCommand( + {configureFailPoint: "voteYesInDryRunButNoInRealElection", mode: "alwaysOn"})); + +// Attempt to step up the original primary. +assert.commandFailedWithCode(originalPrimary.adminCommand({replSetStepUp: 1}), + ErrorCodes.CommandFailed); + +testNodeReplSetGetStatus = assert.commandWorked(testNode.adminCommand({replSetGetStatus: 1})); +testNodeElectionParticipantMetrics = testNodeReplSetGetStatus.electionParticipantMetrics; + +// Since the election succeeded in the dry run, a new term was encountered by the secondary. +// However, because the real election failed, there was no new term oplog entry, so these fields +// should not be set. +assert(!testNodeElectionParticipantMetrics.newTermStartDate, + () => "Response should not have an 'electionParticipantMetrics.newTermStartDate' field: " + + tojson(testNodeElectionParticipantMetrics)); +assert(!testNodeElectionParticipantMetrics.newTermAppliedDate, + () => "Response should not have an 'electionParticipantMetrics.newTermAppliedDate' field: " + + tojson(testNodeElectionParticipantMetrics)); + +rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/pipeline/document_source_change_stream_test.cpp b/src/mongo/db/pipeline/document_source_change_stream_test.cpp index 501045ceadf..eb2583f705b 100644 --- a/src/mongo/db/pipeline/document_source_change_stream_test.cpp +++ b/src/mongo/db/pipeline/document_source_change_stream_test.cpp @@ -1423,8 +1423,8 @@ TEST_F(ChangeStreamStageTest, MatchFiltersCreateCollection) { TEST_F(ChangeStreamStageTest, MatchFiltersNoOp) { auto noOp = makeOplogEntry(OpTypeEnum::kNoop, // op type {}, // namespace - BSON("msg" - << "new primary")); // o + BSON(repl::ReplicationCoordinator::newPrimaryMsgField + << repl::ReplicationCoordinator::newPrimaryMsg)); // o checkTransformation(noOp, boost::none); } @@ -2076,8 +2076,8 @@ TEST_F(ChangeStreamStageDBTest, RenameFromUserToSystemCollectionShouldIncludeNot TEST_F(ChangeStreamStageDBTest, MatchFiltersNoOp) { OplogEntry noOp = makeOplogEntry(OpTypeEnum::kNoop, NamespaceString(), - BSON("msg" - << "new primary")); + BSON(repl::ReplicationCoordinator::newPrimaryMsgField + << repl::ReplicationCoordinator::newPrimaryMsg)); checkTransformation(noOp, boost::none); } diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index cf9596327b4..3cd1dbbd937 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -719,6 +719,7 @@ env.Library( 'repl_coordinator_interface', 'repl_settings', 'storage_interface', + 'replication_metrics', ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/commands/mongod_fsync', diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 5c56aac38c5..433249a086f 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -888,7 +888,7 @@ public: * Increment the counter for the number of ops applied during catchup if the node is in catchup * mode. */ - virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) = 0; + virtual void incrementNumCatchUpOpsIfCatchingUp(long numOps) = 0; /** * Signals that drop pending collections have been removed from storage. @@ -919,6 +919,16 @@ public: virtual void attemptToAdvanceStableTimestamp() = 0; + /** + * Field name of the newPrimaryMsg within the 'o' field in the new term oplog entry. + */ + inline static constexpr StringData newPrimaryMsgField = "msg"_sd; + + /** + * Message string passed in the new term oplog entry after a primary has stepped up. + */ + inline static constexpr StringData newPrimaryMsg = "new primary"_sd; + protected: ReplicationCoordinator(); }; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index bcaf5d94b72..54cf29caf20 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -481,8 +481,8 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC WriteUnitOfWork wuow(opCtx); opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage( opCtx, - BSON("msg" - << "new primary")); + BSON(ReplicationCoordinator::newPrimaryMsgField + << ReplicationCoordinator::newPrimaryMsg)); wuow.commit(); }); const auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx); @@ -490,7 +490,7 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC auto opTimeToReturn = loadLastOpTimeAndWallTimeResult.getValue().opTime; auto newTermStartDate = loadLastOpTimeAndWallTimeResult.getValue().wallTime; - ReplicationMetrics::get(opCtx).setNewTermStartDate(newTermStartDate); + ReplicationMetrics::get(opCtx).setCandidateNewTermStartDate(newTermStartDate); auto replCoord = ReplicationCoordinator::get(opCtx); replCoord->createWMajorityWriteAvailabilityDateWaiter(opTimeToReturn); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 0f686fd0e40..8283b8ddfdc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -2385,6 +2385,8 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( BSONObj electionCandidateMetrics = ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON(); + BSONObj electionParticipantMetrics = + ReplicationMetrics::get(getServiceContext()).getElectionParticipantMetricsBSON(); stdx::lock_guard<stdx::mutex> lk(_mutex); Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse"); @@ -2395,6 +2397,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( _getCurrentCommittedSnapshotOpTimeAndWallTime_inlock(), initialSyncProgress, electionCandidateMetrics, + electionParticipantMetrics, _storage->getLastStableCheckpointTimestampDeprecated(_service), _storage->getLastStableRecoveryTimestamp(_service)}, response, @@ -3033,6 +3036,9 @@ void ReplicationCoordinatorImpl::_onFollowerModeStateChange() { void ReplicationCoordinatorImpl::CatchupState::start_inlock() { log() << "Entering primary catch-up mode."; + // Reset the number of catchup operations performed before starting catchup. + _numCatchUpOps = 0; + // No catchup in single node replica set. if (_repl->_rsConfig.getNumMembers() == 1) { abort_inlock(PrimaryCatchUpConclusionReason::kSkipped); @@ -3076,8 +3082,6 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() { return; } _timeoutCbh = status.getValue(); - - _numCatchUpOps = 0; } void ReplicationCoordinatorImpl::CatchupState::abort_inlock(PrimaryCatchUpConclusionReason reason) { @@ -3160,7 +3164,7 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() { _repl->_opTimeWaiterList.add_inlock(_waiter.get()); } -void ReplicationCoordinatorImpl::CatchupState::incrementNumCatchUpOps_inlock(int numOps) { +void ReplicationCoordinatorImpl::CatchupState::incrementNumCatchUpOps_inlock(long numOps) { _numCatchUpOps += numOps; } @@ -3173,7 +3177,7 @@ Status ReplicationCoordinatorImpl::abortCatchupIfNeeded(PrimaryCatchUpConclusion return Status(ErrorCodes::IllegalOperation, "The node is not in catch-up mode."); } -void ReplicationCoordinatorImpl::incrementNumCatchUpOpsIfCatchingUp(int numOps) { +void ReplicationCoordinatorImpl::incrementNumCatchUpOpsIfCatchingUp(long numOps) { stdx::lock_guard<stdx::mutex> lk(_mutex); if (_catchupState) { _catchupState->incrementNumCatchUpOps_inlock(numOps); @@ -3751,14 +3755,40 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes( _topCoord->processReplSetRequestVotes(args, response); } - if (!args.isADryRun() && response->getVoteGranted()) { - LastVote lastVote{args.getTerm(), args.getCandidateIndex()}; + if (!args.isADryRun()) { + const int candidateIndex = args.getCandidateIndex(); + LastVote lastVote{args.getTerm(), candidateIndex}; - Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote); - if (!status.isOK()) { - error() << "replSetRequestVotes failed to store LastVote document; " << status; - return status; + const bool votedForCandidate = response->getVoteGranted(); + + if (votedForCandidate) { + Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote); + if (!status.isOK()) { + error() << "replSetRequestVotes failed to store LastVote document; " << status; + return status; + } } + + // If the vote was not granted to the candidate, we still want to track metrics around the + // node's participation in the election. + const long long electionTerm = args.getTerm(); + const Date_t lastVoteDate = _replExecutor->now(); + const int electionCandidateMemberId = + _rsConfig.getMemberAt(candidateIndex).getId().getData(); + const std::string voteReason = response->getReason(); + const OpTime lastAppliedOpTime = _topCoord->getMyLastAppliedOpTime(); + const OpTime maxAppliedOpTime = _topCoord->latestKnownOpTime(); + const double priorityAtElection = _rsConfig.getMemberAt(_selfIndex).getPriority(); + + ReplicationMetrics::get(getServiceContext()) + .setElectionParticipantMetrics(votedForCandidate, + electionTerm, + lastVoteDate, + electionCandidateMemberId, + voteReason, + lastAppliedOpTime, + maxAppliedOpTime, + priorityAtElection); } return Status::OK(); } @@ -3898,6 +3928,10 @@ EventHandle ReplicationCoordinatorImpl::_updateTerm_inlock( auto now = _replExecutor->now(); TopologyCoordinator::UpdateTermResult localUpdateTermResult = _topCoord->updateTerm(term, now); if (localUpdateTermResult == TopologyCoordinator::UpdateTermResult::kUpdatedTerm) { + // When the node discovers a new term, the new term date metrics are now out-of-date, so we + // clear them. + ReplicationMetrics::get(getServiceContext()).clearParticipantNewTermDates(); + _termShadow.store(term); _cancelPriorityTakeover_inlock(); _cancelAndRescheduleElectionTimeout_inlock(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index ad0273da3b2..503186a1e8e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -325,7 +325,7 @@ public: virtual Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override; - virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) override; + virtual void incrementNumCatchUpOpsIfCatchingUp(long numOps) override; void signalDropPendingCollectionsRemovedFromStorage() final; @@ -677,7 +677,7 @@ private: // Heartbeat calls this function to update the target optime. void signalHeartbeatUpdate_inlock(); // Increment the counter for the number of ops applied during catchup. - void incrementNumCatchUpOps_inlock(int numOps); + void incrementNumCatchUpOps_inlock(long numOps); private: ReplicationCoordinatorImpl* _repl; // Not owned. @@ -687,7 +687,7 @@ private: // we can exit catchup mode. std::unique_ptr<CallbackWaiter> _waiter; // Counter for the number of ops applied during catchup. - int _numCatchUpOps; + long _numCatchUpOps = 0; }; // Inner class to manage the concurrency of _canAcceptNonLocalWrites and _canServeNonLocalReads. diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 6f95f6a5fd5..be960f2b5b8 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -537,7 +537,7 @@ Status ReplicationCoordinatorMock::abortCatchupIfNeeded(PrimaryCatchUpConclusion return Status::OK(); } -void ReplicationCoordinatorMock::incrementNumCatchUpOpsIfCatchingUp(int numOps) { +void ReplicationCoordinatorMock::incrementNumCatchUpOpsIfCatchingUp(long numOps) { return; } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index f3cc223981e..8ea9a9ddd8e 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -306,7 +306,7 @@ public: virtual Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override; - virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) override; + virtual void incrementNumCatchUpOpsIfCatchingUp(long numOps) override; void signalDropPendingCollectionsRemovedFromStorage() final; diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index ee000942302..a506da67996 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -334,7 +334,7 @@ Status ReplicationCoordinatorNoOp::abortCatchupIfNeeded(PrimaryCatchUpConclusion MONGO_UNREACHABLE; } -void ReplicationCoordinatorNoOp::incrementNumCatchUpOpsIfCatchingUp(int numOps) { +void ReplicationCoordinatorNoOp::incrementNumCatchUpOpsIfCatchingUp(long numOps) { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index 412057ab92a..e6b1b3ecd43 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -245,7 +245,7 @@ public: Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) final; - void incrementNumCatchUpOpsIfCatchingUp(int numOps) final; + void incrementNumCatchUpOpsIfCatchingUp(long numOps) final; void signalDropPendingCollectionsRemovedFromStorage() final; diff --git a/src/mongo/db/repl/replication_metrics.cpp b/src/mongo/db/repl/replication_metrics.cpp index 0794c43fd0c..e7d55c50660 100644 --- a/src/mongo/db/repl/replication_metrics.cpp +++ b/src/mongo/db/repl/replication_metrics.cpp @@ -264,7 +264,7 @@ long ReplicationMetrics::getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_f void ReplicationMetrics::setElectionCandidateMetrics( const StartElectionReasonEnum reason, const Date_t lastElectionDate, - const long long termAtElection, + const long long electionTerm, const OpTime lastCommittedOpTime, const OpTime lastSeenOpTime, const int numVotesNeeded, @@ -277,7 +277,7 @@ void ReplicationMetrics::setElectionCandidateMetrics( _nodeIsCandidateOrPrimary = true; _electionCandidateMetrics.setLastElectionReason(reason); _electionCandidateMetrics.setLastElectionDate(lastElectionDate); - _electionCandidateMetrics.setTermAtElection(termAtElection); + _electionCandidateMetrics.setElectionTerm(electionTerm); _electionCandidateMetrics.setLastCommittedOpTimeAtElection(lastCommittedOpTime); _electionCandidateMetrics.setLastSeenOpTimeAtElection(lastSeenOpTime); _electionCandidateMetrics.setNumVotesNeeded(numVotesNeeded); @@ -292,14 +292,15 @@ void ReplicationMetrics::setTargetCatchupOpTime(OpTime opTime) { _electionCandidateMetrics.setTargetCatchupOpTime(opTime); } -void ReplicationMetrics::setNumCatchUpOps(int numCatchUpOps) { +void ReplicationMetrics::setNumCatchUpOps(long numCatchUpOps) { stdx::lock_guard<stdx::mutex> lk(_mutex); + invariant(numCatchUpOps >= 0); _electionCandidateMetrics.setNumCatchUpOps(numCatchUpOps); _totalNumCatchUpOps += numCatchUpOps; _updateAverageCatchUpOps(lk); } -void ReplicationMetrics::setNewTermStartDate(Date_t newTermStartDate) { +void ReplicationMetrics::setCandidateNewTermStartDate(Date_t newTermStartDate) { stdx::lock_guard<stdx::mutex> lk(_mutex); _electionCandidateMetrics.setNewTermStartDate(newTermStartDate); } @@ -336,6 +337,48 @@ void ReplicationMetrics::clearElectionCandidateMetrics() { _nodeIsCandidateOrPrimary = false; } +void ReplicationMetrics::setElectionParticipantMetrics(const bool votedForCandidate, + const long long electionTerm, + const Date_t lastVoteDate, + const int electionCandidateMemberId, + const std::string voteReason, + const OpTime lastAppliedOpTime, + const OpTime maxAppliedOpTimeInSet, + const double priorityAtElection) { + stdx::lock_guard<stdx::mutex> lk(_mutex); + + _nodeHasVotedInElection = true; + _electionParticipantMetrics.setVotedForCandidate(votedForCandidate); + _electionParticipantMetrics.setElectionTerm(electionTerm); + _electionParticipantMetrics.setLastVoteDate(lastVoteDate); + _electionParticipantMetrics.setElectionCandidateMemberId(electionCandidateMemberId); + _electionParticipantMetrics.setVoteReason(voteReason); + _electionParticipantMetrics.setLastAppliedOpTimeAtElection(lastAppliedOpTime); + _electionParticipantMetrics.setMaxAppliedOpTimeInSet(maxAppliedOpTimeInSet); + _electionParticipantMetrics.setPriorityAtElection(priorityAtElection); +} + +BSONObj ReplicationMetrics::getElectionParticipantMetricsBSON() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + if (_nodeHasVotedInElection) { + return _electionParticipantMetrics.toBSON(); + } + return BSONObj(); +} + +void ReplicationMetrics::setParticipantNewTermDates(Date_t newTermStartDate, + Date_t newTermAppliedDate) { + stdx::lock_guard<stdx::mutex> lk(_mutex); + _electionParticipantMetrics.setNewTermStartDate(newTermStartDate); + _electionParticipantMetrics.setNewTermAppliedDate(newTermAppliedDate); +} + +void ReplicationMetrics::clearParticipantNewTermDates() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + _electionParticipantMetrics.setNewTermStartDate(boost::none); + _electionParticipantMetrics.setNewTermAppliedDate(boost::none); +} + void ReplicationMetrics::_updateAverageCatchUpOps(WithLock lk) { long numCatchUps = _electionMetrics.getNumCatchUps(); if (numCatchUps > 0) { diff --git a/src/mongo/db/repl/replication_metrics.h b/src/mongo/db/repl/replication_metrics.h index 0d8c025e18b..59d27ace445 100644 --- a/src/mongo/db/repl/replication_metrics.h +++ b/src/mongo/db/repl/replication_metrics.h @@ -83,7 +83,7 @@ public: // consistent state. void setElectionCandidateMetrics(const StartElectionReasonEnum reason, const Date_t lastElectionDate, - const long long termAtElection, + const long long electionTerm, const OpTime lastCommittedOpTime, const OpTime lastSeenOpTime, const int numVotesNeeded, @@ -91,8 +91,8 @@ public: const Milliseconds electionTimeoutMillis, const boost::optional<int> priorPrimary); void setTargetCatchupOpTime(OpTime opTime); - void setNumCatchUpOps(int numCatchUpOps); - void setNewTermStartDate(Date_t newTermStartDate); + void setNumCatchUpOps(long numCatchUpOps); + void setCandidateNewTermStartDate(Date_t newTermStartDate); void setWMajorityWriteAvailabilityDate(Date_t wMajorityWriteAvailabilityDate); boost::optional<OpTime> getTargetCatchupOpTime_forTesting(); @@ -101,6 +101,25 @@ public: BSONObj getElectionCandidateMetricsBSON(); void clearElectionCandidateMetrics(); + // Election participant metrics + + // All the election participant metrics that should be set when a node votes in an election are + // set in this one function, so that the 'electionParticipantMetrics' section of replSetStatus + // shows a consistent state. + void setElectionParticipantMetrics(const bool votedForCandidate, + const long long electionTerm, + const Date_t lastVoteDate, + const int electionCandidateMemberId, + const std::string voteReason, + const OpTime lastAppliedOpTime, + const OpTime maxAppliedOpTimeInSet, + const double priorityAtElection); + + BSONObj getElectionParticipantMetricsBSON(); + void setParticipantNewTermDates(Date_t newTermStartDate, Date_t newTermAppliedDate); + void clearParticipantNewTermDates(); + + private: class ElectionMetricsSSS; @@ -112,6 +131,7 @@ private: ElectionParticipantMetrics _electionParticipantMetrics; bool _nodeIsCandidateOrPrimary = false; + bool _nodeHasVotedInElection = false; // This field is a double so that the division result in _updateAverageCatchUpOps will be a // double without any casting. diff --git a/src/mongo/db/repl/replication_metrics.idl b/src/mongo/db/repl/replication_metrics.idl index e214c2b0dc8..4fb062411ed 100644 --- a/src/mongo/db/repl/replication_metrics.idl +++ b/src/mongo/db/repl/replication_metrics.idl @@ -144,8 +144,8 @@ structs: lastElectionDate: description: "Time the node called for the election" type: date - termAtElection: - description: "New term at the time of election" + electionTerm: + description: "Proposed new term for this election" type: long lastCommittedOpTimeAtElection: description: "Last OpTime the node committed before calling the election" @@ -189,6 +189,36 @@ structs: description: "Stores metrics that are specific to the last election in which the node voted" strict: true fields: + votedForCandidate: + description: "States if the node has voted yes or no for the candidate in this election" + type: bool + electionTerm: + description: "The term of the candidate that is running for election" + type: long + lastVoteDate: + description: "Time the node voted" + type: date + electionCandidateMemberId: + description: "MemberId of the node requesting a vote" + type: int + voteReason: + description: "Reason why the node voted the way it did" + type: string + lastAppliedOpTimeAtElection: + description: "Latest applied OpTime at the time of voting" + type: optime + maxAppliedOpTimeInSet: + description: "Highest applied time of any node in this replica set, as currently + known by this node" + type: optime priorityAtElection: description: "The node's priority at the time of the election" type: double + newTermStartDate: + description: "Time the new term oplog entry was written by the primary" + type: date + optional: true + newTermAppliedDate: + description: "Time this node applied the new term oplog entry" + type: date + optional: true diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 6938895799c..50d648a5790 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -69,6 +69,7 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/repl_set_config.h" #include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/repl/replication_metrics.h" #include "mongo/db/repl/transaction_oplog_application.h" #include "mongo/db/session.h" #include "mongo/db/session_txn_record_gen.h" @@ -327,6 +328,18 @@ Status SyncTail::syncApply(OperationContext* opCtx, if (opType == OpTypeEnum::kNoop) { incrementOpsAppliedStats(); + + auto oplogEntry = OplogEntryBase::parse(IDLParserErrorContext("syncApply"), op); + auto opObj = oplogEntry.getObject(); + if (opObj.hasField(ReplicationCoordinator::newPrimaryMsgField) && + opObj.getField(ReplicationCoordinator::newPrimaryMsgField).str() == + ReplicationCoordinator::newPrimaryMsg) { + + invariant(oplogEntry.getWallClockTime()); + ReplicationMetrics::get(opCtx).setParticipantNewTermDates( + oplogEntry.getWallClockTime().get(), applyStartTime); + } + return Status::OK(); } else if (OplogEntry::isCrudOpType(opType)) { return finishApply(writeConflictRetry(opCtx, "syncApply_CRUD", nss.ns(), [&] { diff --git a/src/mongo/db/repl/sync_tail_test.cpp b/src/mongo/db/repl/sync_tail_test.cpp index e9fb2cb0fb1..188e00875a9 100644 --- a/src/mongo/db/repl/sync_tail_test.cpp +++ b/src/mongo/db/repl/sync_tail_test.cpp @@ -232,32 +232,6 @@ auto parseFromOplogEntryArray(const BSONObj& obj, int elem) { return OpTime(tsArray.Array()[elem].timestamp(), termArray.Array()[elem].Long()); }; -TEST_F(SyncTailTest, SyncApplyNoNamespaceBadOp) { - const BSONObj op = BSON("op" - << "x"); - ASSERT_THROWS( - SyncTail::syncApply(_opCtx.get(), op, OplogApplication::Mode::kInitialSync, boost::none), - ExceptionFor<ErrorCodes::BadValue>); -} - -TEST_F(SyncTailTest, SyncApplyNoNamespaceNoOp) { - ASSERT_OK(SyncTail::syncApply(_opCtx.get(), - BSON("op" - << "n"), - OplogApplication::Mode::kInitialSync, - boost::none)); -} - -TEST_F(SyncTailTest, SyncApplyBadOp) { - const BSONObj op = BSON("op" - << "x" - << "ns" - << "test.t"); - ASSERT_THROWS( - SyncTail::syncApply(_opCtx.get(), op, OplogApplication::Mode::kInitialSync, boost::none), - ExceptionFor<ErrorCodes::BadValue>); -} - TEST_F(SyncTailTest, SyncApplyInsertDocumentDatabaseMissing) { NamespaceString nss("test.t"); auto op = makeOplogEntry(OpTypeEnum::kInsert, nss, {}); diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 8ee1a117226..b58f91471b1 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -65,6 +65,8 @@ namespace mongo { namespace repl { MONGO_FAIL_POINT_DEFINE(forceSyncSourceCandidate); +MONGO_FAIL_POINT_DEFINE(voteNoInElection); +MONGO_FAIL_POINT_DEFINE(voteYesInDryRunButNoInRealElection); // If this fail point is enabled, TopologyCoordinator::shouldChangeSyncSource() will ignore // the option TopologyCoordinator::Options::maxSyncSourceLagSecs. The sync source will not be @@ -1388,7 +1390,7 @@ std::string TopologyCoordinator::_getReplSetStatusString() { // Construct a ReplSetStatusArgs using default parameters. Missing parameters will not be // included in the status string. ReplSetStatusArgs rsStatusArgs{ - Date_t::now(), 0U, OpTimeAndWallTime(), BSONObj(), BSONObj(), boost::none}; + Date_t::now(), 0U, OpTimeAndWallTime(), BSONObj(), BSONObj(), BSONObj(), boost::none}; BSONObjBuilder builder; Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse"); prepareStatusResponse(rsStatusArgs, &builder, &result); @@ -1411,6 +1413,7 @@ void TopologyCoordinator::prepareStatusResponse(const ReplSetStatusArgs& rsStatu const Date_t lastOpDurableWall = getMyLastDurableOpTimeAndWallTime().wallTime; const BSONObj& initialSyncStatus = rsStatusArgs.initialSyncStatus; const BSONObj& electionCandidateMetrics = rsStatusArgs.electionCandidateMetrics; + const BSONObj& electionParticipantMetrics = rsStatusArgs.electionParticipantMetrics; const boost::optional<Timestamp>& lastStableRecoveryTimestamp = rsStatusArgs.lastStableRecoveryTimestamp; const boost::optional<Timestamp>& lastStableCheckpointTimestampDeprecated = @@ -1617,6 +1620,10 @@ void TopologyCoordinator::prepareStatusResponse(const ReplSetStatusArgs& rsStatu response->append("electionCandidateMetrics", electionCandidateMetrics); } + if (!electionParticipantMetrics.isEmpty()) { + response->append("electionParticipantMetrics", electionParticipantMetrics); + } + response->append("members", membersOut); *result = Status::OK(); } @@ -2690,6 +2697,29 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr ReplSetRequestVotesResponse* response) { response->setTerm(_term); + if (MONGO_unlikely(voteNoInElection.shouldFail())) { + log() << "failpoint voteNoInElection enabled"; + response->setVoteGranted(false); + response->setReason(str::stream() << "forced to vote no during dry run election due to " + "failpoint voteNoInElection set"); + return; + } + + if (MONGO_unlikely(voteYesInDryRunButNoInRealElection.shouldFail())) { + log() << "failpoint voteYesInDryRunButNoInRealElection enabled"; + if (args.isADryRun()) { + response->setVoteGranted(true); + response->setReason(str::stream() << "forced to vote yes in dry run due to failpoint " + "voteYesInDryRunButNoInRealElection set"); + } else { + response->setVoteGranted(false); + response->setReason(str::stream() + << "forced to vote no in real election due to failpoint " + "voteYesInDryRunButNoInRealElection set"); + } + return; + } + if (args.getTerm() < _term) { response->setVoteGranted(false); response->setReason(str::stream() << "candidate's term (" << args.getTerm() diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index e23f820fef3..db0bf31428d 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -306,6 +306,7 @@ public: const OpTimeAndWallTime readConcernMajorityOpTime; const BSONObj initialSyncStatus; const BSONObj electionCandidateMetrics; + const BSONObj electionParticipantMetrics; // boost::none if the storage engine does not support RTT, or if it does but does not // persist data to necessitate taking checkpoints. Timestamp::min() if a checkpoint is yet diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 04d0a152219..170bfc1587b 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -1538,7 +1538,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { Timestamp lastStableRecoveryTimestamp(2, 2); Timestamp lastStableCheckpointTimestampDeprecated(2, 2); BSONObj initialSyncStatus = BSON("failedInitialSyncAttempts" << 1); - BSONObj electionCandidateMetrics = BSON("DummyElectionMetrics" << 1); + BSONObj electionCandidateMetrics = BSON("DummyElectionCandidateMetrics" << 1); + BSONObj electionParticipantMetrics = BSON("DummyElectionParticipantMetrics" << 1); std::string setName = "mySet"; ReplSetHeartbeatResponse hb; @@ -1595,6 +1596,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { {readConcernMajorityOpTime, readConcernMajorityWallTime}, initialSyncStatus, electionCandidateMetrics, + electionParticipantMetrics, lastStableCheckpointTimestampDeprecated, lastStableRecoveryTimestamp}, &statusBuilder, @@ -1708,6 +1710,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { ASSERT_EQUALS(3, rsStatus["writeMajorityCount"].numberInt()); ASSERT_BSONOBJ_EQ(initialSyncStatus, rsStatus["initialSyncStatus"].Obj()); ASSERT_BSONOBJ_EQ(electionCandidateMetrics, rsStatus["electionCandidateMetrics"].Obj()); + ASSERT_BSONOBJ_EQ(electionParticipantMetrics, rsStatus["electionParticipantMetrics"].Obj()); // Test no lastStableRecoveryTimestamp field. BSONObjBuilder statusBuilder2; @@ -1727,6 +1730,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { ASSERT_FALSE(rsStatus.hasField("lastStableRecoveryTimestamp")); ASSERT_FALSE(rsStatus.hasField("lastStableCheckpointTimestamp")); ASSERT_FALSE(rsStatus.hasField("electionCandidateMetrics")); + ASSERT_FALSE(rsStatus.hasField("electionParticipantMetrics")); } TEST_F(TopoCoordTest, ReplSetGetStatusWriteMajorityDifferentFromMajorityVoteCount) { diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 767a8369d31..be0946a5058 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -359,7 +359,7 @@ Status ReplicationCoordinatorEmbedded::abortCatchupIfNeeded(PrimaryCatchUpConclu UASSERT_NOT_IMPLEMENTED; } -void ReplicationCoordinatorEmbedded::incrementNumCatchUpOpsIfCatchingUp(int numOps) { +void ReplicationCoordinatorEmbedded::incrementNumCatchUpOpsIfCatchingUp(long numOps) { UASSERT_NOT_IMPLEMENTED; } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index e39955ea365..8d7788a0f41 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -253,7 +253,7 @@ public: Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override; - void incrementNumCatchUpOpsIfCatchingUp(int numOps) override; + void incrementNumCatchUpOpsIfCatchingUp(long numOps) override; void signalDropPendingCollectionsRemovedFromStorage() final; |