diff options
author | Medha Potluri <medha.potluri@mongodb.com> | 2019-06-14 11:13:14 -0400 |
---|---|---|
committer | Medha Potluri <medha.potluri@mongodb.com> | 2019-07-03 15:30:12 -0400 |
commit | 8071e24f0f7a4fa83015daa59d828c11c246423c (patch) | |
tree | b8b680b2dd7e7bff065c67e4da9ab15f48074ac0 /src/mongo | |
parent | 558bc6285f0410c9d48fb491173212408bc6b48d (diff) | |
download | mongo-8071e24f0f7a4fa83015daa59d828c11c246423c.tar.gz |
SERVER-41499 Track number of elections called for each reason in serverStatus
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/repl/SConscript | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/election_reason_counter.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/repl/election_reason_counter.h | 53 | ||||
-rw-r--r-- | src/mongo/db/repl/election_reason_counter.idl | 51 | ||||
-rw-r--r-- | src/mongo/db/repl/election_reason_counter_parser.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/repl/election_reason_counter_parser.h | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_metrics.cpp | 91 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_metrics.h | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_metrics.idl | 37 |
13 files changed, 478 insertions, 16 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 41aa13dddd3..926f35031e2 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -635,6 +635,7 @@ env.Library( '$BUILD_DIR/mongo/rpc/metadata', '$BUILD_DIR/mongo/transport/transport_layer_common', '$BUILD_DIR/mongo/util/fail_point', + '$BUILD_DIR/mongo/db/repl/replication_metrics', 'collection_cloner', 'initial_syncer', 'data_replicator_external_state_initial_sync', @@ -1361,9 +1362,33 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', - ], - LIBDEPS_PRIVATE=[ - '$BUILD_DIR/mongo/db/commands/server_status', '$BUILD_DIR/mongo/db/service_context', + '$BUILD_DIR/mongo/db/commands/server_status', + 'topology_coordinator', + 'election_reason_counter', + 'election_reason_counter_parser', + ], +) + +env.Library( + target='election_reason_counter', + source=[ + 'election_reason_counter.cpp', + env.Idlc('election_reason_counter.idl')[0], + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/idl/idl_parser', + ], +) + +env.Library( + target='election_reason_counter_parser', + source=[ + 'election_reason_counter_parser.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + 'election_reason_counter', ], )
\ No newline at end of file diff --git a/src/mongo/db/repl/election_reason_counter.cpp b/src/mongo/db/repl/election_reason_counter.cpp new file mode 100644 index 00000000000..1ca6db64d64 --- /dev/null +++ b/src/mongo/db/repl/election_reason_counter.cpp @@ -0,0 +1,42 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/repl/election_reason_counter.h" + +namespace mongo { +namespace repl { + +ElectionReasonCounter ElectionReasonCounter::parse(const IDLParserErrorContext& ctxt, + const BSONObj& bsonObject) { + this->parseProtected(ctxt, bsonObject); + return *this; +} + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/election_reason_counter.h b/src/mongo/db/repl/election_reason_counter.h new file mode 100644 index 00000000000..8dd068181b5 --- /dev/null +++ b/src/mongo/db/repl/election_reason_counter.h @@ -0,0 +1,53 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/repl/election_reason_counter_gen.h" + +namespace mongo { +namespace repl { + +/** + * Wrapper around the IDL struct ElectionReasonCounterBase that has increment methods. + */ +class ElectionReasonCounter : public ElectionReasonCounterBase { +public: + using ElectionReasonCounterBase::getCalled; + using ElectionReasonCounterBase::setCalled; + + void incrementCalled() { + setCalled(getCalled() + 1); + } + + ElectionReasonCounter parse(const IDLParserErrorContext& ctxt, const BSONObj& bsonObject); +}; + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/election_reason_counter.idl b/src/mongo/db/repl/election_reason_counter.idl new file mode 100644 index 00000000000..3cc0059eb13 --- /dev/null +++ b/src/mongo/db/repl/election_reason_counter.idl @@ -0,0 +1,51 @@ +# Copyright (C) 2019-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# <http://www.mongodb.com/licensing/server-side-public-license>. +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. + +# This IDL file describes the BSON format for ElectionReasonCounterBase and +# handles the serialization to and deserialization from its BSON +# representations for that class. + +global: + cpp_namespace: "mongo::repl" + +imports: + - "mongo/idl/basic_types.idl" + +structs: + ElectionReasonCounterBase: + description: "Stores the total number of elections a node has called for a given reason and + the number of those elections that were successful" + strict: true + fields: + called: + description: "Number of elections this node has called for the given reason" + type: long + default: 0 + successful: + description: "Number of successful elections for the given reason" + type: long + default: 0 diff --git a/src/mongo/db/repl/election_reason_counter_parser.cpp b/src/mongo/db/repl/election_reason_counter_parser.cpp new file mode 100644 index 00000000000..4543b828fd1 --- /dev/null +++ b/src/mongo/db/repl/election_reason_counter_parser.cpp @@ -0,0 +1,50 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/repl/election_reason_counter_parser.h" + +namespace mongo { +namespace repl { + + +ElectionReasonCounter parseElectionReasonCounter(const BSONElement& element) { + ElectionReasonCounter counter; + IDLParserErrorContext ctxt = IDLParserErrorContext("ElectionReasonCounter"); + + return counter.parse(ctxt, element.Obj()); +} + +void serializeElectionReasonCounterToBSON(ElectionReasonCounter counter, + StringData fieldName, + BSONObjBuilder* builder) { + builder->append(fieldName, counter.toBSON()); +} + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/election_reason_counter_parser.h b/src/mongo/db/repl/election_reason_counter_parser.h new file mode 100644 index 00000000000..960021ced2b --- /dev/null +++ b/src/mongo/db/repl/election_reason_counter_parser.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/repl/election_reason_counter.h" + +namespace mongo { +namespace repl { + +ElectionReasonCounter parseElectionReasonCounter(const BSONElement& element); + +void serializeElectionReasonCounterToBSON(ElectionReasonCounter counter, + StringData fieldName, + BSONObjBuilder* builder); + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 2c7c1b7727a..18636ca66f5 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -1017,13 +1017,15 @@ private: * "originalTerm" was the term during which the dry run began, if the term has since * changed, do not run for election. */ - void _processDryRunResult(long long originalTerm); + void _processDryRunResult(long long originalTerm, + TopologyCoordinator::StartElectionReason reason); /** * Begins executing a real election. This is called either a successful dry run, or when the * dry run was skipped (which may be specified for a ReplSetStepUp). */ - void _startRealElection_inlock(long long originalTerm); + void _startRealElection_inlock(long long originalTerm, + TopologyCoordinator::StartElectionReason reason); /** * Writes the last vote in persistent storage after completing dry run successfully. diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index ec622e52ba3..b93ba668eb1 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -34,6 +34,7 @@ #include <memory> #include "mongo/db/repl/replication_coordinator_impl.h" +#include "mongo/db/repl/replication_metrics.h" #include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/repl/vote_requester.h" #include "mongo/stdx/mutex.h" @@ -143,7 +144,7 @@ void ReplicationCoordinatorImpl::_startElectSelfV1_inlock( if (reason == TopologyCoordinator::StartElectionReason::kStepUpRequestSkipDryRun) { long long newTerm = term + 1; log() << "skipping dry run and running for election in term " << newTerm; - _startRealElection_inlock(newTerm); + _startRealElection_inlock(newTerm, reason); lossGuard.dismiss(); return; } @@ -169,12 +170,15 @@ void ReplicationCoordinatorImpl::_startElectSelfV1_inlock( fassert(28685, nextPhaseEvh.getStatus()); _replExecutor ->onEvent(nextPhaseEvh.getValue(), - [=](const executor::TaskExecutor::CallbackArgs&) { _processDryRunResult(term); }) + [=](const executor::TaskExecutor::CallbackArgs&) { + _processDryRunResult(term, reason); + }) .status_with_transitional_ignore(); lossGuard.dismiss(); } -void ReplicationCoordinatorImpl::_processDryRunResult(long long originalTerm) { +void ReplicationCoordinatorImpl::_processDryRunResult( + long long originalTerm, TopologyCoordinator::StartElectionReason reason) { stdx::lock_guard<stdx::mutex> lk(_mutex); LoseElectionDryRunGuardV1 lossGuard(this); @@ -205,11 +209,14 @@ void ReplicationCoordinatorImpl::_processDryRunResult(long long originalTerm) { long long newTerm = originalTerm + 1; log() << "dry election run succeeded, running for election in term " << newTerm; - _startRealElection_inlock(newTerm); + _startRealElection_inlock(newTerm, reason); lossGuard.dismiss(); } -void ReplicationCoordinatorImpl::_startRealElection_inlock(long long newTerm) { +void ReplicationCoordinatorImpl::_startRealElection_inlock( + long long newTerm, TopologyCoordinator::StartElectionReason reason) { + ReplicationMetrics::get(getServiceContext()).incrementNumElectionsCalledForReason(reason); + LoseElectionDryRunGuardV1 lossGuard(this); TopologyCoordinator::UpdateTermResult updateTermResult; diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp index dae9acd510a..27bd7a4653d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp @@ -41,6 +41,7 @@ #include "mongo/db/repl/replication_coordinator_external_state_mock.h" #include "mongo/db/repl/replication_coordinator_impl.h" #include "mongo/db/repl/replication_coordinator_test_fixture.h" +#include "mongo/db/repl/replication_metrics.h" #include "mongo/db/repl/topology_coordinator.h" #include "mongo/executor/network_interface_mock.h" #include "mongo/unittest/unittest.h" @@ -274,6 +275,16 @@ TEST_F(ReplCoordTest, ElectionSucceedsWhenAllNodesVoteYea) { stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("election succeeded")); + + // Check that the numElectionTimeoutsCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. + ServiceContext* svcCtx = getServiceContext(); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); } TEST_F(ReplCoordTest, ElectionSucceedsWhenMaxSevenNodesVoteYea) { @@ -1483,6 +1494,16 @@ TEST_F(TakeoverTest, SuccessfulCatchupTakeover) { performSuccessfulTakeover(catchupTakeoverTime, TopologyCoordinator::StartElectionReason::kCatchupTakeover, lastVoteExpected); + + // Check that the numCatchUpTakeoversCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. + ServiceContext* svcCtx = getServiceContext(); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); } TEST_F(TakeoverTest, CatchupTakeoverDryRunFailsPrimarySaysNo) { @@ -1840,6 +1861,16 @@ TEST_F(TakeoverTest, SuccessfulPriorityTakeover) { performSuccessfulTakeover(priorityTakeoverTime, TopologyCoordinator::StartElectionReason::kPriorityTakeover, lastVoteExpected); + + // Check that the numPriorityTakeoversCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. + ServiceContext* svcCtx = getServiceContext(); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); } TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedSameSecond) { diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 6d5a67909b2..1d2fbf52199 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -55,6 +55,7 @@ #include "mongo/db/repl/replication_coordinator_external_state_mock.h" #include "mongo/db/repl/replication_coordinator_impl.h" #include "mongo/db/repl/replication_coordinator_test_fixture.h" +#include "mongo/db/repl/replication_metrics.h" #include "mongo/db/repl/storage_interface_mock.h" #include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/repl/update_position_args.h" @@ -2298,6 +2299,16 @@ TEST_F(ReplCoordTest, SingleNodeReplSetUnfreeze) { ASSERT_TRUE(getTopoCoord().getMemberState().primary()); getNet()->exitNetwork(); ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + + // Check that the numFreezeTimeoutsCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. + ServiceContext* svcCtx = getServiceContext(); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); } TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingleNodeSet) { @@ -2314,6 +2325,16 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle auto opCtx = makeOperationContext(); runSingleNodeElection(opCtx.get()); + // Check that the numElectionTimeoutsCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. + ServiceContext* svcCtx = getServiceContext(); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); + getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000)); getNet()->enterNetwork(); // Must do this before inspecting the topocoord Date_t stepdownUntil = getNet()->now() + Seconds(1); @@ -2328,6 +2349,16 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle ASSERT_TRUE(getTopoCoord().getMemberState().primary()); getNet()->exitNetwork(); ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + + // Check that the numFreezeTimeoutsCalled election metric has been incremented, and that none + // of the metrics that track the number of elections called for other reasons has been + // incremented. When a stepdown timeout expires in a single node replica set, an election is + // called for the same reason as is used when a freeze timeout expires. + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting()); + ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting()); + ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting()); } TEST_F( diff --git a/src/mongo/db/repl/replication_metrics.cpp b/src/mongo/db/repl/replication_metrics.cpp index 1c1c4075d68..a60d7762a87 100644 --- a/src/mongo/db/repl/replication_metrics.cpp +++ b/src/mongo/db/repl/replication_metrics.cpp @@ -27,9 +27,10 @@ * it in the license file. */ -#include "src/mongo/db/repl/replication_metrics.h" +#include "mongo/db/repl/replication_metrics.h" #include "mongo/db/commands/server_status.h" +#include "mongo/db/repl/election_reason_counter.h" namespace mongo { namespace repl { @@ -46,9 +47,97 @@ ReplicationMetrics& ReplicationMetrics::get(OperationContext* opCtx) { return get(opCtx->getServiceContext()); } +ReplicationMetrics::ReplicationMetrics() + : _electionMetrics(ElectionReasonCounter(), + ElectionReasonCounter(), + ElectionReasonCounter(), + ElectionReasonCounter(), + ElectionReasonCounter()) {} + +ReplicationMetrics::~ReplicationMetrics() {} + +void ReplicationMetrics::incrementNumElectionsCalledForReason( + TopologyCoordinator::StartElectionReason reason) { + stdx::lock_guard<stdx::mutex> lk(_mutex); + switch (reason) { + case TopologyCoordinator::StartElectionReason::kStepUpRequest: + case TopologyCoordinator::StartElectionReason::kStepUpRequestSkipDryRun: { + ElectionReasonCounter& stepUpCmd = _electionMetrics.getStepUpCmd(); + stepUpCmd.incrementCalled(); + _electionMetrics.setStepUpCmd(stepUpCmd); + break; + } + case TopologyCoordinator::StartElectionReason::kPriorityTakeover: { + ElectionReasonCounter& priorityTakeover = _electionMetrics.getPriorityTakeover(); + priorityTakeover.incrementCalled(); + _electionMetrics.setPriorityTakeover(priorityTakeover); + break; + } + case TopologyCoordinator::StartElectionReason::kCatchupTakeover: { + ElectionReasonCounter& catchUpTakeover = _electionMetrics.getCatchUpTakeover(); + catchUpTakeover.incrementCalled(); + _electionMetrics.setCatchUpTakeover(catchUpTakeover); + break; + } + case TopologyCoordinator::StartElectionReason::kElectionTimeout: { + ElectionReasonCounter& electionTimeout = _electionMetrics.getElectionTimeout(); + electionTimeout.incrementCalled(); + _electionMetrics.setElectionTimeout(electionTimeout); + break; + } + case TopologyCoordinator::StartElectionReason::kSingleNodePromptElection: { + ElectionReasonCounter& freezeTimeout = _electionMetrics.getFreezeTimeout(); + freezeTimeout.incrementCalled(); + _electionMetrics.setFreezeTimeout(freezeTimeout); + break; + } + } +} + +int ReplicationMetrics::getNumStepUpCmdsCalled_forTesting() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.getStepUpCmd().getCalled(); +} + +int ReplicationMetrics::getNumPriorityTakeoversCalled_forTesting() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.getPriorityTakeover().getCalled(); +} + +int ReplicationMetrics::getNumCatchUpTakeoversCalled_forTesting() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.getCatchUpTakeover().getCalled(); +} + +int ReplicationMetrics::getNumElectionTimeoutsCalled_forTesting() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.getElectionTimeout().getCalled(); +} + +int ReplicationMetrics::getNumFreezeTimeoutsCalled_forTesting() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.getFreezeTimeout().getCalled(); +} + +BSONObj ReplicationMetrics::getElectionMetricsBSON() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _electionMetrics.toBSON(); +} + class ReplicationMetrics::ElectionMetricsSSS : public ServerStatusSection { public: ElectionMetricsSSS() : ServerStatusSection("electionMetrics") {} + + bool includeByDefault() const override { + return true; + } + + BSONObj generateSection(OperationContext* opCtx, + const BSONElement& configElement) const override { + ReplicationMetrics& replicationMetrics = ReplicationMetrics::get(opCtx); + + return replicationMetrics.getElectionMetricsBSON(); + } } electionMetricsSSS; } // namespace repl diff --git a/src/mongo/db/repl/replication_metrics.h b/src/mongo/db/repl/replication_metrics.h index 7012f4802cf..f6c269e7ba0 100644 --- a/src/mongo/db/repl/replication_metrics.h +++ b/src/mongo/db/repl/replication_metrics.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/db/repl/replication_metrics_gen.h" +#include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/service_context.h" #include "mongo/stdx/mutex.h" @@ -47,6 +48,15 @@ public: ReplicationMetrics(); ~ReplicationMetrics(); + void incrementNumElectionsCalledForReason(TopologyCoordinator::StartElectionReason reason); + int getNumStepUpCmdsCalled_forTesting(); + int getNumPriorityTakeoversCalled_forTesting(); + int getNumCatchUpTakeoversCalled_forTesting(); + int getNumElectionTimeoutsCalled_forTesting(); + int getNumFreezeTimeoutsCalled_forTesting(); + + BSONObj getElectionMetricsBSON(); + private: class ElectionMetricsSSS; diff --git a/src/mongo/db/repl/replication_metrics.idl b/src/mongo/db/repl/replication_metrics.idl index 566d2ee665b..6612eb0216f 100644 --- a/src/mongo/db/repl/replication_metrics.idl +++ b/src/mongo/db/repl/replication_metrics.idl @@ -32,22 +32,49 @@ global: cpp_namespace: "mongo::repl" + cpp_includes: + - "mongo/db/repl/election_reason_counter_parser.h" imports: - "mongo/idl/basic_types.idl" +types: + ElectionReasonCounter: + bson_serialization_type: any + description: "Wrapper around ElectionReasonCounterBase that has increment methods" + cpp_type: ElectionReasonCounter + serializer: "::mongo::repl::serializeElectionReasonCounterToBSON" + deserializer: "::mongo::repl::parseElectionReasonCounter" + structs: ElectionMetrics: description: "Stores metrics related to all the elections a node has called" strict: true fields: - numStepUpsRequested: - description: "Number of elections this node has called due to step up requests" - type: int - + stepUpCmd: + description: "Election reason counter for step up requests" + type: ElectionReasonCounter + non_const_getter: true + priorityTakeover: + description: "Election reason counter for priority takeovers" + type: ElectionReasonCounter + non_const_getter: true + catchUpTakeover: + description: "Election reason counter for catchup takeovers" + type: ElectionReasonCounter + non_const_getter: true + electionTimeout: + description: "Election reason counter for when the node didn't see a primary in the + past electionTimeoutPeriod milliseconds" + type: ElectionReasonCounter + non_const_getter: true + freezeTimeout: + description: "Election reason counter for single node replica set prompt elections" + type: ElectionReasonCounter + non_const_getter: true ElectionCandidateMetrics: - description: "Stores metrics that are specific to the last election in which the node was a + description: "Stores metrics that are specific to the last election in which the node was a candidate" strict: true fields: |