summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorMedha Potluri <medha.potluri@mongodb.com>2019-06-14 11:13:14 -0400
committerMedha Potluri <medha.potluri@mongodb.com>2019-07-03 15:30:12 -0400
commit8071e24f0f7a4fa83015daa59d828c11c246423c (patch)
treeb8b680b2dd7e7bff065c67e4da9ab15f48074ac0 /src/mongo
parent558bc6285f0410c9d48fb491173212408bc6b48d (diff)
downloadmongo-8071e24f0f7a4fa83015daa59d828c11c246423c.tar.gz
SERVER-41499 Track number of elections called for each reason in serverStatus
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/SConscript31
-rw-r--r--src/mongo/db/repl/election_reason_counter.cpp42
-rw-r--r--src/mongo/db/repl/election_reason_counter.h53
-rw-r--r--src/mongo/db/repl/election_reason_counter.idl51
-rw-r--r--src/mongo/db/repl/election_reason_counter_parser.cpp50
-rw-r--r--src/mongo/db/repl/election_reason_counter_parser.h44
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp17
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp31
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp31
-rw-r--r--src/mongo/db/repl/replication_metrics.cpp91
-rw-r--r--src/mongo/db/repl/replication_metrics.h10
-rw-r--r--src/mongo/db/repl/replication_metrics.idl37
13 files changed, 478 insertions, 16 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 41aa13dddd3..926f35031e2 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -635,6 +635,7 @@ env.Library(
'$BUILD_DIR/mongo/rpc/metadata',
'$BUILD_DIR/mongo/transport/transport_layer_common',
'$BUILD_DIR/mongo/util/fail_point',
+ '$BUILD_DIR/mongo/db/repl/replication_metrics',
'collection_cloner',
'initial_syncer',
'data_replicator_external_state_initial_sync',
@@ -1361,9 +1362,33 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
- ],
- LIBDEPS_PRIVATE=[
- '$BUILD_DIR/mongo/db/commands/server_status',
'$BUILD_DIR/mongo/db/service_context',
+ '$BUILD_DIR/mongo/db/commands/server_status',
+ 'topology_coordinator',
+ 'election_reason_counter',
+ 'election_reason_counter_parser',
+ ],
+)
+
+env.Library(
+ target='election_reason_counter',
+ source=[
+ 'election_reason_counter.cpp',
+ env.Idlc('election_reason_counter.idl')[0],
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/idl/idl_parser',
+ ],
+)
+
+env.Library(
+ target='election_reason_counter_parser',
+ source=[
+ 'election_reason_counter_parser.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ 'election_reason_counter',
],
) \ No newline at end of file
diff --git a/src/mongo/db/repl/election_reason_counter.cpp b/src/mongo/db/repl/election_reason_counter.cpp
new file mode 100644
index 00000000000..1ca6db64d64
--- /dev/null
+++ b/src/mongo/db/repl/election_reason_counter.cpp
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/repl/election_reason_counter.h"
+
+namespace mongo {
+namespace repl {
+
+ElectionReasonCounter ElectionReasonCounter::parse(const IDLParserErrorContext& ctxt,
+ const BSONObj& bsonObject) {
+ this->parseProtected(ctxt, bsonObject);
+ return *this;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/election_reason_counter.h b/src/mongo/db/repl/election_reason_counter.h
new file mode 100644
index 00000000000..8dd068181b5
--- /dev/null
+++ b/src/mongo/db/repl/election_reason_counter.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/repl/election_reason_counter_gen.h"
+
+namespace mongo {
+namespace repl {
+
+/**
+ * Wrapper around the IDL struct ElectionReasonCounterBase that has increment methods.
+ */
+class ElectionReasonCounter : public ElectionReasonCounterBase {
+public:
+ using ElectionReasonCounterBase::getCalled;
+ using ElectionReasonCounterBase::setCalled;
+
+ void incrementCalled() {
+ setCalled(getCalled() + 1);
+ }
+
+ ElectionReasonCounter parse(const IDLParserErrorContext& ctxt, const BSONObj& bsonObject);
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/election_reason_counter.idl b/src/mongo/db/repl/election_reason_counter.idl
new file mode 100644
index 00000000000..3cc0059eb13
--- /dev/null
+++ b/src/mongo/db/repl/election_reason_counter.idl
@@ -0,0 +1,51 @@
+# Copyright (C) 2019-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+
+# This IDL file describes the BSON format for ElectionReasonCounterBase and
+# handles the serialization to and deserialization from its BSON
+# representations for that class.
+
+global:
+ cpp_namespace: "mongo::repl"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+
+structs:
+ ElectionReasonCounterBase:
+ description: "Stores the total number of elections a node has called for a given reason and
+ the number of those elections that were successful"
+ strict: true
+ fields:
+ called:
+ description: "Number of elections this node has called for the given reason"
+ type: long
+ default: 0
+ successful:
+ description: "Number of successful elections for the given reason"
+ type: long
+ default: 0
diff --git a/src/mongo/db/repl/election_reason_counter_parser.cpp b/src/mongo/db/repl/election_reason_counter_parser.cpp
new file mode 100644
index 00000000000..4543b828fd1
--- /dev/null
+++ b/src/mongo/db/repl/election_reason_counter_parser.cpp
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/repl/election_reason_counter_parser.h"
+
+namespace mongo {
+namespace repl {
+
+
+ElectionReasonCounter parseElectionReasonCounter(const BSONElement& element) {
+ ElectionReasonCounter counter;
+ IDLParserErrorContext ctxt = IDLParserErrorContext("ElectionReasonCounter");
+
+ return counter.parse(ctxt, element.Obj());
+}
+
+void serializeElectionReasonCounterToBSON(ElectionReasonCounter counter,
+ StringData fieldName,
+ BSONObjBuilder* builder) {
+ builder->append(fieldName, counter.toBSON());
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/election_reason_counter_parser.h b/src/mongo/db/repl/election_reason_counter_parser.h
new file mode 100644
index 00000000000..960021ced2b
--- /dev/null
+++ b/src/mongo/db/repl/election_reason_counter_parser.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/repl/election_reason_counter.h"
+
+namespace mongo {
+namespace repl {
+
+ElectionReasonCounter parseElectionReasonCounter(const BSONElement& element);
+
+void serializeElectionReasonCounterToBSON(ElectionReasonCounter counter,
+ StringData fieldName,
+ BSONObjBuilder* builder);
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 2c7c1b7727a..18636ca66f5 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1017,13 +1017,15 @@ private:
* "originalTerm" was the term during which the dry run began, if the term has since
* changed, do not run for election.
*/
- void _processDryRunResult(long long originalTerm);
+ void _processDryRunResult(long long originalTerm,
+ TopologyCoordinator::StartElectionReason reason);
/**
* Begins executing a real election. This is called either a successful dry run, or when the
* dry run was skipped (which may be specified for a ReplSetStepUp).
*/
- void _startRealElection_inlock(long long originalTerm);
+ void _startRealElection_inlock(long long originalTerm,
+ TopologyCoordinator::StartElectionReason reason);
/**
* Writes the last vote in persistent storage after completing dry run successfully.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index ec622e52ba3..b93ba668eb1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -34,6 +34,7 @@
#include <memory>
#include "mongo/db/repl/replication_coordinator_impl.h"
+#include "mongo/db/repl/replication_metrics.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/repl/vote_requester.h"
#include "mongo/stdx/mutex.h"
@@ -143,7 +144,7 @@ void ReplicationCoordinatorImpl::_startElectSelfV1_inlock(
if (reason == TopologyCoordinator::StartElectionReason::kStepUpRequestSkipDryRun) {
long long newTerm = term + 1;
log() << "skipping dry run and running for election in term " << newTerm;
- _startRealElection_inlock(newTerm);
+ _startRealElection_inlock(newTerm, reason);
lossGuard.dismiss();
return;
}
@@ -169,12 +170,15 @@ void ReplicationCoordinatorImpl::_startElectSelfV1_inlock(
fassert(28685, nextPhaseEvh.getStatus());
_replExecutor
->onEvent(nextPhaseEvh.getValue(),
- [=](const executor::TaskExecutor::CallbackArgs&) { _processDryRunResult(term); })
+ [=](const executor::TaskExecutor::CallbackArgs&) {
+ _processDryRunResult(term, reason);
+ })
.status_with_transitional_ignore();
lossGuard.dismiss();
}
-void ReplicationCoordinatorImpl::_processDryRunResult(long long originalTerm) {
+void ReplicationCoordinatorImpl::_processDryRunResult(
+ long long originalTerm, TopologyCoordinator::StartElectionReason reason) {
stdx::lock_guard<stdx::mutex> lk(_mutex);
LoseElectionDryRunGuardV1 lossGuard(this);
@@ -205,11 +209,14 @@ void ReplicationCoordinatorImpl::_processDryRunResult(long long originalTerm) {
long long newTerm = originalTerm + 1;
log() << "dry election run succeeded, running for election in term " << newTerm;
- _startRealElection_inlock(newTerm);
+ _startRealElection_inlock(newTerm, reason);
lossGuard.dismiss();
}
-void ReplicationCoordinatorImpl::_startRealElection_inlock(long long newTerm) {
+void ReplicationCoordinatorImpl::_startRealElection_inlock(
+ long long newTerm, TopologyCoordinator::StartElectionReason reason) {
+ ReplicationMetrics::get(getServiceContext()).incrementNumElectionsCalledForReason(reason);
+
LoseElectionDryRunGuardV1 lossGuard(this);
TopologyCoordinator::UpdateTermResult updateTermResult;
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index dae9acd510a..27bd7a4653d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
+#include "mongo/db/repl/replication_metrics.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/executor/network_interface_mock.h"
#include "mongo/unittest/unittest.h"
@@ -274,6 +275,16 @@ TEST_F(ReplCoordTest, ElectionSucceedsWhenAllNodesVoteYea) {
stopCapturingLogMessages();
ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
+
+ // Check that the numElectionTimeoutsCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented.
+ ServiceContext* svcCtx = getServiceContext();
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
}
TEST_F(ReplCoordTest, ElectionSucceedsWhenMaxSevenNodesVoteYea) {
@@ -1483,6 +1494,16 @@ TEST_F(TakeoverTest, SuccessfulCatchupTakeover) {
performSuccessfulTakeover(catchupTakeoverTime,
TopologyCoordinator::StartElectionReason::kCatchupTakeover,
lastVoteExpected);
+
+ // Check that the numCatchUpTakeoversCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented.
+ ServiceContext* svcCtx = getServiceContext();
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
}
TEST_F(TakeoverTest, CatchupTakeoverDryRunFailsPrimarySaysNo) {
@@ -1840,6 +1861,16 @@ TEST_F(TakeoverTest, SuccessfulPriorityTakeover) {
performSuccessfulTakeover(priorityTakeoverTime,
TopologyCoordinator::StartElectionReason::kPriorityTakeover,
lastVoteExpected);
+
+ // Check that the numPriorityTakeoversCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented.
+ ServiceContext* svcCtx = getServiceContext();
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
}
TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedSameSecond) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 6d5a67909b2..1d2fbf52199 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -55,6 +55,7 @@
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
+#include "mongo/db/repl/replication_metrics.h"
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/repl/update_position_args.h"
@@ -2298,6 +2299,16 @@ TEST_F(ReplCoordTest, SingleNodeReplSetUnfreeze) {
ASSERT_TRUE(getTopoCoord().getMemberState().primary());
getNet()->exitNetwork();
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // Check that the numFreezeTimeoutsCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented.
+ ServiceContext* svcCtx = getServiceContext();
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
}
TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingleNodeSet) {
@@ -2314,6 +2325,16 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle
auto opCtx = makeOperationContext();
runSingleNodeElection(opCtx.get());
+ // Check that the numElectionTimeoutsCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented.
+ ServiceContext* svcCtx = getServiceContext();
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
+
getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000));
getNet()->enterNetwork(); // Must do this before inspecting the topocoord
Date_t stepdownUntil = getNet()->now() + Seconds(1);
@@ -2328,6 +2349,16 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle
ASSERT_TRUE(getTopoCoord().getMemberState().primary());
getNet()->exitNetwork();
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // Check that the numFreezeTimeoutsCalled election metric has been incremented, and that none
+ // of the metrics that track the number of elections called for other reasons has been
+ // incremented. When a stepdown timeout expires in a single node replica set, an election is
+ // called for the same reason as is used when a freeze timeout expires.
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumStepUpCmdsCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumPriorityTakeoversCalled_forTesting());
+ ASSERT_EQUALS(0, ReplicationMetrics::get(svcCtx).getNumCatchUpTakeoversCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumElectionTimeoutsCalled_forTesting());
+ ASSERT_EQUALS(1, ReplicationMetrics::get(svcCtx).getNumFreezeTimeoutsCalled_forTesting());
}
TEST_F(
diff --git a/src/mongo/db/repl/replication_metrics.cpp b/src/mongo/db/repl/replication_metrics.cpp
index 1c1c4075d68..a60d7762a87 100644
--- a/src/mongo/db/repl/replication_metrics.cpp
+++ b/src/mongo/db/repl/replication_metrics.cpp
@@ -27,9 +27,10 @@
* it in the license file.
*/
-#include "src/mongo/db/repl/replication_metrics.h"
+#include "mongo/db/repl/replication_metrics.h"
#include "mongo/db/commands/server_status.h"
+#include "mongo/db/repl/election_reason_counter.h"
namespace mongo {
namespace repl {
@@ -46,9 +47,97 @@ ReplicationMetrics& ReplicationMetrics::get(OperationContext* opCtx) {
return get(opCtx->getServiceContext());
}
+ReplicationMetrics::ReplicationMetrics()
+ : _electionMetrics(ElectionReasonCounter(),
+ ElectionReasonCounter(),
+ ElectionReasonCounter(),
+ ElectionReasonCounter(),
+ ElectionReasonCounter()) {}
+
+ReplicationMetrics::~ReplicationMetrics() {}
+
+void ReplicationMetrics::incrementNumElectionsCalledForReason(
+ TopologyCoordinator::StartElectionReason reason) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ switch (reason) {
+ case TopologyCoordinator::StartElectionReason::kStepUpRequest:
+ case TopologyCoordinator::StartElectionReason::kStepUpRequestSkipDryRun: {
+ ElectionReasonCounter& stepUpCmd = _electionMetrics.getStepUpCmd();
+ stepUpCmd.incrementCalled();
+ _electionMetrics.setStepUpCmd(stepUpCmd);
+ break;
+ }
+ case TopologyCoordinator::StartElectionReason::kPriorityTakeover: {
+ ElectionReasonCounter& priorityTakeover = _electionMetrics.getPriorityTakeover();
+ priorityTakeover.incrementCalled();
+ _electionMetrics.setPriorityTakeover(priorityTakeover);
+ break;
+ }
+ case TopologyCoordinator::StartElectionReason::kCatchupTakeover: {
+ ElectionReasonCounter& catchUpTakeover = _electionMetrics.getCatchUpTakeover();
+ catchUpTakeover.incrementCalled();
+ _electionMetrics.setCatchUpTakeover(catchUpTakeover);
+ break;
+ }
+ case TopologyCoordinator::StartElectionReason::kElectionTimeout: {
+ ElectionReasonCounter& electionTimeout = _electionMetrics.getElectionTimeout();
+ electionTimeout.incrementCalled();
+ _electionMetrics.setElectionTimeout(electionTimeout);
+ break;
+ }
+ case TopologyCoordinator::StartElectionReason::kSingleNodePromptElection: {
+ ElectionReasonCounter& freezeTimeout = _electionMetrics.getFreezeTimeout();
+ freezeTimeout.incrementCalled();
+ _electionMetrics.setFreezeTimeout(freezeTimeout);
+ break;
+ }
+ }
+}
+
+int ReplicationMetrics::getNumStepUpCmdsCalled_forTesting() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.getStepUpCmd().getCalled();
+}
+
+int ReplicationMetrics::getNumPriorityTakeoversCalled_forTesting() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.getPriorityTakeover().getCalled();
+}
+
+int ReplicationMetrics::getNumCatchUpTakeoversCalled_forTesting() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.getCatchUpTakeover().getCalled();
+}
+
+int ReplicationMetrics::getNumElectionTimeoutsCalled_forTesting() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.getElectionTimeout().getCalled();
+}
+
+int ReplicationMetrics::getNumFreezeTimeoutsCalled_forTesting() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.getFreezeTimeout().getCalled();
+}
+
+BSONObj ReplicationMetrics::getElectionMetricsBSON() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _electionMetrics.toBSON();
+}
+
class ReplicationMetrics::ElectionMetricsSSS : public ServerStatusSection {
public:
ElectionMetricsSSS() : ServerStatusSection("electionMetrics") {}
+
+ bool includeByDefault() const override {
+ return true;
+ }
+
+ BSONObj generateSection(OperationContext* opCtx,
+ const BSONElement& configElement) const override {
+ ReplicationMetrics& replicationMetrics = ReplicationMetrics::get(opCtx);
+
+ return replicationMetrics.getElectionMetricsBSON();
+ }
} electionMetricsSSS;
} // namespace repl
diff --git a/src/mongo/db/repl/replication_metrics.h b/src/mongo/db/repl/replication_metrics.h
index 7012f4802cf..f6c269e7ba0 100644
--- a/src/mongo/db/repl/replication_metrics.h
+++ b/src/mongo/db/repl/replication_metrics.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/repl/replication_metrics_gen.h"
+#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/service_context.h"
#include "mongo/stdx/mutex.h"
@@ -47,6 +48,15 @@ public:
ReplicationMetrics();
~ReplicationMetrics();
+ void incrementNumElectionsCalledForReason(TopologyCoordinator::StartElectionReason reason);
+ int getNumStepUpCmdsCalled_forTesting();
+ int getNumPriorityTakeoversCalled_forTesting();
+ int getNumCatchUpTakeoversCalled_forTesting();
+ int getNumElectionTimeoutsCalled_forTesting();
+ int getNumFreezeTimeoutsCalled_forTesting();
+
+ BSONObj getElectionMetricsBSON();
+
private:
class ElectionMetricsSSS;
diff --git a/src/mongo/db/repl/replication_metrics.idl b/src/mongo/db/repl/replication_metrics.idl
index 566d2ee665b..6612eb0216f 100644
--- a/src/mongo/db/repl/replication_metrics.idl
+++ b/src/mongo/db/repl/replication_metrics.idl
@@ -32,22 +32,49 @@
global:
cpp_namespace: "mongo::repl"
+ cpp_includes:
+ - "mongo/db/repl/election_reason_counter_parser.h"
imports:
- "mongo/idl/basic_types.idl"
+types:
+ ElectionReasonCounter:
+ bson_serialization_type: any
+ description: "Wrapper around ElectionReasonCounterBase that has increment methods"
+ cpp_type: ElectionReasonCounter
+ serializer: "::mongo::repl::serializeElectionReasonCounterToBSON"
+ deserializer: "::mongo::repl::parseElectionReasonCounter"
+
structs:
ElectionMetrics:
description: "Stores metrics related to all the elections a node has called"
strict: true
fields:
- numStepUpsRequested:
- description: "Number of elections this node has called due to step up requests"
- type: int
-
+ stepUpCmd:
+ description: "Election reason counter for step up requests"
+ type: ElectionReasonCounter
+ non_const_getter: true
+ priorityTakeover:
+ description: "Election reason counter for priority takeovers"
+ type: ElectionReasonCounter
+ non_const_getter: true
+ catchUpTakeover:
+ description: "Election reason counter for catchup takeovers"
+ type: ElectionReasonCounter
+ non_const_getter: true
+ electionTimeout:
+ description: "Election reason counter for when the node didn't see a primary in the
+ past electionTimeoutPeriod milliseconds"
+ type: ElectionReasonCounter
+ non_const_getter: true
+ freezeTimeout:
+ description: "Election reason counter for single node replica set prompt elections"
+ type: ElectionReasonCounter
+ non_const_getter: true
ElectionCandidateMetrics:
- description: "Stores metrics that are specific to the last election in which the node was a
+ description: "Stores metrics that are specific to the last election in which the node was a
candidate"
strict: true
fields: