summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorKaitlin Mahar <kaitlin.mahar@mongodb.com>2023-03-02 19:58:52 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-03-22 20:18:43 +0000
commit75a60fabbfd7cb3df17cc509f8babf38a1087a52 (patch)
treeaf80a3920f3f9f1651c29927e5c6b9654f65deee /src/mongo
parent5767f6d16fbd191e3ebc3d0b929b398774a80fb6 (diff)
downloadmongo-75a60fabbfd7cb3df17cc509f8babf38a1087a52.tar.gz
SERVER-66996 Log warnings when ReplicaSetAwareServices and PrimaryOnlyServices block stepUp from completing
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/SConscript2
-rw-r--r--src/mongo/db/repl/primary_only_service.cpp31
-rw-r--r--src/mongo/db/repl/primary_only_service.h3
-rw-r--r--src/mongo/db/repl/primary_only_service_test.cpp51
-rw-r--r--src/mongo/db/repl/repl_server_parameters.idl46
-rw-r--r--src/mongo/db/repl/replica_set_aware_service.cpp61
-rw-r--r--src/mongo/db/repl/replica_set_aware_service.h19
-rw-r--r--src/mongo/db/repl/replica_set_aware_service_test.cpp129
-rw-r--r--src/mongo/db/repl/tenant_file_importer_service.h4
-rw-r--r--src/mongo/db/s/balancer/balancer.h3
-rw-r--r--src/mongo/db/s/balancer_stats_registry.h3
-rw-r--r--src/mongo/db/s/migration_destination_manager.h3
-rw-r--r--src/mongo/db/s/query_analysis_coordinator.h4
-rw-r--r--src/mongo/db/s/query_analysis_writer.h3
-rw-r--r--src/mongo/db/s/range_deleter_service.h3
-rw-r--r--src/mongo/db/s/sharding_initialization_mongod.h3
-rw-r--r--src/mongo/db/s/sharding_recovery_service.h3
-rw-r--r--src/mongo/db/s/user_writes_recoverable_critical_section_service.h3
-rw-r--r--src/mongo/db/transaction/internal_transactions_reap_service.h3
-rw-r--r--src/mongo/db/ttl.cpp3
-rw-r--r--src/mongo/db/vector_clock_mongod.cpp3
-rw-r--r--src/mongo/idl/cluster_server_parameter_initializer.h3
22 files changed, 384 insertions, 2 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 637097b7f65..df1452fbb7d 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -826,6 +826,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/service_context',
+ 'repl_server_parameters',
],
)
@@ -2050,6 +2051,7 @@ env.Library(
'$BUILD_DIR/mongo/executor/thread_pool_task_executor',
'$BUILD_DIR/mongo/util/concurrency/thread_pool',
'repl_coordinator_interface',
+ 'repl_server_parameters',
'replica_set_aware_service',
'wait_for_majority_service',
],
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index fd00380c380..b3513e481fa 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/replica_set_aware_service.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/wait_for_majority_service.h"
@@ -206,7 +207,37 @@ void PrimaryOnlyServiceRegistry::onStepUpComplete(OperationContext* opCtx, long
str::stream() << "Term from last optime (" << stepUpOpTime.getTerm()
<< ") doesn't match the term we're stepping up in (" << term << ")");
+
+ // Since this method is called before we mark the node writable in
+ // ReplicationCoordinatorImpl::signalDrainComplete and therefore can block the new primary from
+ // starting to receive writes, generate a warning if we are spending too long here.
+ Timer totalTime{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = totalTime.millis();
+ auto threshold = slowTotalOnStepUpCompleteThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(6699604,
+ "Duration spent in PrimaryOnlyServiceRegistry::onStepUpComplete for all services "
+ "exceeded slowTotalOnStepUpCompleteThresholdMS",
+ "thresholdMillis"_attr = threshold,
+ "durationMillis"_attr = timeSpent);
+ }
+ });
for (auto& service : _servicesByName) {
+ // Additionally, generate a warning if any individual service is taking too long.
+ Timer t{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = t.millis();
+ auto threshold = slowServiceOnStepUpCompleteThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(6699605,
+ "Duration spent in PrimaryOnlyServiceRegistry::onStepUpComplete "
+ "for service exceeded slowServiceOnStepUpCompleteThresholdMS",
+ "thresholdMillis"_attr = threshold,
+ "durationMillis"_attr = timeSpent,
+ "serviceName"_attr = service.first);
+ }
+ });
service.second->onStepUp(stepUpOpTime);
}
}
diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h
index 207e75c30cb..8d5c30856b4 100644
--- a/src/mongo/db/repl/primary_only_service.h
+++ b/src/mongo/db/repl/primary_only_service.h
@@ -585,6 +585,9 @@ public:
void onBecomeArbiter() final {}
void onStepUpComplete(OperationContext*, long long term) final;
void onStepDown() final;
+ inline std::string getServiceName() const override final {
+ return "PrimaryOnlyServiceRegistry";
+ }
private:
StringMap<std::unique_ptr<PrimaryOnlyService>> _servicesByName;
diff --git a/src/mongo/db/repl/primary_only_service_test.cpp b/src/mongo/db/repl/primary_only_service_test.cpp
index b65a54592d7..277400040fa 100644
--- a/src/mongo/db/repl/primary_only_service_test.cpp
+++ b/src/mongo/db/repl/primary_only_service_test.cpp
@@ -42,6 +42,7 @@
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/rpc/metadata/egress_metadata_hook_list.h"
#include "mongo/unittest/death_test.h"
+#include "mongo/unittest/log_test.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/fail_point.h"
@@ -1152,3 +1153,53 @@ TEST_F(PrimaryOnlyServiceTest, StateTransitionFromRebuildingShouldWakeUpConditio
stepUpThread.join();
lookUpInstanceThread.join();
}
+
+TEST_F(PrimaryOnlyServiceTest, PrimaryOnlyServiceLogSlowServices) {
+ std::string slowSingleServiceStepUpCompleteMsg =
+ "Duration spent in PrimaryOnlyServiceRegistry::onStepUpComplete for service "
+ "exceeded slowServiceOnStepUpCompleteThresholdMS";
+ std::string slowTotalTimeStepUpCompleteMsg =
+ "Duration spent in PrimaryOnlyServiceRegistry::onStepUpComplete for all services "
+ "exceeded slowTotalOnStepUpCompleteThresholdMS";
+
+ stepDown();
+
+ // First test a stepUp with no hanging. We shouldn't log anything.
+ startCapturingLogMessages();
+ stepUp();
+ stopCapturingLogMessages();
+ ASSERT_EQ(0, countTextFormatLogLinesContaining(slowSingleServiceStepUpCompleteMsg));
+ ASSERT_EQ(0, countTextFormatLogLinesContaining(slowTotalTimeStepUpCompleteMsg));
+
+ // Use the "PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic", which is encountered
+ // via PrimaryOnlyServiceRegistry::onStepUpComplete, to simulate a slow primary-only
+ // service.
+ auto doStepUpWithHang = [this](long long ms) {
+ stepDown();
+
+ auto stepUpTimesEntered =
+ PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic.setMode(FailPoint::alwaysOn);
+
+ // Start an async task to step up, which will block on the fail point.
+ auto stepUpFuture = ExecutorFuture<void>(_testExecutor).then([this]() { stepUp(); });
+
+ // Once stepUp has hit the failpoint, sleep for the specified amount of ms so the service
+ // hangs, before disabling the failpoint and allowing stepUp to proceed.
+ PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic.waitForTimesEntered(++stepUpTimesEntered);
+ sleepmillis(ms);
+ PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic.setMode(FailPoint::off);
+ stepUpFuture.wait();
+ };
+
+ // Hang for long enough to generate a message about the single service being slow.
+ startCapturingLogMessages();
+ doStepUpWithHang(repl::slowServiceOnStepUpCompleteThresholdMS.load() + 1);
+ stopCapturingLogMessages();
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowSingleServiceStepUpCompleteMsg));
+
+ // Hang for long enough to generate a message about the total time being slow.
+ startCapturingLogMessages();
+ doStepUpWithHang(repl::slowTotalOnStepUpCompleteThresholdMS.load() + 1);
+ stopCapturingLogMessages();
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowTotalTimeStepUpCompleteMsg));
+}
diff --git a/src/mongo/db/repl/repl_server_parameters.idl b/src/mongo/db/repl/repl_server_parameters.idl
index 85b3a072c91..50959deefe0 100644
--- a/src/mongo/db/repl/repl_server_parameters.idl
+++ b/src/mongo/db/repl/repl_server_parameters.idl
@@ -693,6 +693,52 @@ server_parameters:
default:
expr: 15 * 60 * 1000
+ slowServiceOnStepUpBeginThresholdMS:
+ description: >-
+ The maximum amount of time, in milliseconds, that a single replica set aware
+ service's onStepUpBegin method can take without a warning being logged.
+ set_at: [ startup, runtime ]
+ cpp_vartype: AtomicWord<int>
+ cpp_varname: slowServiceOnStepUpBeginThresholdMS
+ default: 200
+ validator:
+ gte: 0
+
+ slowTotalOnStepUpBeginThresholdMS:
+ description: >-
+ The maximum amount of time, in milliseconds, that the total of all replica set
+ aware services' onStepUpBegin methods can take without a warning being logged.
+ set_at: [ startup, runtime ]
+ cpp_vartype: AtomicWord<int>
+ cpp_varname: slowTotalOnStepUpBeginThresholdMS
+ default: 500
+ validator:
+ gte: 0
+
+ slowServiceOnStepUpCompleteThresholdMS:
+ description: >-
+ The maximum amount of time, in milliseconds, that a single replica set aware
+ service or primary-only service's onStepUpComplete method can take without a warning
+ being logged.
+ set_at: [ startup, runtime ]
+ cpp_vartype: AtomicWord<int>
+ cpp_varname: slowServiceOnStepUpCompleteThresholdMS
+ default: 200
+ validator:
+ gte: 0
+
+ slowTotalOnStepUpCompleteThresholdMS:
+ description: >-
+ The maximum amount of time, in milliseconds, that the total of all replica set
+ aware services' and primary-only services' onStepUpComplete methods can take
+ without a warning being logged.
+ set_at: [ startup, runtime ]
+ cpp_vartype: AtomicWord<int>
+ cpp_varname: slowTotalOnStepUpCompleteThresholdMS
+ default: 500
+ validator:
+ gte: 0
+
feature_flags:
featureFlagRetryableFindAndModify:
description: >-
diff --git a/src/mongo/db/repl/replica_set_aware_service.cpp b/src/mongo/db/repl/replica_set_aware_service.cpp
index 01a3035d376..7b83cfb3d25 100644
--- a/src/mongo/db/repl/replica_set_aware_service.cpp
+++ b/src/mongo/db/repl/replica_set_aware_service.cpp
@@ -28,8 +28,10 @@
*/
+#include "mongo/logv2/log.h"
#include "mongo/platform/basic.h"
+#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/replica_set_aware_service.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
@@ -78,13 +80,72 @@ void ReplicaSetAwareServiceRegistry::onShutdown() {
}
void ReplicaSetAwareServiceRegistry::onStepUpBegin(OperationContext* opCtx, long long term) {
+ // Since this method is run during drain mode and can block state transition, generate a warning
+ // if we are spending too long here.
+ Timer totalTime{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = totalTime.millis();
+ auto threshold = repl::slowTotalOnStepUpBeginThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(
+ 6699600,
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpBegin for all services "
+ "exceeded slowTotalOnStepUpBeginThresholdMS",
+ "thresholdMillis"_attr = threshold,
+ "durationMillis"_attr = timeSpent);
+ }
+ });
+
std::for_each(_services.begin(), _services.end(), [&](ReplicaSetAwareInterface* service) {
+ // Additionally, generate a warning if any individual service is taking too long.
+ Timer t{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = t.millis();
+ auto threshold = repl::slowServiceOnStepUpBeginThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(6699601,
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpBegin "
+ "for service exceeded slowServiceOnStepUpBeginThresholdMS",
+ "thresholdMillis"_attr = threshold,
+ "durationMillis"_attr = timeSpent,
+ "serviceName"_attr = service->getServiceName());
+ }
+ });
service->onStepUpBegin(opCtx, term);
});
}
void ReplicaSetAwareServiceRegistry::onStepUpComplete(OperationContext* opCtx, long long term) {
+ // Since this method is called before we mark the node writable in
+ // ReplicationCoordinatorImpl::signalDrainComplete and therefore can block the new primary from
+ // starting to receive writes, generate a warning if we are spending too long here.
+ Timer totalTime{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = totalTime.millis();
+ auto threshold = repl::slowTotalOnStepUpCompleteThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(6699602,
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpComplete "
+ "for all services exceeded slowTotalOnStepUpCompleteThresholdMS",
+ "thresholdMills"_attr = threshold,
+ "durationMillis"_attr = timeSpent);
+ }
+ });
std::for_each(_services.begin(), _services.end(), [&](ReplicaSetAwareInterface* service) {
+ // Additionally, generate a warning if any individual service is taking too long.
+ Timer t{};
+ ON_BLOCK_EXIT([&] {
+ auto timeSpent = t.millis();
+ auto threshold = repl::slowServiceOnStepUpCompleteThresholdMS.load();
+ if (timeSpent > threshold) {
+ LOGV2(6699603,
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpComplete "
+ "for service exceeded slowServiceOnStepUpCompleteThresholdMS",
+ "thresholdMills"_attr = threshold,
+ "durationMillis"_attr = timeSpent,
+ "serviceName"_attr = service->getServiceName());
+ }
+ });
service->onStepUpComplete(opCtx, term);
});
}
diff --git a/src/mongo/db/repl/replica_set_aware_service.h b/src/mongo/db/repl/replica_set_aware_service.h
index 6618b7453aa..f1af24148a4 100644
--- a/src/mongo/db/repl/replica_set_aware_service.h
+++ b/src/mongo/db/repl/replica_set_aware_service.h
@@ -146,12 +146,19 @@ public:
virtual void onShutdown() = 0;
/**
- * Called prior to stepping up as PRIMARY, i.e. after drain mode has completed.
+ * Called prior to stepping up as PRIMARY, i.e. after drain mode has completed but before
+ * the RSTL is acquired.
+ * Implementations of this method should be short-running in order to prevent blocking
+ * the stepUp from completing.
*/
virtual void onStepUpBegin(OperationContext* opCtx, long long term) = 0;
/**
- * Called after the node has transitioned to PRIMARY.
+ * Called after the node has transitioned to PRIMARY, i.e. after stepUp reconfig and after
+ * writing the first oplog entry with the new term, but before the node starts accepting
+ * writes.
+ * Implementations of this method should be short-running in order to prevent blocking
+ * the stepUp from completing.
*/
virtual void onStepUpComplete(OperationContext* opCtx, long long term) = 0;
@@ -168,6 +175,11 @@ public:
* Called when the node commences being an arbiter.
*/
virtual void onBecomeArbiter() = 0;
+
+ /**
+ * Returns the name of the service. Used for logging purposes.
+ */
+ virtual std::string getServiceName() const = 0;
};
@@ -223,6 +235,9 @@ public:
void onStepUpComplete(OperationContext* opCtx, long long term) final;
void onStepDown() final;
void onBecomeArbiter() final;
+ inline std::string getServiceName() const override final {
+ return "ReplicaSetAwareServiceRegistry";
+ }
private:
void _registerService(ReplicaSetAwareInterface* service);
diff --git a/src/mongo/db/repl/replica_set_aware_service_test.cpp b/src/mongo/db/repl/replica_set_aware_service_test.cpp
index d9a397f7240..ccfa7b3e4f2 100644
--- a/src/mongo/db/repl/replica_set_aware_service_test.cpp
+++ b/src/mongo/db/repl/replica_set_aware_service_test.cpp
@@ -35,6 +35,7 @@
#include "mongo/db/repl/replica_set_aware_service.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/unittest/log_test.h"
namespace mongo {
@@ -94,6 +95,10 @@ private:
bool shouldRegisterReplicaSetAwareService() const final {
return false;
}
+
+ std::string getServiceName() const final override {
+ return "ServiceA";
+ }
};
const auto getServiceA = ServiceContext::declareDecoration<ServiceA>();
@@ -116,6 +121,10 @@ private:
bool shouldRegisterReplicaSetAwareService() const final {
return true;
}
+
+ std::string getServiceName() const final override {
+ return "ServiceB";
+ }
};
const auto getServiceB = ServiceContext::declareDecoration<ServiceB>();
@@ -141,6 +150,10 @@ private:
return true;
}
+ std::string getServiceName() const final override {
+ return "ServiceC";
+ }
+
void onStartup(OperationContext* opCtx) final {
ASSERT_EQ(numCallsOnStartup, ServiceB::get(getServiceContext())->numCallsOnStartup - 1);
TestService::onStartup(opCtx);
@@ -194,6 +207,57 @@ ServiceContext* ServiceC::getServiceContext() {
return getServiceC.owner(this);
}
+/*
+ * Service that can be configured to sleep for specified amount of time in its onStepUpBegin and
+ * onStepUpComplete methods. Used for testing that we log when a service takes a long time.
+ */
+class SlowService : public TestService<SlowService> {
+public:
+ static SlowService* get(ServiceContext* serviceContext);
+
+ void setStepUpBeginSleepDuration(Duration<std::milli> duration) {
+ _stepUpBeginSleepDuration = duration;
+ }
+
+ void setStepUpCompleteSleepDuration(Duration<std::milli> duration) {
+ _stepUpCompleteSleepDuration = duration;
+ }
+
+private:
+ Duration<std::milli> _stepUpBeginSleepDuration = Milliseconds(0);
+ Duration<std::milli> _stepUpCompleteSleepDuration = Milliseconds(0);
+
+ ServiceContext* getServiceContext();
+
+ bool shouldRegisterReplicaSetAwareService() const final {
+ return true;
+ }
+
+ std::string getServiceName() const final override {
+ return "SlowService";
+ }
+
+ void onStepUpBegin(OperationContext* opCtx, long long term) final {
+ sleepFor(_stepUpBeginSleepDuration);
+ TestService::onStepUpBegin(opCtx, term);
+ }
+
+ void onStepUpComplete(OperationContext* opCtx, long long term) final {
+ sleepFor(_stepUpCompleteSleepDuration);
+ TestService::onStepUpComplete(opCtx, term);
+ }
+};
+
+const auto getSlowService = ServiceContext::declareDecoration<SlowService>();
+ReplicaSetAwareServiceRegistry::Registerer<SlowService> slowServiceRegister("SlowService");
+
+SlowService* SlowService::get(ServiceContext* serviceContext) {
+ return &getSlowService(serviceContext);
+}
+
+ServiceContext* SlowService::getServiceContext() {
+ return getSlowService.owner(this);
+}
class ReplicaSetAwareServiceTest : public ServiceContextTest {
public:
@@ -293,6 +357,71 @@ TEST_F(ReplicaSetAwareServiceTest, ReplicaSetAwareService) {
ASSERT_EQ(1, c->numCallsOnBecomeArbiter);
}
+TEST_F(ReplicaSetAwareServiceTest, ReplicaSetAwareServiceLogSlowServices) {
+ std::string slowSingleServiceStepUpBeginMsg =
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpBegin for service exceeded "
+ "slowServiceOnStepUpBeginThresholdMS";
+ std::string slowSingleServiceStepUpCompleteMsg =
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpComplete for service "
+ "exceeded slowServiceOnStepUpCompleteThresholdMS";
+ std::string slowTotalTimeStepUpBeginMsg =
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpBegin for all services "
+ "exceeded slowTotalOnStepUpBeginThresholdMS";
+ std::string slowTotalTimeStepUpCompleteMsg =
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpComplete for all services "
+ "exceeded slowTotalOnStepUpCompleteThresholdMS";
+
+ auto sc = getGlobalServiceContext();
+ auto opCtxHolder = makeOperationContext();
+ auto opCtx = opCtxHolder.get();
+
+ auto slowService = SlowService::get(sc);
+ ASSERT_EQ(0, slowService->numCallsOnStepUpBegin);
+ ASSERT_EQ(0, slowService->numCallsOnStepUpComplete);
+
+ // With the default sleep interval (no sleep) we don't log anything.
+ startCapturingLogMessages();
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpBegin(opCtx, _term);
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpComplete(opCtx, _term);
+ stopCapturingLogMessages();
+ ASSERT_EQ(1, slowService->numCallsOnStepUpBegin);
+ ASSERT_EQ(1, slowService->numCallsOnStepUpComplete);
+ ASSERT_EQ(0,
+ countTextFormatLogLinesContaining(
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpBegin"));
+ ASSERT_EQ(0,
+ countTextFormatLogLinesContaining(
+ "Duration spent in ReplicaSetAwareServiceRegistry::onStepUpComplete"));
+
+ // Introduce delays at the minimum thresholds at which we will log for a single service.
+ slowService->setStepUpBeginSleepDuration(
+ Milliseconds(repl::slowServiceOnStepUpBeginThresholdMS.load() + 1));
+ slowService->setStepUpCompleteSleepDuration(
+ Milliseconds(repl::slowServiceOnStepUpCompleteThresholdMS.load() + 1));
+ startCapturingLogMessages();
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpBegin(opCtx, _term);
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpComplete(opCtx, _term);
+ stopCapturingLogMessages();
+ ASSERT_EQ(2, slowService->numCallsOnStepUpBegin);
+ ASSERT_EQ(2, slowService->numCallsOnStepUpComplete);
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowSingleServiceStepUpBeginMsg));
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowSingleServiceStepUpCompleteMsg));
+
+ // Introduce a delay that should cause us to log for the total time across all services.
+ slowService->setStepUpBeginSleepDuration(
+ Milliseconds(repl::slowTotalOnStepUpBeginThresholdMS.load() + 1));
+ slowService->setStepUpCompleteSleepDuration(
+ Milliseconds(repl::slowTotalOnStepUpCompleteThresholdMS.load() + 1));
+ startCapturingLogMessages();
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpBegin(opCtx, _term);
+ ReplicaSetAwareServiceRegistry::get(sc).onStepUpComplete(opCtx, _term);
+ stopCapturingLogMessages();
+ ASSERT_EQ(3, slowService->numCallsOnStepUpBegin);
+ ASSERT_EQ(3, slowService->numCallsOnStepUpComplete);
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowTotalTimeStepUpBeginMsg));
+ ASSERT_EQ(1, countTextFormatLogLinesContaining(slowTotalTimeStepUpCompleteMsg));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_file_importer_service.h b/src/mongo/db/repl/tenant_file_importer_service.h
index 4a6650da72f..2ac95e3bed6 100644
--- a/src/mongo/db/repl/tenant_file_importer_service.h
+++ b/src/mongo/db/repl/tenant_file_importer_service.h
@@ -101,6 +101,10 @@ private:
void onBecomeArbiter() final {}
+ inline std::string getServiceName() const override final {
+ return "TenantFileImporterService";
+ }
+
/**
* A worker function that waits for ImporterEvents and handles cloning and importing files.
*/
diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h
index 9e08037c8f1..592204e9bb4 100644
--- a/src/mongo/db/s/balancer/balancer.h
+++ b/src/mongo/db/s/balancer/balancer.h
@@ -173,6 +173,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) final;
void onStepDown() final;
void onBecomeArbiter() final;
+ inline std::string getServiceName() const override final {
+ return "Balancer";
+ }
/**
* The main balancer loop, which runs in a separate thread.
diff --git a/src/mongo/db/s/balancer_stats_registry.h b/src/mongo/db/s/balancer_stats_registry.h
index aa495fae56c..a5effc6b0e2 100644
--- a/src/mongo/db/s/balancer_stats_registry.h
+++ b/src/mongo/db/s/balancer_stats_registry.h
@@ -113,6 +113,9 @@ private:
void onStartup(OperationContext* opCtx) override final;
void onStepUpComplete(OperationContext* opCtx, long long term) override final;
void onStepDown() override final;
+ inline std::string getServiceName() const override final {
+ return "BalancerStatsRegistry";
+ }
void _loadOrphansCount(OperationContext* opCtx);
bool _isInitialized() const {
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index 827a0206b80..49d1daa0eb8 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -279,6 +279,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) final {}
void onStepDown() final;
void onBecomeArbiter() final {}
+ inline std::string getServiceName() const override final {
+ return "MigrationDestinationManager";
+ }
// The number of session oplog entries recieved from the source shard. Not all oplog
// entries recieved from the source shard may be committed
diff --git a/src/mongo/db/s/query_analysis_coordinator.h b/src/mongo/db/s/query_analysis_coordinator.h
index 2525bdcbef5..c275c059c85 100644
--- a/src/mongo/db/s/query_analysis_coordinator.h
+++ b/src/mongo/db/s/query_analysis_coordinator.h
@@ -153,6 +153,10 @@ private:
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "QueryAnalysisCoordinator";
+ }
+
/**
* Returns the minimum last ping time for a sampler to be considered as active.
*/
diff --git a/src/mongo/db/s/query_analysis_writer.h b/src/mongo/db/s/query_analysis_writer.h
index 3ea38483b09..c197e15bdb1 100644
--- a/src/mongo/db/s/query_analysis_writer.h
+++ b/src/mongo/db/s/query_analysis_writer.h
@@ -139,6 +139,9 @@ public:
void onStartup(OperationContext* opCtx);
void onShutdown();
void onStepUpComplete(OperationContext* opCtx, long long term);
+ inline std::string getServiceName() const override final {
+ return "QueryAnalysisWriter";
+ }
ExecutorFuture<void> createTTLIndexes(OperationContext* opCtx);
diff --git a/src/mongo/db/s/range_deleter_service.h b/src/mongo/db/s/range_deleter_service.h
index 93baaf76b70..90745568ce1 100644
--- a/src/mongo/db/s/range_deleter_service.h
+++ b/src/mongo/db/s/range_deleter_service.h
@@ -239,6 +239,9 @@ public:
void onStepUpComplete(OperationContext* opCtx, long long term) override;
void onStepDown() override;
void onShutdown() override;
+ inline std::string getServiceName() const override final {
+ return "RangeDeleterService";
+ }
/*
* Returns the RangeDeleterService state with the following schema:
diff --git a/src/mongo/db/s/sharding_initialization_mongod.h b/src/mongo/db/s/sharding_initialization_mongod.h
index 791d43f05c5..d11d6deaa32 100644
--- a/src/mongo/db/s/sharding_initialization_mongod.h
+++ b/src/mongo/db/s/sharding_initialization_mongod.h
@@ -125,6 +125,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) override final {}
void onStepDown() override final {}
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "ShardingInitializationMongoD";
+ }
// This mutex ensures that only one thread at a time executes the sharding
diff --git a/src/mongo/db/s/sharding_recovery_service.h b/src/mongo/db/s/sharding_recovery_service.h
index 8127bd7ce33..3ba7a40f6e9 100644
--- a/src/mongo/db/s/sharding_recovery_service.h
+++ b/src/mongo/db/s/sharding_recovery_service.h
@@ -155,6 +155,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) override final {}
void onStepDown() override final {}
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "ShardingRecoveryService";
+ }
};
} // namespace mongo
diff --git a/src/mongo/db/s/user_writes_recoverable_critical_section_service.h b/src/mongo/db/s/user_writes_recoverable_critical_section_service.h
index 5c72dddd6c4..9df6045575f 100644
--- a/src/mongo/db/s/user_writes_recoverable_critical_section_service.h
+++ b/src/mongo/db/s/user_writes_recoverable_critical_section_service.h
@@ -157,6 +157,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) override final {}
void onStepDown() override final {}
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "UserWritesRecoverableCriticalSectionService";
+ }
};
} // namespace mongo
diff --git a/src/mongo/db/transaction/internal_transactions_reap_service.h b/src/mongo/db/transaction/internal_transactions_reap_service.h
index 56acae9e632..09da172f687 100644
--- a/src/mongo/db/transaction/internal_transactions_reap_service.h
+++ b/src/mongo/db/transaction/internal_transactions_reap_service.h
@@ -97,6 +97,9 @@ private:
bool isMajorityDataAvailable) override final {}
void onStepUpBegin(OperationContext* opCtx, long long term) override final {}
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "InternalTransactionsReapService";
+ }
std::shared_ptr<ThreadPool> _threadPool;
diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp
index 34e1bc23cc9..844894407ce 100644
--- a/src/mongo/db/ttl.cpp
+++ b/src/mongo/db/ttl.cpp
@@ -277,6 +277,9 @@ private:
}
void onStepDown() override {}
void onBecomeArbiter() override {}
+ inline std::string getServiceName() const override final {
+ return "TTLMonitorService";
+ }
};
const auto _ttlMonitorService = ServiceContext::declareDecoration<TTLMonitorService>();
diff --git a/src/mongo/db/vector_clock_mongod.cpp b/src/mongo/db/vector_clock_mongod.cpp
index e6a5527e584..66628cd312d 100644
--- a/src/mongo/db/vector_clock_mongod.cpp
+++ b/src/mongo/db/vector_clock_mongod.cpp
@@ -103,6 +103,9 @@ private:
void onStepUpComplete(OperationContext* opCtx, long long term) override {}
void onStepDown() override;
void onBecomeArbiter() override;
+ inline std::string getServiceName() const override final {
+ return "VectorClockMongoD";
+ }
/**
* Structure used as keys for the map of waiters for VectorClock durability.
diff --git a/src/mongo/idl/cluster_server_parameter_initializer.h b/src/mongo/idl/cluster_server_parameter_initializer.h
index 7778b7c5295..94e48bdacf5 100644
--- a/src/mongo/idl/cluster_server_parameter_initializer.h
+++ b/src/mongo/idl/cluster_server_parameter_initializer.h
@@ -68,6 +68,9 @@ public:
void onStepUpComplete(OperationContext* opCtx, long long term) override final {}
void onStepDown() override final {}
void onBecomeArbiter() override final {}
+ inline std::string getServiceName() const override final {
+ return "ClusterServerParameterInitializer";
+ }
};
} // namespace mongo