summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorBrett Nawrocki <brett.nawrocki@mongodb.com>2022-02-16 20:52:34 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-22 17:12:18 +0000
commit7ad05d39663d30ee9f20afafd499905a165127f9 (patch)
tree6bb3290b101f1c85744898d546762e3ef86077c9 /src/mongo
parenta3158ab422e4d8091203166c5f2601f9bfa0099d (diff)
downloadmongo-7ad05d39663d30ee9f20afafd499905a165127f9.tar.gz
SERVER-63619 Add placeholder for reporting server status metrics
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp197
-rw-r--r--src/mongo/db/s/sharding_data_transform_cumulative_metrics.h14
-rw-r--r--src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h2
-rw-r--r--src/mongo/db/s/sharding_server_status.cpp32
4 files changed, 240 insertions, 5 deletions
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
index 33c5370f75d..ba18bd7d8de 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
@@ -32,8 +32,127 @@
namespace mongo {
+namespace {
+constexpr int32_t kPlaceholderInt = 0;
+constexpr int64_t kPlaceholderLong = 0;
+} // namespace
+
+namespace {
+constexpr auto kResharding = "resharding";
+constexpr auto kGlobalIndex = "globalIndex";
+constexpr auto kCountStarted = "countStarted";
+constexpr auto kCountSucceeded = "countSucceeded";
+constexpr auto kCountFailed = "countFailed";
+constexpr auto kCountCanceled = "countCanceled";
+constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance";
+constexpr auto kActive = "active";
+constexpr auto kDocumentsCopied = "documentsCopied";
+constexpr auto kBytesCopied = "bytesCopied";
+constexpr auto kOplogEntriesFetched = "oplogEntriesFetched";
+constexpr auto kOplogEntriesApplied = "oplogEntriesApplied";
+constexpr auto kInsertsApplied = "insertsApplied";
+constexpr auto kUpdatesApplied = "updatesApplied";
+constexpr auto kDeletesApplied = "deletesApplied";
+constexpr auto kCountWritesToStashCollections = "countWritesToStashCollections";
+constexpr auto kCountWritesDuringCriticalSection = "countWritesDuringCriticalSection";
+constexpr auto kCountReadsDuringCriticalSection = "countReadsDuringCriticalSection";
+constexpr auto kOldestActive = "oldestActive";
+constexpr auto kCoordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis =
+ "coordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis";
+constexpr auto kCoordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis =
+ "coordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis";
+constexpr auto kRecipientRemainingOperationTimeEstimatedMillis =
+ "recipientRemainingOperationTimeEstimatedMillis";
+constexpr auto kLatencies = "latencies";
+constexpr auto kCollectionCloningTotalRemoteBatchRetrievalTimeMillis =
+ "collectionCloningTotalRemoteBatchRetrievalTimeMillis";
+constexpr auto kCollectionCloningTotalRemoteBatchesRetrieved =
+ "collectionCloningTotalRemoteBatchesRetrieved";
+constexpr auto kCollectionCloningTotalLocalInsertTimeMillis =
+ "collectionCloningTotalLocalInsertTimeMillis";
+constexpr auto kCollectionCloningTotalLocalInserts = "collectionCloningTotalLocalInserts";
+constexpr auto kOplogFetchingTotalRemoteBatchRetrievalTimeMillis =
+ "oplogFetchingTotalRemoteBatchRetrievalTimeMillis";
+constexpr auto kOplogFetchingTotalRemoteBatchesRetrieved =
+ "oplogFetchingTotalRemoteBatchesRetrieved";
+constexpr auto kOplogFetchingTotalLocalInsertTimeMillis = "oplogFetchingTotalLocalInsertTimeMillis";
+constexpr auto kOplogFetchingTotalLocalInserts = "oplogFetchingTotalLocalInserts";
+constexpr auto kOplogApplyingTotalLocalBatchRetrievalTimeMillis =
+ "oplogApplyingTotalLocalBatchRetrievalTimeMillis";
+constexpr auto kOplogApplyingTotalLocalBatchesRetrieved = "oplogApplyingTotalLocalBatchesRetrieved";
+constexpr auto kOplogApplyingTotalLocalBatchApplyTimeMillis =
+ "oplogApplyingTotalLocalBatchApplyTimeMillis";
+constexpr auto kOplogApplyingTotalLocalBatchesApplied = "oplogApplyingTotalLocalBatchesApplied";
+constexpr auto kCurrentInSteps = "currentInSteps";
+constexpr auto kCountInstancesInCoordinatorState1Initializing =
+ "countInstancesInCoordinatorState1Initializing";
+constexpr auto kCountInstancesInCoordinatorState2PreparingToDonate =
+ "countInstancesInCoordinatorState2PreparingToDonate";
+constexpr auto kCountInstancesInCoordinatorState3Cloning =
+ "countInstancesInCoordinatorState3Cloning";
+constexpr auto kCountInstancesInCoordinatorState4Applying =
+ "countInstancesInCoordinatorState4Applying";
+constexpr auto kCountInstancesInCoordinatorState5BlockingWrites =
+ "countInstancesInCoordinatorState5BlockingWrites";
+constexpr auto kCountInstancesInCoordinatorState6Aborting =
+ "countInstancesInCoordinatorState6Aborting";
+constexpr auto kCountInstancesInCoordinatorState7Committing =
+ "countInstancesInCoordinatorState7Committing";
+constexpr auto kCountInstancesInRecipientState1AwaitingFetchTimestamp =
+ "countInstancesInRecipientState1AwaitingFetchTimestamp";
+constexpr auto kCountInstancesInRecipientState2CreatingCollection =
+ "countInstancesInRecipientState2CreatingCollection";
+constexpr auto kCountInstancesInRecipientState3Cloning = "countInstancesInRecipientState3Cloning";
+constexpr auto kCountInstancesInRecipientState4Applying = "countInstancesInRecipientState4Applying";
+constexpr auto kCountInstancesInRecipientState5Error = "countInstancesInRecipientState5Error";
+constexpr auto kCountInstancesInRecipientState6StrictConsistency =
+ "countInstancesInRecipientState6StrictConsistency";
+constexpr auto kCountInstancesInRecipientState7Done = "countInstancesInRecipientState7Done";
+constexpr auto kCountInstancesInDonorState1PreparingToDonate =
+ "countInstancesInDonorState1PreparingToDonate";
+constexpr auto kCountInstancesInDonorState2DonatingInitialData =
+ "countInstancesInDonorState2DonatingInitialData";
+constexpr auto kCountInstancesInDonorState3DonatingOplogEntries =
+ "countInstancesInDonorState3DonatingOplogEntries";
+constexpr auto kCountInstancesInDonorState4PreparingToBlockWrites =
+ "countInstancesInDonorState4PreparingToBlockWrites";
+constexpr auto kCountInstancesInDonorState5Error = "countInstancesInDonorState5Error";
+constexpr auto kCountInstancesInDonorState6BlockingWrites =
+ "countInstancesInDonorState6BlockingWrites";
+constexpr auto kCountInstancesInDonorState7Done = "countInstancesInDonorState7Done";
+
+struct Metrics {
+ Metrics() : _resharding(kResharding), _globalIndexes(kGlobalIndex) {}
+ ShardingDataTransformCumulativeMetrics _resharding;
+ ShardingDataTransformCumulativeMetrics _globalIndexes;
+};
+using MetricsPtr = std::unique_ptr<Metrics>;
+const auto getMetrics = ServiceContext::declareDecoration<MetricsPtr>();
+
+const auto metricsRegisterer = ServiceContext::ConstructorActionRegisterer{
+ "ShardingDataTransformMetrics",
+ [](ServiceContext* ctx) { getMetrics(ctx) = std::make_unique<Metrics>(); }};
+} // namespace
+
+ShardingDataTransformCumulativeMetrics* ShardingDataTransformCumulativeMetrics::getForResharding(
+ ServiceContext* context) {
+ auto& metrics = getMetrics(context);
+ return &metrics->_resharding;
+}
+
+ShardingDataTransformCumulativeMetrics* ShardingDataTransformCumulativeMetrics::getForGlobalIndexes(
+ ServiceContext* context) {
+ auto& metrics = getMetrics(context);
+ return &metrics->_globalIndexes;
+}
+
+ShardingDataTransformCumulativeMetrics::ShardingDataTransformCumulativeMetrics(
+ const std::string& rootSectionName)
+ : _rootSectionName{rootSectionName}, _operationWasAttempted{false} {}
+
ShardingDataTransformCumulativeMetrics::DeregistrationFunction
ShardingDataTransformCumulativeMetrics::registerInstanceMetrics(const InstanceObserver* metrics) {
+ _operationWasAttempted.store(true);
auto it = insertMetrics(metrics);
return [=] {
stdx::unique_lock guard(_mutex);
@@ -54,6 +173,84 @@ size_t ShardingDataTransformCumulativeMetrics::getObservedMetricsCount() const {
return _instanceMetrics.size();
}
+void ShardingDataTransformCumulativeMetrics::reportForServerStatus(BSONObjBuilder* bob) const {
+ if (!_operationWasAttempted.load()) {
+ return;
+ }
+ BSONObjBuilder root(bob->subobjStart(_rootSectionName));
+ root.append(kCountStarted, kPlaceholderLong);
+ root.append(kCountSucceeded, kPlaceholderLong);
+ root.append(kCountFailed, kPlaceholderLong);
+ root.append(kCountCanceled, kPlaceholderLong);
+ root.append(kLastOpEndingChunkImbalance, kPlaceholderLong);
+ reportActive(&root);
+ reportOldestActive(&root);
+ reportLatencies(&root);
+ reportCurrentInSteps(&root);
+}
+
+void ShardingDataTransformCumulativeMetrics::reportActive(BSONObjBuilder* bob) const {
+ BSONObjBuilder s(bob->subobjStart(kActive));
+ s.append(kDocumentsCopied, kPlaceholderLong);
+ s.append(kBytesCopied, kPlaceholderLong);
+ s.append(kOplogEntriesFetched, kPlaceholderLong);
+ s.append(kOplogEntriesApplied, kPlaceholderLong);
+ s.append(kInsertsApplied, kPlaceholderLong);
+ s.append(kUpdatesApplied, kPlaceholderLong);
+ s.append(kDeletesApplied, kPlaceholderLong);
+ s.append(kCountWritesToStashCollections, kPlaceholderLong);
+ s.append(kCountWritesDuringCriticalSection, kPlaceholderLong);
+ s.append(kCountReadsDuringCriticalSection, kPlaceholderLong);
+}
+
+void ShardingDataTransformCumulativeMetrics::reportOldestActive(BSONObjBuilder* bob) const {
+ BSONObjBuilder s(bob->subobjStart(kOldestActive));
+ s.append(kCoordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis, kPlaceholderLong);
+ s.append(kCoordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis, kPlaceholderLong);
+ s.append(kRecipientRemainingOperationTimeEstimatedMillis, kPlaceholderLong);
+}
+
+void ShardingDataTransformCumulativeMetrics::reportLatencies(BSONObjBuilder* bob) const {
+ BSONObjBuilder s(bob->subobjStart(kLatencies));
+ s.append(kCollectionCloningTotalRemoteBatchRetrievalTimeMillis, kPlaceholderLong);
+ s.append(kCollectionCloningTotalRemoteBatchesRetrieved, kPlaceholderLong);
+ s.append(kCollectionCloningTotalLocalInsertTimeMillis, kPlaceholderLong);
+ s.append(kCollectionCloningTotalLocalInserts, kPlaceholderLong);
+ s.append(kOplogFetchingTotalRemoteBatchRetrievalTimeMillis, kPlaceholderLong);
+ s.append(kOplogFetchingTotalRemoteBatchesRetrieved, kPlaceholderLong);
+ s.append(kOplogFetchingTotalLocalInsertTimeMillis, kPlaceholderLong);
+ s.append(kOplogFetchingTotalLocalInserts, kPlaceholderLong);
+ s.append(kOplogApplyingTotalLocalBatchRetrievalTimeMillis, kPlaceholderLong);
+ s.append(kOplogApplyingTotalLocalBatchesRetrieved, kPlaceholderLong);
+ s.append(kOplogApplyingTotalLocalBatchApplyTimeMillis, kPlaceholderLong);
+ s.append(kOplogApplyingTotalLocalBatchesApplied, kPlaceholderLong);
+}
+
+void ShardingDataTransformCumulativeMetrics::reportCurrentInSteps(BSONObjBuilder* bob) const {
+ BSONObjBuilder s(bob->subobjStart(kCurrentInSteps));
+ s.append(kCountInstancesInCoordinatorState1Initializing, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState2PreparingToDonate, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState3Cloning, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState4Applying, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState5BlockingWrites, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState6Aborting, kPlaceholderInt);
+ s.append(kCountInstancesInCoordinatorState7Committing, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState1AwaitingFetchTimestamp, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState2CreatingCollection, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState3Cloning, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState4Applying, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState5Error, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState6StrictConsistency, kPlaceholderInt);
+ s.append(kCountInstancesInRecipientState7Done, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState1PreparingToDonate, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState2DonatingInitialData, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState3DonatingOplogEntries, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState4PreparingToBlockWrites, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState5Error, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState6BlockingWrites, kPlaceholderInt);
+ s.append(kCountInstancesInDonorState7Done, kPlaceholderInt);
+}
+
ShardingDataTransformCumulativeMetrics::MetricsSet::iterator
ShardingDataTransformCumulativeMetrics::insertMetrics(const InstanceObserver* metrics) {
stdx::unique_lock guard(_mutex);
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
index 9dfe59264ef..10b41a7875a 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
@@ -29,7 +29,10 @@
#pragma once
+#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/s/sharding_data_transform_metrics_observer_interface.h"
+#include "mongo/db/service_context.h"
+#include "mongo/platform/atomic_word.h"
#include "mongo/platform/mutex.h"
#include "mongo/util/functional.h"
#include <set>
@@ -41,9 +44,14 @@ public:
using InstanceObserver = ShardingDataTransformMetricsObserverInterface;
using DeregistrationFunction = unique_function<void()>;
+ static ShardingDataTransformCumulativeMetrics* getForResharding(ServiceContext* context);
+ static ShardingDataTransformCumulativeMetrics* getForGlobalIndexes(ServiceContext* context);
+
+ ShardingDataTransformCumulativeMetrics(const std::string& rootSectionName);
[[nodiscard]] DeregistrationFunction registerInstanceMetrics(const InstanceObserver* metrics);
int64_t getOldestOperationRemainingTimeMillis() const;
size_t getObservedMetricsCount() const;
+ void reportForServerStatus(BSONObjBuilder* bob) const;
private:
struct MetricsComparer {
@@ -58,10 +66,16 @@ private:
};
using MetricsSet = std::set<const InstanceObserver*, MetricsComparer>;
+ void reportActive(BSONObjBuilder* bob) const;
+ void reportOldestActive(BSONObjBuilder* bob) const;
+ void reportLatencies(BSONObjBuilder* bob) const;
+ void reportCurrentInSteps(BSONObjBuilder* bob) const;
MetricsSet::iterator insertMetrics(const InstanceObserver* metrics);
mutable Mutex _mutex;
+ const std::string _rootSectionName;
MetricsSet _instanceMetrics;
+ AtomicWord<bool> _operationWasAttempted;
};
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h b/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h
index fda55a6cefb..ceae902adb6 100644
--- a/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h
+++ b/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h
@@ -70,6 +70,8 @@ protected:
constexpr static int64_t kYoungestTime = std::numeric_limits<int64_t>::max();
constexpr static int64_t kOldestTime = 1;
+ ShardingDataTransformMetricsTestFixture() : _cumulativeMetrics{"testMetrics"} {}
+
const ObserverMock* getYoungestObserver() {
static StaticImmortal<ObserverMock> youngest{kYoungestTime, kYoungestTime};
return &youngest.value();
diff --git a/src/mongo/db/s/sharding_server_status.cpp b/src/mongo/db/s/sharding_server_status.cpp
index ffb2e307b6f..5a331364032 100644
--- a/src/mongo/db/s/sharding_server_status.cpp
+++ b/src/mongo/db/s/sharding_server_status.cpp
@@ -34,6 +34,7 @@
#include "mongo/db/s/active_migrations_registry.h"
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/s/resharding/resharding_metrics.h"
+#include "mongo/db/s/sharding_data_transform_cumulative_metrics.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/sharding_statistics.h"
#include "mongo/db/vector_clock.h"
@@ -43,6 +44,7 @@
#include "mongo/s/grid.h"
#include "mongo/s/is_mongos.h"
#include "mongo/s/resharding/resharding_feature_flag_gen.h"
+#include "mongo/s/sharding_feature_flags_gen.h"
namespace mongo {
namespace {
@@ -123,16 +125,36 @@ public:
// The serverStatus command is run before the FCV is initialized so we ignore it when
// checking whether the resharding feature is enabled here.
- if (resharding::gFeatureFlagResharding.isEnabledAndIgnoreFCV() &&
- ReshardingMetrics::get(opCtx->getServiceContext())->wasReshardingEverAttempted()) {
- BSONObjBuilder subObjBuilder(result.subobjStart("resharding"));
- ReshardingMetrics::get(opCtx->getServiceContext())
- ->serializeCumulativeOpMetrics(&subObjBuilder);
+ if (resharding::gFeatureFlagResharding.isEnabledAndIgnoreFCV()) {
+ if (feature_flags::gFeatureFlagShardingDataTransformMetrics.isEnabledAndIgnoreFCV()) {
+ // TODO PM-2664: Switch over to using data transform metrics when they have feature
+ // parity with resharding metrics.
+ reportReshardingMetrics(opCtx, &result);
+ // reportDataTransformMetrics(opCtx, &result);
+ } else {
+ reportReshardingMetrics(opCtx, &result);
+ }
}
return result.obj();
}
+ void reportReshardingMetrics(OperationContext* opCtx, BSONObjBuilder* bob) const {
+ auto metrics = ReshardingMetrics::get(opCtx->getServiceContext());
+ if (!metrics->wasReshardingEverAttempted()) {
+ return;
+ }
+ BSONObjBuilder subObjBuilder(bob->subobjStart("resharding"));
+ metrics->serializeCumulativeOpMetrics(&subObjBuilder);
+ }
+
+ void reportDataTransformMetrics(OperationContext* opCtx, BSONObjBuilder* bob) const {
+ auto sCtx = opCtx->getServiceContext();
+ using Metrics = ShardingDataTransformCumulativeMetrics;
+ Metrics::getForResharding(sCtx)->reportForServerStatus(bob);
+ Metrics::getForGlobalIndexes(sCtx)->reportForServerStatus(bob);
+ }
+
} shardingStatisticsServerStatus;
} // namespace