diff options
author | Brett Nawrocki <brett.nawrocki@mongodb.com> | 2022-02-16 20:52:34 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-22 17:12:18 +0000 |
commit | 7ad05d39663d30ee9f20afafd499905a165127f9 (patch) | |
tree | 6bb3290b101f1c85744898d546762e3ef86077c9 /src/mongo | |
parent | a3158ab422e4d8091203166c5f2601f9bfa0099d (diff) | |
download | mongo-7ad05d39663d30ee9f20afafd499905a165127f9.tar.gz |
SERVER-63619 Add placeholder for reporting server status metrics
Diffstat (limited to 'src/mongo')
4 files changed, 240 insertions, 5 deletions
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp index 33c5370f75d..ba18bd7d8de 100644 --- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp +++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp @@ -32,8 +32,127 @@ namespace mongo { +namespace { +constexpr int32_t kPlaceholderInt = 0; +constexpr int64_t kPlaceholderLong = 0; +} // namespace + +namespace { +constexpr auto kResharding = "resharding"; +constexpr auto kGlobalIndex = "globalIndex"; +constexpr auto kCountStarted = "countStarted"; +constexpr auto kCountSucceeded = "countSucceeded"; +constexpr auto kCountFailed = "countFailed"; +constexpr auto kCountCanceled = "countCanceled"; +constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance"; +constexpr auto kActive = "active"; +constexpr auto kDocumentsCopied = "documentsCopied"; +constexpr auto kBytesCopied = "bytesCopied"; +constexpr auto kOplogEntriesFetched = "oplogEntriesFetched"; +constexpr auto kOplogEntriesApplied = "oplogEntriesApplied"; +constexpr auto kInsertsApplied = "insertsApplied"; +constexpr auto kUpdatesApplied = "updatesApplied"; +constexpr auto kDeletesApplied = "deletesApplied"; +constexpr auto kCountWritesToStashCollections = "countWritesToStashCollections"; +constexpr auto kCountWritesDuringCriticalSection = "countWritesDuringCriticalSection"; +constexpr auto kCountReadsDuringCriticalSection = "countReadsDuringCriticalSection"; +constexpr auto kOldestActive = "oldestActive"; +constexpr auto kCoordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis = + "coordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis"; +constexpr auto kCoordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis = + "coordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis"; +constexpr auto kRecipientRemainingOperationTimeEstimatedMillis = + "recipientRemainingOperationTimeEstimatedMillis"; +constexpr auto kLatencies = "latencies"; +constexpr auto kCollectionCloningTotalRemoteBatchRetrievalTimeMillis = + "collectionCloningTotalRemoteBatchRetrievalTimeMillis"; +constexpr auto kCollectionCloningTotalRemoteBatchesRetrieved = + "collectionCloningTotalRemoteBatchesRetrieved"; +constexpr auto kCollectionCloningTotalLocalInsertTimeMillis = + "collectionCloningTotalLocalInsertTimeMillis"; +constexpr auto kCollectionCloningTotalLocalInserts = "collectionCloningTotalLocalInserts"; +constexpr auto kOplogFetchingTotalRemoteBatchRetrievalTimeMillis = + "oplogFetchingTotalRemoteBatchRetrievalTimeMillis"; +constexpr auto kOplogFetchingTotalRemoteBatchesRetrieved = + "oplogFetchingTotalRemoteBatchesRetrieved"; +constexpr auto kOplogFetchingTotalLocalInsertTimeMillis = "oplogFetchingTotalLocalInsertTimeMillis"; +constexpr auto kOplogFetchingTotalLocalInserts = "oplogFetchingTotalLocalInserts"; +constexpr auto kOplogApplyingTotalLocalBatchRetrievalTimeMillis = + "oplogApplyingTotalLocalBatchRetrievalTimeMillis"; +constexpr auto kOplogApplyingTotalLocalBatchesRetrieved = "oplogApplyingTotalLocalBatchesRetrieved"; +constexpr auto kOplogApplyingTotalLocalBatchApplyTimeMillis = + "oplogApplyingTotalLocalBatchApplyTimeMillis"; +constexpr auto kOplogApplyingTotalLocalBatchesApplied = "oplogApplyingTotalLocalBatchesApplied"; +constexpr auto kCurrentInSteps = "currentInSteps"; +constexpr auto kCountInstancesInCoordinatorState1Initializing = + "countInstancesInCoordinatorState1Initializing"; +constexpr auto kCountInstancesInCoordinatorState2PreparingToDonate = + "countInstancesInCoordinatorState2PreparingToDonate"; +constexpr auto kCountInstancesInCoordinatorState3Cloning = + "countInstancesInCoordinatorState3Cloning"; +constexpr auto kCountInstancesInCoordinatorState4Applying = + "countInstancesInCoordinatorState4Applying"; +constexpr auto kCountInstancesInCoordinatorState5BlockingWrites = + "countInstancesInCoordinatorState5BlockingWrites"; +constexpr auto kCountInstancesInCoordinatorState6Aborting = + "countInstancesInCoordinatorState6Aborting"; +constexpr auto kCountInstancesInCoordinatorState7Committing = + "countInstancesInCoordinatorState7Committing"; +constexpr auto kCountInstancesInRecipientState1AwaitingFetchTimestamp = + "countInstancesInRecipientState1AwaitingFetchTimestamp"; +constexpr auto kCountInstancesInRecipientState2CreatingCollection = + "countInstancesInRecipientState2CreatingCollection"; +constexpr auto kCountInstancesInRecipientState3Cloning = "countInstancesInRecipientState3Cloning"; +constexpr auto kCountInstancesInRecipientState4Applying = "countInstancesInRecipientState4Applying"; +constexpr auto kCountInstancesInRecipientState5Error = "countInstancesInRecipientState5Error"; +constexpr auto kCountInstancesInRecipientState6StrictConsistency = + "countInstancesInRecipientState6StrictConsistency"; +constexpr auto kCountInstancesInRecipientState7Done = "countInstancesInRecipientState7Done"; +constexpr auto kCountInstancesInDonorState1PreparingToDonate = + "countInstancesInDonorState1PreparingToDonate"; +constexpr auto kCountInstancesInDonorState2DonatingInitialData = + "countInstancesInDonorState2DonatingInitialData"; +constexpr auto kCountInstancesInDonorState3DonatingOplogEntries = + "countInstancesInDonorState3DonatingOplogEntries"; +constexpr auto kCountInstancesInDonorState4PreparingToBlockWrites = + "countInstancesInDonorState4PreparingToBlockWrites"; +constexpr auto kCountInstancesInDonorState5Error = "countInstancesInDonorState5Error"; +constexpr auto kCountInstancesInDonorState6BlockingWrites = + "countInstancesInDonorState6BlockingWrites"; +constexpr auto kCountInstancesInDonorState7Done = "countInstancesInDonorState7Done"; + +struct Metrics { + Metrics() : _resharding(kResharding), _globalIndexes(kGlobalIndex) {} + ShardingDataTransformCumulativeMetrics _resharding; + ShardingDataTransformCumulativeMetrics _globalIndexes; +}; +using MetricsPtr = std::unique_ptr<Metrics>; +const auto getMetrics = ServiceContext::declareDecoration<MetricsPtr>(); + +const auto metricsRegisterer = ServiceContext::ConstructorActionRegisterer{ + "ShardingDataTransformMetrics", + [](ServiceContext* ctx) { getMetrics(ctx) = std::make_unique<Metrics>(); }}; +} // namespace + +ShardingDataTransformCumulativeMetrics* ShardingDataTransformCumulativeMetrics::getForResharding( + ServiceContext* context) { + auto& metrics = getMetrics(context); + return &metrics->_resharding; +} + +ShardingDataTransformCumulativeMetrics* ShardingDataTransformCumulativeMetrics::getForGlobalIndexes( + ServiceContext* context) { + auto& metrics = getMetrics(context); + return &metrics->_globalIndexes; +} + +ShardingDataTransformCumulativeMetrics::ShardingDataTransformCumulativeMetrics( + const std::string& rootSectionName) + : _rootSectionName{rootSectionName}, _operationWasAttempted{false} {} + ShardingDataTransformCumulativeMetrics::DeregistrationFunction ShardingDataTransformCumulativeMetrics::registerInstanceMetrics(const InstanceObserver* metrics) { + _operationWasAttempted.store(true); auto it = insertMetrics(metrics); return [=] { stdx::unique_lock guard(_mutex); @@ -54,6 +173,84 @@ size_t ShardingDataTransformCumulativeMetrics::getObservedMetricsCount() const { return _instanceMetrics.size(); } +void ShardingDataTransformCumulativeMetrics::reportForServerStatus(BSONObjBuilder* bob) const { + if (!_operationWasAttempted.load()) { + return; + } + BSONObjBuilder root(bob->subobjStart(_rootSectionName)); + root.append(kCountStarted, kPlaceholderLong); + root.append(kCountSucceeded, kPlaceholderLong); + root.append(kCountFailed, kPlaceholderLong); + root.append(kCountCanceled, kPlaceholderLong); + root.append(kLastOpEndingChunkImbalance, kPlaceholderLong); + reportActive(&root); + reportOldestActive(&root); + reportLatencies(&root); + reportCurrentInSteps(&root); +} + +void ShardingDataTransformCumulativeMetrics::reportActive(BSONObjBuilder* bob) const { + BSONObjBuilder s(bob->subobjStart(kActive)); + s.append(kDocumentsCopied, kPlaceholderLong); + s.append(kBytesCopied, kPlaceholderLong); + s.append(kOplogEntriesFetched, kPlaceholderLong); + s.append(kOplogEntriesApplied, kPlaceholderLong); + s.append(kInsertsApplied, kPlaceholderLong); + s.append(kUpdatesApplied, kPlaceholderLong); + s.append(kDeletesApplied, kPlaceholderLong); + s.append(kCountWritesToStashCollections, kPlaceholderLong); + s.append(kCountWritesDuringCriticalSection, kPlaceholderLong); + s.append(kCountReadsDuringCriticalSection, kPlaceholderLong); +} + +void ShardingDataTransformCumulativeMetrics::reportOldestActive(BSONObjBuilder* bob) const { + BSONObjBuilder s(bob->subobjStart(kOldestActive)); + s.append(kCoordinatorAllShardsHighestRemainingOperationTimeEstimatedMillis, kPlaceholderLong); + s.append(kCoordinatorAllShardsLowestRemainingOperationTimeEstimatedMillis, kPlaceholderLong); + s.append(kRecipientRemainingOperationTimeEstimatedMillis, kPlaceholderLong); +} + +void ShardingDataTransformCumulativeMetrics::reportLatencies(BSONObjBuilder* bob) const { + BSONObjBuilder s(bob->subobjStart(kLatencies)); + s.append(kCollectionCloningTotalRemoteBatchRetrievalTimeMillis, kPlaceholderLong); + s.append(kCollectionCloningTotalRemoteBatchesRetrieved, kPlaceholderLong); + s.append(kCollectionCloningTotalLocalInsertTimeMillis, kPlaceholderLong); + s.append(kCollectionCloningTotalLocalInserts, kPlaceholderLong); + s.append(kOplogFetchingTotalRemoteBatchRetrievalTimeMillis, kPlaceholderLong); + s.append(kOplogFetchingTotalRemoteBatchesRetrieved, kPlaceholderLong); + s.append(kOplogFetchingTotalLocalInsertTimeMillis, kPlaceholderLong); + s.append(kOplogFetchingTotalLocalInserts, kPlaceholderLong); + s.append(kOplogApplyingTotalLocalBatchRetrievalTimeMillis, kPlaceholderLong); + s.append(kOplogApplyingTotalLocalBatchesRetrieved, kPlaceholderLong); + s.append(kOplogApplyingTotalLocalBatchApplyTimeMillis, kPlaceholderLong); + s.append(kOplogApplyingTotalLocalBatchesApplied, kPlaceholderLong); +} + +void ShardingDataTransformCumulativeMetrics::reportCurrentInSteps(BSONObjBuilder* bob) const { + BSONObjBuilder s(bob->subobjStart(kCurrentInSteps)); + s.append(kCountInstancesInCoordinatorState1Initializing, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState2PreparingToDonate, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState3Cloning, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState4Applying, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState5BlockingWrites, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState6Aborting, kPlaceholderInt); + s.append(kCountInstancesInCoordinatorState7Committing, kPlaceholderInt); + s.append(kCountInstancesInRecipientState1AwaitingFetchTimestamp, kPlaceholderInt); + s.append(kCountInstancesInRecipientState2CreatingCollection, kPlaceholderInt); + s.append(kCountInstancesInRecipientState3Cloning, kPlaceholderInt); + s.append(kCountInstancesInRecipientState4Applying, kPlaceholderInt); + s.append(kCountInstancesInRecipientState5Error, kPlaceholderInt); + s.append(kCountInstancesInRecipientState6StrictConsistency, kPlaceholderInt); + s.append(kCountInstancesInRecipientState7Done, kPlaceholderInt); + s.append(kCountInstancesInDonorState1PreparingToDonate, kPlaceholderInt); + s.append(kCountInstancesInDonorState2DonatingInitialData, kPlaceholderInt); + s.append(kCountInstancesInDonorState3DonatingOplogEntries, kPlaceholderInt); + s.append(kCountInstancesInDonorState4PreparingToBlockWrites, kPlaceholderInt); + s.append(kCountInstancesInDonorState5Error, kPlaceholderInt); + s.append(kCountInstancesInDonorState6BlockingWrites, kPlaceholderInt); + s.append(kCountInstancesInDonorState7Done, kPlaceholderInt); +} + ShardingDataTransformCumulativeMetrics::MetricsSet::iterator ShardingDataTransformCumulativeMetrics::insertMetrics(const InstanceObserver* metrics) { stdx::unique_lock guard(_mutex); diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h index 9dfe59264ef..10b41a7875a 100644 --- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h +++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h @@ -29,7 +29,10 @@ #pragma once +#include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/s/sharding_data_transform_metrics_observer_interface.h" +#include "mongo/db/service_context.h" +#include "mongo/platform/atomic_word.h" #include "mongo/platform/mutex.h" #include "mongo/util/functional.h" #include <set> @@ -41,9 +44,14 @@ public: using InstanceObserver = ShardingDataTransformMetricsObserverInterface; using DeregistrationFunction = unique_function<void()>; + static ShardingDataTransformCumulativeMetrics* getForResharding(ServiceContext* context); + static ShardingDataTransformCumulativeMetrics* getForGlobalIndexes(ServiceContext* context); + + ShardingDataTransformCumulativeMetrics(const std::string& rootSectionName); [[nodiscard]] DeregistrationFunction registerInstanceMetrics(const InstanceObserver* metrics); int64_t getOldestOperationRemainingTimeMillis() const; size_t getObservedMetricsCount() const; + void reportForServerStatus(BSONObjBuilder* bob) const; private: struct MetricsComparer { @@ -58,10 +66,16 @@ private: }; using MetricsSet = std::set<const InstanceObserver*, MetricsComparer>; + void reportActive(BSONObjBuilder* bob) const; + void reportOldestActive(BSONObjBuilder* bob) const; + void reportLatencies(BSONObjBuilder* bob) const; + void reportCurrentInSteps(BSONObjBuilder* bob) const; MetricsSet::iterator insertMetrics(const InstanceObserver* metrics); mutable Mutex _mutex; + const std::string _rootSectionName; MetricsSet _instanceMetrics; + AtomicWord<bool> _operationWasAttempted; }; } // namespace mongo diff --git a/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h b/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h index fda55a6cefb..ceae902adb6 100644 --- a/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h +++ b/src/mongo/db/s/sharding_data_transform_metrics_test_fixture.h @@ -70,6 +70,8 @@ protected: constexpr static int64_t kYoungestTime = std::numeric_limits<int64_t>::max(); constexpr static int64_t kOldestTime = 1; + ShardingDataTransformMetricsTestFixture() : _cumulativeMetrics{"testMetrics"} {} + const ObserverMock* getYoungestObserver() { static StaticImmortal<ObserverMock> youngest{kYoungestTime, kYoungestTime}; return &youngest.value(); diff --git a/src/mongo/db/s/sharding_server_status.cpp b/src/mongo/db/s/sharding_server_status.cpp index ffb2e307b6f..5a331364032 100644 --- a/src/mongo/db/s/sharding_server_status.cpp +++ b/src/mongo/db/s/sharding_server_status.cpp @@ -34,6 +34,7 @@ #include "mongo/db/s/active_migrations_registry.h" #include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/s/resharding/resharding_metrics.h" +#include "mongo/db/s/sharding_data_transform_cumulative_metrics.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/s/sharding_statistics.h" #include "mongo/db/vector_clock.h" @@ -43,6 +44,7 @@ #include "mongo/s/grid.h" #include "mongo/s/is_mongos.h" #include "mongo/s/resharding/resharding_feature_flag_gen.h" +#include "mongo/s/sharding_feature_flags_gen.h" namespace mongo { namespace { @@ -123,16 +125,36 @@ public: // The serverStatus command is run before the FCV is initialized so we ignore it when // checking whether the resharding feature is enabled here. - if (resharding::gFeatureFlagResharding.isEnabledAndIgnoreFCV() && - ReshardingMetrics::get(opCtx->getServiceContext())->wasReshardingEverAttempted()) { - BSONObjBuilder subObjBuilder(result.subobjStart("resharding")); - ReshardingMetrics::get(opCtx->getServiceContext()) - ->serializeCumulativeOpMetrics(&subObjBuilder); + if (resharding::gFeatureFlagResharding.isEnabledAndIgnoreFCV()) { + if (feature_flags::gFeatureFlagShardingDataTransformMetrics.isEnabledAndIgnoreFCV()) { + // TODO PM-2664: Switch over to using data transform metrics when they have feature + // parity with resharding metrics. + reportReshardingMetrics(opCtx, &result); + // reportDataTransformMetrics(opCtx, &result); + } else { + reportReshardingMetrics(opCtx, &result); + } } return result.obj(); } + void reportReshardingMetrics(OperationContext* opCtx, BSONObjBuilder* bob) const { + auto metrics = ReshardingMetrics::get(opCtx->getServiceContext()); + if (!metrics->wasReshardingEverAttempted()) { + return; + } + BSONObjBuilder subObjBuilder(bob->subobjStart("resharding")); + metrics->serializeCumulativeOpMetrics(&subObjBuilder); + } + + void reportDataTransformMetrics(OperationContext* opCtx, BSONObjBuilder* bob) const { + auto sCtx = opCtx->getServiceContext(); + using Metrics = ShardingDataTransformCumulativeMetrics; + Metrics::getForResharding(sCtx)->reportForServerStatus(bob); + Metrics::getForGlobalIndexes(sCtx)->reportForServerStatus(bob); + } + } shardingStatisticsServerStatus; } // namespace |