diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-11-02 16:36:20 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-02 22:02:31 +0000 |
commit | 12646ec5380bcc0dc10c66069a6583c653683922 (patch) | |
tree | 364c06ade130e8e386ea41fb88cfa90647fb7daa /src | |
parent | bccb5dc1eb8f7b731f4e20a292abea2193182cfd (diff) | |
download | mongo-12646ec5380bcc0dc10c66069a6583c653683922.tar.gz |
SERVER-50865 Collect resource consumption metrics for index builds
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/curop.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/curop.h | 2 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/introspect.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics.cpp | 141 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics.h | 161 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics_test.cpp | 151 |
8 files changed, 301 insertions, 189 deletions
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index e00d6a3b6ba..9c6a0685565 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -556,7 +556,7 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx, _debug.prepareConflictDurationMillis = duration_cast<Milliseconds>(prepareConflictDurationMicros); - auto operationMetricsPtr = [&]() -> ResourceConsumption::Metrics* { + auto operationMetricsPtr = [&]() -> ResourceConsumption::OperationMetrics* { auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx); if (metricsCollector.hasCollectedMetrics()) { return &metricsCollector.getMetrics(); @@ -938,7 +938,7 @@ string OpDebug::report(OperationContext* opCtx, const SingleThreadedLockStats* l void OpDebug::report(OperationContext* opCtx, const SingleThreadedLockStats* lockStats, - const ResourceConsumption::Metrics* operationMetrics, + const ResourceConsumption::OperationMetrics* operationMetrics, logv2::DynamicAttributes* pAttrs) const { Client* client = opCtx->getClient(); auto& curop = *CurOp::get(opCtx); @@ -1081,7 +1081,7 @@ void OpDebug::report(OperationContext* opCtx, if (operationMetrics) { BSONObjBuilder builder; - operationMetrics->toFlatBsonNonZeroFields(&builder); + operationMetrics->toBsonNonZeroFields(&builder); pAttrs->add("operationMetrics", builder.obj()); } @@ -1516,7 +1516,7 @@ std::function<BSONObj(ProfileFilter::Args)> OpDebug::appendStaged(StringSet requ auto& metricsCollector = ResourceConsumption::MetricsCollector::get(args.opCtx); if (metricsCollector.hasCollectedMetrics()) { BSONObjBuilder metricsBuilder(b.subobjStart(field)); - metricsCollector.getMetrics().toFlatBsonAllFields(&metricsBuilder); + metricsCollector.getMetrics().toBson(&metricsBuilder); } }); diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index 40205de6dcb..739ba91acc5 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -167,7 +167,7 @@ public: void report(OperationContext* opCtx, const SingleThreadedLockStats* lockStats, - const ResourceConsumption::Metrics* operationMetrics, + const ResourceConsumption::OperationMetrics* operationMetrics, logv2::DynamicAttributes* pAttrs) const; /** diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index bb6e94fe8d2..56c97fc7e93 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2589,6 +2589,22 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide replState->onOplogCommit(isPrimary); } + // While we are still holding the RSTL and before returning, ensure the metrics collected for + // this index build are attributed to the primary that commits or aborts the index build. + auto metricsGuard = makeGuard([&]() { + if (!isPrimary) { + return; + } + + auto& collector = ResourceConsumption::MetricsCollector::get(opCtx); + bool wasCollecting = collector.endScopedCollecting(); + if (!wasCollecting || !ResourceConsumption::isMetricsAggregationEnabled()) { + return; + } + + ResourceConsumption::get(opCtx).merge(opCtx, collector.getDbName(), collector.getMetrics()); + }); + // The collection object should always exist while an index build is registered. CollectionWriter collection(opCtx, replState->collectionUUID); invariant(collection, diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index eef382c66de..e73cbc97c1c 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -45,6 +45,7 @@ #include "mongo/db/operation_context.h" #include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/service_context.h" +#include "mongo/db/stats/resource_consumption_metrics.h" #include "mongo/db/storage/two_phase_index_build_knobs_gen.h" #include "mongo/executor/task_executor.h" #include "mongo/logv2/log.h" @@ -319,6 +320,14 @@ IndexBuildsCoordinatorMongod::_startIndexBuild(OperationContext* opCtx, sleepmillis(100); } + // Start collecting metrics for the index build. The metrics for this operation will only be + // aggregated globally if the node commits or aborts while it is primary. + auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx.get()); + if (ResourceConsumption::shouldCollectMetricsForDatabase(dbName) && + ResourceConsumption::isMetricsCollectionEnabled()) { + metricsCollector.beginScopedCollecting(dbName); + } + // Index builds should never take the PBWM lock, even on a primary. This allows the // index build to continue running after the node steps down to a secondary. ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index 89fea0c89cb..12dc4c200f1 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -71,7 +71,7 @@ void profile(OperationContext* opCtx, NetworkOp op) { if (metricsCollector.hasCollectedMetrics()) { BSONObjBuilder metricsBuilder = b.subobjStart("operationMetrics"); const auto& metrics = metricsCollector.getMetrics(); - metrics.toFlatBsonAllFields(&metricsBuilder); + metrics.toBson(&metricsBuilder); metricsBuilder.done(); } diff --git a/src/mongo/db/stats/resource_consumption_metrics.cpp b/src/mongo/db/stats/resource_consumption_metrics.cpp index 362ec248703..5e9633856c2 100644 --- a/src/mongo/db/stats/resource_consumption_metrics.cpp +++ b/src/mongo/db/stats/resource_consumption_metrics.cpp @@ -88,56 +88,46 @@ ResourceConsumption::MetricsCollector& ResourceConsumption::MetricsCollector::ge return getMetricsCollector(opCtx); } -void ResourceConsumption::Metrics::toBson(BSONObjBuilder* builder) const { +void ResourceConsumption::ReadMetrics::toBson(BSONObjBuilder* builder) const { + builder->appendNumber(kDocBytesRead, docBytesRead); + builder->appendNumber(kDocUnitsRead, docUnitsRead); + builder->appendNumber(kIdxEntryBytesRead, idxEntryBytesRead); + builder->appendNumber(kIdxEntryUnitsRead, idxEntryUnitsRead); + builder->appendNumber(kKeysSorted, keysSorted); + builder->appendNumber(kDocUnitsReturned, docUnitsReturned); +} + +void ResourceConsumption::WriteMetrics::toBson(BSONObjBuilder* builder) const { + builder->appendNumber(kDocBytesWritten, docBytesWritten); + builder->appendNumber(kDocUnitsWritten, docUnitsWritten); + builder->appendNumber(kIdxEntryBytesWritten, idxEntryBytesWritten); + builder->appendNumber(kIdxEntryUnitsWritten, idxEntryUnitsWritten); +} + +void ResourceConsumption::AggregatedMetrics::toBson(BSONObjBuilder* builder) const { { BSONObjBuilder primaryBuilder = builder->subobjStart(kPrimaryMetrics); - primaryBuilder.appendNumber(kDocBytesRead, primaryMetrics.docBytesRead); - primaryBuilder.appendNumber(kDocUnitsRead, primaryMetrics.docUnitsRead); - primaryBuilder.appendNumber(kIdxEntryBytesRead, primaryMetrics.idxEntryBytesRead); - primaryBuilder.appendNumber(kIdxEntryUnitsRead, primaryMetrics.idxEntryUnitsRead); - primaryBuilder.appendNumber(kKeysSorted, primaryMetrics.keysSorted); - primaryBuilder.appendNumber(kDocUnitsReturned, primaryMetrics.docUnitsReturned); + primaryReadMetrics.toBson(&primaryBuilder); primaryBuilder.done(); } { BSONObjBuilder secondaryBuilder = builder->subobjStart(kSecondaryMetrics); - secondaryBuilder.appendNumber(kDocBytesRead, secondaryMetrics.docBytesRead); - secondaryBuilder.appendNumber(kDocUnitsRead, secondaryMetrics.docUnitsRead); - secondaryBuilder.appendNumber(kIdxEntryBytesRead, secondaryMetrics.idxEntryBytesRead); - secondaryBuilder.appendNumber(kIdxEntryUnitsRead, secondaryMetrics.idxEntryUnitsRead); - secondaryBuilder.appendNumber(kKeysSorted, secondaryMetrics.keysSorted); - secondaryBuilder.appendNumber(kDocUnitsReturned, secondaryMetrics.docUnitsReturned); + secondaryReadMetrics.toBson(&secondaryBuilder); secondaryBuilder.done(); } + writeMetrics.toBson(builder); builder->appendNumber(kCpuMillis, cpuMillis); - builder->appendNumber(kDocBytesWritten, docBytesWritten); - builder->appendNumber(kDocUnitsWritten, docUnitsWritten); - builder->appendNumber(kIdxEntryBytesWritten, idxEntryBytesWritten); - builder->appendNumber(kIdxEntryUnitsWritten, idxEntryUnitsWritten); } -void ResourceConsumption::Metrics::toFlatBsonAllFields(BSONObjBuilder* builder) const { - // Report all read metrics together to generate a flat object. - auto readMetrics = primaryMetrics + secondaryMetrics; - builder->appendNumber(kDocBytesRead, readMetrics.docBytesRead); - builder->appendNumber(kDocUnitsRead, readMetrics.docUnitsRead); - builder->appendNumber(kIdxEntryBytesRead, readMetrics.idxEntryBytesRead); - builder->appendNumber(kIdxEntryUnitsRead, readMetrics.idxEntryUnitsRead); - builder->appendNumber(kKeysSorted, readMetrics.keysSorted); - builder->appendNumber(kDocUnitsReturned, readMetrics.docUnitsReturned); - +void ResourceConsumption::OperationMetrics::toBson(BSONObjBuilder* builder) const { + readMetrics.toBson(builder); + writeMetrics.toBson(builder); builder->appendNumber(kCpuMillis, cpuMillis); - builder->appendNumber(kDocBytesWritten, docBytesWritten); - builder->appendNumber(kDocUnitsWritten, docUnitsWritten); - builder->appendNumber(kIdxEntryBytesWritten, idxEntryBytesWritten); - builder->appendNumber(kIdxEntryUnitsWritten, idxEntryUnitsWritten); } -void ResourceConsumption::Metrics::toFlatBsonNonZeroFields(BSONObjBuilder* builder) const { - // Report all read metrics together to generate a flat object. - auto readMetrics = primaryMetrics + secondaryMetrics; +void ResourceConsumption::OperationMetrics::toBsonNonZeroFields(BSONObjBuilder* builder) const { appendNonZeroMetric(builder, kDocBytesRead, readMetrics.docBytesRead); appendNonZeroMetric(builder, kDocUnitsRead, readMetrics.docUnitsRead); appendNonZeroMetric(builder, kIdxEntryBytesRead, readMetrics.idxEntryBytesRead); @@ -146,10 +136,10 @@ void ResourceConsumption::Metrics::toFlatBsonNonZeroFields(BSONObjBuilder* build appendNonZeroMetric(builder, kDocUnitsReturned, readMetrics.docUnitsReturned); appendNonZeroMetric(builder, kCpuMillis, cpuMillis); - appendNonZeroMetric(builder, kDocBytesWritten, docBytesWritten); - appendNonZeroMetric(builder, kDocUnitsWritten, docUnitsWritten); - appendNonZeroMetric(builder, kIdxEntryBytesWritten, idxEntryBytesWritten); - appendNonZeroMetric(builder, kIdxEntryUnitsWritten, idxEntryUnitsWritten); + appendNonZeroMetric(builder, kDocBytesWritten, writeMetrics.docBytesWritten); + appendNonZeroMetric(builder, kDocUnitsWritten, writeMetrics.docUnitsWritten); + appendNonZeroMetric(builder, kIdxEntryBytesWritten, writeMetrics.idxEntryBytesWritten); + appendNonZeroMetric(builder, kIdxEntryUnitsWritten, writeMetrics.idxEntryUnitsWritten); } template <typename Func> @@ -160,65 +150,47 @@ inline void ResourceConsumption::MetricsCollector::_doIfCollecting(Func&& func) func(); } -void ResourceConsumption::MetricsCollector::_updateReadMetrics(OperationContext* opCtx, - ReadMetricsFunc&& updateFunc) { - _doIfCollecting([&] { - // The RSTL is normally required to check the replication state, but callers may not always - // be holding it. Since we need to attribute this metric to some replication state, and an - // inconsistent state is not impactful for the purposes of metrics collection, perform a - // best-effort check so that we can record metrics for this operation. - if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase_UNSAFE( - opCtx, NamespaceString::kAdminDb)) { - updateFunc(_metrics.primaryMetrics); - } else { - updateFunc(_metrics.secondaryMetrics); - } - }); -} - void ResourceConsumption::MetricsCollector::incrementOneDocRead(OperationContext* opCtx, size_t docBytesRead) { - _updateReadMetrics(opCtx, [&](ReadMetrics& readMetrics) { + _doIfCollecting([&]() { size_t docUnits = std::ceil(docBytesRead / static_cast<float>(gDocumentUnitSizeBytes)); - readMetrics.docBytesRead += docBytesRead; - readMetrics.docUnitsRead += docUnits; + _metrics.readMetrics.docBytesRead += docBytesRead; + _metrics.readMetrics.docUnitsRead += docUnits; }); } void ResourceConsumption::MetricsCollector::incrementOneIdxEntryRead(OperationContext* opCtx, size_t bytesRead) { - _updateReadMetrics(opCtx, [&](ReadMetrics& readMetrics) { + _doIfCollecting([&]() { size_t units = std::ceil(bytesRead / static_cast<float>(gIndexEntryUnitSizeBytes)); - readMetrics.idxEntryBytesRead += bytesRead; - readMetrics.idxEntryUnitsRead += units; + _metrics.readMetrics.idxEntryBytesRead += bytesRead; + _metrics.readMetrics.idxEntryUnitsRead += units; }); } void ResourceConsumption::MetricsCollector::incrementKeysSorted(OperationContext* opCtx, size_t keysSorted) { - _updateReadMetrics(opCtx, - [&](ReadMetrics& readMetrics) { readMetrics.keysSorted += keysSorted; }); + _doIfCollecting([&]() { _metrics.readMetrics.keysSorted += keysSorted; }); } void ResourceConsumption::MetricsCollector::incrementDocUnitsReturned(OperationContext* opCtx, size_t returned) { - _updateReadMetrics(opCtx, - [&](ReadMetrics& readMetrics) { readMetrics.docUnitsReturned += returned; }); + _doIfCollecting([&]() { _metrics.readMetrics.docUnitsReturned += returned; }); } void ResourceConsumption::MetricsCollector::incrementOneDocWritten(size_t bytesWritten) { _doIfCollecting([&] { size_t docUnits = std::ceil(bytesWritten / static_cast<float>(gDocumentUnitSizeBytes)); - _metrics.docBytesWritten += bytesWritten; - _metrics.docUnitsWritten += docUnits; + _metrics.writeMetrics.docBytesWritten += bytesWritten; + _metrics.writeMetrics.docUnitsWritten += docUnits; }); } void ResourceConsumption::MetricsCollector::incrementOneIdxEntryWritten(size_t bytesWritten) { _doIfCollecting([&] { size_t idxUnits = std::ceil(bytesWritten / static_cast<float>(gIndexEntryUnitSizeBytes)); - _metrics.idxEntryBytesWritten += bytesWritten; - _metrics.idxEntryUnitsWritten += idxUnits; + _metrics.writeMetrics.idxEntryBytesWritten += bytesWritten; + _metrics.writeMetrics.idxEntryUnitsWritten += idxUnits; }); } @@ -259,16 +231,12 @@ ResourceConsumption::ScopedMetricsCollector::~ScopedMetricsCollector() { return; } - if (collector.getDbName().empty()) { - return; - } - if (!isMetricsAggregationEnabled()) { return; } auto& globalResourceConsumption = ResourceConsumption::get(_opCtx); - globalResourceConsumption.add(collector); + globalResourceConsumption.merge(_opCtx, collector.getDbName(), collector.getMetrics()); } ResourceConsumption& ResourceConsumption::get(ServiceContext* svcCtx) { @@ -279,10 +247,31 @@ ResourceConsumption& ResourceConsumption::get(OperationContext* opCtx) { return getGlobalResourceConsumption(opCtx->getServiceContext()); } -void ResourceConsumption::add(const MetricsCollector& collector) { - invariant(!collector.getDbName().empty()); +void ResourceConsumption::merge(OperationContext* opCtx, + const std::string& dbName, + const OperationMetrics& metrics) { + invariant(!dbName.empty()); + + // All metrics over the duration of this operation will be attributed to the current state, even + // if it ran accross state transitions. + // The RSTL is normally required to check the replication state, but callers may not always be + // holding it. Since we need to attribute this metric to some replication state, and an + // inconsistent state is not impactful for the purposes of metrics collection, perform a + // best-effort check so that we can record metrics for this operation. + auto isPrimary = repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase_UNSAFE( + opCtx, NamespaceString::kAdminDb); + + // Add all metrics into the the globally-aggregated metrics. stdx::unique_lock<Mutex> lk(_mutex); - _metrics[collector.getDbName()] += collector.getMetrics(); + auto& elem = _metrics[dbName]; + + if (isPrimary) { + elem.primaryReadMetrics += metrics.readMetrics; + } else { + elem.secondaryReadMetrics += metrics.readMetrics; + } + elem.writeMetrics += metrics.writeMetrics; + elem.cpuMillis += metrics.cpuMillis; } ResourceConsumption::MetricsMap ResourceConsumption::getMetrics() const { diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h index 2968619ee1c..05b74c26043 100644 --- a/src/mongo/db/stats/resource_consumption_metrics.h +++ b/src/mongo/db/stats/resource_consumption_metrics.h @@ -49,7 +49,9 @@ public: static ResourceConsumption& get(OperationContext* opCtx); static ResourceConsumption& get(ServiceContext* svcCtx); - struct ReadMetrics { + /** ReadMetrics maintains metrics for read operations. */ + class ReadMetrics { + public: void add(const ReadMetrics& other) { docBytesRead += other.docBytesRead; docUnitsRead += other.docUnitsRead; @@ -59,88 +61,117 @@ public: docUnitsReturned += other.docUnitsReturned; } - ReadMetrics operator+(const ReadMetrics& other) const { - ReadMetrics copy = *this; - copy.add(other); - return copy; - } - ReadMetrics& operator+=(const ReadMetrics& other) { add(other); return *this; } + /** + * Reports all metrics on a BSONObjBuilder. + */ + void toBson(BSONObjBuilder* builder) const; + // Number of document bytes read - long long docBytesRead; + long long docBytesRead = 0; // Number of document units read - long long docUnitsRead; + long long docUnitsRead = 0; // Number of index entry bytes read - long long idxEntryBytesRead; + long long idxEntryBytesRead = 0; // Number of index entries units read - long long idxEntryUnitsRead; + long long idxEntryUnitsRead = 0; // Number of keys sorted for query operations - long long keysSorted; + long long keysSorted = 0; // Number of document units returned by a query - long long docUnitsReturned; + long long docUnitsReturned = 0; }; - /** - * Metrics maintains a set of resource consumption metrics. - */ - class Metrics { + /* WriteMetrics maintains metrics for write operations. */ + class WriteMetrics { public: - /** - * Adds other Metrics to this one. - */ - void add(const Metrics& other) { - primaryMetrics += other.primaryMetrics; - secondaryMetrics += other.secondaryMetrics; - cpuMillis += other.cpuMillis; + void add(const WriteMetrics& other) { docBytesWritten += other.docBytesWritten; docUnitsWritten += other.docUnitsWritten; idxEntryBytesWritten += other.idxEntryBytesWritten; idxEntryUnitsWritten += other.idxEntryUnitsWritten; - }; + } - Metrics& operator+=(const Metrics& other) { + WriteMetrics& operator+=(const WriteMetrics& other) { add(other); return *this; } - // Read metrics recorded for queries processed while this node was primary - ReadMetrics primaryMetrics; - // Read metrics recorded for queries processed while this node was secondary - ReadMetrics secondaryMetrics; - // Amount of CPU time consumed by an operation in milliseconds - long long cpuMillis; + /** + * Reports all metrics on a BSONObjBuilder. + */ + void toBson(BSONObjBuilder* builder) const; + // Number of document bytes written - long long docBytesWritten; + long long docBytesWritten = 0; // Number of document units written - long long docUnitsWritten; + long long docUnitsWritten = 0; // Number of index entry bytes written - long long idxEntryBytesWritten; + long long idxEntryBytesWritten = 0; // Number of index entry units written - long long idxEntryUnitsWritten; + long long idxEntryUnitsWritten = 0; + }; + /** + * OperationMetrics maintains resource consumption metrics for a single operation. + */ + class OperationMetrics { + public: /** - * Reports all metrics on a BSONObjectBuilder. The generated object has nested fields to - * represent the stucture of the data members on this class. + * Reports all metrics on a BSONObjBuilder. */ void toBson(BSONObjBuilder* builder) const; /** - * Reports metrics on a BSONObjectBuilder. This forms a flat object by merging - * primaryMetrics and secondaryMetrics and promotes their members to top-level fields. All - * fields are reported. + * Reports metrics on a BSONObjBuilder. Only non-zero fields are reported. */ - void toFlatBsonAllFields(BSONObjBuilder* builder) const; + void toBsonNonZeroFields(BSONObjBuilder* builder) const; + + // Read and write metrics for this operation + ReadMetrics readMetrics; + WriteMetrics writeMetrics; + + // Amount of CPU time consumed by an operation in milliseconds + long long cpuMillis = 0; + }; + + /** + * AggregatedMetrics maintains a structure of resource consumption metrics designed to be + * aggregated and added together at some global level. + */ + class AggregatedMetrics { + public: + void add(const AggregatedMetrics& other) { + primaryReadMetrics += other.primaryReadMetrics; + secondaryReadMetrics += other.secondaryReadMetrics; + writeMetrics += other.writeMetrics; + cpuMillis += other.cpuMillis; + }; + + AggregatedMetrics& operator+=(const AggregatedMetrics& other) { + add(other); + return *this; + } /** - * Reports metrics on a BSONObjectBuilder. This forms a flat object by merging - * primaryMetrics and secondaryMetrics and promotes their members to top-level fields. Only - * non-zero fields are reported. + * Reports all metrics on a BSONObjBuilder. */ - void toFlatBsonNonZeroFields(BSONObjBuilder* builder) const; + void toBson(BSONObjBuilder* builder) const; + + // Read metrics recorded for queries processed while this node was primary + ReadMetrics primaryReadMetrics; + + // Read metrics recorded for queries processed while this node was secondary + ReadMetrics secondaryReadMetrics; + + // Write metrics recorded for all operations + WriteMetrics writeMetrics; + + // Amount of CPU time consumed by an operation in milliseconds + long long cpuMillis = 0; }; /** @@ -206,12 +237,12 @@ public: * To observe the stored Metrics, the dbName must be set. This prevents "losing" collected * Metrics due to the Collector stopping without being associated with any database yet. */ - Metrics& getMetrics() { + OperationMetrics& getMetrics() { invariant(!_dbName.empty(), "observing Metrics before a dbName has been set"); return _metrics; } - const Metrics& getMetrics() const { + const OperationMetrics& getMetrics() const { invariant(!_dbName.empty(), "observing Metrics before a dbName has been set"); return _metrics; } @@ -225,15 +256,13 @@ public: /** * This should be called once per document read with the number of bytes read for that - * document. This is replication-state aware and increments the metric based on the current - * replication state. This is a no-op when metrics collection is disabled on this operation. + * document. This is a no-op when metrics collection is disabled on this operation. */ void incrementOneDocRead(OperationContext* opCtx, size_t docBytesRead); /** * This should be called once per index entry read with the number of bytes read for that - * entry. This is replication-state aware and increments the metric based on the current - * replication state. This is a no-op when metrics collection is disabled on this operation. + * entry. This is a no-op when metrics collection is disabled on this operation. */ void incrementOneIdxEntryRead(OperationContext* opCtx, size_t idxEntryBytesRead); @@ -242,15 +271,13 @@ public: /** * This should be called once per document written with the number of bytes written for that - * document. This increments the metric independent of replication state, and only when - * metrics collection is enabled for this operation. + * document. This is a no-op when metrics collection is disabled on this operation. */ void incrementOneDocWritten(size_t docBytesWritten); /** * This should be called once per index entry written with the number of bytes written for - * that entry. This increments the metric independent of replication state, and only when - * metrics collection is enabled for this operation. + * that entry. This is a no-op when metrics collection is disabled on this operation. */ void incrementOneIdxEntryWritten(size_t idxEntryBytesWritten); @@ -258,13 +285,6 @@ public: private: /** - * Update the current replication state's ReadMetrics if this operation is currently - * collecting metrics. - */ - using ReadMetricsFunc = std::function<void(ReadMetrics& readMetrics)>; - void _updateReadMetrics(OperationContext* opCtx, ReadMetricsFunc&& updateFunc); - - /** * Helper function that calls the Func when this collector is currently collecting metrics. */ template <typename Func> @@ -284,7 +304,7 @@ public: ScopedCollectionState _collecting = ScopedCollectionState::kInactive; bool _hasCollectedMetrics = false; std::string _dbName; - Metrics _metrics; + OperationMetrics _metrics; }; /** @@ -328,18 +348,19 @@ public: static bool isMetricsAggregationEnabled(); /** - * Adds a MetricsCollector's Metrics to an existing Metrics object in the map, keyed by - * database name. If no Metrics exist for the database, the value is initialized with the - * provided MetricsCollector's Metrics. + * Merges OperationMetrics with a globally-aggregated structure. The OperationMetrics's contents + * are added to existing values in a map keyed by database name. Read metrics will be attributed + * to the current replication state. If no metrics already exist for the database, a new value + * is initialized with the one provided. * - * The MetricsCollector's database name must not be an empty string. + * The database name must not be an empty string. */ - void add(const MetricsCollector& metrics); + void merge(OperationContext* opCtx, const std::string& dbName, const OperationMetrics& metrics); /** * Returns a copy of the Metrics map. */ - using MetricsMap = std::map<std::string, Metrics>; + using MetricsMap = std::map<std::string, AggregatedMetrics>; MetricsMap getMetrics() const; /** diff --git a/src/mongo/db/stats/resource_consumption_metrics_test.cpp b/src/mongo/db/stats/resource_consumption_metrics_test.cpp index 79f23103a29..50c9711df51 100644 --- a/src/mongo/db/stats/resource_consumption_metrics_test.cpp +++ b/src/mongo/db/stats/resource_consumption_metrics_test.cpp @@ -76,7 +76,7 @@ protected: ServiceContext::UniqueOperationContext _opCtx; }; -TEST_F(ResourceConsumptionMetricsTest, Add) { +TEST_F(ResourceConsumptionMetricsTest, Merge) { auto& globalResourceConsumption = ResourceConsumption::get(getServiceContext()); auto [client2, opCtx2] = makeClientAndCtx("opCtx2"); @@ -86,10 +86,14 @@ TEST_F(ResourceConsumptionMetricsTest, Add) { operationMetrics1.beginScopedCollecting("db1"); operationMetrics2.beginScopedCollecting("db2"); - globalResourceConsumption.add(operationMetrics1); - globalResourceConsumption.add(operationMetrics1); - globalResourceConsumption.add(operationMetrics2); - globalResourceConsumption.add(operationMetrics2); + globalResourceConsumption.merge( + _opCtx.get(), operationMetrics1.getDbName(), operationMetrics1.getMetrics()); + globalResourceConsumption.merge( + _opCtx.get(), operationMetrics1.getDbName(), operationMetrics1.getMetrics()); + globalResourceConsumption.merge( + opCtx2.get(), operationMetrics2.getDbName(), operationMetrics2.getMetrics()); + globalResourceConsumption.merge( + opCtx2.get(), operationMetrics2.getDbName(), operationMetrics2.getMetrics()); auto globalMetrics = globalResourceConsumption.getMetrics(); ASSERT_EQ(globalMetrics.count("db1"), 1); @@ -226,12 +230,12 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetrics) { ASSERT(operationMetrics.hasCollectedMetrics()); auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docBytesRead, 2); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docUnitsRead, 1); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryBytesRead, 8); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryUnitsRead, 1); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.keysSorted, 16); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docUnitsReturned, 32); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 1); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, 1); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 16); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsReturned, 32); // Clear metrics so we do not double-count. operationMetrics.reset(); @@ -246,12 +250,12 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetrics) { } metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docBytesRead, 2 + 32); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docUnitsRead, 2); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryBytesRead, 8 + 128); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryUnitsRead, 1 + 8); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.keysSorted, 16 + 256); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docUnitsReturned, 32 + 512); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2 + 32); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 2); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8 + 128); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, 1 + 8); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 16 + 256); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsReturned, 32 + 512); } TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsSecondary) { @@ -271,12 +275,12 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsSecondary) { } auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docBytesRead, 2); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docUnitsRead, 1); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.idxEntryBytesRead, 8); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.idxEntryUnitsRead, 1); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.keysSorted, 16); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docUnitsReturned, 32); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 2); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 1); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 8); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryUnitsRead, 1); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.keysSorted, 16); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsReturned, 32); // Clear metrics so we do not double-count. operationMetrics.reset(); @@ -291,12 +295,85 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsSecondary) { } metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docBytesRead, 2 + 32); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docUnitsRead, 2); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.idxEntryBytesRead, 8 + 128); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.idxEntryUnitsRead, 1 + 8); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.keysSorted, 16 + 256); - ASSERT_EQ(metricsCopy["db1"].secondaryMetrics.docUnitsReturned, 32 + 512); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 2 + 32); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 2); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 8 + 128); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryUnitsRead, 1 + 8); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.keysSorted, 16 + 256); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsReturned, 32 + 512); +} + +TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsAcrossStates) { + auto& globalResourceConsumption = ResourceConsumption::get(getServiceContext()); + auto& operationMetrics = ResourceConsumption::MetricsCollector::get(_opCtx.get()); + + // Start collecting metrics in the primary state, then change to secondary. Metrics should be + // attributed to the secondary state, since that is the state where the operation completed. + { + ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); + + operationMetrics.incrementOneDocRead(_opCtx.get(), 2); + operationMetrics.incrementOneIdxEntryRead(_opCtx.get(), 8); + operationMetrics.incrementKeysSorted(_opCtx.get(), 16); + operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 32); + + ASSERT_OK(repl::ReplicationCoordinator::get(_opCtx.get()) + ->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + operationMetrics.incrementOneDocRead(_opCtx.get(), 32); + operationMetrics.incrementOneIdxEntryRead(_opCtx.get(), 128); + operationMetrics.incrementKeysSorted(_opCtx.get(), 256); + operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); + } + + auto metricsCopy = globalResourceConsumption.getAndClearMetrics(); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 0); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 0); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 0); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, 0); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 0); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsReturned, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 2 + 32); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 2); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 8 + 128); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryUnitsRead, 1 + 8); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.keysSorted, 16 + 256); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsReturned, 32 + 512); + + operationMetrics.reset(); + + // Start collecting metrics in the secondary state, then change to primary. Metrics should be + // attributed to the secondary state only. + { + ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); + + operationMetrics.incrementOneDocRead(_opCtx.get(), 2); + operationMetrics.incrementOneIdxEntryRead(_opCtx.get(), 8); + operationMetrics.incrementKeysSorted(_opCtx.get(), 16); + operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 32); + + ASSERT_OK(repl::ReplicationCoordinator::get(_opCtx.get()) + ->setFollowerMode(repl::MemberState::RS_PRIMARY)); + + operationMetrics.incrementOneDocRead(_opCtx.get(), 32); + operationMetrics.incrementOneIdxEntryRead(_opCtx.get(), 128); + operationMetrics.incrementKeysSorted(_opCtx.get(), 256); + operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); + } + + metricsCopy = globalResourceConsumption.getAndClearMetrics(); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2 + 32); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 2); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8 + 128); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, 1 + 8); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 16 + 256); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsReturned, 32 + 512); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryUnitsRead, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.keysSorted, 0); + ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsReturned, 0); } TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsRead) { @@ -330,8 +407,8 @@ TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsRead) { } auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docBytesRead, expectedBytes); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.docUnitsRead, expectedUnits); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, expectedBytes); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, expectedUnits); } TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsWritten) { @@ -365,8 +442,8 @@ TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsWritten) { } auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].docBytesWritten, expectedBytes); - ASSERT_EQ(metricsCopy["db1"].docUnitsWritten, expectedUnits); + ASSERT_EQ(metricsCopy["db1"].writeMetrics.docBytesWritten, expectedBytes); + ASSERT_EQ(metricsCopy["db1"].writeMetrics.docUnitsWritten, expectedUnits); } TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsRead) { @@ -414,8 +491,8 @@ TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsRead) { } auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryBytesRead, expectedBytes); - ASSERT_EQ(metricsCopy["db1"].primaryMetrics.idxEntryUnitsRead, expectedUnits); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, expectedBytes); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, expectedUnits); } TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsWritten) { @@ -463,7 +540,7 @@ TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsWritten) { } auto metricsCopy = globalResourceConsumption.getMetrics(); - ASSERT_EQ(metricsCopy["db1"].idxEntryBytesWritten, expectedBytes); - ASSERT_EQ(metricsCopy["db1"].idxEntryUnitsWritten, expectedUnits); + ASSERT_EQ(metricsCopy["db1"].writeMetrics.idxEntryBytesWritten, expectedBytes); + ASSERT_EQ(metricsCopy["db1"].writeMetrics.idxEntryUnitsWritten, expectedUnits); } } // namespace mongo |