diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-11-09 09:17:07 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-09 14:48:16 +0000 |
commit | a09a1afbe18353ac5c865a643c97029e5cba4925 (patch) | |
tree | 7caa77af6a208654628a86271db35756cd07c346 | |
parent | 9363cfebe2ab8123ced86632a6b674fb5903e420 (diff) | |
download | mongo-a09a1afbe18353ac5c865a643c97029e5cba4925.tar.gz |
SERVER-51668 Report total CPU time spent by operations in serverStatus
-rw-r--r-- | jstests/noPassthrough/aggregate_operation_metrics.js | 24 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_operation_metrics.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/stats/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics.cpp | 65 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics.h | 26 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics_test.cpp | 75 |
6 files changed, 140 insertions, 59 deletions
diff --git a/jstests/noPassthrough/aggregate_operation_metrics.js b/jstests/noPassthrough/aggregate_operation_metrics.js index 74d9e649cbe..4a893f59b30 100644 --- a/jstests/noPassthrough/aggregate_operation_metrics.js +++ b/jstests/noPassthrough/aggregate_operation_metrics.js @@ -58,6 +58,12 @@ let getDBMetrics = (adminDB) => { return allMetrics; }; +let getGlobalCpuTime = (db) => { + let ss = db.serverStatus({resourceConsumption: true}); + assert(ss.hasOwnProperty('resourceConsumption'), ss); + return ss.resourceConsumption.cpuNanos; +}; + // Perform very basic reads and writes on two different databases. const db1Name = 'db1'; const primary = rst.getPrimary(); @@ -70,6 +76,10 @@ const db2 = primary.getDB(db2Name); assert.commandWorked(db2.coll1.insert({a: 1})); assert.commandWorked(db2.coll2.insert({a: 1})); +// The 'resourceConsumption' field should not be included in serverStatus by default. +let ss = db1.serverStatus(); +assert(!ss.hasOwnProperty('resourceConsumption'), ss); + const secondary = rst.getSecondary(); [primary, secondary].forEach(function(node) { jsTestLog("Testing node: " + node); @@ -78,6 +88,9 @@ const secondary = rst.getSecondary(); // a previous loop iteration. rst.awaitReplication(); const adminDB = node.getDB('admin'); + + let initialCpuTime = getGlobalCpuTime(adminDB); + adminDB.aggregate([{$operationMetrics: {clearMetrics: true}}]); assert.eq(node.getDB(db1Name).coll1.find({a: 1}).itcount(), 1); @@ -129,6 +142,17 @@ const secondary = rst.getSecondary(); lastDocBytesRead = allMetrics[db1Name].secondaryMetrics.docBytesRead; } + // CPU time aggregation is not supported on Windows. + if (!_isWindows()) { + // Ensure the CPU time is increasing. + let lastCpuTime = getGlobalCpuTime(adminDB); + assert.gt(lastCpuTime, initialCpuTime); + + // Ensure the global CPU time matches the aggregated time for both databases. + assert.eq(lastCpuTime - initialCpuTime, + allMetrics[db1Name].cpuNanos + allMetrics[db2Name].cpuNanos); + } + // Metrics for these databases should not be collected or reported. assert.eq(allMetrics['admin'], undefined); assert.eq(allMetrics['local'], undefined); diff --git a/src/mongo/db/pipeline/document_source_operation_metrics.cpp b/src/mongo/db/pipeline/document_source_operation_metrics.cpp index 5ae4e1e7734..d694b651f16 100644 --- a/src/mongo/db/pipeline/document_source_operation_metrics.cpp +++ b/src/mongo/db/pipeline/document_source_operation_metrics.cpp @@ -56,13 +56,13 @@ static constexpr StringData kDatabaseName = "db"_sd; DocumentSource::GetNextResult DocumentSourceOperationMetrics::doGetNext() { if (_operationMetrics.empty()) { - auto globalMetrics = [&]() { + auto dbMetrics = [&]() { if (_clearMetrics) { - return ResourceConsumption::get(pExpCtx->opCtx).getAndClearMetrics(); + return ResourceConsumption::get(pExpCtx->opCtx).getAndClearDbMetrics(); } - return ResourceConsumption::get(pExpCtx->opCtx).getMetrics(); + return ResourceConsumption::get(pExpCtx->opCtx).getDbMetrics(); }(); - for (auto& [dbName, metrics] : globalMetrics) { + for (auto& [dbName, metrics] : dbMetrics) { BSONObjBuilder builder; builder.append(kDatabaseName, dbName); metrics.toBson(&builder); diff --git a/src/mongo/db/stats/SConscript b/src/mongo/db/stats/SConscript index 65545e1e408..679545ba324 100644 --- a/src/mongo/db/stats/SConscript +++ b/src/mongo/db/stats/SConscript @@ -83,6 +83,7 @@ env.Library( '$BUILD_DIR/mongo/idl/idl_parser', ], LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/commands/server_status', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', '$BUILD_DIR/mongo/idl/feature_flag', '$BUILD_DIR/mongo/idl/server_parameter', diff --git a/src/mongo/db/stats/resource_consumption_metrics.cpp b/src/mongo/db/stats/resource_consumption_metrics.cpp index 7c40b216987..90d278ea072 100644 --- a/src/mongo/db/stats/resource_consumption_metrics.cpp +++ b/src/mongo/db/stats/resource_consumption_metrics.cpp @@ -33,6 +33,7 @@ #include "mongo/db/stats/resource_consumption_metrics.h" +#include "mongo/db/commands/server_status.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/stats/operation_resource_consumption_gen.h" #include "mongo/logv2/log.h" @@ -63,6 +64,31 @@ inline void appendNonZeroMetric(BSONObjBuilder* builder, const char* name, long builder->append(name, value); } } + +/** + * Reports globally-aggregated CPU time spent by user operations and a specific set of commands. + */ +class ResourceConsumptionSSS : public ServerStatusSection { +public: + ResourceConsumptionSSS() : ServerStatusSection("resourceConsumption") {} + + // Do not include this metric by default. It is likely to be misleading for diagnostic purposes + // because it does not include the CPU time for every operation or every command. + bool includeByDefault() const override { + return false; + } + + BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElem) const override { + auto& resourceConsumption = ResourceConsumption::get(opCtx); + if (!resourceConsumption.isMetricsAggregationEnabled()) { + return BSONObj(); + } + BSONObjBuilder builder; + builder.append(kCpuNanos, durationCount<Nanoseconds>(resourceConsumption.getCpuTime())); + return builder.obj(); + } +} resourceConsumptionMetricSSM; + } // namespace bool ResourceConsumption::isMetricsCollectionEnabled() { @@ -284,31 +310,42 @@ void ResourceConsumption::merge(OperationContext* opCtx, auto isPrimary = repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase_UNSAFE( opCtx, NamespaceString::kAdminDb); - // Add all metrics into the the globally-aggregated metrics. - stdx::unique_lock<Mutex> lk(_mutex); - auto& elem = _metrics[dbName]; - + AggregatedMetrics newMetrics; if (isPrimary) { - elem.primaryReadMetrics += metrics.readMetrics; + newMetrics.primaryReadMetrics = metrics.readMetrics; } else { - elem.secondaryReadMetrics += metrics.readMetrics; + newMetrics.secondaryReadMetrics = metrics.readMetrics; } - elem.writeMetrics += metrics.writeMetrics; + newMetrics.writeMetrics = metrics.writeMetrics; if (metrics.cpuTimer) { - elem.cpuNanos += metrics.cpuTimer->getElapsed(); + newMetrics.cpuNanos = metrics.cpuTimer->getElapsed(); } + + // Add all metrics into the the globally-aggregated metrics. + stdx::lock_guard<Mutex> lk(_mutex); + _dbMetrics[dbName] += newMetrics; + _cpuTime += newMetrics.cpuNanos; } -ResourceConsumption::MetricsMap ResourceConsumption::getMetrics() const { - stdx::unique_lock<Mutex> lk(_mutex); - return _metrics; +ResourceConsumption::MetricsMap ResourceConsumption::getDbMetrics() const { + stdx::lock_guard<Mutex> lk(_mutex); + return _dbMetrics; } -ResourceConsumption::MetricsMap ResourceConsumption::getAndClearMetrics() { - stdx::unique_lock<Mutex> lk(_mutex); +ResourceConsumption::MetricsMap ResourceConsumption::getAndClearDbMetrics() { + stdx::lock_guard<Mutex> lk(_mutex); MetricsMap newMap; - _metrics.swap(newMap); + _dbMetrics.swap(newMap); return newMap; } +Nanoseconds ResourceConsumption::getCpuTime() const { + stdx::lock_guard<Mutex> lk(_mutex); + return _cpuTime; +} + +Nanoseconds ResourceConsumption::getAndClearCpuTime() { + stdx::lock_guard<Mutex> lk(_mutex); + return std::exchange(_cpuTime, {}); +} } // namespace mongo diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h index a4c8e02b247..2f7855dd870 100644 --- a/src/mongo/db/stats/resource_consumption_metrics.h +++ b/src/mongo/db/stats/resource_consumption_metrics.h @@ -350,21 +350,31 @@ public: void merge(OperationContext* opCtx, const std::string& dbName, const OperationMetrics& metrics); /** - * Returns a copy of the Metrics map. + * Returns a copy of the per-database metrics map. */ using MetricsMap = std::map<std::string, AggregatedMetrics>; - MetricsMap getMetrics() const; + MetricsMap getDbMetrics() const; /** - * Returns the Metrics map and then clears the contents. This attempts to swap and return the - * metrics map rather than making a full copy like getMetrics. + * Returns the per-database metrics map and then clears the contents. This attempts to swap and + * return the metrics map rather than making a full copy like getDbMetrics. */ - MetricsMap getAndClearMetrics(); + MetricsMap getAndClearDbMetrics(); + + /** + * Returns the globally-aggregated CPU time. + */ + Nanoseconds getCpuTime() const; + + /** + * Clears the existing CPU time. + */ + Nanoseconds getAndClearCpuTime(); private: - // Protects _metrics + // Protects _dbMetrics and _cpuTime mutable Mutex _mutex = MONGO_MAKE_LATCH("ResourceConsumption::_mutex"); - MetricsMap _metrics; + MetricsMap _dbMetrics; + Nanoseconds _cpuTime; }; - } // namespace mongo diff --git a/src/mongo/db/stats/resource_consumption_metrics_test.cpp b/src/mongo/db/stats/resource_consumption_metrics_test.cpp index a07e76e1ddf..fea3bb0a456 100644 --- a/src/mongo/db/stats/resource_consumption_metrics_test.cpp +++ b/src/mongo/db/stats/resource_consumption_metrics_test.cpp @@ -83,10 +83,10 @@ TEST_F(ResourceConsumptionMetricsTest, Merge) { _opCtx.get(), operationMetrics.getDbName(), operationMetrics.getMetrics()); - auto globalMetrics = globalResourceConsumption.getMetrics(); - ASSERT_EQ(globalMetrics.count("db1"), 1); - ASSERT_EQ(globalMetrics.count("db2"), 0); - ASSERT_EQ(globalMetrics.count("db3"), 0); + auto dbMetrics = globalResourceConsumption.getDbMetrics(); + ASSERT_EQ(dbMetrics.count("db1"), 1); + ASSERT_EQ(dbMetrics.count("db2"), 0); + ASSERT_EQ(dbMetrics.count("db3"), 0); operationMetrics.endScopedCollecting(); operationMetrics.beginScopedCollecting(_opCtx.get(), "db2"); @@ -95,10 +95,10 @@ TEST_F(ResourceConsumptionMetricsTest, Merge) { globalResourceConsumption.merge( _opCtx.get(), operationMetrics.getDbName(), operationMetrics.getMetrics()); - globalMetrics = globalResourceConsumption.getMetrics(); - ASSERT_EQ(globalMetrics.count("db1"), 1); - ASSERT_EQ(globalMetrics.count("db2"), 1); - ASSERT_EQ(globalMetrics.count("db3"), 0); + dbMetrics = globalResourceConsumption.getDbMetrics(); + ASSERT_EQ(dbMetrics.count("db1"), 1); + ASSERT_EQ(dbMetrics.count("db2"), 1); + ASSERT_EQ(dbMetrics.count("db3"), 0); } TEST_F(ResourceConsumptionMetricsTest, ScopedMetricsCollector) { @@ -114,7 +114,7 @@ TEST_F(ResourceConsumptionMetricsTest, ScopedMetricsCollector) { ASSERT_FALSE(operationMetrics.isCollecting()); - auto metricsCopy = globalResourceConsumption.getAndClearMetrics(); + auto metricsCopy = globalResourceConsumption.getAndClearDbMetrics(); ASSERT_EQ(metricsCopy.size(), 1); // Don't collect @@ -126,28 +126,28 @@ TEST_F(ResourceConsumptionMetricsTest, ScopedMetricsCollector) { ASSERT_FALSE(operationMetrics.isCollecting()); - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 0); // Collect { ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); } - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 1); // Collect on a different database { ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db2"); } - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 1); ASSERT_EQ(metricsCopy.count("db2"), 1); // Ensure fetch and clear works. - auto metrics = globalResourceConsumption.getAndClearMetrics(); + auto metrics = globalResourceConsumption.getAndClearDbMetrics(); ASSERT_EQ(metrics.count("db1"), 1); ASSERT_EQ(metrics.count("db2"), 1); - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 0); ASSERT_EQ(metricsCopy.count("db2"), 0); } @@ -173,7 +173,7 @@ TEST_F(ResourceConsumptionMetricsTest, NestedScopedMetricsCollector) { } } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 1); ASSERT_EQ(metricsCopy.count("db2"), 0); ASSERT_EQ(metricsCopy.count("db3"), 0); @@ -199,17 +199,17 @@ TEST_F(ResourceConsumptionMetricsTest, NestedScopedMetricsCollector) { } } - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db2"), 0); ASSERT_EQ(metricsCopy.count("db3"), 0); ASSERT_EQ(metricsCopy.count("db4"), 0); // Ensure fetch and clear works. - auto metrics = globalResourceConsumption.getAndClearMetrics(); + auto metrics = globalResourceConsumption.getAndClearDbMetrics(); ASSERT_EQ(metrics.count("db1"), 1); ASSERT_EQ(metrics.count("db2"), 0); - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy.count("db1"), 0); ASSERT_EQ(metricsCopy.count("db2"), 0); } @@ -229,7 +229,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetrics) { ASSERT(operationMetrics.hasCollectedMetrics()); - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 1); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8); @@ -249,7 +249,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetrics) { operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); } - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2 + 32); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 2); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8 + 128); @@ -274,7 +274,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsSecondary) { operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 32); } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 2); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 1); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 8); @@ -294,7 +294,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsSecondary) { operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); } - metricsCopy = globalResourceConsumption.getMetrics(); + metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docBytesRead, 2 + 32); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.docUnitsRead, 2); ASSERT_EQ(metricsCopy["db1"].secondaryReadMetrics.idxEntryBytesRead, 8 + 128); @@ -326,7 +326,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsAcrossStates) { operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); } - auto metricsCopy = globalResourceConsumption.getAndClearMetrics(); + auto metricsCopy = globalResourceConsumption.getAndClearDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 0); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 0); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 0); @@ -343,7 +343,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsAcrossStates) { operationMetrics.reset(); // Start collecting metrics in the secondary state, then change to primary. Metrics should be - // attributed to the secondary state only. + // attributed to the primary state only. { ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); @@ -361,7 +361,7 @@ TEST_F(ResourceConsumptionMetricsTest, IncrementReadMetricsAcrossStates) { operationMetrics.incrementDocUnitsReturned(_opCtx.get(), 512); } - metricsCopy = globalResourceConsumption.getAndClearMetrics(); + metricsCopy = globalResourceConsumption.getAndClearDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, 2 + 32); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, 2); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, 8 + 128); @@ -406,7 +406,7 @@ TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsRead) { expectedUnits += 8; } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docBytesRead, expectedBytes); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docUnitsRead, expectedUnits); } @@ -441,7 +441,7 @@ TEST_F(ResourceConsumptionMetricsTest, DocumentUnitsWritten) { expectedUnits += 8; } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].writeMetrics.docBytesWritten, expectedBytes); ASSERT_EQ(metricsCopy["db1"].writeMetrics.docUnitsWritten, expectedUnits); } @@ -490,7 +490,7 @@ TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsRead) { expectedUnits += 6; } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryBytesRead, expectedBytes); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntryUnitsRead, expectedUnits); } @@ -539,7 +539,7 @@ TEST_F(ResourceConsumptionMetricsTest, IdxEntryUnitsWritten) { expectedUnits += 6; } - auto metricsCopy = globalResourceConsumption.getMetrics(); + auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].writeMetrics.idxEntryBytesWritten, expectedBytes); ASSERT_EQ(metricsCopy["db1"].writeMetrics.idxEntryUnitsWritten, expectedUnits); } @@ -574,8 +574,8 @@ TEST_F(ResourceConsumptionMetricsTest, CpuNanos) { ASSERT_EQ(nanos, operationMetrics.getMetrics().cpuTimer->getElapsed()); // Ensure the CPU time gets aggregated globally. - auto globalMetrics = globalResourceConsumption.getMetrics(); - ASSERT_EQ(globalMetrics["db1"].cpuNanos, nanos); + auto dbMetrics = globalResourceConsumption.getDbMetrics(); + ASSERT_EQ(dbMetrics["db1"].cpuNanos, nanos); { ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); @@ -584,7 +584,16 @@ TEST_F(ResourceConsumptionMetricsTest, CpuNanos) { // Ensure the aggregated CPU time increases over time. nanos += operationMetrics.getMetrics().cpuTimer->getElapsed(); - globalMetrics = globalResourceConsumption.getMetrics(); - ASSERT_EQ(globalMetrics["db1"].cpuNanos, nanos); + dbMetrics = globalResourceConsumption.getDbMetrics(); + ASSERT_EQ(dbMetrics["db1"].cpuNanos, nanos); + + // Ensure the CPU time is aggregated globally. + auto globalCpuTime = globalResourceConsumption.getCpuTime(); + ASSERT_EQ(dbMetrics["db1"].cpuNanos, globalCpuTime); + + // Ensure the CPU time can be reset. + globalResourceConsumption.getAndClearCpuTime(); + globalCpuTime = globalResourceConsumption.getCpuTime(); + ASSERT_EQ(Nanoseconds(0), globalCpuTime); } } // namespace mongo |