summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2023-03-28 07:29:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-14 10:53:47 +0000
commitff5e2fc2fde5edf9ccd7c9814734651996f45851 (patch)
tree3aae99c42688830d334b5e02ffed6593fd296622
parentad66db6e43327ab2763c1d51b74acde64480d97b (diff)
downloadmongo-ff5e2fc2fde5edf9ccd7c9814734651996f45851.tar.gz
SERVER-75277 Clear resource consumption metrics before each TTL index passr6.3.1-rc0
(cherry picked from commit 8e744478f8569c1ead0ffe7b725c89983c886ac6)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js148
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics.cpp17
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics.h4
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics_test.cpp61
5 files changed, 233 insertions, 1 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index f8f1b94de15..bb2261f573c 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -324,6 +324,8 @@ last-continuous:
ticket: SERVER-73482
- test_file: jstests/replsets/session_cache_refresh_write_error_fail.js
ticket: SERVER-73229
+ - test_file: jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js
+ ticket: SERVER-75277
suites: null
last-lts:
all:
@@ -719,4 +721,6 @@ last-lts:
ticket: SERVER-73482
- test_file: jstests/replsets/session_cache_refresh_write_error_fail.js
ticket: SERVER-73229
+ - test_file: jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js
+ ticket: SERVER-75277
suites: null
diff --git a/jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js b/jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js
new file mode 100644
index 00000000000..e6e1bfefdf3
--- /dev/null
+++ b/jstests/noPassthrough/ttl_operation_metrics_multi_dbs.js
@@ -0,0 +1,148 @@
+/**
+ * Tests resource consumption metrics for TTL indexes on multiple databases.
+ *
+ * @tags: [
+ * requires_replication,
+ * ]
+ */
+(function() {
+'use strict';
+
+load('jstests/noPassthrough/libs/index_build.js'); // For IndexBuildTest
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/ttl_util.js");
+
+var rst = new ReplSetTest({
+ nodes: 2,
+ nodeOptions: {
+ setParameter: {
+ "aggregateOperationResourceConsumptionMetrics": true,
+ "ttlMonitorSleepSecs": 1,
+ }
+ }
+});
+rst.startSet();
+rst.initiate();
+
+const dbName1 = 'db1';
+const dbName2 = 'db2';
+const collName = 'coll';
+const primary = rst.getPrimary();
+const secondary = rst.getSecondary();
+const primaryDB1 = primary.getDB(dbName1);
+const primaryDB2 = primary.getDB(dbName2);
+
+const clearMetrics = (conn) => {
+ conn.getDB('admin').aggregate([{$operationMetrics: {clearMetrics: true}}]);
+};
+
+// Get aggregated metrics keyed by database name.
+const getMetrics = (conn) => {
+ const cursor = conn.getDB('admin').aggregate([{$operationMetrics: {}}]);
+
+ let allMetrics = {};
+ while (cursor.hasNext()) {
+ let doc = cursor.next();
+ allMetrics[doc.db] = doc;
+ }
+ return allMetrics;
+};
+
+const assertMetrics = (conn, assertFn) => {
+ let metrics = getMetrics(conn);
+ try {
+ assertFn(metrics);
+ } catch (e) {
+ print("caught exception while checking metrics on " + tojson(conn) +
+ ", metrics: " + tojson(metrics));
+ throw e;
+ }
+};
+
+// Create identical TTL indexes on both databases with immediate expiry.
+assert.commandWorked(primaryDB1[collName].createIndex({x: 1}, {expireAfterSeconds: 0}));
+assert.commandWorked(primaryDB2[collName].createIndex({x: 1}, {expireAfterSeconds: 0}));
+
+const pauseTtl = configureFailPoint(primary, 'hangTTLMonitorWithLock');
+pauseTtl.wait();
+
+clearMetrics(primary);
+
+// On DB 1 we expect all documents to be deleted.
+const now = new Date();
+assert.commandWorked(primaryDB1[collName].insert({_id: 0, x: now}));
+assert.commandWorked(primaryDB1[collName].insert({_id: 1, x: now}));
+
+// On DB2 we expect no documents to be deleted.
+const later = new Date(now.getTime() + 1000 * 60 * 60);
+assert.commandWorked(primaryDB2[collName].insert({_id: 0, x: later}));
+assert.commandWorked(primaryDB2[collName].insert({_id: 1, x: later}));
+
+assertMetrics(primary, (metrics) => {
+ // With replication enabled, oplog writes are counted towards bytes written. Only assert that we
+ // insert at least as many bytes in the documents.
+ // Document size is 29 bytes.
+ assert.gte(metrics[dbName1].docBytesWritten, 29 * 2);
+ assert.gte(metrics[dbName1].docUnitsWritten, 2);
+ assert.gte(metrics[dbName1].totalUnitsWritten, 2);
+
+ assert.gte(metrics[dbName2].docBytesWritten, 29 * 2);
+ assert.gte(metrics[dbName2].docUnitsWritten, 2);
+ assert.gte(metrics[dbName2].totalUnitsWritten, 2);
+});
+
+// Clear metrics and wait for a TTL pass to delete the documents.
+clearMetrics(primary);
+pauseTtl.off();
+TTLUtil.waitForPass(primaryDB1);
+
+// Ensure that the TTL monitor deleted 2 documents on the primary and recorded read and write
+// metrics.
+assertMetrics(primary, (metrics) => {
+ // The TTL monitor generates oplog entries for each deletion on the primary. Assert that we
+ // write at least as many bytes in the documents. Document size is 29 bytes.
+ assert.gte(metrics[dbName1].primaryMetrics.docBytesRead, 29 * 2);
+ assert.gte(metrics[dbName1].primaryMetrics.docUnitsRead, 2);
+ assert.gte(metrics[dbName1].docBytesWritten, 29 * 2);
+ assert.gte(metrics[dbName1].docUnitsWritten, 2);
+ assert.gte(metrics[dbName1].totalUnitsWritten, 2);
+ // Key size is 12 bytes.
+ assert.gte(metrics[dbName1].primaryMetrics.idxEntryBytesRead, 12 * 2);
+ assert.gte(metrics[dbName1].primaryMetrics.idxEntryUnitsRead, 2);
+ // At least 2 keys (_id and x) should be deleted for each document.
+ assert.gte(metrics[dbName1].idxEntryUnitsWritten, 2 * 2);
+ assert.gte(metrics[dbName1].idxEntryBytesWritten, 12 * 2);
+
+ assert.eq(metrics[dbName2].primaryMetrics.docBytesRead, 0);
+ assert.eq(metrics[dbName2].primaryMetrics.docUnitsRead, 0);
+ assert.eq(metrics[dbName2].docBytesWritten, 0);
+ assert.eq(metrics[dbName2].docUnitsWritten, 0);
+ assert.eq(metrics[dbName2].totalUnitsWritten, 0);
+
+ // We need to read in a few keys to determine whether there is data to delete. Since we haven't
+ // stopped the TTL monitor, the value can be larger than expected.
+ assert.gte(metrics[dbName2].primaryMetrics.idxEntryBytesRead, 24);
+ assert.gte(metrics[dbName2].primaryMetrics.idxEntryUnitsRead, 2);
+ assert.eq(metrics[dbName2].idxEntryUnitsWritten, 0);
+ assert.eq(metrics[dbName2].idxEntryBytesWritten, 0);
+});
+
+rst.awaitReplication();
+
+// There should be no activity on the secondary.
+assertMetrics(secondary, (metrics) => {
+ assert(!metrics.hasOwnProperty(dbName1));
+ assert(!metrics.hasOwnProperty(dbName2));
+});
+
+// Ensure the correct documents were deleted.
+assert.eq(primaryDB1[collName].count({}), 0);
+assert.eq(primaryDB2[collName].count({}), 2);
+
+const secondaryDB1 = secondary.getDB(dbName1);
+const secondaryDB2 = secondary.getDB(dbName2);
+assert.eq(secondaryDB1[collName].count({}), 0);
+assert.eq(secondaryDB2[collName].count({}), 2);
+
+rst.stopSet();
+}());
diff --git a/src/mongo/db/stats/resource_consumption_metrics.cpp b/src/mongo/db/stats/resource_consumption_metrics.cpp
index 1fccd04fc66..bedfcdb3f2f 100644
--- a/src/mongo/db/stats/resource_consumption_metrics.cpp
+++ b/src/mongo/db/stats/resource_consumption_metrics.cpp
@@ -211,6 +211,12 @@ void ResourceConsumption::OperationMetrics::toBson(BSONObjBuilder* builder) cons
}
}
+BSONObj ResourceConsumption::OperationMetrics::toBson() const {
+ BSONObjBuilder builder;
+ toBson(&builder);
+ return builder.obj();
+}
+
void ResourceConsumption::OperationMetrics::toBsonNonZeroFields(BSONObjBuilder* builder) const {
appendNonZeroMetric(builder, kDocBytesRead, readMetrics.docsRead.bytes());
appendNonZeroMetric(builder, kDocUnitsRead, readMetrics.docsRead.units());
@@ -328,6 +334,11 @@ void ResourceConsumption::MetricsCollector::beginScopedCollecting(OperationConte
_collecting = ScopedCollectionState::kInScopeCollecting;
_hasCollectedMetrics = true;
+ // We must clear the metrics here to ensure we do not accumulate metrics from previous scoped
+ // collections. Note that we can't clear metrics in endScopedCollecting() because consumers
+ // expect metrics to be available after a scoped collection period has ended.
+ _metrics = {};
+
// The OperationCPUTimer may be nullptr on unsupported systems.
_metrics.cpuTimer = OperationCPUTimer::get(opCtx);
if (_metrics.cpuTimer) {
@@ -408,6 +419,12 @@ void ResourceConsumption::merge(OperationContext* opCtx,
const OperationMetrics& metrics) {
invariant(!dbName.empty());
+ LOGV2_DEBUG(7527700,
+ 1,
+ "ResourceConsumption::merge",
+ "dbName"_attr = dbName,
+ "metrics"_attr = metrics.toBson());
+
// All metrics over the duration of this operation will be attributed to the current state, even
// if it ran accross state transitions.
// The RSTL is normally required to check the replication state, but callers may not always be
diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h
index f41042b29ab..19b3c67c234 100644
--- a/src/mongo/db/stats/resource_consumption_metrics.h
+++ b/src/mongo/db/stats/resource_consumption_metrics.h
@@ -219,6 +219,7 @@ public:
* Reports all metrics on a BSONObjBuilder.
*/
void toBson(BSONObjBuilder* builder) const;
+ BSONObj toBson() const;
/**
* Reports metrics on a BSONObjBuilder. Only non-zero fields are reported.
@@ -280,7 +281,8 @@ public:
static MetricsCollector& get(OperationContext* opCtx);
/**
- * When called, resource consumption metrics should be recorded for this operation.
+ * When called, resource consumption metrics should be recorded for this operation. Clears
+ * any metrics from previous collection periods.
*/
void beginScopedCollecting(OperationContext* opCtx, const std::string& dbName);
diff --git a/src/mongo/db/stats/resource_consumption_metrics_test.cpp b/src/mongo/db/stats/resource_consumption_metrics_test.cpp
index 9c992bca916..155bb688e11 100644
--- a/src/mongo/db/stats/resource_consumption_metrics_test.cpp
+++ b/src/mongo/db/stats/resource_consumption_metrics_test.cpp
@@ -762,4 +762,65 @@ TEST_F(ResourceConsumptionMetricsTest, PauseMetricsCollectorBlock) {
ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, 1);
}
+TEST_F(ResourceConsumptionMetricsTest, ResetMetricsBetweenCollection) {
+ auto& globalResourceConsumption = ResourceConsumption::get(getServiceContext());
+ auto& operationMetrics = ResourceConsumption::MetricsCollector::get(_opCtx.get());
+
+ {
+ ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1");
+
+ operationMetrics.incrementOneDocRead("", 2);
+ operationMetrics.incrementOneIdxEntryRead("", 4);
+ operationMetrics.incrementKeysSorted(8);
+ operationMetrics.incrementSorterSpills(16);
+ operationMetrics.incrementDocUnitsReturned("", makeDocUnits(32));
+ operationMetrics.incrementOneCursorSeek("");
+ }
+
+ auto metricsCopy = globalResourceConsumption.getAndClearDbMetrics();
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.bytes(), 2);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.bytes(), 4);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 8);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.sorterSpills, 16);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.bytes(), 32);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, 1);
+
+ // We expect this metrics collection to wipe out the metrics from the previous one.
+ {
+ ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db2");
+ operationMetrics.incrementOneDocRead("", 64);
+ operationMetrics.incrementOneIdxEntryRead("", 128);
+ operationMetrics.incrementKeysSorted(256);
+ operationMetrics.incrementSorterSpills(512);
+ operationMetrics.incrementDocUnitsReturned("", makeDocUnits(1024));
+ operationMetrics.incrementOneCursorSeek("");
+ }
+
+ metricsCopy = globalResourceConsumption.getDbMetrics();
+
+ // We should not have any of the metrics from the first collection.
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.bytes(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.units(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.bytes(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.units(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.sorterSpills, 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.bytes(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.units(), 0);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, 0);
+
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.docsRead.bytes(), 64);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.docsRead.units(), 1);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.idxEntriesRead.bytes(), 128);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.idxEntriesRead.units(), 8);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.keysSorted, 256);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.sorterSpills, 512);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.docsReturned.bytes(), 1024);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.docsReturned.units(), 8);
+ ASSERT_EQ(metricsCopy["db2"].primaryReadMetrics.cursorSeeks, 1);
+}
+
} // namespace mongo