summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Jin Kang Park <yujin.kang@mongodb.com>2022-07-13 08:46:10 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-13 09:45:16 +0000
commit8109993be980df50d937b4eec0c500605bc0dc82 (patch)
tree10cf05ddef87a8831e5edbd6fab47a0ded61f6ef
parent04e9dd9003ceae4fb8d4e74295d32f81a1875838 (diff)
downloadmongo-8109993be980df50d937b4eec0c500605bc0dc82.tar.gz
SERVER-66502: Pause consumption metrics collection in wait for write concern.
-rw-r--r--jstests/noPassthrough/write_concern_operation_metrics.js117
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp6
-rw-r--r--src/mongo/db/service_entry_point_mongod.cpp5
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics.h58
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics_test.cpp40
5 files changed, 224 insertions, 2 deletions
diff --git a/jstests/noPassthrough/write_concern_operation_metrics.js b/jstests/noPassthrough/write_concern_operation_metrics.js
new file mode 100644
index 00000000000..7af1e14418c
--- /dev/null
+++ b/jstests/noPassthrough/write_concern_operation_metrics.js
@@ -0,0 +1,117 @@
+/**
+ * Tests that operation metrics are not increased while waiting for write concern.
+ *
+ * @tags: [
+ * # j:true requires persistence
+ * requires_persistence,
+ * requires_replication,
+ * ]
+ */
+
+// This test reproduces the diagram below, which shows how Writer 1 can end up reading the oplog
+// entry generated by Writer 2 when waiting for write concern. On serverless environments this can
+// cause a user to be billed improperly billed for RPUs caused by reading large entries written by
+// other tenants.
+//
+// | Writer 1 | Writer 2 |
+// |-------------------+--------------|
+// | BeginTxn | |
+// | Timestamp 10 | |
+// | | BeginTxn |
+// | Write A | |
+// | | Update B |
+// | | Timestamp 11 |
+// | Commit | |
+// | OnCommit hooks | |
+// | | Commit |
+// | WaitForTopOfOplog | |
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+// Returns metrics aggregated by database name.
+const getDBMetrics = (adminDB) => {
+ const cursor = adminDB.aggregate([{$operationMetrics: {}}]);
+ let allMetrics = {};
+ while (cursor.hasNext()) {
+ let doc = cursor.next();
+ // Remove localTime field as it prevents us from comparing objects since it always changes.
+ delete doc.localTime;
+ allMetrics[doc.db] = doc;
+ }
+ return allMetrics;
+};
+
+const setParams = {
+ "aggregateOperationResourceConsumptionMetrics": true,
+};
+
+const replSet = new ReplSetTest({nodes: 1, nodeOptions: {setParameter: setParams}});
+replSet.startSet();
+replSet.initiate();
+
+const primary = replSet.getPrimary();
+const adminDB = primary.getDB('admin');
+
+const db1Name = "db1";
+const db1 = primary.getDB(db1Name);
+
+// Create coll to avoid implicit creation.
+db1.createCollection("coll");
+// Insert document to be updated by Writer 2.
+primary.getDB("otherDB").othercoll.insert({_id: 1, a: 'a'});
+
+var doInsert = function() {
+ jsTestLog("Writer 1 performing an insert.");
+ assert.commandWorked(
+ db.getSiblingDB("db1").coll.insertOne({a: 'a'}, {writeConcern: {w: "majority", j: true}}));
+};
+
+function doUpdate() {
+ jsTestLog("Writer 2 performing an update.");
+ // Write a large record which is going to be the top of the oplog.
+ assert.commandWorked(
+ db.getSiblingDB("otherDB").othercoll.update({_id: 1}, {a: 'a'.repeat(100 * 1024)}));
+}
+
+const hangAfterReserveOptime =
+ configureFailPoint(primary,
+ "hangAndFailAfterDocumentInsertsReserveOpTimes",
+ {collectionNS: db1.coll.getFullName(), skipFail: true});
+
+var joinWriter1 = startParallelShell(doInsert, primary.port);
+// Ensure Writer 1 has fetched the op time before starting up Writer 2.
+hangAfterReserveOptime.wait();
+
+// We want Writer 2 to perform the update after Writer 1 has fetched the oplog.
+const hangAfterUpdate = configureFailPoint(primary, "hangAfterBatchUpdate");
+var joinWriter2 = startParallelShell(doUpdate, primary.port);
+hangAfterUpdate.wait();
+
+// Stop the primary from calling into awaitReplication()
+const hangBeforeWaitingForWriteConcern =
+ configureFailPoint(primary, "hangBeforeWaitingForWriteConcern");
+
+// Unblock Writer 1 so it performs the insert and then blocks on hangBeforeWaitingForWriteConcern.
+hangAfterReserveOptime.off();
+// Unblock Writer 2, which should commit after Writer 1.
+hangAfterUpdate.off();
+
+// Unblock write concern wait.
+hangBeforeWaitingForWriteConcern.wait();
+hangBeforeWaitingForWriteConcern.off();
+
+joinWriter1();
+joinWriter2();
+
+const metrics = getDBMetrics(adminDB);
+jsTestLog(metrics);
+// docBytesRead should be much smaller than 100kb. A threshold at 10kb should be enough.
+assert.lt(metrics.db1.primaryMetrics.docBytesRead,
+ 10 * 1024,
+ "Writer 1 wait for write concern caused undue consumption metrics increase.");
+
+replSet.stopSet();
+})();
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index 8e02cf04ec7..53c7f0c8a7a 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -373,7 +373,11 @@ void insertDocuments(OperationContext* opCtx,
hangAndFailAfterDocumentInsertsReserveOpTimes.executeIf(
[&](const BSONObj& data) {
hangAndFailAfterDocumentInsertsReserveOpTimes.pauseWhileSet(opCtx);
- uasserted(51269, "hangAndFailAfterDocumentInsertsReserveOpTimes fail point enabled");
+ const auto skipFail = data["skipFail"];
+ if (!skipFail || !skipFail.boolean()) {
+ uasserted(51269,
+ "hangAndFailAfterDocumentInsertsReserveOpTimes fail point enabled");
+ }
},
[&](const BSONObj& data) {
// Check if the failpoint specifies no collection or matches the existing one.
diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp
index 9d6102f7481..4519167240c 100644
--- a/src/mongo/db/service_entry_point_mongod.cpp
+++ b/src/mongo/db/service_entry_point_mongod.cpp
@@ -125,6 +125,11 @@ public:
return;
}
+ // Do not increase consumption metrics during wait for write concern, as in serverless this
+ // might cause a tenant to be billed for reading the oplog entry (which might be of
+ // considerable size) of another tenant.
+ ResourceConsumption::PauseMetricsCollectorBlock pauseMetricsCollection(opCtx);
+
auto lastOpAfterRun = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
auto waitForWriteConcernAndAppendStatus = [&]() {
diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h
index 9acba648b1e..f41042b29ab 100644
--- a/src/mongo/db/stats/resource_consumption_metrics.h
+++ b/src/mongo/db/stats/resource_consumption_metrics.h
@@ -301,7 +301,7 @@ public:
bool endScopedCollecting();
bool isCollecting() const {
- return _collecting == ScopedCollectionState::kInScopeCollecting;
+ return !_paused && _collecting == ScopedCollectionState::kInScopeCollecting;
}
bool isInScope() const {
@@ -391,6 +391,30 @@ public:
*/
void incrementOneCursorSeek(StringData uri);
+ /**
+ * Pause metrics collection, overriding kInScopeCollecting status. The scope status may be
+ * changed during a pause, but will not come into effect until resume() is called.
+ */
+ void pause() {
+ invariant(!_paused);
+ _paused = true;
+ }
+
+ /**
+ * Resume metrics collection. Trying to resume a non-paused object will invariant.
+ */
+ void resume() {
+ invariant(_paused);
+ _paused = false;
+ }
+
+ /**
+ * Returns if the current object is in paused state.
+ */
+ bool isPaused() {
+ return _paused;
+ }
+
private:
// Privatize copy constructors to prevent callers from accidentally copying when this is
// decorated on the OperationContext by reference.
@@ -418,6 +442,7 @@ public:
bool _hasCollectedMetrics = false;
std::string _dbName;
OperationMetrics _metrics;
+ bool _paused = false;
};
/**
@@ -440,6 +465,37 @@ public:
};
/**
+ * RAII-style class to temporarily pause the MetricsCollector in the OperationContext. This
+ * applies even if the MetricsCollector is started explicitly in lower levels.
+ *
+ * Exception: CPU metrics are not paused.
+ */
+ class PauseMetricsCollectorBlock {
+ PauseMetricsCollectorBlock(const PauseMetricsCollectorBlock&) = delete;
+ PauseMetricsCollectorBlock& operator=(const PauseMetricsCollectorBlock&) = delete;
+
+ public:
+ explicit PauseMetricsCollectorBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+ auto& metrics = MetricsCollector::get(_opCtx);
+ _wasPaused = metrics.isPaused();
+ if (!_wasPaused) {
+ metrics.pause();
+ }
+ }
+
+ ~PauseMetricsCollectorBlock() {
+ if (!_wasPaused) {
+ auto& metrics = MetricsCollector::get(_opCtx);
+ metrics.resume();
+ }
+ }
+
+ private:
+ OperationContext* _opCtx;
+ bool _wasPaused;
+ };
+
+ /**
* Returns whether the database's metrics should be collected.
*/
static bool shouldCollectMetricsForDatabase(StringData dbName) {
diff --git a/src/mongo/db/stats/resource_consumption_metrics_test.cpp b/src/mongo/db/stats/resource_consumption_metrics_test.cpp
index 25b30274e23..9c992bca916 100644
--- a/src/mongo/db/stats/resource_consumption_metrics_test.cpp
+++ b/src/mongo/db/stats/resource_consumption_metrics_test.cpp
@@ -722,4 +722,44 @@ TEST_F(ResourceConsumptionMetricsTest, CursorSeeks) {
auto metricsCopy = globalResourceConsumption.getDbMetrics();
ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, expectedSeeks);
}
+
+TEST_F(ResourceConsumptionMetricsTest, PauseMetricsCollectorBlock) {
+ auto& globalResourceConsumption = ResourceConsumption::get(getServiceContext());
+ auto& operationMetrics = ResourceConsumption::MetricsCollector::get(_opCtx.get());
+
+ {
+ ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1");
+ {
+ // Metrics increase within this scope should not be applied.
+ ResourceConsumption::PauseMetricsCollectorBlock pauseMetricsCollection(_opCtx.get());
+ operationMetrics.incrementOneDocRead("", 2);
+ operationMetrics.incrementOneIdxEntryRead("", 8);
+ operationMetrics.incrementKeysSorted(16);
+ operationMetrics.incrementSorterSpills(32);
+ operationMetrics.incrementDocUnitsReturned("", makeDocUnits(64));
+ operationMetrics.incrementOneCursorSeek("");
+ }
+
+ // Once PauseMetricsCollectorBlock goes out of scope, metrics collection should work as
+ // normal.
+ operationMetrics.incrementOneDocRead("", 4);
+ operationMetrics.incrementOneIdxEntryRead("", 16);
+ operationMetrics.incrementKeysSorted(32);
+ operationMetrics.incrementSorterSpills(64);
+ operationMetrics.incrementDocUnitsReturned("", makeDocUnits(128));
+ operationMetrics.incrementOneCursorSeek("");
+ }
+
+ auto metricsCopy = globalResourceConsumption.getDbMetrics();
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.bytes(), 4);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.bytes(), 16);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 32);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.sorterSpills, 64);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.bytes(), 128);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.units(), 1);
+ ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, 1);
+}
+
} // namespace mongo