diff options
author | Yu Jin Kang Park <yujin.kang@mongodb.com> | 2022-07-13 08:46:10 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-13 09:45:16 +0000 |
commit | 8109993be980df50d937b4eec0c500605bc0dc82 (patch) | |
tree | 10cf05ddef87a8831e5edbd6fab47a0ded61f6ef | |
parent | 04e9dd9003ceae4fb8d4e74295d32f81a1875838 (diff) | |
download | mongo-8109993be980df50d937b4eec0c500605bc0dc82.tar.gz |
SERVER-66502: Pause consumption metrics collection in wait for write concern.
-rw-r--r-- | jstests/noPassthrough/write_concern_operation_metrics.js | 117 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_mongod.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics.h | 58 | ||||
-rw-r--r-- | src/mongo/db/stats/resource_consumption_metrics_test.cpp | 40 |
5 files changed, 224 insertions, 2 deletions
diff --git a/jstests/noPassthrough/write_concern_operation_metrics.js b/jstests/noPassthrough/write_concern_operation_metrics.js new file mode 100644 index 00000000000..7af1e14418c --- /dev/null +++ b/jstests/noPassthrough/write_concern_operation_metrics.js @@ -0,0 +1,117 @@ +/** + * Tests that operation metrics are not increased while waiting for write concern. + * + * @tags: [ + * # j:true requires persistence + * requires_persistence, + * requires_replication, + * ] + */ + +// This test reproduces the diagram below, which shows how Writer 1 can end up reading the oplog +// entry generated by Writer 2 when waiting for write concern. On serverless environments this can +// cause a user to be billed improperly billed for RPUs caused by reading large entries written by +// other tenants. +// +// | Writer 1 | Writer 2 | +// |-------------------+--------------| +// | BeginTxn | | +// | Timestamp 10 | | +// | | BeginTxn | +// | Write A | | +// | | Update B | +// | | Timestamp 11 | +// | Commit | | +// | OnCommit hooks | | +// | | Commit | +// | WaitForTopOfOplog | | + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +// Returns metrics aggregated by database name. +const getDBMetrics = (adminDB) => { + const cursor = adminDB.aggregate([{$operationMetrics: {}}]); + let allMetrics = {}; + while (cursor.hasNext()) { + let doc = cursor.next(); + // Remove localTime field as it prevents us from comparing objects since it always changes. + delete doc.localTime; + allMetrics[doc.db] = doc; + } + return allMetrics; +}; + +const setParams = { + "aggregateOperationResourceConsumptionMetrics": true, +}; + +const replSet = new ReplSetTest({nodes: 1, nodeOptions: {setParameter: setParams}}); +replSet.startSet(); +replSet.initiate(); + +const primary = replSet.getPrimary(); +const adminDB = primary.getDB('admin'); + +const db1Name = "db1"; +const db1 = primary.getDB(db1Name); + +// Create coll to avoid implicit creation. +db1.createCollection("coll"); +// Insert document to be updated by Writer 2. +primary.getDB("otherDB").othercoll.insert({_id: 1, a: 'a'}); + +var doInsert = function() { + jsTestLog("Writer 1 performing an insert."); + assert.commandWorked( + db.getSiblingDB("db1").coll.insertOne({a: 'a'}, {writeConcern: {w: "majority", j: true}})); +}; + +function doUpdate() { + jsTestLog("Writer 2 performing an update."); + // Write a large record which is going to be the top of the oplog. + assert.commandWorked( + db.getSiblingDB("otherDB").othercoll.update({_id: 1}, {a: 'a'.repeat(100 * 1024)})); +} + +const hangAfterReserveOptime = + configureFailPoint(primary, + "hangAndFailAfterDocumentInsertsReserveOpTimes", + {collectionNS: db1.coll.getFullName(), skipFail: true}); + +var joinWriter1 = startParallelShell(doInsert, primary.port); +// Ensure Writer 1 has fetched the op time before starting up Writer 2. +hangAfterReserveOptime.wait(); + +// We want Writer 2 to perform the update after Writer 1 has fetched the oplog. +const hangAfterUpdate = configureFailPoint(primary, "hangAfterBatchUpdate"); +var joinWriter2 = startParallelShell(doUpdate, primary.port); +hangAfterUpdate.wait(); + +// Stop the primary from calling into awaitReplication() +const hangBeforeWaitingForWriteConcern = + configureFailPoint(primary, "hangBeforeWaitingForWriteConcern"); + +// Unblock Writer 1 so it performs the insert and then blocks on hangBeforeWaitingForWriteConcern. +hangAfterReserveOptime.off(); +// Unblock Writer 2, which should commit after Writer 1. +hangAfterUpdate.off(); + +// Unblock write concern wait. +hangBeforeWaitingForWriteConcern.wait(); +hangBeforeWaitingForWriteConcern.off(); + +joinWriter1(); +joinWriter2(); + +const metrics = getDBMetrics(adminDB); +jsTestLog(metrics); +// docBytesRead should be much smaller than 100kb. A threshold at 10kb should be enough. +assert.lt(metrics.db1.primaryMetrics.docBytesRead, + 10 * 1024, + "Writer 1 wait for write concern caused undue consumption metrics increase."); + +replSet.stopSet(); +})(); diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 8e02cf04ec7..53c7f0c8a7a 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -373,7 +373,11 @@ void insertDocuments(OperationContext* opCtx, hangAndFailAfterDocumentInsertsReserveOpTimes.executeIf( [&](const BSONObj& data) { hangAndFailAfterDocumentInsertsReserveOpTimes.pauseWhileSet(opCtx); - uasserted(51269, "hangAndFailAfterDocumentInsertsReserveOpTimes fail point enabled"); + const auto skipFail = data["skipFail"]; + if (!skipFail || !skipFail.boolean()) { + uasserted(51269, + "hangAndFailAfterDocumentInsertsReserveOpTimes fail point enabled"); + } }, [&](const BSONObj& data) { // Check if the failpoint specifies no collection or matches the existing one. diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp index 9d6102f7481..4519167240c 100644 --- a/src/mongo/db/service_entry_point_mongod.cpp +++ b/src/mongo/db/service_entry_point_mongod.cpp @@ -125,6 +125,11 @@ public: return; } + // Do not increase consumption metrics during wait for write concern, as in serverless this + // might cause a tenant to be billed for reading the oplog entry (which might be of + // considerable size) of another tenant. + ResourceConsumption::PauseMetricsCollectorBlock pauseMetricsCollection(opCtx); + auto lastOpAfterRun = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); auto waitForWriteConcernAndAppendStatus = [&]() { diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h index 9acba648b1e..f41042b29ab 100644 --- a/src/mongo/db/stats/resource_consumption_metrics.h +++ b/src/mongo/db/stats/resource_consumption_metrics.h @@ -301,7 +301,7 @@ public: bool endScopedCollecting(); bool isCollecting() const { - return _collecting == ScopedCollectionState::kInScopeCollecting; + return !_paused && _collecting == ScopedCollectionState::kInScopeCollecting; } bool isInScope() const { @@ -391,6 +391,30 @@ public: */ void incrementOneCursorSeek(StringData uri); + /** + * Pause metrics collection, overriding kInScopeCollecting status. The scope status may be + * changed during a pause, but will not come into effect until resume() is called. + */ + void pause() { + invariant(!_paused); + _paused = true; + } + + /** + * Resume metrics collection. Trying to resume a non-paused object will invariant. + */ + void resume() { + invariant(_paused); + _paused = false; + } + + /** + * Returns if the current object is in paused state. + */ + bool isPaused() { + return _paused; + } + private: // Privatize copy constructors to prevent callers from accidentally copying when this is // decorated on the OperationContext by reference. @@ -418,6 +442,7 @@ public: bool _hasCollectedMetrics = false; std::string _dbName; OperationMetrics _metrics; + bool _paused = false; }; /** @@ -440,6 +465,37 @@ public: }; /** + * RAII-style class to temporarily pause the MetricsCollector in the OperationContext. This + * applies even if the MetricsCollector is started explicitly in lower levels. + * + * Exception: CPU metrics are not paused. + */ + class PauseMetricsCollectorBlock { + PauseMetricsCollectorBlock(const PauseMetricsCollectorBlock&) = delete; + PauseMetricsCollectorBlock& operator=(const PauseMetricsCollectorBlock&) = delete; + + public: + explicit PauseMetricsCollectorBlock(OperationContext* opCtx) : _opCtx(opCtx) { + auto& metrics = MetricsCollector::get(_opCtx); + _wasPaused = metrics.isPaused(); + if (!_wasPaused) { + metrics.pause(); + } + } + + ~PauseMetricsCollectorBlock() { + if (!_wasPaused) { + auto& metrics = MetricsCollector::get(_opCtx); + metrics.resume(); + } + } + + private: + OperationContext* _opCtx; + bool _wasPaused; + }; + + /** * Returns whether the database's metrics should be collected. */ static bool shouldCollectMetricsForDatabase(StringData dbName) { diff --git a/src/mongo/db/stats/resource_consumption_metrics_test.cpp b/src/mongo/db/stats/resource_consumption_metrics_test.cpp index 25b30274e23..9c992bca916 100644 --- a/src/mongo/db/stats/resource_consumption_metrics_test.cpp +++ b/src/mongo/db/stats/resource_consumption_metrics_test.cpp @@ -722,4 +722,44 @@ TEST_F(ResourceConsumptionMetricsTest, CursorSeeks) { auto metricsCopy = globalResourceConsumption.getDbMetrics(); ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, expectedSeeks); } + +TEST_F(ResourceConsumptionMetricsTest, PauseMetricsCollectorBlock) { + auto& globalResourceConsumption = ResourceConsumption::get(getServiceContext()); + auto& operationMetrics = ResourceConsumption::MetricsCollector::get(_opCtx.get()); + + { + ResourceConsumption::ScopedMetricsCollector scope(_opCtx.get(), "db1"); + { + // Metrics increase within this scope should not be applied. + ResourceConsumption::PauseMetricsCollectorBlock pauseMetricsCollection(_opCtx.get()); + operationMetrics.incrementOneDocRead("", 2); + operationMetrics.incrementOneIdxEntryRead("", 8); + operationMetrics.incrementKeysSorted(16); + operationMetrics.incrementSorterSpills(32); + operationMetrics.incrementDocUnitsReturned("", makeDocUnits(64)); + operationMetrics.incrementOneCursorSeek(""); + } + + // Once PauseMetricsCollectorBlock goes out of scope, metrics collection should work as + // normal. + operationMetrics.incrementOneDocRead("", 4); + operationMetrics.incrementOneIdxEntryRead("", 16); + operationMetrics.incrementKeysSorted(32); + operationMetrics.incrementSorterSpills(64); + operationMetrics.incrementDocUnitsReturned("", makeDocUnits(128)); + operationMetrics.incrementOneCursorSeek(""); + } + + auto metricsCopy = globalResourceConsumption.getDbMetrics(); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.bytes(), 4); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsRead.units(), 1); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.bytes(), 16); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.idxEntriesRead.units(), 1); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.keysSorted, 32); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.sorterSpills, 64); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.bytes(), 128); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.docsReturned.units(), 1); + ASSERT_EQ(metricsCopy["db1"].primaryReadMetrics.cursorSeeks, 1); +} + } // namespace mongo |