summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-10-15 10:03:54 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-15 14:36:46 +0000
commit74bcbc442aa3aa6bb188a8c7aa2d12f272a0b8ce (patch)
tree1819ebfd1b3864f0f1879f35adbc7fb8b96732ed
parent16b59bd6236a90ad3d807a66ec2fe529e09cf0ec (diff)
downloadmongo-74bcbc442aa3aa6bb188a8c7aa2d12f272a0b8ce.tar.gz
SERVER-51027 Collect document bytes written per operation
-rw-r--r--jstests/core/profile_operation_metrics.js176
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics.cpp47
-rw-r--r--src/mongo/db/stats/resource_consumption_metrics.h15
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp27
4 files changed, 249 insertions, 16 deletions
diff --git a/jstests/core/profile_operation_metrics.js b/jstests/core/profile_operation_metrics.js
index 5bba1c03121..3442c646bcf 100644
--- a/jstests/core/profile_operation_metrics.js
+++ b/jstests/core/profile_operation_metrics.js
@@ -7,11 +7,15 @@
* requires_getmore,
* requires_non_retryable_writes,
* requires_profiling,
+ * requires_wiredtiger,
+ * sbe_incompatible,
* ]
*/
(function() {
"use strict";
+load("jstests/libs/fixture_helpers.js"); // For isReplSet().
+
let res = assert.commandWorked(
db.adminCommand({getParameter: 1, measureOperationResourceConsumption: 1}));
if (!res.measureOperationResourceConsumption) {
@@ -23,6 +27,7 @@ const dbName = jsTestName();
const testDB = db.getSiblingDB(dbName);
const collName = 'coll';
const debugBuild = db.adminCommand('buildInfo').debug;
+const isReplSet = FixtureHelpers.isReplSet(db);
testDB.dropDatabase();
@@ -63,6 +68,7 @@ const operations = [
// test run, so only assert this is non-zero.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.gt(profileDoc.docBytesWritten, 0);
}
},
{
@@ -78,6 +84,7 @@ const operations = [
// metrics for index builds.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.gt(profileDoc.docBytesWritten, 0);
}
},
{
@@ -90,6 +97,15 @@ const operations = [
// Insert should not perform any reads.
assert.eq(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted. Some oplog fields like UUID are
+ // randomized between runs, but the types are fixed-length, so we can make strong
+ // assertions about the sizes. If the oplog format changes for any reason, then so
+ // will these assertions.
+ assert.eq(profileDoc.docBytesWritten, 188);
+ } else {
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
{
@@ -113,6 +129,7 @@ const operations = [
// Should read exactly as many bytes are in the document.
assert.eq(profileDoc.docBytesRead, 29);
assert.eq(profileDoc.idxEntriesRead, 1);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -125,6 +142,7 @@ const operations = [
// Should read exactly as many bytes are in the document.
assert.eq(profileDoc.docBytesRead, 29);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -137,6 +155,7 @@ const operations = [
// Should read exactly as many bytes are in the document.
assert.eq(profileDoc.docBytesRead, 29);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -149,6 +168,7 @@ const operations = [
// Does not read from the collection.
assert.eq(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 1);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -166,6 +186,14 @@ const operations = [
assert.gte(profileDoc.docBytesRead, 29);
}
assert.eq(profileDoc.idxEntriesRead, 1);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted.
+ assert.eq(profileDoc.docBytesWritten, 224);
+ } else {
+ // This update will not be performed in-place because it is too small and affects an
+ // index.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
{
@@ -183,6 +211,14 @@ const operations = [
assert.gte(profileDoc.docBytesRead, 29);
}
assert.eq(profileDoc.idxEntriesRead, 1);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted.
+ assert.eq(profileDoc.docBytesWritten, 224);
+ } else {
+ // This update will not be performed in-place because it is too small and affects an
+ // index.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
{
@@ -195,6 +231,7 @@ const operations = [
// Reads from the fast-count, not the collection.
assert.eq(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -207,6 +244,7 @@ const operations = [
// Should not read from the collection.
assert.eq(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
// Clear the profile collection so we can easily identify new operations with similar filters as
@@ -222,6 +260,7 @@ const operations = [
// Should read from the collection.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -234,6 +273,7 @@ const operations = [
// This reads from the collection catalog.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -246,6 +286,7 @@ const operations = [
// This reads from the collection catalog.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.gt(profileDoc.docBytesWritten, 0);
}
},
resetProfileColl,
@@ -285,6 +326,13 @@ const operations = [
assert.gte(profileDoc.docBytesRead, 58);
}
assert.eq(profileDoc.idxEntriesRead, 1);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted.
+ assert.eq(profileDoc.docBytesWritten, 177);
+ } else {
+ // Deleted bytes are counted as 'written'.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
{
@@ -302,6 +350,13 @@ const operations = [
assert.gte(profileDoc.docBytesRead, 58);
}
assert.eq(profileDoc.idxEntriesRead, 0);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted.
+ assert.eq(profileDoc.docBytesWritten, 177);
+ } else {
+ // Deleted bytes are counted as 'written'.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
{
@@ -314,6 +369,7 @@ const operations = [
// Reads from the collection catalog.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.gt(profileDoc.docBytesWritten, 0);
}
},
resetProfileColl,
@@ -332,6 +388,7 @@ const operations = [
// The exact amount of data read is not easily calculable.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
{
@@ -347,26 +404,38 @@ const operations = [
// metrics for index builds.
assert.gt(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 0);
+ assert.eq(profileDoc.docBytesWritten, 0);
}
},
resetProfileColl,
{
name: 'insertUnique',
command: (db) => {
- assert.commandWorked(db[collName].insert({a: 200}));
+ assert.commandWorked(db[collName].insert({_id: 150, a: 150}));
},
profileFilter: {op: 'insert', 'command.insert': collName},
profileAssert: (profileDoc) => {
// Insert should not perform any reads.
assert.eq(profileDoc.docBytesRead, 0);
assert.eq(profileDoc.idxEntriesRead, 1);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted. Some oplog fields like UUID are
+ // randomized between runs, but the types are fixed-length, so we can make strong
+ // assertions about the sizes. If the oplog format changes for any reason, then so
+ // will these assertions.
+ assert.eq(profileDoc.docBytesWritten, 188);
+ } else {
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
}
},
resetProfileColl,
{
name: 'insertDup',
command: (db) => {
- assert.commandFailedWithCode(db[collName].insert({a: 0}), ErrorCodes.DuplicateKey);
+ // Insert a duplicate key on 'a', not _id.
+ assert.commandFailedWithCode(db[collName].insert({_id: 200, a: 0}),
+ ErrorCodes.DuplicateKey);
},
profileFilter: {op: 'insert', 'command.insert': collName},
profileAssert: (profileDoc) => {
@@ -374,6 +443,101 @@ const operations = [
assert.eq(profileDoc.docBytesRead, 0);
// Inserting into a unique index requires reading one key.
assert.eq(profileDoc.idxEntriesRead, 1);
+ // Despite failing to insert keys into the unique index, the operation first succeeded
+ // in writing to the collection. Even though the operation was rolled-back, this counts
+ // towards metrics.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
+ },
+ {
+ name: 'updateWithoutModify',
+ command: (db) => {
+ assert.commandWorked(db[collName].update({_id: 1}, {$set: {a: 151}}));
+ },
+ profileFilter: {op: 'update', 'command.q': {_id: 1}},
+ profileAssert: (profileDoc) => {
+ // Should read exactly as many bytes are in the document. Debug builds may perform extra
+ // reads of the _mdb_catalog.
+ if (!debugBuild) {
+ assert.eq(profileDoc.docBytesRead, 29);
+ } else {
+ assert.gte(profileDoc.docBytesRead, 29);
+ }
+ // Reads index entries on '_id' for the lookup and 'a' to ensure uniqueness.
+ assert.eq(profileDoc.idxEntriesRead, 2);
+ if (isReplSet) {
+ // Ensure writes to the oplog are counted.
+ assert.eq(profileDoc.docBytesWritten, 224);
+ } else {
+ // This out-of-place update should perform a direct insert because it is not large
+ // enough to qualify for the in-place update path.
+ assert.eq(profileDoc.docBytesWritten, 29);
+ }
+ }
+ },
+ {
+ name: 'updateWithModify',
+ command: (db) => {
+ // WT_MODIFY updates can be used to overwrite small regions of documents rather than
+ // rewriting an entire document. They are only used under the following conditions:
+ // * The collection is not journaled (i.e. it is a replicated user collection)
+ // * The document is at least 1K bytes
+ // * The updated document is no more than 10% larger than the original document
+ assert.commandWorked(db[collName].insert({_id: 200, x: 'x'.repeat(1024)}));
+ assert.commandWorked(db[collName].update({_id: 200}, {$set: {a: 200}}));
+ },
+ profileFilter: {op: 'update', 'command.q': {_id: 200}},
+ profileAssert: (profileDoc) => {
+ // Should read exactly as many bytes are in the document. Debug builds may perform extra
+ // reads of the _mdb_catalog.
+ if (!debugBuild) {
+ assert.eq(profileDoc.docBytesRead, 1050);
+ } else {
+ assert.gte(profileDoc.docBytesRead, 1050);
+ }
+ // Reads index entries on '_id' for the lookup and 'a' to ensure uniqueness.
+ assert.eq(profileDoc.idxEntriesRead, 2);
+ if (isReplSet) {
+ // When WT_MODIFY is used on a replicated collection, in addition to writing fewer
+ // bytes per the comment about WT_MODIFY above, ensure it also inserts into the
+ // oplog.
+ assert.eq(profileDoc.docBytesWritten, 208);
+ } else {
+ assert.eq(profileDoc.docBytesWritten, 1061);
+ }
+ }
+ },
+ {
+ name: 'updateWithDamages',
+ command: (db) => {
+ // This update behaves differently from the 'updateWithModify' case above. It uses the
+ // same WT_MODIFY update machinery and can be used on un-replicated collections, but is
+ // limited instead to the following conditions:
+ // * A field is replaced such that that the total size of the document remains unchanged
+ // * No secondary indexes are affected (e.g. we are not updating 'a')
+ assert.commandWorked(db[collName].insert({_id: 201, b: 0}));
+ assert.commandWorked(db[collName].update({_id: 201}, {$set: {b: 1}}));
+ },
+ profileFilter: {op: 'update', 'command.q': {_id: 201}},
+ profileAssert: (profileDoc) => {
+ // Should read exactly as many bytes are in the document. Debug builds may perform extra
+ // reads of the _mdb_catalog.
+ if (!debugBuild) {
+ assert.eq(profileDoc.docBytesRead, 29);
+ } else {
+ assert.gte(profileDoc.docBytesRead, 29);
+ }
+ assert.eq(profileDoc.idxEntriesRead, 1);
+ if (isReplSet) {
+ // When WT_MODIFY is used on a replicated collection, in addition to writing fewer
+ // bytes per the comment about WT_MODIFY above, ensure it also inserts into the
+ // oplog.
+ assert.eq(profileDoc.docBytesWritten, 211);
+ } else {
+ // This is calculated as the number of bytes overwritten + the number of bytes
+ // written, and is still less than the full document size.
+ assert.eq(profileDoc.docBytesWritten, 16);
+ }
}
},
];
@@ -398,7 +562,13 @@ let testOperation = (operation) => {
assertMetricsExist(entry);
if (operation.profileAssert) {
- operation.profileAssert(entry.operationMetrics);
+ try {
+ operation.profileAssert(entry.operationMetrics);
+ } catch (e) {
+ print("Caught exception while checking profile entry for '" + operation.name +
+ "' : " + tojson(entry));
+ throw e;
+ }
}
};
diff --git a/src/mongo/db/stats/resource_consumption_metrics.cpp b/src/mongo/db/stats/resource_consumption_metrics.cpp
index 6958b9bd020..4244b845590 100644
--- a/src/mongo/db/stats/resource_consumption_metrics.cpp
+++ b/src/mongo/db/stats/resource_consumption_metrics.cpp
@@ -135,23 +135,28 @@ void ResourceConsumption::Metrics::toFlatBsonNonZeroFields(BSONObjBuilder* build
appendNonZeroMetric(builder, kDocUnitsReturned, docUnitsReturned);
}
-
-void ResourceConsumption::MetricsCollector::_updateReadMetrics(OperationContext* opCtx,
- ReadMetricsFunc&& updateFunc) {
+template <typename Func>
+inline void ResourceConsumption::MetricsCollector::_doIfCollecting(Func&& func) {
if (!isCollecting()) {
return;
}
+ func();
+}
- // The RSTL is normally required to check the replication state, but callers may not always be
- // holding it. Since we need to attribute this metric to some replication state, and an
- // inconsistent state is not impactful for the purposes of metrics collection, perform a
- // best-effort check so that we can record metrics for this operation.
- if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase_UNSAFE(
- opCtx, NamespaceString::kAdminDb)) {
- updateFunc(_metrics.primaryMetrics);
- } else {
- updateFunc(_metrics.secondaryMetrics);
- }
+void ResourceConsumption::MetricsCollector::_updateReadMetrics(OperationContext* opCtx,
+ ReadMetricsFunc&& updateFunc) {
+ _doIfCollecting([&] {
+ // The RSTL is normally required to check the replication state, but callers may not always
+ // be holding it. Since we need to attribute this metric to some replication state, and an
+ // inconsistent state is not impactful for the purposes of metrics collection, perform a
+ // best-effort check so that we can record metrics for this operation.
+ if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase_UNSAFE(
+ opCtx, NamespaceString::kAdminDb)) {
+ updateFunc(_metrics.primaryMetrics);
+ } else {
+ updateFunc(_metrics.secondaryMetrics);
+ }
+ });
}
void ResourceConsumption::MetricsCollector::incrementDocBytesRead(OperationContext* opCtx,
@@ -175,6 +180,22 @@ void ResourceConsumption::MetricsCollector::incrementKeysSorted(OperationContext
[&](ReadMetrics& readMetrics) { readMetrics.keysSorted += keysSorted; });
}
+void ResourceConsumption::MetricsCollector::incrementDocBytesWritten(size_t bytesWritten) {
+ _doIfCollecting([&] { _metrics.docBytesWritten += bytesWritten; });
+}
+
+void ResourceConsumption::MetricsCollector::incrementDocUnitsWritten(size_t unitsWritten) {
+ _doIfCollecting([&] { _metrics.docUnitsWritten += unitsWritten; });
+}
+
+void ResourceConsumption::MetricsCollector::incrementCpuMillis(size_t cpuMillis) {
+ _doIfCollecting([&] { _metrics.cpuMillis += cpuMillis; });
+}
+
+void ResourceConsumption::MetricsCollector::incrementDocUnitsReturned(size_t returned) {
+ _doIfCollecting([&] { _metrics.docUnitsReturned += returned; });
+}
+
ResourceConsumption::ScopedMetricsCollector::ScopedMetricsCollector(OperationContext* opCtx,
const std::string& dbName,
bool commandCollectsMetrics)
diff --git a/src/mongo/db/stats/resource_consumption_metrics.h b/src/mongo/db/stats/resource_consumption_metrics.h
index 9f256590959..f665588e8be 100644
--- a/src/mongo/db/stats/resource_consumption_metrics.h
+++ b/src/mongo/db/stats/resource_consumption_metrics.h
@@ -224,6 +224,15 @@ public:
void incrementIdxEntriesRead(OperationContext* opCtx, size_t idxEntriesRead);
void incrementKeysSorted(OperationContext* opCtx, size_t keysSorted);
+ /**
+ * These setters increment the desired metrics independent of replication state, and only
+ * when metrics collection is enabled for this operation.
+ */
+ void incrementDocBytesWritten(size_t docBytesWritten);
+ void incrementDocUnitsWritten(size_t docUnitsWitten);
+ void incrementCpuMillis(size_t cpuMillis);
+ void incrementDocUnitsReturned(size_t docUnitsReturned);
+
private:
/**
* Update the current replication state's ReadMetrics if this operation is currently
@@ -233,6 +242,12 @@ public:
void _updateReadMetrics(OperationContext* opCtx, ReadMetricsFunc&& updateFunc);
/**
+ * Helper function that calls the Func when this collector is currently collecting metrics.
+ */
+ template <typename Func>
+ void _doIfCollecting(Func&& func);
+
+ /**
* Represents the ScopedMetricsCollector state.
*/
enum class ScopedCollectionState {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index c4605150701..e0d9b0750e3 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -1065,6 +1065,9 @@ void WiredTigerRecordStore::deleteRecord(OperationContext* opCtx, const RecordId
ret = WT_OP_CHECK(wiredTigerCursorRemove(opCtx, c));
invariantWTOK(ret);
+ auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx);
+ metricsCollector.incrementDocBytesWritten(old_length);
+
_changeNumRecords(opCtx, -1);
_increaseDataSize(opCtx, -old_length);
}
@@ -1511,6 +1514,9 @@ Status WiredTigerRecordStore::_insertRecords(OperationContext* opCtx,
_changeNumRecords(opCtx, nRecords);
_increaseDataSize(opCtx, totalLength);
+ auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx);
+ metricsCollector.incrementDocBytesWritten(totalLength);
+
if (_oplogStones) {
_oplogStones->updateCurrentStoneAfterInsertOnCommit(
opCtx, totalLength, highestIdRecord, nRecords);
@@ -1627,6 +1633,7 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx,
const int kMaxEntries = 16;
const int kMaxDiffBytes = len / 10;
+ auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx);
bool skip_update = false;
if (!_isLogged && len > kMinLengthForDiff && len <= old_length + kMaxDiffBytes) {
int nentries = kMaxEntries;
@@ -1638,6 +1645,17 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx,
invariantWTOK(WT_OP_CHECK(
nentries == 0 ? c->reserve(c)
: wiredTigerCursorModify(opCtx, c, entries.data(), nentries)));
+
+ size_t modifiedDataSize = 0;
+ // Don't perform a range-based for loop because there may be fewer calculated entries
+ // than the reserved maximum.
+ for (auto i = 0; i < nentries; i++) {
+ // Account for both the amount of old data we are overwriting (size) and new data we
+ // are inserting (data.size).
+ modifiedDataSize += entries[i].size + entries[i].data.size;
+ };
+ metricsCollector.incrementDocBytesWritten(modifiedDataSize);
+
WT_ITEM new_value;
dassert(nentries == 0 ||
(c->get_value(c, &new_value) == 0 && new_value.size == value.size &&
@@ -1651,6 +1669,7 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx,
if (!skip_update) {
c->set_value(c, value.Get());
ret = WT_OP_CHECK(wiredTigerCursorInsert(opCtx, c));
+ metricsCollector.incrementDocBytesWritten(value.size);
}
invariantWTOK(ret);
@@ -1677,11 +1696,16 @@ StatusWith<RecordData> WiredTigerRecordStore::updateWithDamages(
mutablebson::DamageVector::const_iterator where = damages.begin();
const mutablebson::DamageVector::const_iterator end = damages.cend();
std::vector<WT_MODIFY> entries(nentries);
+ size_t modifiedDataSize = 0;
for (u_int i = 0; where != end; ++i, ++where) {
entries[i].data.data = damageSource + where->sourceOffset;
entries[i].data.size = where->size;
entries[i].offset = where->targetOffset;
entries[i].size = where->size;
+ // Account for both the amount of old data we are overwriting (size) and new data we are
+ // inserting (data.size).
+ modifiedDataSize += entries[i].size;
+ modifiedDataSize += entries[i].data.size;
}
WiredTigerCursor curwrap(_uri, _tableId, true, opCtx);
@@ -1696,6 +1720,9 @@ StatusWith<RecordData> WiredTigerRecordStore::updateWithDamages(
else
invariantWTOK(WT_OP_CHECK(wiredTigerCursorModify(opCtx, c, entries.data(), nentries)));
+ auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx);
+ metricsCollector.incrementDocBytesWritten(modifiedDataSize);
+
WT_ITEM value;
invariantWTOK(c->get_value(c, &value));