diff options
author | Pol PiƱol Castuera <67922619+PolPinol@users.noreply.github.com> | 2022-11-11 09:24:33 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-11 10:43:30 +0000 |
commit | b4414b6651c8c815d8629f4655e606d4d2046537 (patch) | |
tree | 35e8f68c799b2a7b6dccbd54c98f3c1cb262cf2a | |
parent | 7c2223c9d3357557e2cb28f16ee68f6b268baf79 (diff) | |
download | mongo-b4414b6651c8c815d8629f4655e606d4d2046537.tar.gz |
SERVER-68855 Optimize $collStats for $shardedDataDistribution
13 files changed, 423 insertions, 118 deletions
diff --git a/jstests/sharding/all_collection_stats.js b/jstests/sharding/all_collection_stats.js index 22cc1c7f3a7..ac0207bda7b 100644 --- a/jstests/sharding/all_collection_stats.js +++ b/jstests/sharding/all_collection_stats.js @@ -9,6 +9,30 @@ (function() { 'use strict'; +function checkResults(results, checksToDo) { + for (let i = 0; i < numCollections; i++) { + const coll = "coll" + i; + + // To check that the data retrieve from $_internalAllCollectionStats is correct we will call + // $collStats for each namespace to retrieve its storage stats and compare the two outputs. + const expectedResults = + testDb.getCollection(coll).aggregate([{$collStats: {storageStats: {}}}]).toArray(); + assert.neq(null, expectedResults); + assert.eq(expectedResults.length, 1); + + let exists = false; + for (const data of results) { + const ns = data.ns; + if (dbName + "." + coll === ns) { + checksToDo(data, expectedResults); + exists = true; + break; + } + } + assert(exists, "Expected to have $_internalAllCollectionStats results for coll" + i); + } +} + // Configure initial sharding cluster const st = new ShardingTest({shards: 2}); const mongos = st.s; @@ -16,56 +40,140 @@ const mongos = st.s; const dbName = "test"; const testDb = mongos.getDB(dbName); const adminDb = mongos.getDB("admin"); +const numCollections = 20; // Insert sharded collections to validate the aggregation stage -for (let i = 0; i < 10; i++) { +for (let i = 0; i < (numCollections / 2); i++) { const coll = "coll" + i; assert(st.adminCommand({shardcollection: dbName + "." + coll, key: {skey: 1}})); assert.commandWorked(testDb.getCollection(coll).insert({skey: i})); } // Insert some unsharded collections to validate the aggregation stage -for (let i = 10; i < 20; i++) { +for (let i = numCollections / 2; i < numCollections; i++) { const coll = "coll" + i; assert.commandWorked(testDb.getCollection(coll).insert({skey: i})); } -// Get output data -const outputData = - adminDb.aggregate([{$_internalAllCollectionStats: {stats: {storageStats: {}}}}]).toArray(); -assert.gte(outputData.length, 20); - // Testing for comparing each collection returned from $_internalAllCollectionStats to $collStats -for (let i = 0; i < 20; i++) { - const coll = "coll" + i; - const expectedResults = - testDb.getCollection(coll).aggregate([{$collStats: {storageStats: {}}}]).toArray(); - assert.neq(null, expectedResults); - assert.eq(expectedResults.length, 1); - - let exists = false; - for (const data of outputData) { - const ns = data.ns; - if (dbName + "." + coll === ns) { - assert.eq(data.host, expectedResults[0].host); - assert.eq(data.shard, expectedResults[0].shard); - assert.eq(data.storageStats.size, expectedResults[0].storageStats.size); - assert.eq(data.storageStats.count, expectedResults[0].storageStats.count); - assert.eq(data.storageStats.avgObjSize, expectedResults[0].storageStats.avgObjSize); - assert.eq(data.storageStats.storageSize, expectedResults[0].storageStats.storageSize); - assert.eq(data.storageStats.freeStorageSize, - expectedResults[0].storageStats.freeStorageSize); - assert.eq(data.storageStats.nindexes, expectedResults[0].storageStats.nindexes); - assert.eq(data.storageStats.totalIndexSize, - expectedResults[0].storageStats.totalIndexSize); - assert.eq(data.storageStats.totalSize, expectedResults[0].storageStats.totalSize); - exists = true; - break; - } - } +(function testInternalAllCollectionStats() { + const outputData = + adminDb.aggregate([{$_internalAllCollectionStats: {stats: {storageStats: {}}}}]).toArray(); + assert.gte(outputData.length, 20); - assert(exists); -} + const checksToDo = (left, right) => { + const msg = "Expected same output from $_internalAllCollectionStats and $collStats " + + "for same namespace"; + assert.eq(left.host, right[0].host, msg); + assert.eq(left.shard, right[0].shard, msg); + assert.eq(left.storageStats.size, right[0].storageStats.size, msg); + assert.eq(left.storageStats.count, right[0].storageStats.count, msg); + assert.eq(left.storageStats.avgObjSize, right[0].storageStats.avgObjSize, msg); + assert.eq(left.storageStats.storageSize, right[0].storageStats.storageSize, msg); + assert.eq(left.storageStats.freeStorageSize, right[0].storageStats.freeStorageSize, msg); + assert.eq(left.storageStats.nindexes, right[0].storageStats.nindexes, msg); + assert.eq(left.storageStats.totalIndexSize, right[0].storageStats.totalIndexSize, msg); + assert.eq(left.storageStats.totalSize, right[0].storageStats.totalSize, msg); + }; + checkResults(outputData, checksToDo); +})(); + +// Tests to check the correct behaviour of a $project stage after $_internalAllCollectionStats +(function testNumOrphanDocsFieldProject() { + const outputData = adminDb + .aggregate([ + {$_internalAllCollectionStats: {stats: {storageStats: {}}}}, + {$project: {"ns": 1, "storageStats.numOrphanDocs": 1}} + ]) + .toArray(); + assert.gte(outputData.length, numCollections); + + const checksToDo = (left, right) => { + assert.eq(left.storageStats.numOrphanDocs, + right[0].storageStats.numOrphanDocs, + "Expected same output after a projection with storageStats.numOrphanDocs field"); + }; + checkResults(outputData, checksToDo); +})(); + +(function testStorageSizeFieldProject() { + const outputData = adminDb + .aggregate([ + {$_internalAllCollectionStats: {stats: {storageStats: {}}}}, + {$project: {"ns": 1, "storageStats.storageSize": 1}} + ]) + .toArray(); + assert.gte(outputData.length, numCollections); + + const checksToDo = (left, right) => { + assert.eq(left.storageStats.storageSize, + right[0].storageStats.storageSize, + "Expected same output after a projection with storageStats.storageSize field"); + }; + checkResults(outputData, checksToDo); +})(); + +(function testNIndexesFieldProject() { + const outputData = adminDb + .aggregate([ + {$_internalAllCollectionStats: {stats: {storageStats: {}}}}, + {$project: {"ns": 1, "storageStats.nindexes": 1}} + ]) + .toArray(); + assert.gte(outputData.length, numCollections); + + const checksToDo = (left, right) => { + assert.eq(left.storageStats.nindexes, + right[0].storageStats.nindexes, + "Expected same output after a projection with storageStats.nindexes field"); + }; + checkResults(outputData, checksToDo); +})(); + +(function testTotalSizeFieldProject() { + const outputData = adminDb + .aggregate([ + {$_internalAllCollectionStats: {stats: {storageStats: {}}}}, + {$project: {"ns": 1, "storageStats.totalSize": 1}} + ]) + .toArray(); + assert.gte(outputData.length, numCollections); + + const checksToDo = (left, right) => { + assert.eq(left.storageStats.totalSize, + right[0].storageStats.totalSize, + "Expected same output after a projection with storageStats.totalSize field"); + }; + checkResults(outputData, checksToDo); +})(); + +(function testProjectingDifferentFields() { + const outputData = adminDb + .aggregate([ + {$_internalAllCollectionStats: {stats: {storageStats: {}}}}, + { + $project: { + "ns": 1, + "storageStats.numOrphanDocs": 1, + "storageStats.storageSize": 1, + "storageStats.nindexes": 1, + "storageStats.totalSize": 1 + } + } + ]) + .toArray(); + assert.gte(outputData.length, numCollections); + + const checksToDo = (left, right) => { + const msg = "Expected same output after a projection with fields from different storage " + + "stats groups"; + assert.eq(left.storageStats.numOrphanDocs, right[0].storageStats.numOrphanDocs, msg); + assert.eq(left.storageStats.storageSize, right[0].storageStats.storageSize, msg); + assert.eq(left.storageStats.nindexes, right[0].storageStats.nindexes, msg); + assert.eq(left.storageStats.totalSize, right[0].storageStats.totalSize, msg); + }; + checkResults(outputData, checksToDo); +})(); // Test invalid queries/values. assert.commandFailedWithCode( diff --git a/src/mongo/db/pipeline/document_source_coll_stats.cpp b/src/mongo/db/pipeline/document_source_coll_stats.cpp index 3f40948ad9c..88e6cf9337b 100644 --- a/src/mongo/db/pipeline/document_source_coll_stats.cpp +++ b/src/mongo/db/pipeline/document_source_coll_stats.cpp @@ -74,7 +74,8 @@ intrusive_ptr<DocumentSource> DocumentSourceCollStats::createFromBson( BSONObj DocumentSourceCollStats::makeStatsForNs( const boost::intrusive_ptr<ExpressionContext>& expCtx, const NamespaceString& nss, - const DocumentSourceCollStatsSpec& spec) { + const DocumentSourceCollStatsSpec& spec, + const boost::optional<BSONObj>& filterObj) { BSONObjBuilder builder; builder.append("ns", nss.ns()); @@ -96,9 +97,10 @@ BSONObj DocumentSourceCollStats::makeStatsForNs( if (auto storageStats = spec.getStorageStats()) { // If the storageStats field exists, it must have been validated as an object when parsing. BSONObjBuilder storageBuilder(builder.subobjStart("storageStats")); - uassertStatusOKWithContext(expCtx->mongoProcessInterface->appendStorageStats( - expCtx->opCtx, nss, *storageStats, &storageBuilder), - "Unable to retrieve storageStats in $collStats stage"); + uassertStatusOKWithContext( + expCtx->mongoProcessInterface->appendStorageStats( + expCtx->opCtx, nss, *storageStats, &storageBuilder, filterObj), + "Unable to retrieve storageStats in $collStats stage"); storageBuilder.doneFast(); } diff --git a/src/mongo/db/pipeline/document_source_coll_stats.h b/src/mongo/db/pipeline/document_source_coll_stats.h index f860a1222ee..cafaa111b25 100644 --- a/src/mongo/db/pipeline/document_source_coll_stats.h +++ b/src/mongo/db/pipeline/document_source_coll_stats.h @@ -87,7 +87,8 @@ public: static BSONObj makeStatsForNs(const boost::intrusive_ptr<ExpressionContext>&, const NamespaceString&, - const DocumentSourceCollStatsSpec&); + const DocumentSourceCollStatsSpec&, + const boost::optional<BSONObj>& filterObj = boost::none); DocumentSourceCollStats(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, DocumentSourceCollStatsSpec spec) diff --git a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.cpp b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.cpp index f37206dbafe..73782cd88c4 100644 --- a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.cpp +++ b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.cpp @@ -28,6 +28,7 @@ */ #include "mongo/db/pipeline/document_source_internal_all_collection_stats.h" +#include "mongo/db/pipeline/document_source_project.h" namespace mongo { @@ -63,7 +64,7 @@ DocumentSource::GetNextResult DocumentSourceInternalAllCollectionStats::doGetNex try { return {Document{DocumentSourceCollStats::makeStatsForNs( - pExpCtx, nss, _internalAllCollectionStatsSpec.getStats().get())}}; + pExpCtx, nss, _internalAllCollectionStatsSpec.getStats().get(), _projectFilter)}}; } catch (const ExceptionFor<ErrorCodes::CommandNotSupportedOnView>&) { // We don't want to retrieve data for views, only for collections. continue; @@ -81,6 +82,14 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalAllCollectionStats::do return container->end(); } + // Attempt to internalize any predicates of a $project stage in order to calculate only + // necessary fields. + if (auto nextProject = + dynamic_cast<DocumentSourceSingleDocumentTransformation*>((*std::next(itr)).get())) { + _projectFilter = + nextProject->getTransformer().serializeTransformation(boost::none).toBson(); + } + // Attempt to internalize any predicates of a $match upon the "ns" field. auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); diff --git a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.h b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.h index 491b6354c3f..93b1556b94f 100644 --- a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.h +++ b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.h @@ -125,5 +125,10 @@ private: // A $match stage can be absorbed in order to avoid unnecessarily computing the stats for // collections that do not match that predicate. boost::intrusive_ptr<DocumentSourceMatch> _absorbedMatch; + + // If a $project stage exists after $_internalAllCollectionStats, we will peek the BSONObj + // associated with the $project. This BSONObj will be used to avoid calculating + // unnecessary fields. + boost::optional<BSONObj> _projectFilter; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_sharded_data_distribution.cpp b/src/mongo/db/pipeline/document_source_sharded_data_distribution.cpp index 8edacb97b10..660a96c1613 100644 --- a/src/mongo/db/pipeline/document_source_sharded_data_distribution.cpp +++ b/src/mongo/db/pipeline/document_source_sharded_data_distribution.cpp @@ -65,6 +65,15 @@ list<intrusive_ptr<DocumentSource>> DocumentSourceShardedDataDistribution::creat static const BSONObj kAllCollStatsObj = fromjson("{$_internalAllCollectionStats: {stats: {storageStats: {}}}}}"); + static const BSONObj kProjectObj = fromjson(R"({ + $project: { + "ns": 1, + "shard": 1, + "storageStats.count": 1, + "storageStats.numOrphanDocs": 1, + "storageStats.avgObjSize": 1 + } + })"); static const BSONObj kGroupObj = fromjson(R"({ $group: { _id: "$ns", @@ -113,7 +122,7 @@ list<intrusive_ptr<DocumentSource>> DocumentSourceShardedDataDistribution::creat } })"); static const BSONObj kMatchObj = fromjson("{$match: {matchingShardedCollection: {$ne: []}}}"); - static const BSONObj kProjectObj = fromjson(R"({ + static const BSONObj kFinalProjectObj = fromjson(R"({ $project: { _id: 0, ns: "$_id", @@ -123,9 +132,10 @@ list<intrusive_ptr<DocumentSource>> DocumentSourceShardedDataDistribution::creat return {DocumentSourceInternalAllCollectionStats::createFromBsonInternal( kAllCollStatsObj.firstElement(), expCtx), + DocumentSourceProject::createFromBson(kProjectObj.firstElement(), expCtx), DocumentSourceGroup::createFromBson(kGroupObj.firstElement(), expCtx), DocumentSourceLookUp::createFromBson(kLookupObj.firstElement(), expCtx), DocumentSourceMatch::createFromBson(kMatchObj.firstElement(), expCtx), - DocumentSourceProject::createFromBson(kProjectObj.firstElement(), expCtx)}; + DocumentSourceProject::createFromBson(kFinalProjectObj.firstElement(), expCtx)}; } } // namespace mongo diff --git a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp index e8e109aee8f..430378b2a41 100644 --- a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp +++ b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp @@ -349,11 +349,13 @@ void CommonMongodProcessInterface::appendLatencyStats(OperationContext* opCtx, Top::get(opCtx->getServiceContext()).appendLatencyStats(nss, includeHistograms, builder); } -Status CommonMongodProcessInterface::appendStorageStats(OperationContext* opCtx, - const NamespaceString& nss, - const StorageStatsSpec& spec, - BSONObjBuilder* builder) const { - return appendCollectionStorageStats(opCtx, nss, spec, builder); +Status CommonMongodProcessInterface::appendStorageStats( + OperationContext* opCtx, + const NamespaceString& nss, + const StorageStatsSpec& spec, + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj) const { + return appendCollectionStorageStats(opCtx, nss, spec, builder, filterObj); } Status CommonMongodProcessInterface::appendRecordCount(OperationContext* opCtx, diff --git a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.h b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.h index c9878e2aa52..48112e9eb28 100644 --- a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.h +++ b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.h @@ -69,7 +69,8 @@ public: Status appendStorageStats(OperationContext* opCtx, const NamespaceString& nss, const StorageStatsSpec& spec, - BSONObjBuilder* builder) const final; + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj) const final; Status appendRecordCount(OperationContext* opCtx, const NamespaceString& nss, BSONObjBuilder* builder) const final; diff --git a/src/mongo/db/pipeline/process_interface/mongo_process_interface.h b/src/mongo/db/pipeline/process_interface/mongo_process_interface.h index 1f2c73d9c6e..5951ca1c4ad 100644 --- a/src/mongo/db/pipeline/process_interface/mongo_process_interface.h +++ b/src/mongo/db/pipeline/process_interface/mongo_process_interface.h @@ -229,12 +229,17 @@ public: BSONObjBuilder* builder) const = 0; /** - * Appends storage statistics for collection "nss" to "builder" + * Appends storage statistics for collection "nss" to "builder". + * + * By passing a BSONObj as the parameter 'filterObj' in this function, the caller can request + * specific stats to be appended to parameter 'builder'. By passing 'boost::none' to + * 'filterObj', the caller is requesting to append all possible storage stats. */ virtual Status appendStorageStats(OperationContext* opCtx, const NamespaceString& nss, const StorageStatsSpec& spec, - BSONObjBuilder* builder) const = 0; + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj) const = 0; /** * Appends the record count for collection "nss" to "builder". @@ -242,6 +247,7 @@ public: virtual Status appendRecordCount(OperationContext* opCtx, const NamespaceString& nss, BSONObjBuilder* builder) const = 0; + /** * Appends the exec stats for the collection 'nss' to 'builder'. */ diff --git a/src/mongo/db/pipeline/process_interface/mongos_process_interface.h b/src/mongo/db/pipeline/process_interface/mongos_process_interface.h index 786440cb5af..caf30df8869 100644 --- a/src/mongo/db/pipeline/process_interface/mongos_process_interface.h +++ b/src/mongo/db/pipeline/process_interface/mongos_process_interface.h @@ -120,7 +120,8 @@ public: Status appendStorageStats(OperationContext* opCtx, const NamespaceString& nss, const StorageStatsSpec& spec, - BSONObjBuilder* builder) const final { + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj) const final { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/pipeline/process_interface/stub_mongo_process_interface.h b/src/mongo/db/pipeline/process_interface/stub_mongo_process_interface.h index 813684b8f8e..1b991a55cd9 100644 --- a/src/mongo/db/pipeline/process_interface/stub_mongo_process_interface.h +++ b/src/mongo/db/pipeline/process_interface/stub_mongo_process_interface.h @@ -124,7 +124,8 @@ public: Status appendStorageStats(OperationContext* opCtx, const NamespaceString& nss, const StorageStatsSpec& spec, - BSONObjBuilder* builder) const override { + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj) const override { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/stats/storage_stats.cpp b/src/mongo/db/stats/storage_stats.cpp index 7b1cd6cc06a..70c096c3178 100644 --- a/src/mongo/db/stats/storage_stats.cpp +++ b/src/mongo/db/stats/storage_stats.cpp @@ -43,6 +43,7 @@ #include "mongo/db/timeseries/timeseries_stats.h" #include "mongo/logv2/log.h" #include "mongo/s/sharding_feature_flags_gen.h" +#include "mongo/stdx/unordered_map.h" #include "mongo/db/stats/storage_stats.h" @@ -50,61 +51,45 @@ namespace mongo { - -Status appendCollectionStorageStats(OperationContext* opCtx, - const NamespaceString& nss, - const StorageStatsSpec& storageStatsSpec, - BSONObjBuilder* result) { +namespace { + +enum class StorageStatsGroups { + kRecordStatsField, + kRecordStoreField, + kInProgressIndexesField, + kTotalSizeField, +}; + +// Mapping possible 'filterObj' fields and their corresponding output groups. For a whole group to +// be part of the output, it is only necessary that one field it contains is included in the filter. +const stdx::unordered_map<std::string, StorageStatsGroups> _mapStorageStatsFieldsToGroup = { + {"numOrphanDocs", StorageStatsGroups::kRecordStatsField}, + {"size", StorageStatsGroups::kRecordStatsField}, + {"timeseries", StorageStatsGroups::kRecordStatsField}, + {"count", StorageStatsGroups::kRecordStatsField}, + {"avgObjSize", StorageStatsGroups::kRecordStatsField}, + {"storageSize", StorageStatsGroups::kRecordStoreField}, + {"freeStorageSize", StorageStatsGroups::kRecordStoreField}, + {"capped", StorageStatsGroups::kRecordStoreField}, + {"max", StorageStatsGroups::kRecordStoreField}, + {"maxSize", StorageStatsGroups::kRecordStoreField}, + {"nindexes", StorageStatsGroups::kInProgressIndexesField}, + {"indexDetails", StorageStatsGroups::kInProgressIndexesField}, + {"indexBuilds", StorageStatsGroups::kInProgressIndexesField}, + {"totalIndexSize", StorageStatsGroups::kInProgressIndexesField}, + {"indexSizes", StorageStatsGroups::kInProgressIndexesField}, + {"totalSize", StorageStatsGroups::kTotalSizeField}, + {"scaleFactor", StorageStatsGroups::kTotalSizeField}}; + +// Append to 'result' the stats related to record stats. +void _appendRecordStats(OperationContext* opCtx, + const CollectionPtr& collection, + const NamespaceString& collNss, + bool isNamespaceAlwaysUnsharded, + int scale, + bool isTimeseries, + BSONObjBuilder* result) { static constexpr auto kOrphanCountField = "numOrphanDocs"_sd; - - auto scale = storageStatsSpec.getScale().value_or(1); - bool verbose = storageStatsSpec.getVerbose(); - bool waitForLock = storageStatsSpec.getWaitForLock(); - bool numericOnly = storageStatsSpec.getNumericOnly(); - - const auto bucketNss = nss.makeTimeseriesBucketsNamespace(); - const auto isTimeseries = nss.isTimeseriesBucketsCollection() || - CollectionCatalog::get(opCtx)->lookupCollectionByNamespaceForRead(opCtx, bucketNss); - const auto collNss = - (isTimeseries && !nss.isTimeseriesBucketsCollection()) ? std::move(bucketNss) : nss; - - auto failed = [&](const DBException& ex) { - LOGV2_DEBUG(3088801, - 2, - "Failed to retrieve storage statistics", - logAttrs(collNss), - "error"_attr = ex); - return Status::OK(); - }; - - boost::optional<AutoGetCollectionForReadCommandMaybeLockFree> autoColl; - try { - autoColl.emplace( - opCtx, - collNss, - AutoGetCollection::Options{}.deadline(waitForLock ? Date_t::max() : Date_t::now())); - } catch (const ExceptionFor<ErrorCodes::LockTimeout>& ex) { - return failed(ex); - } catch (const ExceptionFor<ErrorCodes::MaxTimeMSExpired>& ex) { - return failed(ex); - } - - const auto& collection = autoColl->getCollection(); // Will be set if present - if (!collection) { - result->appendNumber("size", 0); - result->appendNumber("count", 0); - result->appendNumber(kOrphanCountField, 0); - result->appendNumber("storageSize", 0); - result->append("totalSize", 0); - result->append("nindexes", 0); - result->appendNumber("totalIndexSize", 0); - result->append("indexDetails", BSONObj()); - result->append("indexSizes", BSONObj()); - result->append("scaleFactor", scale); - return {ErrorCodes::NamespaceNotFound, - "Collection [" + collNss.toString() + "] not found."}; - } - long long size = collection->dataSize(opCtx) / scale; result->appendNumber("size", size); @@ -125,8 +110,7 @@ Status appendCollectionStorageStats(OperationContext* opCtx, } } - if (serverGlobalParams.clusterRole == ClusterRole::ShardServer && - !nss.isNamespaceAlwaysUnsharded()) { + if (serverGlobalParams.clusterRole == ClusterRole::ShardServer && !isNamespaceAlwaysUnsharded) { if (serverGlobalParams.featureCompatibility.isVersionInitialized() && feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) { result->appendNumber( @@ -137,7 +121,15 @@ Status appendCollectionStorageStats(OperationContext* opCtx, } else { result->appendNumber(kOrphanCountField, 0); } +} +// Append to 'result' the stats related to record store. +void _appendRecordStore(OperationContext* opCtx, + const CollectionPtr& collection, + bool verbose, + int scale, + bool numericOnly, + BSONObjBuilder* result) { const RecordStore* recordStore = collection->getRecordStore(); auto storageSize = static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)); @@ -157,7 +149,13 @@ Status appendCollectionStorageStats(OperationContext* opCtx, } else { recordStore->appendAllCustomStats(opCtx, result, scale); } +} +// Append to 'result' the stats related to inProgress indexes. +void _appendInProgressIndexesStats(OperationContext* opCtx, + const CollectionPtr& collection, + int scale, + BSONObjBuilder* result) { const IndexCatalog* indexCatalog = collection->getIndexCatalog(); BSONObjBuilder indexDetails; std::vector<std::string> indexBuilds; @@ -207,17 +205,135 @@ Status appendCollectionStorageStats(OperationContext* opCtx, } } + BSONObjBuilder indexSizes; + long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale); + result->append("indexDetails", indexDetails.obj()); result->append("indexBuilds", indexBuilds); + result->appendNumber("totalIndexSize", indexSize / scale); + result->append("indexSizes", indexSizes.obj()); +} +// Append to 'result' the total size and the scale factor. +void _appendTotalSize(OperationContext* opCtx, + const CollectionPtr& collection, + bool verbose, + int scale, + BSONObjBuilder* result) { + const RecordStore* recordStore = collection->getRecordStore(); + auto storageSize = + static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)); BSONObjBuilder indexSizes; long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale); - result->appendNumber("totalIndexSize", indexSize / scale); result->appendNumber("totalSize", (storageSize + indexSize) / scale); - result->append("indexSizes", indexSizes.obj()); result->append("scaleFactor", scale); +} +} // namespace + +Status appendCollectionStorageStats(OperationContext* opCtx, + const NamespaceString& nss, + const StorageStatsSpec& storageStatsSpec, + BSONObjBuilder* result, + const boost::optional<BSONObj>& filterObj) { + auto scale = storageStatsSpec.getScale().value_or(1); + bool verbose = storageStatsSpec.getVerbose(); + bool waitForLock = storageStatsSpec.getWaitForLock(); + bool numericOnly = storageStatsSpec.getNumericOnly(); + static constexpr auto kStorageStatsField = "storageStats"_sd; + + const auto bucketNss = nss.makeTimeseriesBucketsNamespace(); + const auto isTimeseries = nss.isTimeseriesBucketsCollection() || + CollectionCatalog::get(opCtx)->lookupCollectionByNamespaceForRead(opCtx, bucketNss); + const auto collNss = + (isTimeseries && !nss.isTimeseriesBucketsCollection()) ? std::move(bucketNss) : nss; + + auto failed = [&](const DBException& ex) { + LOGV2_DEBUG(3088801, + 2, + "Failed to retrieve storage statistics", + logAttrs(collNss), + "error"_attr = ex); + return Status::OK(); + }; + + boost::optional<AutoGetCollectionForReadCommandMaybeLockFree> autoColl; + try { + autoColl.emplace( + opCtx, + collNss, + AutoGetCollection::Options{}.deadline(waitForLock ? Date_t::max() : Date_t::now())); + } catch (const ExceptionFor<ErrorCodes::LockTimeout>& ex) { + return failed(ex); + } catch (const ExceptionFor<ErrorCodes::MaxTimeMSExpired>& ex) { + return failed(ex); + } + const auto& collection = autoColl->getCollection(); // Will be set if present + if (!collection) { + result->appendNumber("size", 0); + result->appendNumber("count", 0); + result->appendNumber("numOrphanDocs", 0); + result->appendNumber("storageSize", 0); + result->append("totalSize", 0); + result->append("nindexes", 0); + result->appendNumber("totalIndexSize", 0); + result->append("indexDetails", BSONObj()); + result->append("indexSizes", BSONObj()); + result->append("scaleFactor", scale); + return {ErrorCodes::NamespaceNotFound, + "Collection [" + collNss.toString() + "] not found."}; + } + + // We will parse all 'filterObj' into different groups of data to compute. This groups will be + // marked and appended to the vector 'groupsToCompute'. In addition, if the filterObj doesn't + // exist (filterObj == boost::none), we will retrieve all stats for all fields. + std::vector<StorageStatsGroups> groupsToCompute; + if (filterObj) { + // Case where exists a filterObj that specifies one or more groups to compute from the + // storage stats. + BSONObj stats = filterObj.get(); + if (stats.hasField(kStorageStatsField)) { + BSONObj storageStats = stats.getObjectField(kStorageStatsField); + for (const auto& element : storageStats) { + if (element.Bool() && _mapStorageStatsFieldsToGroup.count(element.fieldName())) { + groupsToCompute.push_back( + _mapStorageStatsFieldsToGroup.at(element.fieldName())); + } + } + } + } else { + // Case where filterObj doesn't exist. We will append to 'groupsToCompute' all existing + // groups to retrieve all possible fields. + groupsToCompute = {StorageStatsGroups::kRecordStatsField, + StorageStatsGroups::kRecordStoreField, + StorageStatsGroups::kInProgressIndexesField, + StorageStatsGroups::kTotalSizeField}; + } + + // Iterate elements from 'groupsToCompute' to compute only the demanded groups of fields. + for (const auto& group : groupsToCompute) { + switch (group) { + case StorageStatsGroups::kRecordStatsField: + _appendRecordStats(opCtx, + collection, + collNss, + nss.isNamespaceAlwaysUnsharded(), + scale, + isTimeseries, + result); + break; + case StorageStatsGroups::kRecordStoreField: + _appendRecordStore(opCtx, collection, verbose, scale, numericOnly, result); + break; + case StorageStatsGroups::kInProgressIndexesField: + _appendInProgressIndexesStats(opCtx, collection, scale, result); + break; + case StorageStatsGroups::kTotalSizeField: + _appendTotalSize(opCtx, collection, verbose, scale, result); + break; + } + } return Status::OK(); } diff --git a/src/mongo/db/stats/storage_stats.h b/src/mongo/db/stats/storage_stats.h index b2a5e3fee0f..f37b9aba612 100644 --- a/src/mongo/db/stats/storage_stats.h +++ b/src/mongo/db/stats/storage_stats.h @@ -39,20 +39,63 @@ namespace mongo { /** - * Appends to 'builder' storage related statistics for the collection represented by 'nss'. + * Appends to 'builder' storage related statistics for the collection represented by 'nss'. This + * method will have different implementations depending of the existence of 'filterObj'. + * + * If 'filterObj' doesn't exist (filterObj == boost::none), the method will retrieve all the + * storage related statistics. + * + * For the case that 'filterObj' exists, the method will return filtered stats depending on the + * fields specified in the filter: + * 1.- In order to append to 'builder' the RecordStats for the collection, the 'filterObj' must + * contain at least 1 field from the following list: + * - numOrphanDocs + * - size + * - timeseries + * - count + * - avgObjSize + * 2.- In order to append to 'builder' the RecordStore for the collection, the 'filterObj' must + * contain at least 1 field from the following list: + * - storageSize + * - freeStorageSize + * - capped + * - max + * - maxSize + * 3.- In order to append to 'builder' the InProgressIndexesStats for the collection, the + * 'filterObj' must contain at least 1 field from the following list: + * - nindexes + * - indexDetails + * - indexBuilds + * - totalIndexSize + * - indexSizes + * 4.- In order to append to 'builder' the TotalSize for the collection, the 'filterObj' must + * contain at least 1 field from the following list: + * - totalSize + * - scaleFactor * * Params: - * opCtx - * nss Fully qualified namespace. - * spec Includes options such as "scale" (default = 1) and "verbose". - * builder out; object the stats will be appended to. + * - opCtx + * - nss: Fully qualified namespace. + * - spec: Includes options such as "scale" (default = 1) and "verbose". + * - builder out; object the stats will be appended to. + * - filterObj: BSONObj to request specific storage stats. If 'filterObj' is 'boost:none', all + * possible storage stats will be appended to 'builder' parameter. The filterObj must follow this + * pattern: + * filter = { + * storageStats : { + * field1ToShow: <bool>, + * field2ToShow: <bool>, + * ... + * } + * } * * returns Status, (note "NamespaceNotFound" will fill result with 0-ed stats) */ Status appendCollectionStorageStats(OperationContext* opCtx, const NamespaceString& nss, const StorageStatsSpec& spec, - BSONObjBuilder* builder); + BSONObjBuilder* builder, + const boost::optional<BSONObj>& filterObj = boost::none); /** * Appends the collection record count to 'builder' for the collection represented by 'nss'. |