diff options
-rw-r--r-- | jstests/sharding/timeseries_cluster_collstats.js | 77 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_coll_stats_cmd.cpp | 77 |
2 files changed, 154 insertions, 0 deletions
diff --git a/jstests/sharding/timeseries_cluster_collstats.js b/jstests/sharding/timeseries_cluster_collstats.js new file mode 100644 index 00000000000..815277d93f8 --- /dev/null +++ b/jstests/sharding/timeseries_cluster_collstats.js @@ -0,0 +1,77 @@ +/** + * Tests that the cluster collStats command returns timeseries statistics in the expected format. + * + * { + * ...., + * "ns" : ..., + * ...., + * "timeseries" : { + * .... (sums the shards' field values) + * } + * ...., + * "shards" : { + * <shardName> { + * "timeseries" : { + * .... (single shard's field values) + * } + * .... + * } + * } + * .... + * } + * + * @tags: [ + * requires_fcv_50, + * ] + */ + +(function() { +load("jstests/core/timeseries/libs/timeseries.js"); + +// Sharded timeseries collections are not yet supported. Therefore, the cluster will not possess the +// same collections/indexes. +TestData.skipCheckingIndexesConsistentAcrossCluster = true; + +const st = new ShardingTest({shards: 2}); + +if (!TimeseriesTest.timeseriesCollectionsEnabled(st.shard0)) { + jsTestLog("Skipping test because the time-series collection feature flag is disabled"); + st.stop(); + return; +} + +const dbName = 'testDB'; +const mongosDB = st.s.getDB(dbName); +const collName = 'testColl'; +const mongosColl = mongosDB.getCollection(collName); + +// Create a timeseries collection. +assert.commandWorked( + mongosDB.createCollection(collName, {timeseries: {timeField: 'tm', metaField: 'xx'}})); + +// Populate the timeseries collection with some data. More interesting test case, and populates the +// statistics results. +const numberDoc = 20; +for (let i = 0; i < numberDoc; i++) { + assert.commandWorked(mongosColl.insert({'tm': ISODate(), 'xx': i})); +} +assert.eq(mongosColl.count(), numberDoc); + +// The cluster collStats command should pull the shard's 'timeseries' data to the top level of the +// command results. +const clusterCollStatsResult = assert.commandWorked(mongosDB.runCommand({collStats: collName})); +jsTestLog("Cluster collStats command result: " + tojson(clusterCollStatsResult)); +assert(clusterCollStatsResult.timeseries, + "Expected a top-level 'timeseries' field but didn't find one: " + + tojson(clusterCollStatsResult)); + +// Check that the top-level 'timeseries' fields match the shard's, that the stats were correctly +// pulled up. +assert( + clusterCollStatsResult.shards["timeseries_cluster_collstats-rs1"].timeseries, + "Expected a shard 'timeseries' field but didn't find one: " + tojson(clusterCollStatsResult)); +assert.docEq(clusterCollStatsResult.timeseries, + clusterCollStatsResult.shards["timeseries_cluster_collstats-rs1"].timeseries); + +st.stop(); +})(); diff --git a/src/mongo/s/commands/cluster_coll_stats_cmd.cpp b/src/mongo/s/commands/cluster_coll_stats_cmd.cpp index fe30992c35f..029b22ae6d8 100644 --- a/src/mongo/s/commands/cluster_coll_stats_cmd.cpp +++ b/src/mongo/s/commands/cluster_coll_stats_cmd.cpp @@ -73,6 +73,72 @@ BSONObj scaleIndividualShardStatistics(const BSONObj& shardStats, int scale) { return builder.obj(); } +/** + * Takes the shard's "shardTimeseriesStats" and adds it to the sum across shards saved in + * "clusterTimeseriesStats". All of the mongod "timeseries" collStats are numbers except for the + * "bucketsNs" field, which we specially track in "timeseriesBucketsNs". + * + * Invariants that "shardTimeseriesStats" is non-empty. + */ +void aggregateTimeseriesStats(const BSONObj& shardTimeseriesStats, + std::map<std::string, long long>* clusterTimeseriesStats, + std::string* timeseriesBucketsNs) { + invariant(!shardTimeseriesStats.isEmpty()); + + // It's currently impossible to have multiple shards with timeseries info because sharded + // timeseries collections are not yet supported. + invariant(clusterTimeseriesStats->empty()); + + for (const auto& shardTimeseriesStat : shardTimeseriesStats) { + // "bucketsNs" is the only timeseries stat that is not a number, so it requires special + // handling. + if (shardTimeseriesStat.type() == BSONType::String) { + invariant(shardTimeseriesStat.fieldNameStringData() == "bucketsNs", + str::stream() << "Found an unexpected field '" + << shardTimeseriesStat.fieldNameStringData() + << "' in a shard's 'timeseries' subobject: " + << shardTimeseriesStats.toString()); + if (timeseriesBucketsNs->empty()) { + *timeseriesBucketsNs = shardTimeseriesStat.String(); + } else { + // All shards should have the same timeseries buckets collection namespace. + invariant(*timeseriesBucketsNs == shardTimeseriesStat.String(), + str::stream() + << "Found different timeseries buckets collection namespaces on " + << "different shards, for the same collection. Previous shard's ns: " + << *timeseriesBucketsNs + << ", current shard's ns: " << shardTimeseriesStat.String()); + } + continue; + } + + // Use 'numberLong' to ensure integers are safely converted to long type. + (*clusterTimeseriesStats)[shardTimeseriesStat.fieldName()] += + shardTimeseriesStat.numberLong(); + } +} + +/** + * Adds a "timeseries" field to "result" that contains the summed timeseries statistics in + * "clusterTimeseriesStats". "timeseriesBucketNs" is specially handled and added to the "timeseries" + * sub-document because it is the only non-number timeseries statistic. + * + * Invariants that "clusterTimeseriesStats" and "timeseriesBucketNs" are set. + */ +void appendTimeseriesInfoToResult(const std::map<std::string, long long>& clusterTimeseriesStats, + const std::string& timeseriesBucketNs, + BSONObjBuilder* result) { + invariant(!clusterTimeseriesStats.empty()); + invariant(!timeseriesBucketNs.empty()); + + BSONObjBuilder timeseriesSubObjBuilder(result->subobjStart("timeseries")); + timeseriesSubObjBuilder.append("bucketsNs", timeseriesBucketNs); + for (const auto& statEntry : clusterTimeseriesStats) { + timeseriesSubObjBuilder.appendNumber(statEntry.first, statEntry.second); + } + timeseriesSubObjBuilder.done(); +} + class CollectionStats : public BasicCommand { public: CollectionStats() : BasicCommand("collStats", "collstats") {} @@ -151,12 +217,14 @@ public: BSONObjBuilder shardStats; std::map<std::string, long long> counts; std::map<std::string, long long> indexSizes; + std::map<std::string, long long> clusterTimeseriesStats; long long maxSize = 0; long long unscaledCollSize = 0; int nindexes = 0; bool warnedAboutIndexes = false; + std::string timeseriesBucketsNs; for (const auto& shardResult : unscaledShardResults) { const auto& shardId = shardResult.shardId; @@ -189,6 +257,9 @@ public: // match across shards if (!result.hasField(e.fieldName())) result.append(e); + } else if (fieldName == "timeseries") { + aggregateTimeseriesStats( + e.Obj(), &clusterTimeseriesStats, ×eriesBucketsNs); } else if (fieldIsAnyOf( fieldName, {"count", "size", "storageSize", "totalIndexSize", "totalSize"})) { @@ -247,6 +318,12 @@ public: } } + if (!clusterTimeseriesStats.empty() || !timeseriesBucketsNs.empty()) { + // 'clusterTimeseriesStats' and 'timeseriesBucketsNs' should both be set. If only one is + // ever set, the error will be caught in appendTimeseriesInfoToResult(). + appendTimeseriesInfoToResult(clusterTimeseriesStats, timeseriesBucketsNs, &result); + } + { BSONObjBuilder ib(result.subobjStart("indexSizes")); for (const auto& entry : indexSizes) { |