SERVER-57287 Pull the shards' summed 'timeseries' field values to the top-level of the cluster

collStats command response (cherry picked from commit 91ce36557dc90c85544791a47f880509ada6469e)
author: Dianna Hohensee <dianna.hohensee@mongodb.com> 2021-06-08 18:51:19 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-06-17 16:00:45 +0000
commit: 79cc015b4294ef55f4e09d12ab5bc0b95bc3a853 (patch)
tree: 57fd41503aaae0ce049024cf74e0661f64152081
parent: 41ddb7524a860cce66131901094ee6d4685a3dc3 (diff)
download: mongo-79cc015b4294ef55f4e09d12ab5bc0b95bc3a853.tar.gz
2 files changed, 154 insertions, 0 deletions
diff --git a/jstests/sharding/timeseries_cluster_collstats.js b/jstests/sharding/timeseries_cluster_collstats.js
new file mode 100644
index 00000000000..815277d93f8
--- /dev/null
+++ b/jstests/sharding/timeseries_cluster_collstats.js
@@ -0,0 +1,77 @@
+/**
+ * Tests that the cluster collStats command returns timeseries statistics in the expected format.
+ *
+ * {
+ *     ....,
+ *     "ns" : ...,
+ *     ....,
+ *     "timeseries" : {
+ *         .... (sums the shards' field values)
+ *     }
+ *     ....,
+ *     "shards" : {
+ *         <shardName> {
+ *             "timeseries" : {
+ *                 .... (single shard's field values)
+ *             }
+ *             ....
+ *         }
+ *     }
+ *     ....
+ * }
+ *
+ * @tags: [
+ *   requires_fcv_50,
+ * ]
+ */
+
+(function() {
+load("jstests/core/timeseries/libs/timeseries.js");
+
+// Sharded timeseries collections are not yet supported. Therefore, the cluster will not possess the
+// same collections/indexes.
+TestData.skipCheckingIndexesConsistentAcrossCluster = true;
+
+const st = new ShardingTest({shards: 2});
+
+if (!TimeseriesTest.timeseriesCollectionsEnabled(st.shard0)) {
+    jsTestLog("Skipping test because the time-series collection feature flag is disabled");
+    st.stop();
+    return;
+}
+
+const dbName = 'testDB';
+const mongosDB = st.s.getDB(dbName);
+const collName = 'testColl';
+const mongosColl = mongosDB.getCollection(collName);
+
+// Create a timeseries collection.
+assert.commandWorked(
+    mongosDB.createCollection(collName, {timeseries: {timeField: 'tm', metaField: 'xx'}}));
+
+// Populate the timeseries collection with some data. More interesting test case, and populates the
+// statistics results.
+const numberDoc = 20;
+for (let i = 0; i < numberDoc; i++) {
+    assert.commandWorked(mongosColl.insert({'tm': ISODate(), 'xx': i}));
+}
+assert.eq(mongosColl.count(), numberDoc);
+
+// The cluster collStats command should pull the shard's 'timeseries' data to the top level of the
+// command results.
+const clusterCollStatsResult = assert.commandWorked(mongosDB.runCommand({collStats: collName}));
+jsTestLog("Cluster collStats command result: " + tojson(clusterCollStatsResult));
+assert(clusterCollStatsResult.timeseries,
+       "Expected a top-level 'timeseries' field but didn't find one: " +
+           tojson(clusterCollStatsResult));
+
+// Check that the top-level 'timeseries' fields match the shard's, that the stats were correctly
+// pulled up.
+assert(
+    clusterCollStatsResult.shards["timeseries_cluster_collstats-rs1"].timeseries,
+    "Expected a shard 'timeseries' field but didn't find one: " + tojson(clusterCollStatsResult));
+assert.docEq(clusterCollStatsResult.timeseries,
+             clusterCollStatsResult.shards["timeseries_cluster_collstats-rs1"].timeseries);
+
+st.stop();
+})();
diff --git a/src/mongo/s/commands/cluster_coll_stats_cmd.cpp b/src/mongo/s/commands/cluster_coll_stats_cmd.cpp
index fe30992c35f..029b22ae6d8 100644
--- a/src/mongo/s/commands/cluster_coll_stats_cmd.cpp
+++ b/src/mongo/s/commands/cluster_coll_stats_cmd.cpp
@@ -73,6 +73,72 @@ BSONObj scaleIndividualShardStatistics(const BSONObj& shardStats, int scale) {
     return builder.obj();
 }
 
+/**
+ * Takes the shard's "shardTimeseriesStats" and adds it to the sum across shards saved in
+ * "clusterTimeseriesStats". All of the mongod "timeseries" collStats are numbers except for the
+ * "bucketsNs" field, which we specially track in "timeseriesBucketsNs".
+ *
+ * Invariants that "shardTimeseriesStats" is non-empty.
+ */
+void aggregateTimeseriesStats(const BSONObj& shardTimeseriesStats,
+                              std::map<std::string, long long>* clusterTimeseriesStats,
+                              std::string* timeseriesBucketsNs) {
+    invariant(!shardTimeseriesStats.isEmpty());
+
+    // It's currently impossible to have multiple shards with timeseries info because sharded
+    // timeseries collections are not yet supported.
+    invariant(clusterTimeseriesStats->empty());
+
+    for (const auto& shardTimeseriesStat : shardTimeseriesStats) {
+        // "bucketsNs" is the only timeseries stat that is not a number, so it requires special
+        // handling.
+        if (shardTimeseriesStat.type() == BSONType::String) {
+            invariant(shardTimeseriesStat.fieldNameStringData() == "bucketsNs",
+                      str::stream() << "Found an unexpected field '"
+                                    << shardTimeseriesStat.fieldNameStringData()
+                                    << "' in a shard's 'timeseries' subobject: "
+                                    << shardTimeseriesStats.toString());
+            if (timeseriesBucketsNs->empty()) {
+                *timeseriesBucketsNs = shardTimeseriesStat.String();
+            } else {
+                // All shards should have the same timeseries buckets collection namespace.
+                invariant(*timeseriesBucketsNs == shardTimeseriesStat.String(),
+                          str::stream()
+                              << "Found different timeseries buckets collection namespaces on "
+                              << "different shards, for the same collection. Previous shard's ns: "
+                              << *timeseriesBucketsNs
+                              << ", current shard's ns: " << shardTimeseriesStat.String());
+            }
+            continue;
+        }
+
+        // Use 'numberLong' to ensure integers are safely converted to long type.
+        (*clusterTimeseriesStats)[shardTimeseriesStat.fieldName()] +=
+            shardTimeseriesStat.numberLong();
+    }
+}
+
+/**
+ * Adds a "timeseries" field to "result" that contains the summed timeseries statistics in
+ * "clusterTimeseriesStats". "timeseriesBucketNs" is specially handled and added to the "timeseries"
+ * sub-document because it is the only non-number timeseries statistic.
+ *
+ * Invariants that "clusterTimeseriesStats" and "timeseriesBucketNs" are set.
+ */
+void appendTimeseriesInfoToResult(const std::map<std::string, long long>& clusterTimeseriesStats,
+                                  const std::string& timeseriesBucketNs,
+                                  BSONObjBuilder* result) {
+    invariant(!clusterTimeseriesStats.empty());
+    invariant(!timeseriesBucketNs.empty());
+
+    BSONObjBuilder timeseriesSubObjBuilder(result->subobjStart("timeseries"));
+    timeseriesSubObjBuilder.append("bucketsNs", timeseriesBucketNs);
+    for (const auto& statEntry : clusterTimeseriesStats) {
+        timeseriesSubObjBuilder.appendNumber(statEntry.first, statEntry.second);
+    }
+    timeseriesSubObjBuilder.done();
+}
+
 class CollectionStats : public BasicCommand {
 public:
     CollectionStats() : BasicCommand("collStats", "collstats") {}
@@ -151,12 +217,14 @@ public:
         BSONObjBuilder shardStats;
         std::map<std::string, long long> counts;
         std::map<std::string, long long> indexSizes;
+        std::map<std::string, long long> clusterTimeseriesStats;
 
         long long maxSize = 0;
         long long unscaledCollSize = 0;
 
         int nindexes = 0;
         bool warnedAboutIndexes = false;
+        std::string timeseriesBucketsNs;
 
         for (const auto& shardResult : unscaledShardResults) {
             const auto& shardId = shardResult.shardId;
@@ -189,6 +257,9 @@ public:
                     // match across shards
                     if (!result.hasField(e.fieldName()))
                         result.append(e);
+                } else if (fieldName == "timeseries") {
+                    aggregateTimeseriesStats(
+                        e.Obj(), &clusterTimeseriesStats, &timeseriesBucketsNs);
                 } else if (fieldIsAnyOf(
                                fieldName,
                                {"count", "size", "storageSize", "totalIndexSize", "totalSize"})) {
@@ -247,6 +318,12 @@ public:
             }
         }
 
+        if (!clusterTimeseriesStats.empty() || !timeseriesBucketsNs.empty()) {
+            // 'clusterTimeseriesStats' and 'timeseriesBucketsNs' should both be set. If only one is
+            // ever set, the error will be caught in appendTimeseriesInfoToResult().
+            appendTimeseriesInfoToResult(clusterTimeseriesStats, timeseriesBucketsNs, &result);
+        }
+
         {
             BSONObjBuilder ib(result.subobjStart("indexSizes"));
             for (const auto& entry : indexSizes) {
author	Dianna Hohensee <dianna.hohensee@mongodb.com>	2021-06-08 18:51:19 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-06-17 16:00:45 +0000
commit	79cc015b4294ef55f4e09d12ab5bc0b95bc3a853 (patch)
tree	57fd41503aaae0ce049024cf74e0661f64152081
parent	41ddb7524a860cce66131901094ee6d4685a3dc3 (diff)
download	mongo-79cc015b4294ef55f4e09d12ab5bc0b95bc3a853.tar.gz