From 3548d42acd178b5bfb26d4688062dc7e01d82d70 Mon Sep 17 00:00:00 2001
From: Tommaso Tocci <tommaso.tocci@mongodb.com>
Date: Fri, 13 Mar 2020 22:25:07 +0100
Subject: SERVER-44892 getShardDistribution should use $collStats agg stage
 instead of collStats command

(cherry picked from commit eae89bb9aa53a7b3a7164d971be8e8be991d33a2)
---
 src/mongo/shell/collection.js | 124 ++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 59 deletions(-)
diff --git a/src/mongo/shell/collection.js b/src/mongo/shell/collection.js
index 877e9238dab..4b552d4da3f 100644
--- a/src/mongo/shell/collection.js
+++ b/src/mongo/shell/collection.js
@@ -1171,88 +1171,94 @@ DBCollection.autocomplete = function(obj) {
     return ret;
 };
 
-// Sharding additions
-
-/*
-Usage :
-
-mongo <mongos>
-> load('path-to-file/shardingAdditions.js')
-Loading custom sharding extensions...
-true
-
-> var collection = db.getMongo().getCollection("foo.bar")
-> collection.getShardDistribution() // prints statistics related to the collection's data
-distribution
-
-> collection.getSplitKeysForChunks() // generates split points for all chunks in the collection,
-based on the
-                                     // default maxChunkSize or alternately a specified chunk size
-> collection.getSplitKeysForChunks( 10 ) // Mb
-
-> var splitter = collection.getSplitKeysForChunks() // by default, the chunks are not split, the
-keys are just
-                                                    // found.  A splitter function is returned which
-will actually
-                                                    // do the splits.
-
-> splitter() // ! Actually executes the splits on the cluster !
-
-*/
+/**
+ * Return true if the collection has been sharded.
+ *
+ * @method
+ * @return {boolean}
+ */
+DBCollection.prototype._isSharded = function() {
+    return !!this._db.getSiblingDB("config").collections.countDocuments({_id: this._fullName});
+};
 
+/**
+ * Prints statistics related to the collection's data distribution
+ */
 DBCollection.prototype.getShardDistribution = function() {
+    var config = this.getDB().getSiblingDB("config");
 
-    var stats = this.stats();
-
-    if (!stats.sharded) {
+    if (!this._isSharded()) {
         print("Collection " + this + " is not sharded.");
         return;
     }
 
-    var config = this.getDB().getSiblingDB("config");
-
-    var numChunks = 0;
-
-    for (var shard in stats.shards) {
-        var shardDoc = config.shards.findOne({_id: shard});
-
-        print("\nShard " + shard + " at " + shardDoc.host);
+    var collStats = this.aggregate({"$collStats": {storageStats: {}}});
 
-        var shardStats = stats.shards[shard];
-
-        var chunks = config.chunks.find({_id: sh._collRE(this), shard: shard}).toArray();
+    var totals = {numChunks: 0, size: 0, count: 0};
+    var conciseShardsStats = [];
 
-        numChunks += chunks.length;
+    collStats.forEach(function(extShardStats) {
+        // Extract and store only the relevant subset of the stats for this shard
+        const shardStats = {
+            shardId: extShardStats.shard,
+            host: config.shards.findOne({_id: extShardStats.shard}).host,
+            size: extShardStats.storageStats.size,
+            count: extShardStats.storageStats.count,
+            numChunks:
+                config.chunks.countDocuments({ns: extShardStats.ns, shard: extShardStats.shard}),
+            avgObjSize: extShardStats.storageStats.avgObjSize
+        };
 
-        var estChunkData = (chunks.length == 0) ? 0 : shardStats.size / chunks.length;
-        var estChunkCount = (chunks.length == 0) ? 0 : Math.floor(shardStats.count / chunks.length);
+        print("\nShard " + shardStats.shardId + " at " + shardStats.host);
 
+        var estChunkData =
+            (shardStats.numChunks == 0) ? 0 : (shardStats.size / shardStats.numChunks);
+        var estChunkCount =
+            (shardStats.numChunks == 0) ? 0 : Math.floor(shardStats.count / shardStats.numChunks);
         print(" data : " + sh._dataFormat(shardStats.size) + " docs : " + shardStats.count +
-              " chunks : " + chunks.length);
+              " chunks : " + shardStats.numChunks);
         print(" estimated data per chunk : " + sh._dataFormat(estChunkData));
         print(" estimated docs per chunk : " + estChunkCount);
-    }
 
-    print("\nTotals");
-    print(" data : " + sh._dataFormat(stats.size) + " docs : " + stats.count + " chunks : " +
-          numChunks);
-    for (var shard in stats.shards) {
-        var shardStats = stats.shards[shard];
+        totals.size += shardStats.size;
+        totals.count += shardStats.count;
+        totals.numChunks += shardStats.numChunks;
 
+        conciseShardsStats.push(shardStats);
+    });
+
+    print("\nTotals");
+    print(" data : " + sh._dataFormat(totals.size) + " docs : " + totals.count + " chunks : " +
+          totals.numChunks);
+    conciseShardsStats.forEach(function(shardStats) {
         var estDataPercent =
-            (stats.size == 0) ? 0 : (Math.floor(shardStats.size / stats.size * 10000) / 100);
+            (totals.size == 0) ? 0 : (Math.floor(shardStats.size / totals.size * 10000) / 100);
         var estDocPercent =
-            (stats.count == 0) ? 0 : (Math.floor(shardStats.count / stats.count * 10000) / 100);
+            (totals.count == 0) ? 0 : (Math.floor(shardStats.count / totals.count * 10000) / 100);
 
-        print(" Shard " + shard + " contains " + estDataPercent + "% data, " + estDocPercent +
-              "% docs in cluster, " + "avg obj size on shard : " +
-              sh._dataFormat(stats.shards[shard].avgObjSize));
-    }
+        print(" Shard " + shardStats.shardId + " contains " + estDataPercent + "% data, " +
+              estDocPercent + "% docs in cluster, " + "avg obj size on shard : " +
+              sh._dataFormat(shardStats.avgObjSize));
+    });
 
     print("\n");
-
 };
 
+/*
+
+Generates split points for all chunks in the collection, based on the default maxChunkSize
+> collection.getSplitKeysForChunks()
+
+or alternately a specified chunk size in Mb.
+> collection.getSplitKeysForChunks( 10 )
+
+By default, the chunks are not split, the keys are just found. A splitter function is returned which
+will actually do the splits.
+> var splitter = collection.getSplitKeysForChunks()
+> splitter()
+
+*/
+
 DBCollection.prototype.getSplitKeysForChunks = function(chunkSize) {
 
     var stats = this.stats();
-- 
cgit v1.2.1