SERVER-44892 getShardDistribution should use $collStats agg stage instead of collStats command

(cherry picked from commit 67a6dee604b91e759d30d97d72b0cb9ddbdd06f8)
author: Tommaso Tocci <tommaso.tocci@mongodb.com> 2020-03-13 22:25:07 +0100
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-03-30 10:54:33 +0000
commit: 0a151a58d8977f6341f53883f2ce5bdcbd545431 (patch)
tree: fc7493a00f139e080bebf6396807c4b171af3519
parent: 510f448b5b99aa9951b74958adddf1b9c857dd4d (diff)
download: mongo-0a151a58d8977f6341f53883f2ce5bdcbd545431.tar.gz
1 files changed, 64 insertions, 56 deletions
diff --git a/src/mongo/shell/collection.js b/src/mongo/shell/collection.js
index 96bbdc9f9a9..5ccc5e28932 100644
--- a/src/mongo/shell/collection.js
+++ b/src/mongo/shell/collection.js
@@ -1110,86 +1110,94 @@ DBCollection.autocomplete = function(obj) {
     return ret;
 };
 
-// Sharding additions
-
-/*
-Usage :
-
-mongo <mongos>
-> load('path-to-file/shardingAdditions.js')
-Loading custom sharding extensions...
-true
-
-> var collection = db.getMongo().getCollection("foo.bar")
-> collection.getShardDistribution() // prints statistics related to the collection's data
-distribution
-
-> collection.getSplitKeysForChunks() // generates split points for all chunks in the collection,
-based on the
-                                     // default maxChunkSize or alternately a specified chunk size
-> collection.getSplitKeysForChunks( 10 ) // Mb
-
-> var splitter = collection.getSplitKeysForChunks() // by default, the chunks are not split, the
-keys are just
-                                                    // found.  A splitter function is returned which
-will actually
-                                                    // do the splits.
-
-> splitter() // ! Actually executes the splits on the cluster !
-
-*/
+/**
+ * Return true if the collection has been sharded.
+ *
+ * @method
+ * @return {boolean}
+ */
+DBCollection.prototype._isSharded = function() {
+    return !!this._db.getSiblingDB("config").collections.countDocuments({_id: this._fullName});
+};
 
+/**
+ * Prints statistics related to the collection's data distribution
+ */
 DBCollection.prototype.getShardDistribution = function() {
-    var stats = this.stats();
+    var config = this.getDB().getSiblingDB("config");
 
-    if (!stats.sharded) {
+    if (!this._isSharded()) {
         print("Collection " + this + " is not sharded.");
         return;
     }
 
-    var config = this.getDB().getSiblingDB("config");
-
-    var numChunks = 0;
-
-    for (var shard in stats.shards) {
-        var shardDoc = config.shards.findOne({_id: shard});
+    var collStats = this.aggregate({"$collStats": {storageStats: {}}});
 
-        print("\nShard " + shard + " at " + shardDoc.host);
+    var totals = {numChunks: 0, size: 0, count: 0};
+    var conciseShardsStats = [];
 
-        var shardStats = stats.shards[shard];
-
-        var chunks = config.chunks.find({_id: sh._collRE(this), shard: shard}).toArray();
+    collStats.forEach(function(extShardStats) {
+        // Extract and store only the relevant subset of the stats for this shard
+        const shardStats = {
+            shardId: extShardStats.shard,
+            host: config.shards.findOne({_id: extShardStats.shard}).host,
+            size: extShardStats.storageStats.size,
+            count: extShardStats.storageStats.count,
+            numChunks:
+                config.chunks.countDocuments({ns: extShardStats.ns, shard: extShardStats.shard}),
+            avgObjSize: extShardStats.storageStats.avgObjSize
+        };
 
-        numChunks += chunks.length;
-
-        var estChunkData = (chunks.length == 0) ? 0 : shardStats.size / chunks.length;
-        var estChunkCount = (chunks.length == 0) ? 0 : Math.floor(shardStats.count / chunks.length);
+        print("\nShard " + shardStats.shardId + " at " + shardStats.host);
 
+        var estChunkData =
+            (shardStats.numChunks == 0) ? 0 : (shardStats.size / shardStats.numChunks);
+        var estChunkCount =
+            (shardStats.numChunks == 0) ? 0 : Math.floor(shardStats.count / shardStats.numChunks);
         print(" data : " + sh._dataFormat(shardStats.size) + " docs : " + shardStats.count +
-              " chunks : " + chunks.length);
+              " chunks : " + shardStats.numChunks);
         print(" estimated data per chunk : " + sh._dataFormat(estChunkData));
         print(" estimated docs per chunk : " + estChunkCount);
-    }
 
-    print("\nTotals");
-    print(" data : " + sh._dataFormat(stats.size) + " docs : " + stats.count +
-          " chunks : " + numChunks);
-    for (var shard in stats.shards) {
-        var shardStats = stats.shards[shard];
+        totals.size += shardStats.size;
+        totals.count += shardStats.count;
+        totals.numChunks += shardStats.numChunks;
+
+        conciseShardsStats.push(shardStats);
+    });
 
+    print("\nTotals");
+    print(" data : " + sh._dataFormat(totals.size) + " docs : " + totals.count +
+          " chunks : " + totals.numChunks);
+    for (const shardStats of conciseShardsStats) {
         var estDataPercent =
-            (stats.size == 0) ? 0 : (Math.floor(shardStats.size / stats.size * 10000) / 100);
+            (totals.size == 0) ? 0 : (Math.floor(shardStats.size / totals.size * 10000) / 100);
         var estDocPercent =
-            (stats.count == 0) ? 0 : (Math.floor(shardStats.count / stats.count * 10000) / 100);
+            (totals.count == 0) ? 0 : (Math.floor(shardStats.count / totals.count * 10000) / 100);
 
-        print(" Shard " + shard + " contains " + estDataPercent + "% data, " + estDocPercent +
-              "% docs in cluster, " +
-              "avg obj size on shard : " + sh._dataFormat(stats.shards[shard].avgObjSize));
+        print(" Shard " + shardStats.shardId + " contains " + estDataPercent + "% data, " +
+              estDocPercent + "% docs in cluster, " +
+              "avg obj size on shard : " + sh._dataFormat(shardStats.avgObjSize));
     }
 
     print("\n");
 };
 
+/*
+
+Generates split points for all chunks in the collection, based on the default maxChunkSize
+> collection.getSplitKeysForChunks()
+
+or alternately a specified chunk size in Mb.
+> collection.getSplitKeysForChunks( 10 )
+
+By default, the chunks are not split, the keys are just found. A splitter function is returned which
+will actually do the splits.
+> var splitter = collection.getSplitKeysForChunks()
+> splitter()
+
+*/
+
 DBCollection.prototype.getSplitKeysForChunks = function(chunkSize) {
     var stats = this.stats();
author	Tommaso Tocci <tommaso.tocci@mongodb.com>	2020-03-13 22:25:07 +0100
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-03-30 10:54:33 +0000
commit	0a151a58d8977f6341f53883f2ce5bdcbd545431 (patch)
tree	fc7493a00f139e080bebf6396807c4b171af3519
parent	510f448b5b99aa9951b74958adddf1b9c857dd4d (diff)
download	mongo-0a151a58d8977f6341f53883f2ce5bdcbd545431.tar.gz