SERVER-76550 Shards undergoing draining must chop big chunks to move them offr6.0.6-rc1 r6.0.6

author: Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> 2023-04-28 07:50:21 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2023-04-28 08:22:02 +0000
commit: 26b4851a412cc8b9b4a18cdb6cd0f9f642e06aa7 (patch)
tree: 1d78265a333325eaf8a8c8404bec34b5386fbce7
parent: 8c9ab2dc711df435840237c90fb04d6ff3cb8b71 (diff)
download: mongo-r6.0.6-rc1.tar.gz
3 files changed, 137 insertions, 13 deletions
diff --git a/jstests/sharding/enforce_zone_policy.js b/jstests/sharding/enforce_zone_policy.js
index f21c313bbd4..2ded590cb09 100644
--- a/jstests/sharding/enforce_zone_policy.js
+++ b/jstests/sharding/enforce_zone_policy.js
@@ -96,7 +96,8 @@ st.addTagRange(ns, {_id: MinKey}, {_id: MaxKey}, 'a');
 assertBalanceCompleteAndStable(function() {
     var counts = st.chunkCounts(collName);
     printjson(counts);
-    return counts[st.shard0.shardName] == 11 && counts[st.shard1.shardName] == 0 &&
+    // All chunks must have been moved to shard 0, none left on shard 1 and 2
+    return counts[st.shard0.shardName] > 0 && counts[st.shard1.shardName] == 0 &&
         counts[st.shard2.shardName] == 0;
 }, 'all chunks to zone a');
 
diff --git a/jstests/sharding/shard_drain_works_with_chunks_of_any_size.js b/jstests/sharding/shard_drain_works_with_chunks_of_any_size.js
new file mode 100644
index 00000000000..b9f8aff9460
--- /dev/null
+++ b/jstests/sharding/shard_drain_works_with_chunks_of_any_size.js
@@ -0,0 +1,92 @@
+/*
+ * Shard a collection with documents spread on 2 shards and then call `removeShard` checking that:
+ * - Huge non-jumbo chunks are split during draining (moveRange moves off pieces of `chunkSize` MB)
+ * - Jumbo chunks are moved off (without splitting, since it's not possible)
+ *
+ * Regression test for SERVER-76550.
+ *
+ * @tags: [ requires_fcv_60 ]
+ */
+
+(function() {
+"use strict";
+load("jstests/sharding/libs/find_chunks_util.js");
+
+function removeShard(st, shardName, timeout) {
+    if (timeout == undefined) {
+        timeout = 5 * 60 * 1000;  // 5 minutes
+    }
+
+    assert.soon(function() {
+        const res = st.s.adminCommand({removeShard: shardName});
+        if (!res.ok && res.code === ErrorCodes.ShardNotFound) {
+            // If the config server primary steps down right after removing the config.shards doc
+            // for the shard but before responding with "state": "completed", the mongos would retry
+            // the _configsvrRemoveShard command against the new config server primary, which would
+            // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload
+            // after the config.shards doc for the shard was removed. This would cause the command
+            // to fail with ShardNotFound.
+            return true;
+        }
+        assert.commandWorked(res);
+        return res.state == 'completed';
+    }, "failed to remove shard " + shardName + " within " + timeout + "ms", timeout);
+}
+
+const st = new ShardingTest({other: {enableBalancer: false, chunkSize: 1}});
+const mongos = st.s0;
+const configDB = st.getDB('config');
+
+st.forEachConfigServer((conn) => {
+    assert.commandWorked(conn.adminCommand({
+        configureFailPoint: 'overrideBalanceRoundInterval',
+        mode: 'alwaysOn',
+        data: {intervalMs: 100}
+    }));
+});
+
+const dbName = 'test';
+const collName = 'collToDrain';
+const ns = dbName + '.' + collName;
+const db = st.getDB(dbName);
+const coll = db.getCollection(collName);
+
+// Shard collection with shard0 as db primary
+assert.commandWorked(
+    mongos.adminCommand({enablesharding: dbName, primaryShard: st.shard0.shardName}));
+assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: {x: 1}}));
+
+// shard0 owns docs with shard key [MinKey, 0), shard1 owns docs with shard key [0, MaxKey)
+assert.commandWorked(st.s.adminCommand(
+    {moveRange: ns, min: {x: 0}, max: {x: MaxKey}, toShard: st.shard1.shardName}));
+
+// Insert ~20MB of docs with different shard keys (10MB on shard0 and 10MB on shard1)
+// and ~10MB of docs with the same shard key (jumbo chunk)
+const big = 'X'.repeat(1024 * 1024);  // 1MB
+const jumboKey = 100;
+var bulk = coll.initializeUnorderedBulkOp();
+for (var i = -10; i < 10; i++) {
+    bulk.insert({x: i, big: big});
+    bulk.insert({x: jumboKey, big: big});
+}
+assert.commandWorked(bulk.execute());
+
+// Check that there are only 2 big chunks before starting draining
+const chunksBeforeDrain = findChunksUtil.findChunksByNs(configDB, ns).toArray();
+assert.eq(2, chunksBeforeDrain.length);
+
+st.startBalancer();
+
+// Remove shard 1 and wait for all chunks to be moved off from it
+removeShard(st, st.shard1.shardName);
+
+// Check that after draining there are 12 chunks on shard0:
+// - [MinKey, 0)                   original chunk on shard 1
+// - [0, 1), [1, 2), ... [8, 9)    1 MB chunks
+// - [9, MaxKey)                   10MB jumbo chunk
+const chunksAfterDrain =
+    findChunksUtil.findChunksByNs(configDB, ns, {shard: st.shard0.shardName}).toArray();
+assert.eq(12, chunksAfterDrain.length);
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/balancer/balancer_policy.cpp b/src/mongo/db/s/balancer/balancer_policy.cpp
index 29aa72109ac..6d4c1be900a 100644
--- a/src/mongo/db/s/balancer/balancer_policy.cpp
+++ b/src/mongo/db/s/balancer/balancer_policy.cpp
@@ -504,11 +504,26 @@ MigrateInfosWithReason BalancerPolicy::balance(
                     return boost::none;
                 }();
 
-                migrations.emplace_back(to,
-                                        distribution.nss(),
-                                        chunk,
-                                        MoveChunkRequest::ForceJumbo::kForceBalancer,
-                                        maxChunkSizeBytes);
+                if (collDataSizeInfo.has_value()) {
+                    migrations.emplace_back(
+                        to,
+                        chunk.getShard(),
+                        distribution.nss(),
+                        chunk.getCollectionUUID(),
+                        chunk.getMin(),
+                        boost::none /* max */,
+                        chunk.getVersion(),
+                        // Always force jumbo chunks to be migrated off draining shards
+                        MoveChunkRequest::ForceJumbo::kForceBalancer,
+                        maxChunkSizeBytes);
+                } else {
+                    migrations.emplace_back(to,
+                                            distribution.nss(),
+                                            chunk,
+                                            MoveChunkRequest::ForceJumbo::kForceBalancer,
+                                            maxChunkSizeBytes);
+                }
+
                 if (firstReason == MigrationReason::none) {
                     firstReason = MigrationReason::drain;
                 }
@@ -584,12 +599,28 @@ MigrateInfosWithReason BalancerPolicy::balance(
                     return boost::none;
                 }();
 
-                migrations.emplace_back(to,
-                                        distribution.nss(),
-                                        chunk,
-                                        forceJumbo ? MoveChunkRequest::ForceJumbo::kForceBalancer
-                                                   : MoveChunkRequest::ForceJumbo::kDoNotForce,
-                                        maxChunkSizeBytes);
+                if (collDataSizeInfo.has_value()) {
+                    migrations.emplace_back(to,
+                                            chunk.getShard(),
+                                            distribution.nss(),
+                                            chunk.getCollectionUUID(),
+                                            chunk.getMin(),
+                                            boost::none /* max */,
+                                            chunk.getVersion(),
+                                            forceJumbo
+                                                ? MoveChunkRequest::ForceJumbo::kForceBalancer
+                                                : MoveChunkRequest::ForceJumbo::kDoNotForce,
+                                            maxChunkSizeBytes);
+                } else {
+                    migrations.emplace_back(to,
+                                            distribution.nss(),
+                                            chunk,
+                                            forceJumbo
+                                                ? MoveChunkRequest::ForceJumbo::kForceBalancer
+                                                : MoveChunkRequest::ForceJumbo::kDoNotForce,
+                                            maxChunkSizeBytes);
+                }
+
                 if (firstReason == MigrationReason::none) {
                     firstReason = MigrationReason::zoneViolation;
                 }
@@ -882,7 +913,7 @@ bool BalancerPolicy::_singleZoneBalanceBasedOnDataSize(
                                  distribution.nss(),
                                  chunk.getCollectionUUID(),
                                  chunk.getMin(),
-                                 boost::none /* call moveRange*/,
+                                 boost::none /* max */,
                                  chunk.getVersion(),
                                  forceJumbo,
                                  collDataSizeInfo.maxChunkSizeBytes);
author	Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>	2023-04-28 07:50:21 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2023-04-28 08:22:02 +0000
commit	26b4851a412cc8b9b4a18cdb6cd0f9f642e06aa7 (patch)
tree	1d78265a333325eaf8a8c8404bec34b5386fbce7
parent	8c9ab2dc711df435840237c90fb04d6ff3cb8b71 (diff)
download	mongo-r6.0.6-rc1.tar.gz