SERVER-60652 Backport the autoSplitVector logic to v4.4

author: Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> 2021-10-22 16:55:43 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-10-22 17:08:07 +0000
commit: 6d6a3e814c3a36b180d696eef3a5ae4914c1f306 (patch)
tree: 0f75f4aff8464e07897b82c19115f6f7da596413
parent: 7ea91003d2dcd8d02599dc56a157ffd3f79c9fb6 (diff)
download: mongo-6d6a3e814c3a36b180d696eef3a5ae4914c1f306.tar.gz
24 files changed, 1204 insertions, 161 deletions
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js
index 1fdaf68dca1..260eaef9b39 100644
--- a/jstests/core/views/views_all_commands.js
+++ b/jstests/core/views/views_all_commands.js
@@ -136,6 +136,14 @@ let viewsCommandTests = {
         skipSharded: true,
     },
     authenticate: {skip: isUnrelated},
+    autoSplitVector: {
+        command: {
+            splitVector: "test.view",
+            keyPattern: {x: 1},
+            maxChunkSize: 1,
+        },
+        expectFailure: true,
+    },
     availableQueryOptions: {skip: isAnInternalCommand},
     balancerCollectionStatus: {
         command: {balancerCollectionStatus: "test.view"},
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js
index 5bb5d5f6a00..7bcd62f3de3 100644
--- a/jstests/replsets/db_reads_while_recovering_all_commands.js
+++ b/jstests/replsets/db_reads_while_recovering_all_commands.js
@@ -76,6 +76,7 @@ const allCommands = {
     appendOplogNote: {skip: isPrimaryOnly},
     applyOps: {skip: isPrimaryOnly},
     authenticate: {skip: isNotAUserDataRead},
+    autoSplitVector: {skip: isNotAUserDataRead},
     availableQueryOptions: {skip: isNotAUserDataRead},
     buildInfo: {skip: isNotAUserDataRead},
     captrunc: {skip: isPrimaryOnly},
diff --git a/jstests/sharding/autosplit_low_cardinality.js b/jstests/sharding/autosplit_low_cardinality.js
index f9c4d8bf2ae..479c20361a9 100644
--- a/jstests/sharding/autosplit_low_cardinality.js
+++ b/jstests/sharding/autosplit_low_cardinality.js
@@ -42,13 +42,40 @@ insertBigDocsWithKey(-10, 4);
 insertBigDocsWithKey(10, 4);
 waitForOngoingChunkSplits(st);
 
-// At least one split should have been performed
-assert.gte(numChunks(), 2, "Number of chunks is less then 2, no split have been perfomed");
+let expectedNumChunks = 2;
+try {
+    // At least one split should have been performed
+    assert.gte(numChunks(),
+               expectedNumChunks,
+               "Number of chunks is less than 2, no split have been perfomed");
+} catch (e) {
+    // (SERVER-59882) split may not have happened due to commit delay of the inserted documents
+    print("Retrying performing one insert after catching exception " + e);
+    insertBigDocsWithKey(10, 1);
+    waitForOngoingChunkSplits(st);
+    assert.gte(
+        numChunks(),
+        expectedNumChunks,
+        "Number of chunks is less than " + expectedNumChunks + ", no split has been perfomed");
+}
+
+expectedNumChunks++;
 
 insertBigDocsWithKey(20, 4);
 waitForOngoingChunkSplits(st);
 // An additional split should have been performed
-assert.gte(numChunks(), 3, "Number of chunks must be at least 3");
+try {
+    assert.gte(numChunks(), expectedNumChunks, "Number of chunks must be at least 3");
+} catch (e) {
+    // (SERVER-59882) split may not have happened due to commit delay of the inserted documents
+    print("Retrying performing one insert after catching exception " + e);
+    insertBigDocsWithKey(20, 1);
+    waitForOngoingChunkSplits(st);
+    assert.gte(
+        numChunks(),
+        expectedNumChunks,
+        "Number of chunks is less than " + 3 + ", not all expected splits have been perfomed");
+}
 
 st.stop();
 })();
diff --git a/jstests/sharding/presplit.js b/jstests/sharding/presplit.js
index 87d4e81a315..9fbec19f0ec 100644
--- a/jstests/sharding/presplit.js
+++ b/jstests/sharding/presplit.js
@@ -1,3 +1,9 @@
+/*
+ * @tags: [
+ *     requires_fcv_44, # autoSplitVector not present in older v4.2 binaries
+ * ]
+ */
+
 (function() {
 
 var s = new ShardingTest({name: "presplit", shards: 2, mongos: 1, other: {chunkSize: 1}});
@@ -31,7 +37,7 @@ s.adminCommand({shardcollection: "test.foo", key: {_id: 1}});
 
 // Make sure the collection's original chunk got split
 s.printChunks();
-assert.lt(20, s.config.chunks.count({"ns": "test.foo"}), "many chunks assertion");
+assert.lte(20, s.config.chunks.count({"ns": "test.foo"}), "many chunks assertion");
 assert.eq(num, primary.foo.count());
 
 s.printChangeLog();
diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js
index 43e3abbf3e6..b1afbbadd8b 100644
--- a/jstests/sharding/read_write_concern_defaults_application.js
+++ b/jstests/sharding/read_write_concern_defaults_application.js
@@ -163,6 +163,7 @@ let testCases = {
     },
     applyOps: {skip: "internal command"},
     authenticate: {skip: "does not accept read or write concern"},
+    autoSplitVector: {skip: "internal command"},
     availableQueryOptions: {skip: "internal command"},
     balancerCollectionStatus: {skip: "does not accept read or write concern"},
     balancerStart: {skip: "does not accept read or write concern"},
diff --git a/jstests/sharding/shard_existing.js b/jstests/sharding/shard_existing.js
index 6e3242647f8..8fb475367e3 100644
--- a/jstests/sharding/shard_existing.js
+++ b/jstests/sharding/shard_existing.js
@@ -1,3 +1,9 @@
+/*
+ * @tags: [
+ *   requires_fcv_44, # autoSplitVector not present in older v4.2 binaries
+ * ]
+ */
+
 (function() {
 'use strict';
 
@@ -28,10 +34,10 @@ s.ensurePrimaryShard('test', s.shard1.shardName);
 var res = s.adminCommand({shardcollection: "test.data", key: {_id: 1}});
 printjson(res);
 
-// number of chunks should be approx equal to the total data size / half the chunk size
+// number of chunks should be approx equal to the total data size / chunk size
 var numChunks = s.config.chunks.find({ns: 'test.data'}).itcount();
-var guess = Math.ceil(dataSize / (512 * 1024 + avgObjSize));
-assert(Math.abs(numChunks - guess) < 2, "not right number of chunks");
+var guess = Math.ceil(dataSize / (1024 * 1024 + avgObjSize));
+assert.lte(Math.abs(numChunks - guess), 2, "not right number of chunks");
 
 s.stop();
 })();
diff --git a/jstests/sharding/shard_existing_coll_chunk_count.js b/jstests/sharding/shard_existing_coll_chunk_count.js
index 7ee54444976..51fe2239b9f 100644
--- a/jstests/sharding/shard_existing_coll_chunk_count.js
+++ b/jstests/sharding/shard_existing_coll_chunk_count.js
@@ -2,7 +2,10 @@
  * This test confirms that after sharding a collection with some pre-existing data,
  * the resulting chunks aren't auto-split too aggressively.
  *
- * @tags: [requires_persistence]
+ * @tags: [
+ *    requires_fcv_44, # autoSplitVector not present in older v4.2 binaries
+ *    requires_persistence
+ * ]
  */
 (function() {
 'use strict';
@@ -82,7 +85,7 @@ var runCase = function(opts) {
 
         // Confirm number of chunks for this stage.
         var numChunks = getNumberChunks(coll.getFullName());
-        assert.gte(numChunks,
+        assert.lte(numChunks,
                    stage.expectedNumChunks,
                    'in ' + coll.getFullName() + ' expected ' + stage.expectedNumChunks +
                        ' chunks for stage ' + stageNum + ', but found ' + numChunks + '\nopts: ' +
@@ -157,7 +160,7 @@ runCase({
     docSize: 510 * 1024,
     stages: [
         {numDocsToInsert: 10, expectedNumChunks: 6},
-        {numDocsToInsert: 10, expectedNumChunks: 10},
+        {numDocsToInsert: 10, expectedNumChunks: 12},
     ],
 });
 
@@ -166,7 +169,7 @@ runCase({
     docSize: 514 * 1024,
     stages: [
         {numDocsToInsert: 10, expectedNumChunks: 10},
-        {numDocsToInsert: 10, expectedNumChunks: 18},
+        {numDocsToInsert: 10, expectedNumChunks: 20},
     ],
 });
 
diff --git a/jstests/sharding/sharding_balance1.js b/jstests/sharding/sharding_balance1.js
index d6e0384d7f7..821f6faf58d 100644
--- a/jstests/sharding/sharding_balance1.js
+++ b/jstests/sharding/sharding_balance1.js
@@ -1,3 +1,9 @@
+/*
+ * @tags: [
+ *     requires_fcv_44, # autoSplitVector not present in older v4.2 binaries
+ * ]
+ */
+
 (function() {
 'use strict';
 
@@ -23,7 +29,7 @@ while (inserted < (20 * 1024 * 1024)) {
 assert.commandWorked(bulk.execute());
 
 assert.commandWorked(s.s0.adminCommand({shardcollection: "test.foo", key: {_id: 1}}));
-assert.lt(20, s.config.chunks.count({"ns": "test.foo"}), "setup2");
+assert.lte(20, s.config.chunks.count({"ns": "test.foo"}), "setup2");
 
 function diff1() {
     var x = s.chunkCounts("foo");
@@ -37,7 +43,7 @@ function sum() {
     return x[s.shard0.shardName] + x[s.shard1.shardName];
 }
 
-assert.lt(20, diff1(), "big differential here");
+assert.lte(20, diff1(), "big differential here");
 print(diff1());
 
 assert.soon(function() {
diff --git a/jstests/sharding/sharding_rs1.js b/jstests/sharding/sharding_rs1.js
index d15f97bafd6..5536505ef07 100644
--- a/jstests/sharding/sharding_rs1.js
+++ b/jstests/sharding/sharding_rs1.js
@@ -4,7 +4,7 @@
 (function() {
 'use strict';
 
-var s = new ShardingTest({shards: 3, other: {rs: true, chunkSize: 1, enableBalancer: true}});
+var s = new ShardingTest({shards: 3, other: {rs: true, chunkSize: 2, enableBalancer: true}});
 
 s.adminCommand({enablesharding: "test"});
 s.ensurePrimaryShard('test', s.shard0.shardName);
@@ -12,14 +12,14 @@ s.config.settings.update({_id: "balancer"}, {$set: {_waitForDelete: true}}, true
 
 var db = s.getDB("test");
 
-var bigString = "X".repeat(256 * 1024);
+var bigString = "X".repeat(256 * 1024);  // 250 KB
 
 var insertedBytes = 0;
 var num = 0;
 
-// Insert 10 MB of data to result in 10+ chunks
+// Insert 20 MB of data to result in 20 chunks
 var bulk = db.foo.initializeUnorderedBulkOp();
-while (insertedBytes < (10 * 1024 * 1024)) {
+while (insertedBytes < (20 * 1024 * 1024)) {
     bulk.insert({_id: num++, s: bigString, x: Math.random()});
     insertedBytes += bigString.length;
 }
diff --git a/jstests/sharding/write_cmd_auto_split.js b/jstests/sharding/write_cmd_auto_split.js
index 197d29ccc90..55796aa1c30 100644
--- a/jstests/sharding/write_cmd_auto_split.js
+++ b/jstests/sharding/write_cmd_auto_split.js
@@ -14,148 +14,186 @@ assert.commandWorked(configDB.adminCommand({shardCollection: 'test.insert', key:
 var doc1k = (new Array(1024)).join('x');
 var testDB = st.s.getDB('test');
 
-jsTest.log('Test single batch insert should auto-split');
+function testSingleBatchInsertShouldAutoSplit() {
+    jsTest.log('Test single batch insert should auto-split');
+
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.insert', key: {x: 1}}));
+
+    assert.eq(1, configDB.chunks.find({"ns": "test.insert"}).itcount());
+
+    // This should result in a little over 3MB inserted into the chunk, so with
+    // a max chunk size of 1MB we'd expect the autosplitter to split this into
+    // at least 3 chunks
+    for (var x = 0; x < 3100; x++) {
+        assert.commandWorked(testDB.runCommand({
+            insert: 'insert',
+            documents: [{x: x, v: doc1k}],
+            ordered: false,
+            writeConcern: {w: 1}
+        }));
+    }
 
-assert.eq(1, configDB.chunks.find({"ns": "test.insert"}).itcount());
+    waitForOngoingChunkSplits(st);
 
-// This should result in a little over 3MB inserted into the chunk, so with
-// a max chunk size of 1MB we'd expect the autosplitter to split this into
-// at least 3 chunks
-for (var x = 0; x < 3100; x++) {
-    assert.commandWorked(testDB.runCommand(
-        {insert: 'insert', documents: [{x: x, v: doc1k}], ordered: false, writeConcern: {w: 1}}));
-}
+    // Inserted batch is a multiple of the chunkSize, expect the chunks to split into
+    // more than 2.
+    assert.gt(configDB.chunks.find({"ns": "test.insert"}).itcount(), 2);
+    testDB.dropDatabase();
 
-waitForOngoingChunkSplits(st);
+    jsTest.log('Test single batch update should auto-split');
 
-// Inserted batch is a multiple of the chunkSize, expect the chunks to split into
-// more than 2.
-assert.gt(configDB.chunks.find({"ns": "test.insert"}).itcount(), 2);
-testDB.dropDatabase();
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.update', key: {x: 1}}));
 
-jsTest.log('Test single batch update should auto-split');
+    assert.eq(1, configDB.chunks.find({"ns": "test.update"}).itcount());
 
-assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
-assert.commandWorked(configDB.adminCommand({shardCollection: 'test.update', key: {x: 1}}));
+    for (var x = 0; x < 2100; x++) {
+        assert.commandWorked(testDB.runCommand({
+            update: 'update',
+            updates: [{q: {x: x}, u: {x: x, v: doc1k}, upsert: true}],
+            ordered: false,
+            writeConcern: {w: 1}
+        }));
+    }
 
-assert.eq(1, configDB.chunks.find({"ns": "test.update"}).itcount());
+    waitForOngoingChunkSplits(st);
 
-for (var x = 0; x < 2100; x++) {
-    assert.commandWorked(testDB.runCommand({
-        update: 'update',
-        updates: [{q: {x: x}, u: {x: x, v: doc1k}, upsert: true}],
-        ordered: false,
-        writeConcern: {w: 1}
-    }));
+    assert.gt(configDB.chunks.find({"ns": "test.update"}).itcount(), 1);
+    testDB.dropDatabase();
 }
 
-waitForOngoingChunkSplits(st);
+function testSingleDeleteShouldNotAutoSplit() {
+    jsTest.log('Test single delete should not auto-split');
 
-assert.gt(configDB.chunks.find({"ns": "test.update"}).itcount(), 1);
-testDB.dropDatabase();
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.delete', key: {x: 1}}));
 
-jsTest.log('Test single delete should not auto-split');
+    assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
 
-assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
-assert.commandWorked(configDB.adminCommand({shardCollection: 'test.delete', key: {x: 1}}));
+    for (var x = 0; x < 1100; x++) {
+        assert.commandWorked(testDB.runCommand({
+            delete: 'delete',
+            deletes: [{q: {x: x, v: doc1k}, limit: NumberInt(0)}],
+            ordered: false,
+            writeConcern: {w: 1}
+        }));
+    }
 
-assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
+    // If we are autosplitting (which we shouldn't be), we want to wait until
+    // it's finished, otherwise we could falsely think no autosplitting was
+    // done when really it was just in progress.
+    waitForOngoingChunkSplits(st);
 
-for (var x = 0; x < 1100; x++) {
-    assert.commandWorked(testDB.runCommand({
-        delete: 'delete',
-        deletes: [{q: {x: x, v: doc1k}, limit: NumberInt(0)}],
-        ordered: false,
-        writeConcern: {w: 1}
-    }));
+    assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
+    testDB.dropDatabase();
 }
 
-// If we are autosplitting (which we shouldn't be), we want to wait until
-// it's finished, otherwise we could falsely think no autosplitting was
-// done when really it was just in progress.
-waitForOngoingChunkSplits(st);
+function testBatchedInsertShouldAutoSplit() {
+    jsTest.log('Test batched insert should auto-split');
 
-assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
-testDB.dropDatabase();
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.insert', key: {x: 1}}));
 
-jsTest.log('Test batched insert should auto-split');
+    assert.eq(1, configDB.chunks.find({"ns": "test.insert"}).itcount());
 
-assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
-assert.commandWorked(configDB.adminCommand({shardCollection: 'test.insert', key: {x: 1}}));
+    // Note: Estimated 'chunk size' tracked by mongos is initialized with a random value so
+    // we are going to be conservative.
+    for (var x = 0; x < 2100; x += 400) {
+        var docs = [];
 
-assert.eq(1, configDB.chunks.find({"ns": "test.insert"}).itcount());
+        for (var y = 0; y < 400; y++) {
+            docs.push({x: (x + y), v: doc1k});
+        }
 
-// Note: Estimated 'chunk size' tracked by mongos is initialized with a random value so
-// we are going to be conservative.
-for (var x = 0; x < 2100; x += 400) {
-    var docs = [];
-
-    for (var y = 0; y < 400; y++) {
-        docs.push({x: (x + y), v: doc1k});
+        assert.commandWorked(testDB.runCommand(
+            {insert: 'insert', documents: docs, ordered: false, writeConcern: {w: 1}}));
     }
 
-    assert.commandWorked(testDB.runCommand(
-        {insert: 'insert', documents: docs, ordered: false, writeConcern: {w: 1}}));
-}
+    waitForOngoingChunkSplits(st);
 
-waitForOngoingChunkSplits(st);
+    assert.gt(configDB.chunks.find({"ns": "test.insert"}).itcount(), 1);
+    testDB.dropDatabase();
+}
 
-assert.gt(configDB.chunks.find({"ns": "test.insert"}).itcount(), 1);
-testDB.dropDatabase();
+function testBatchedUpdateShouldAutoSplit() {
+    jsTest.log('Test batched update should auto-split');
 
-jsTest.log('Test batched update should auto-split');
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.update', key: {x: 1}}));
 
-assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
-assert.commandWorked(configDB.adminCommand({shardCollection: 'test.update', key: {x: 1}}));
+    assert.eq(1, configDB.chunks.find({"ns": "test.update"}).itcount());
 
-assert.eq(1, configDB.chunks.find({"ns": "test.update"}).itcount());
+    for (var x = 0; x < 2100; x += 400) {
+        var docs = [];
 
-for (var x = 0; x < 2100; x += 400) {
-    var docs = [];
+        for (var y = 0; y < 400; y++) {
+            var id = x + y;
+            docs.push({q: {x: id}, u: {x: id, v: doc1k}, upsert: true});
+        }
 
-    for (var y = 0; y < 400; y++) {
-        var id = x + y;
-        docs.push({q: {x: id}, u: {x: id, v: doc1k}, upsert: true});
+        assert.commandWorked(testDB.runCommand(
+            {update: 'update', updates: docs, ordered: false, writeConcern: {w: 1}}));
     }
 
-    assert.commandWorked(
-        testDB.runCommand({update: 'update', updates: docs, ordered: false, writeConcern: {w: 1}}));
-}
+    waitForOngoingChunkSplits(st);
 
-waitForOngoingChunkSplits(st);
+    assert.gt(configDB.chunks.find({"ns": "test.update"}).itcount(), 1);
+    testDB.dropDatabase();
+}
 
-assert.gt(configDB.chunks.find({"ns": "test.update"}).itcount(), 1);
-testDB.dropDatabase();
+function testBatchedDeleteShouldNotAutoSplit() {
+    jsTest.log('Test batched delete should not auto-split');
 
-jsTest.log('Test batched delete should not auto-split');
+    assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
+    assert.commandWorked(configDB.adminCommand({shardCollection: 'test.delete', key: {x: 1}}));
 
-assert.commandWorked(configDB.adminCommand({enableSharding: 'test'}));
-assert.commandWorked(configDB.adminCommand({shardCollection: 'test.delete', key: {x: 1}}));
+    assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
 
-assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
+    for (var x = 0; x < 2100; x += 400) {
+        var docs = [];
 
-for (var x = 0; x < 2100; x += 400) {
-    var docs = [];
+        for (var y = 0; y < 400; y++) {
+            var id = x + y;
+            docs.push({q: {x: id, v: doc1k}, top: 0});
+        }
 
-    for (var y = 0; y < 400; y++) {
-        var id = x + y;
-        docs.push({q: {x: id, v: doc1k}, top: 0});
+        assert.commandWorked(testDB.runCommand({
+            delete: 'delete',
+            deletes: [{q: {x: x, v: doc1k}, limit: NumberInt(0)}],
+            ordered: false,
+            writeConcern: {w: 1}
+        }));
     }
 
-    assert.commandWorked(testDB.runCommand({
-        delete: 'delete',
-        deletes: [{q: {x: x, v: doc1k}, limit: NumberInt(0)}],
-        ordered: false,
-        writeConcern: {w: 1}
-    }));
-}
+    // If we are autosplitting (which we shouldn't be), we want to wait until
+    // it's finished, otherwise we could falsely think no autosplitting was
+    // done when really it was just in progress.
+    waitForOngoingChunkSplits(st);
 
-// If we are autosplitting (which we shouldn't be), we want to wait until
-// it's finished, otherwise we could falsely think no autosplitting was
-// done when really it was just in progress.
-waitForOngoingChunkSplits(st);
+    assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
+}
 
-assert.eq(1, configDB.chunks.find({"ns": "test.delete"}).itcount());
+var testCases = [
+    testSingleBatchInsertShouldAutoSplit,
+    testSingleDeleteShouldNotAutoSplit,
+    testBatchedInsertShouldAutoSplit,
+    testBatchedUpdateShouldAutoSplit,
+    testBatchedDeleteShouldNotAutoSplit
+];
+
+for (let testCase of testCases) {
+    try {
+        testDB.dropDatabase();
+        testCase();
+    } catch (e) {
+        print("Retrying test case failed due to " + e);
+        // (SERVER-59882) The split may not have happened due to write-unit-of-work commit delay
+        // Give it another best-effort try, given the low probability it would happen again
+        testDB.dropDatabase();
+        testCase();
+    }
+}
 
 st.stop();
 })();
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 14741dcafa7..6c55a3c8e02 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -46,6 +46,7 @@ env.Library(
         'active_migrations_registry.cpp',
         'active_move_primaries_registry.cpp',
         'active_shard_collection_registry.cpp',
+        'auto_split_vector.cpp',
         'chunk_move_write_concern_options.cpp',
         'chunk_splitter.cpp',
         'collection_sharding_runtime.cpp',
@@ -263,6 +264,7 @@ env.Library(
     target='sharding_commands_d',
     source=[
         'add_shard_cmd.cpp',
+        'auto_split_vector_command.cpp',
         'check_sharding_index_command.cpp',
         'cleanup_orphaned_cmd.cpp',
         'clone_catalog_data_command.cpp',
@@ -405,6 +407,7 @@ env.CppUnitTest(
         'active_migrations_registry_test.cpp',
         'active_move_primaries_registry_test.cpp',
         'active_shard_collection_registry_test.cpp',
+        'auto_split_vector_test.cpp',
         'catalog_cache_loader_mock.cpp',
         'implicit_create_collection_test.cpp',
         'migration_chunk_cloner_source_legacy_test.cpp',
diff --git a/src/mongo/db/s/auto_split_vector.cpp b/src/mongo/db/s/auto_split_vector.cpp
new file mode 100644
index 00000000000..7569142fde0
--- /dev/null
+++ b/src/mongo/db/s/auto_split_vector.cpp
@@ -0,0 +1,368 @@
+/**
+ *    Copyright (C) 2021-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/auto_split_vector.h"
+
+#include "mongo/base/status_with.h"
+#include "mongo/db/bson/dotted_path_support.h"
+#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/catalog_raii.h"
+#include "mongo/db/dbhelpers.h"
+#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/index/index_descriptor.h"
+#include "mongo/db/keypattern.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/internal_plans.h"
+#include "mongo/db/query/plan_executor.h"
+#include "mongo/logv2/log.h"
+
+namespace mongo {
+namespace {
+
+constexpr int estimatedAdditionalBytesPerItemInBSONArray{2};
+
+constexpr int kMaxSplitPointsToReposition{3};
+
+BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) {
+    return key.replaceFieldNames(keyPattern).clientReadable();
+}
+
+/*
+ * Takes the given min/max BSON objects that are a prefix of the shardKey and return two new BSON
+ * object extended to cover the entire shardKey. See KeyPattern::extendRangeBound documentation for
+ * some examples.
+ */
+const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(const IndexDescriptor* shardKeyIdx,
+                                                           const BSONObj& min,
+                                                           const BSONObj& max) {
+    KeyPattern kp(shardKeyIdx->keyPattern());
+
+    // Extend min to get (min, MinKey, MinKey, ....)
+    BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false /* upperInclusive */));
+    BSONObj maxKey;
+    if (max.isEmpty()) {
+        // if max not specified, make it (MaxKey, Maxkey, MaxKey...)
+        maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true /* upperInclusive */));
+    } else {
+        // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
+        maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false /* upperInclusive*/));
+    }
+
+    return {minKey, maxKey};
+}
+
+/*
+ * Returns true if the final key in the range is the same as the first key, false otherwise.
+ */
+bool maxKeyEqualToMinKey(OperationContext* opCtx,
+                         const Collection* collection,
+                         const IndexDescriptor* shardKeyIdx,
+                         const BSONObj& minBound,
+                         const BSONObj& maxBound,
+                         const BSONObj& minKeyInChunk) {
+    BSONObj maxKeyInChunk;
+    {
+        auto backwardIdxScanner = InternalPlanner::indexScan(opCtx,
+                                                             collection,
+                                                             shardKeyIdx,
+                                                             maxBound,
+                                                             minBound,
+                                                             BoundInclusion::kIncludeEndKeyOnly,
+                                                             PlanExecutor::YIELD_AUTO,
+                                                             InternalPlanner::BACKWARD);
+
+        PlanExecutor::ExecState state = backwardIdxScanner->getNext(&maxKeyInChunk, nullptr);
+        uassert(ErrorCodes::OperationFailed,
+                "can't open a cursor to find final key in range (desired range is possibly empty)",
+                state == PlanExecutor::ADVANCED);
+    }
+
+    if (minKeyInChunk.woCompare(maxKeyInChunk) == 0) {
+        // Range contains only documents with a single key value.  So we cannot possibly find a
+        // split point, and there is no need to scan any further.
+        LOGV2_WARNING(
+            5865001,
+            "Possible low cardinality key detected in range. Range contains only a single key.",
+            "namespace"_attr = collection->ns(),
+            "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minBound)),
+            "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxBound)),
+            "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKeyInChunk)));
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Reshuffle fields according to the shard key pattern.
+ */
+auto orderShardKeyFields(const BSONObj& keyPattern, BSONObj& key) {
+    return dotted_path_support::extractElementsBasedOnTemplate(
+        prettyKey(keyPattern, key.getOwned()), keyPattern);
+}
+
+}  // namespace
+
+std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
+                                     const NamespaceString& nss,
+                                     const BSONObj& keyPattern,
+                                     const BSONObj& min,
+                                     const BSONObj& max,
+                                     long long maxChunkSizeBytes) {
+    std::vector<BSONObj> splitKeys;
+
+    int elapsedMillisToFindSplitPoints;
+
+    // Contains each key appearing multiple times and estimated to be able to fill-in a chunk alone
+    auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+
+    {
+        AutoGetCollection autoGetColl(opCtx, nss, MODE_IS);
+        auto* collection = autoGetColl.getCollection();
+
+        uassert(ErrorCodes::NamespaceNotFound, "ns not found", collection);
+
+        // Get the size estimate for this namespace
+        const long long totalLocalCollDocuments = collection->numRecords(opCtx);
+        const long long dataSize = collection->dataSize(opCtx);
+
+        // Return empty vector if current estimated data size is less than max chunk size
+        if (dataSize < maxChunkSizeBytes || totalLocalCollDocuments == 0) {
+            return {};
+        }
+
+        // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore,
+        // any multi-key index prefixed by shard key cannot be multikey over the shard key fields.
+        auto catalog = collection->getIndexCatalog();
+        auto shardKeyIdx =
+            catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, /*requireSingleKey=*/false);
+        uassert(ErrorCodes::IndexNotFound,
+                str::stream() << "couldn't find index over splitting key "
+                              << keyPattern.clientReadable().toString(),
+                shardKeyIdx);
+
+        const auto [minKey, maxKey] = getMinMaxExtendedBounds(shardKeyIdx, min, max);
+
+        // Setup the index scanner that will be used to find the split points
+        auto forwardIdxScanner = InternalPlanner::indexScan(opCtx,
+                                                            collection,
+                                                            shardKeyIdx,
+                                                            minKey,
+                                                            maxKey,
+                                                            BoundInclusion::kIncludeStartKeyOnly,
+                                                            PlanExecutor::YIELD_AUTO,
+                                                            InternalPlanner::FORWARD);
+
+        // Get minimum key belonging to the chunk
+        BSONObj minKeyInOriginalChunk;
+        {
+            PlanExecutor::ExecState state =
+                forwardIdxScanner->getNext(&minKeyInOriginalChunk, nullptr);
+            uassert(ErrorCodes::OperationFailed,
+                    "can't open a cursor to scan the range (desired range is possibly empty)",
+                    state == PlanExecutor::ADVANCED);
+        }
+
+        // Return empty vector if chunk's min and max keys are the same.
+        if (maxKeyEqualToMinKey(
+                opCtx, collection, shardKeyIdx, minKey, maxKey, minKeyInOriginalChunk)) {
+            return {};
+        }
+
+        LOGV2(5865000,
+              "Requested split points lookup for chunk",
+              "namespace"_attr = nss,
+              "minKey"_attr = redact(prettyKey(keyPattern, minKey)),
+              "maxKey"_attr = redact(prettyKey(keyPattern, maxKey)));
+
+        // Use the average document size and number of documents to find the approximate number of
+        // keys each chunk should contain
+        const long long avgDocSize = dataSize / totalLocalCollDocuments;
+
+        // Split at max chunk size
+        long long maxDocsPerChunk = maxChunkSizeBytes / avgDocSize;
+
+        BSONObj currentKey;               // Last key seen during the index scan
+        long long numScannedKeys = 1;     // minKeyInOriginalChunk has already been scanned
+        std::size_t resultArraySize = 0;  // Approximate size in bytes of the split points array
+        bool reachedMaxBSONSize = false;  // True if the split points vector becomes too big
+
+        // Lambda to check whether the split points vector would exceed BSONObjMaxUserSize in case
+        // of additional split key of the specified size.
+        auto checkMaxBSONSize = [&resultArraySize](const int additionalKeySize) {
+            return resultArraySize + additionalKeySize > BSONObjMaxUserSize;
+        };
+
+        // Reference to last split point that needs to be checked in order to avoid adding duplicate
+        // split points. Initialized to the min of the first chunk being split.
+        auto minKeyElement = orderShardKeyFields(keyPattern, minKeyInOriginalChunk);
+        auto lastSplitPoint = minKeyElement;
+
+        Timer timer;  // To measure time elapsed while searching split points
+
+        // Traverse the index and add the maxDocsPerChunk-th key to the result vector
+        while (forwardIdxScanner->getNext(&currentKey, nullptr) == PlanExecutor::ADVANCED) {
+            if (++numScannedKeys >= maxDocsPerChunk) {
+                currentKey = orderShardKeyFields(keyPattern, currentKey);
+
+                if (currentKey.woCompare(lastSplitPoint) == 0) {
+                    // Do not add again the same split point in case of frequent shard key.
+                    tooFrequentKeys.insert(currentKey.getOwned());
+                    continue;
+                }
+
+                const auto additionalKeySize =
+                    currentKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray;
+                if (checkMaxBSONSize(additionalKeySize)) {
+                    if (splitKeys.empty()) {
+                        // Keep trying until finding at least one split point that isn't above
+                        // the max object user size. Very improbable corner case: the shard key
+                        // size for the chosen split point is exactly 16MB.
+                        continue;
+                    }
+                    reachedMaxBSONSize = true;
+                    break;
+                }
+
+                resultArraySize += additionalKeySize;
+                splitKeys.push_back(currentKey.getOwned());
+                lastSplitPoint = splitKeys.back();
+                numScannedKeys = 0;
+
+                LOGV2_DEBUG(5865003, 4, "Picked a split key", "key"_attr = redact(currentKey));
+            }
+        }
+
+        // Avoid creating small chunks by fairly recalculating the last split points if the last
+        // chunk would be too small (containing less than `80% maxDocsPerChunk` documents).
+        bool lastChunk80PercentFull = numScannedKeys >= maxDocsPerChunk * 0.8;
+        if (!lastChunk80PercentFull && !splitKeys.empty() && !reachedMaxBSONSize) {
+            // Eventually recalculate the last split points (at most `kMaxSplitPointsToReposition`).
+            int nSplitPointsToReposition = splitKeys.size() > kMaxSplitPointsToReposition
+                ? kMaxSplitPointsToReposition
+                : splitKeys.size();
+
+            // Equivalent to: (nSplitPointsToReposition * maxDocsPerChunk + numScannedKeys) divided
+            // by the number of reshuffled chunks (nSplitPointsToReposition + 1).
+            const auto maxDocsPerNewChunk = maxDocsPerChunk -
+                ((maxDocsPerChunk - numScannedKeys) / (nSplitPointsToReposition + 1));
+
+            if (numScannedKeys < maxDocsPerChunk - maxDocsPerNewChunk) {
+                // If the surplus is not too much, simply keep a bigger last chunk.
+                // The surplus is considered enough if repositioning the split points would imply
+                // generating chunks with a number of documents lower than `67% maxDocsPerChunk`.
+                splitKeys.pop_back();
+            } else {
+                // Fairly recalculate the last `nSplitPointsToReposition` split points.
+                splitKeys.erase(splitKeys.end() - nSplitPointsToReposition, splitKeys.end());
+
+                auto forwardIdxScanner =
+                    InternalPlanner::indexScan(opCtx,
+                                               collection,
+                                               shardKeyIdx,
+                                               splitKeys.empty() ? minKeyElement : splitKeys.back(),
+                                               maxKey,
+                                               BoundInclusion::kIncludeStartKeyOnly,
+                                               PlanExecutor::YIELD_AUTO,
+                                               InternalPlanner::FORWARD);
+
+                numScannedKeys = 0;
+
+                auto previousSplitPoint = splitKeys.empty() ? minKeyElement : splitKeys.back();
+                while (forwardIdxScanner->getNext(&currentKey, nullptr) == PlanExecutor::ADVANCED) {
+                    if (++numScannedKeys >= maxDocsPerNewChunk) {
+                        currentKey = orderShardKeyFields(keyPattern, currentKey);
+
+                        const auto compareWithPreviousSplitPoint =
+                            currentKey.woCompare(previousSplitPoint);
+                        if (compareWithPreviousSplitPoint > 0) {
+                            const auto additionalKeySize =
+                                currentKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray;
+                            if (checkMaxBSONSize(additionalKeySize)) {
+                                reachedMaxBSONSize = true;
+                                break;
+                            }
+
+                            splitKeys.push_back(currentKey.getOwned());
+                            previousSplitPoint = splitKeys.back();
+                            numScannedKeys = 0;
+
+                            if (--nSplitPointsToReposition == 0) {
+                                break;
+                            }
+                        } else if (compareWithPreviousSplitPoint == 0) {
+                            // Don't add again the same split point in case of frequent shard key.
+                            tooFrequentKeys.insert(currentKey.getOwned());
+                        }
+                    }
+                }
+            }
+        }
+
+        elapsedMillisToFindSplitPoints = timer.millis();
+
+        if (reachedMaxBSONSize) {
+            LOGV2(5865002,
+                  "Max BSON response size reached for split vector before the end of chunk",
+                  "namespace"_attr = nss,
+                  "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)),
+                  "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey)));
+        }
+    }
+
+    // Emit a warning for each frequent key
+    for (const auto& frequentKey : tooFrequentKeys) {
+        LOGV2_WARNING(5865004,
+                      "Possible low cardinality key detected",
+                      "namespace"_attr = nss,
+                      "key"_attr = redact(prettyKey(keyPattern, frequentKey)));
+    }
+
+    if (elapsedMillisToFindSplitPoints > serverGlobalParams.slowMS) {
+        LOGV2_WARNING(5865005,
+                      "Finding the auto split vector completed",
+                      "namespace"_attr = nss,
+                      "keyPattern"_attr = redact(keyPattern),
+                      "numSplits"_attr = splitKeys.size(),
+                      "duration"_attr = Milliseconds(elapsedMillisToFindSplitPoints));
+    }
+
+    // TODO SERVER-58750: investigate if it is really needed to sort the vector
+    // Make sure splitKeys is in ascending order
+    std::sort(
+        splitKeys.begin(), splitKeys.end(), SimpleBSONObjComparator::kInstance.makeLessThan());
+
+    return splitKeys;
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/s/auto_split_vector.h b/src/mongo/db/s/auto_split_vector.h
new file mode 100644
index 00000000000..559c1f814d6
--- /dev/null
+++ b/src/mongo/db/s/auto_split_vector.h
@@ -0,0 +1,134 @@
+/**
+ *    Copyright (C) 2021-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+
+namespace mongo {
+
+/**
+ * Given a chunk, determines whether it satisfies the requisites to be auto-splitted and - if so -
+ * returns the split points (shard keys representing the lower bounds of the new chunks to create).
+ *
+ * The logic implemented can be summarized as follows: given a `maxChunkSize` of `x` MB, the
+ * algorithm aims to choose the split points so that the resulting chunks' size would be around
+ * `maxChunkSize`. As it is too expensive to precisely determine the dimension of a chunk, it is
+ * assumed a uniform distribution of document sizes, hence the aim is to balance the number of
+ * documents per chunk.
+ *
+ * ======= ALGORITHM DESCRIPTION =======
+ *
+ * The split points for a chunk `C` belonging to a collection `coll` are calculated as follows:
+ * - `averageDocumentSize` = `totalCollSizeOnShard / numberOfCollDocs`
+ * - `maxNumberOfDocsPerChunk` = `maxChunkSize / averageDocumentSize`
+ * - Scan forward the shard key index entries for `coll` that are belonging to chunk `C`:
+ * - (1) Choose a split point every `maxNumberOfDocsPerChunk` scanned keys.
+ * - (2) As it needs to be avoided the creation of small chunks, consider the number of documents
+ * `S` that the right-most chunk would contain given the calculated split points:
+ * --- (2.1) IF `S >= 80% maxNumberOfDocsPerChunk`, return the list of calculated split points.
+ * --- (2.2) ELSE IF `S` documents could be fairly redistributed in the last chunks so that their
+ * size would be at least `67% maxNumberOfDocsPerChunk`: recalculate the last split points (max 3).
+ * --- (2.3) ELSE simply remove the last split point and keep a bigger last chunk.
+ *
+ *
+ * ============== EXAMPLES =============
+ *
+ * ========= EXAMPLE (CASE 2.1) ========
+ * `maxChunkSize` = 100MB
+ * `averageDocumentSize` = 1MB
+ * `maxNumberOfDocsPerChunk` = 100
+ *
+ * Shard key type: integer
+ * Chunk `C` bounds: `[0, maxKey)` . Chunk `C` contains 190 documents with shard keys [0...189].
+ *
+ * (1) Initially calculated split points: [99].
+ * (2) The last chunk would contain the interval `[99-189]` so `S = 90`
+ * (2.1) `S >= 80% maxChunkSize`, so keep the current split points.
+ *
+ * Returned split points: [99].
+ *
+ * ========= EXAMPLE (CASE 2.2) ========
+ * `maxChunkSize` = 100MB
+ * `averageDocumentSize` = 1MB
+ * `maxNumberOfDocsPerChunk` = 100
+ *
+ * Shard key type: integer
+ * Chunk `C` bounds: `[0, maxKey)` . Chunk `C` contains 140 documents with shard keys [0...139].
+ *
+ * (1) Initially calculated split points: [99].
+ * (2) The last chunk would contain the interval `[99-139]` so `S = 40`
+ * (2.2) `S` documents can be redistributed on the last split point by generating chunks of size >=
+ * 67% maxChunkSize. Recalculate.
+ *
+ * Returned split points: [69].
+ *
+ * ========= EXAMPLE (CASE 2.3) ========
+ * `maxChunkSize` = 100MB
+ * `averageDocumentSize` = 1MB
+ * `maxNumberOfDocsPerChunk` = 100
+ *
+ * Shard key type: integer
+ * Chunk `C` bounds: `[0, maxKey)` . Chunk `C` contains 120 documents with shard keys [0...119].
+ *
+ * (1) Initially calculated split points: [99].
+ * (2) The last chunk would contain the interval `[99-119]` so `S = 20`
+ * (2.3) `S` documents can't be redistributed on the last split point by generating chunks of size
+ * >= 67% maxChunkSize. So remove the last split point.
+ *
+ * Returned split points: [].
+ */
+std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
+                                     const NamespaceString& nss,
+                                     const BSONObj& keyPattern,
+                                     const BSONObj& min,
+                                     const BSONObj& max,
+                                     long long maxChunkSizeBytes);
+
+/*
+ * Utility function for deserializing autoSplitVector/splitVector responses.
+ */
+static std::vector<BSONObj> parseSplitKeys(const BSONElement& splitKeysArray) {
+    uassert(ErrorCodes::TypeMismatch,
+            "The split keys vector must be represented as a BSON array",
+            !splitKeysArray.eoo() && splitKeysArray.type() == BSONType::Array);
+
+    std::vector<BSONObj> splitKeys;
+    for (const auto& elem : splitKeysArray.Obj()) {
+        uassert(ErrorCodes::TypeMismatch,
+                "Each element of the split keys array must be an object",
+                elem.type() == BSONType::Object);
+        splitKeys.push_back(elem.embeddedObject().getOwned());
+    }
+
+    return splitKeys;
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/s/auto_split_vector_command.cpp b/src/mongo/db/s/auto_split_vector_command.cpp
new file mode 100644
index 00000000000..32275403eff
--- /dev/null
+++ b/src/mongo/db/s/auto_split_vector_command.cpp
@@ -0,0 +1,100 @@
+/**
+ *    Copyright (C) 2021-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/s/auto_split_vector.h"
+#include "mongo/db/s/sharding_state.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/request_types/auto_split_vector_gen.h"
+
+namespace mongo {
+namespace {
+
+class AutoSplitVectorCommand final : public TypedCommand<AutoSplitVectorCommand> {
+public:
+    AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
+        return Command::AllowedOnSecondary::kNever;
+    }
+
+    std::string help() const override {
+        return "Internal command returning the split points for a chunk, given the maximum chunk "
+               "size.";
+    }
+
+    using Request = AutoSplitVectorRequest;
+    using Response = AutoSplitVectorResponse;
+
+    class Invocation final : public InvocationBase {
+    public:
+        using InvocationBase::InvocationBase;
+
+        Response typedRun(OperationContext* opCtx) {
+            uassert(ErrorCodes::IllegalOperation,
+                    "The autoSplitVector command can only be invoked on shards (no CSRS).",
+                    serverGlobalParams.clusterRole == ClusterRole::ShardServer);
+
+            uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands());
+            opCtx->setAlwaysInterruptAtStepDownOrUp();
+
+            const auto& req = request();
+
+            auto splitKeys = autoSplitVector(opCtx,
+                                             ns(),
+                                             req.getKeyPattern(),
+                                             req.getMin(),
+                                             req.getMax(),
+                                             req.getMaxChunkSizeBytes());
+
+            return splitKeys;
+        }
+
+    private:
+        NamespaceString ns() const override {
+            return request().getNamespace();
+        }
+
+        bool supportsWriteConcern() const override {
+            return false;
+        }
+
+        void doCheckAuthorization(OperationContext* opCtx) const override {
+            uassert(ErrorCodes::Unauthorized,
+                    "Unauthorized",
+                    AuthorizationSession::get(opCtx->getClient())
+                        ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+                                                           ActionType::splitVector));
+        }
+    };
+} autoSplitVectorCommand;
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/s/auto_split_vector_test.cpp b/src/mongo/db/s/auto_split_vector_test.cpp
new file mode 100644
index 00000000000..18c2cceab38
--- /dev/null
+++ b/src/mongo/db/s/auto_split_vector_test.cpp
@@ -0,0 +1,252 @@
+/**
+ *    Copyright (C) 2021-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
+#include "mongo/db/catalog/create_collection.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/s/auto_split_vector.h"
+#include "mongo/db/s/collection_sharding_runtime.h"
+#include "mongo/db/s/operation_sharding_state.h"
+#include "mongo/db/s/split_vector.h"
+#include "mongo/logv2/log.h"
+#include "mongo/platform/random.h"
+#include "mongo/s/shard_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+const NamespaceString kNss = NamespaceString("autosplitDB", "coll");
+const std::string kPattern = "_id";
+
+/*
+ * Call the autoSplitVector function of the test collection on a chunk with bounds [0, 100) and with
+ * the specified `maxChunkSizeMB`.
+ */
+std::vector<BSONObj> autoSplit(OperationContext* opCtx, int maxChunkSizeMB) {
+    return autoSplitVector(opCtx,
+                           kNss,
+                           BSON(kPattern << 1) /* shard key pattern */,
+                           BSON(kPattern << 0) /* min */,
+                           BSON(kPattern << 1000) /* max */,
+                           maxChunkSizeMB * 1024 * 1024 /* max chunk size in bytes*/);
+}
+
+class AutoSplitVectorTest : public ShardServerTestFixture {
+public:
+    /*
+     * Before each test case:
+     * - Creates a sharded collection with shard key `_id`
+     */
+    void setUp() {
+        ShardServerTestFixture::setUp();
+
+        auto opCtx = operationContext();
+
+        uassertStatusOK(createCollection(
+            operationContext(), kNss.db().toString(), BSON("create" << kNss.coll())));
+
+        DBDirectClient client(opCtx);
+        client.createIndex(kNss.ns(), BSON(kPattern << 1));
+    }
+
+    /*
+     * Insert the specified number of documents in the test collection, with incremental shard key
+     * `_id` starting from `_nextShardKey`.
+     */
+    void insertNDocsOf1MB(OperationContext* opCtx, int nDocs) {
+        DBDirectClient client(opCtx);
+
+        std::string s(1024 * 1024 - 24, 'a');  // To get a 1MB document
+        for (int i = 0; i < nDocs; i++) {
+            BSONObjBuilder builder;
+            builder.append(kPattern, _nextShardKey++);
+            builder.append("str", s);
+            BSONObj obj = builder.obj();
+            ASSERT(obj.objsize() == 1024 * 1024);  // 1 MB document
+            client.insert(kNss.toString(), obj);
+        }
+    }
+
+    /*
+     * Get the number of documents inserted until now.
+     */
+    int getInsertedSize() {
+        return _nextShardKey;
+    }
+
+private:
+    int _nextShardKey = 0;
+};
+
+class AutoSplitVectorTest10MB : public AutoSplitVectorTest {
+    /*
+     * Before each test case:
+     * - Creates a sharded collection with shard key `_id`
+     * - Inserts `10` documents of ~1MB size (shard keys [0...9])
+     */
+    void setUp() {
+        AutoSplitVectorTest::setUp();
+
+        auto opCtx = operationContext();
+
+        DBDirectClient client(opCtx);
+        client.createIndex(kNss.ns(), BSON(kPattern << 1));
+
+        insertNDocsOf1MB(opCtx, 10 /* nDocs */);
+        ASSERT_EQUALS(10, client.count(kNss));
+    }
+};
+
+// Throw exception upon calling autoSplitVector on dropped/unexisting collection
+TEST_F(AutoSplitVectorTest10MB, NoCollection) {
+    ASSERT_THROWS_CODE(autoSplitVector(operationContext(),
+                                       NamespaceString("dummy", "collection"),
+                                       BSON(kPattern << 1) /* shard key pattern */,
+                                       BSON(kPattern << 0) /* min */,
+                                       BSON(kPattern << 100) /* max */,
+                                       1 * 1024 * 1024 /* max chunk size in bytes*/),
+                       DBException,
+                       ErrorCodes::NamespaceNotFound);
+}
+
+// No split points if estimated `data size < max chunk size`
+TEST_F(AutoSplitVectorTest10MB, NoSplitIfDataLessThanMaxChunkSize) {
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 11 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 0);
+}
+
+// Do not split in case of `chunk size == maxChunkSize`
+TEST_F(AutoSplitVectorTest10MB, NoSplitIfDataEqualMaxChunkSize) {
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 10 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 0);
+}
+
+// No split points if `chunk size > max chunk size` but threshold not reached
+TEST_F(AutoSplitVectorTest10MB, NoSplitIfDataLessThanThreshold) {
+    const auto surplus = 2;
+    {
+        // Increase collection size so that the auto splitter can actually be triggered. Use a
+        // different range to don't interfere with the chunk getting splitted.
+        insertNDocsOf1MB(operationContext(), surplus /* nDocs */);
+    }
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 10 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 0);
+}
+
+// One split point if `chunk size > max chunk size` and threshold reached
+TEST_F(AutoSplitVectorTest10MB, SplitIfDataSlightlyMoreThanThreshold) {
+    const auto surplus = 4;
+    insertNDocsOf1MB(operationContext(), surplus /* nDocs */);
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 10 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 1);
+    ASSERT_EQ(6, splitKeys.front().getIntField(kPattern));
+}
+
+// Split points if `data size > max chunk size * 2` and threshold reached
+TEST_F(AutoSplitVectorTest10MB, SplitIfDataMoreThanThreshold) {
+    const auto surplus = 14;
+    insertNDocsOf1MB(operationContext(), surplus /* nDocs */);
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 10 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 2);
+    ASSERT_EQ(7, splitKeys.front().getIntField(kPattern));
+    ASSERT_EQ(15, splitKeys.back().getIntField(kPattern));
+}
+
+// Split points are not recalculated if the right-most chunk is at least `80% maxChunkSize`
+TEST_F(AutoSplitVectorTest10MB, NoRecalculateIfBigLastChunk) {
+    const auto surplus = 8;
+    insertNDocsOf1MB(operationContext(), surplus /* nDocs */);
+    std::vector<BSONObj> splitKeys = autoSplit(operationContext(), 10 /* maxChunkSizeMB */);
+    ASSERT_EQ(splitKeys.size(), 1);
+    ASSERT_EQ(9, splitKeys.front().getIntField(kPattern));
+}
+
+class RepositionLastSplitPointsTest : public AutoSplitVectorTest {
+public:
+    /*
+     * Tests that last split points are properly repositioned in case the surplus allows so or not
+     * repositioned otherwise.
+     */
+    void checkRepositioning(int maxDocsPerChunk, int surplus, int nSplitPoints) {
+        ASSERT(surplus >= 0 && surplus < maxDocsPerChunk);
+
+        const auto maxDocsPerNewChunk =
+            maxDocsPerChunk - ((maxDocsPerChunk - surplus) / (nSplitPoints + 1));
+        bool mustReposition =
+            surplus >= maxDocsPerChunk - maxDocsPerNewChunk && surplus < maxDocsPerChunk * 0.8;
+
+        int toInsert = (maxDocsPerChunk * nSplitPoints) - getInsertedSize() + surplus;
+        insertNDocsOf1MB(operationContext(), toInsert);
+
+        int expectedChunkSize =
+            mustReposition ? getInsertedSize() / (nSplitPoints + 1) : maxDocsPerChunk;
+        std::vector<BSONObj> splitKeys =
+            autoSplit(operationContext(), maxDocsPerChunk /* maxChunkSizeMB */);
+
+        int approximateNextMin = expectedChunkSize;
+        for (const auto& splitKey : splitKeys) {
+            int _id = splitKey.getIntField(kPattern);
+            // Expect an approximate match due to integers rounding in the split points algorithm.
+            ASSERT(_id >= approximateNextMin - 2 && _id <= approximateNextMin + 2) << BSON(
+                "approximateNextMin"
+                << approximateNextMin << "splitKeys" << splitKeys << "maxDocsPerChunk"
+                << maxDocsPerChunk << "surplus" << surplus << "nSplitPoints" << nSplitPoints
+                << "maxDocsPerNewChunk" << maxDocsPerNewChunk << "mustReposition" << mustReposition
+                << "toInsert" << toInsert << "expectedChunkSize" << expectedChunkSize);
+            approximateNextMin = _id + expectedChunkSize;
+        }
+    }
+};
+
+
+// Test that last split points are recalculated fairly (if the surplus allows so)
+TEST_F(RepositionLastSplitPointsTest, RandomRepositioningTest) {
+    PseudoRandom random(SecureRandom().nextInt64());
+    // Avoid small sizes already checked in other test cases.
+    // Random maxDocsPerChunk in interval: [10, 110).
+    int maxDocsPerChunk = random.nextInt32(100) + 10;
+    // Random surplus in interval: [0, maxDocsPerChunk).
+    int surplus = random.nextInt32(maxDocsPerChunk);
+
+    LOGV2(6000900,
+          "RandomRepositioningTest parameters",
+          "maxDocsPerChunk"_attr = maxDocsPerChunk,
+          "surplus"_attr = surplus);
+
+    for (int nSplitPointsToReposition = 1; nSplitPointsToReposition < 4;
+         nSplitPointsToReposition++) {
+        checkRepositioning(maxDocsPerChunk, surplus, nSplitPointsToReposition);
+    }
+}
+
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp
index 298c6181568..8bab73b7047 100644
--- a/src/mongo/db/s/chunk_splitter.cpp
+++ b/src/mongo/db/s/chunk_splitter.cpp
@@ -38,11 +38,11 @@
 #include "mongo/db/client.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/namespace_string.h"
+#include "mongo/db/s/auto_split_vector.h"
 #include "mongo/db/s/chunk_split_state_driver.h"
 #include "mongo/db/s/shard_filtering_metadata_refresh.h"
 #include "mongo/db/s/sharding_state.h"
 #include "mongo/db/s/split_chunk.h"
-#include "mongo/db/s/split_vector.h"
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/s/balancer_configuration.h"
@@ -338,15 +338,13 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr<ChunkSplitStateDriver> chunkSp
                     "maxChunkSizeBytes"_attr = maxChunkSizeBytes);
 
         chunkSplitStateDriver->prepareSplit();
-        auto splitPoints = uassertStatusOK(splitVector(opCtx.get(),
-                                                       nss,
-                                                       shardKeyPattern.toBSON(),
-                                                       chunk.getMin(),
-                                                       chunk.getMax(),
-                                                       false,
-                                                       boost::none,
-                                                       boost::none,
-                                                       maxChunkSizeBytes));
+
+        auto splitPoints = autoSplitVector(opCtx.get(),
+                                           nss,
+                                           shardKeyPattern.toBSON(),
+                                           chunk.getMin(),
+                                           chunk.getMax(),
+                                           maxChunkSizeBytes);
 
         if (splitPoints.empty()) {
             LOGV2_DEBUG(21907,
@@ -356,8 +354,8 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr<ChunkSplitStateDriver> chunkSp
                         "ChunkSplitter attempted split but not enough split points were found for "
                         "chunk",
                         "chunk"_attr = redact(chunk.toString()));
-            // Reset our size estimate that we had prior to splitVector to 0, while still counting
-            // the bytes that have been written in parallel to this split task
+            // Reset our size estimate that we had prior to autoSplitVector to 0, while still
+            // counting the bytes that have been written in parallel to this split task
             chunkSplitStateDriver->abandonPrepare();
             return;
         }
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
index 1af9ceb57fd..3633e2f741e 100644
--- a/src/mongo/db/s/config/initial_split_policy.cpp
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
@@ -314,7 +314,7 @@ InitialSplitPolicy::ShardCollectionConfig UnoptimizedSplitPolicy::createFirstChu
     std::vector<ShardId> shardIds{params.primaryShardId};
 
     // Refresh the balancer settings to ensure the chunk size setting, which is sent as part of
-    // the splitVector command and affects the number of chunks returned, has been loaded.
+    // the autoSplitVector command and affects the number of chunks returned, has been loaded.
     const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();
     uassertStatusOK(balancerConfig->refreshAndCheck(opCtx));
     const auto shardSelectedSplitPoints = uassertStatusOK(
diff --git a/src/mongo/db/s/config/initial_split_policy.h b/src/mongo/db/s/config/initial_split_policy.h
index 3b5a5d9ea12..ade39dcf179 100644
--- a/src/mongo/db/s/config/initial_split_policy.h
+++ b/src/mongo/db/s/config/initial_split_policy.h
@@ -152,7 +152,7 @@ public:
 
 /**
  * Split point building strategy to be used when no optimizations are available. We send a
- * splitVector command to the primary shard in order to calculate the appropriate split points.
+ * autoSplitVector command to the primary shard in order to calculate the appropriate split points.
  */
 class UnoptimizedSplitPolicy : public InitialSplitPolicy {
 public:
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
index 6a6bc2c7e66..11cbd8ce065 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
@@ -66,13 +66,15 @@ class ShardCollectionTestBase : public ConfigServerTestFixture {
 protected:
     void expectSplitVector(const HostAndPort& shardHost,
                            const ShardKeyPattern& keyPattern,
-                           const BSONObj& splitPoints) {
+                           const BSONArray& splitPoints) {
         onCommand([&](const RemoteCommandRequest& request) {
             ASSERT_EQUALS(shardHost, request.target);
             string cmdName = request.cmdObj.firstElement().fieldName();
-            ASSERT_EQUALS("splitVector", cmdName);
-            ASSERT_EQUALS(kNamespace.ns(),
-                          request.cmdObj["splitVector"].String());  // splitVector uses full ns
+            ASSERT_EQUALS("autoSplitVector", cmdName);
+            // autoSplitVector concatenates the collection name to the command's db
+            const auto receivedNs =
+                request.dbname + '.' + request.cmdObj["autoSplitVector"].String();
+            ASSERT_EQUALS(kNamespace.ns(), receivedNs);
 
             ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj());
             ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj());
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index d1fb2ca5fcd..d04c4fcf584 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -961,7 +961,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
             avgRecSize = BSONObj::kMinBSONLength;
         }
         maxRecsWhenFull = _args.getMaxChunkSizeBytes() / avgRecSize;
-        maxRecsWhenFull = 130 * maxRecsWhenFull / 100;  // pad some slack
+        maxRecsWhenFull = 2 * maxRecsWhenFull;  // pad some slack
     } else {
         avgRecSize = 0;
         maxRecsWhenFull = kMaxObjectPerChunk + 1;
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript
index f54aad9499b..8b448c1736f 100644
--- a/src/mongo/s/SConscript
+++ b/src/mongo/s/SConscript
@@ -161,6 +161,7 @@ env.Library(
         'catalog/type_tags.cpp',
         'request_types/add_shard_request_type.cpp',
         'request_types/add_shard_to_zone_request_type.cpp',
+        'request_types/auto_split_vector.idl',
         'request_types/balance_chunk_request_type.cpp',
         'request_types/commit_chunk_migration_request_type.cpp',
         'request_types/merge_chunk_request_type.cpp',
diff --git a/src/mongo/s/request_types/auto_split_vector.idl b/src/mongo/s/request_types/auto_split_vector.idl
new file mode 100644
index 00000000000..d8eb082d087
--- /dev/null
+++ b/src/mongo/s/request_types/auto_split_vector.idl
@@ -0,0 +1,71 @@
+# Copyright(C) 2021 - present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+# This IDL file describes the BSON format for the autoSplitVector command.
+
+global:
+  cpp_namespace: "mongo"
+  cpp_includes:
+    - "mongo/db/s/auto_split_vector.h"
+
+imports:
+    - "mongo/idl/basic_types.idl"
+
+types:
+    bson_vector:
+        bson_serialization_type: any
+        description: "An array of objects representing the split keys."
+        cpp_type: "std::vector<mongo::BSONObj>"
+        deserializer: ::mongo::parseSplitKeys
+
+structs:
+    AutoSplitVectorResponse:
+        description: "The reply of an autoSplitVector command."
+        strict: false
+        fields:
+            splitKeys: bson_vector
+
+commands:
+  autoSplitVector:
+    cpp_name: AutoSplitVectorRequest
+    description: "Internal autoSplitVector command"
+    strict: false
+    namespace: concatenate_with_db
+    fields:
+        keyPattern:
+            type: object_owned
+            description: "Shard key pattern of the collection"
+        min:
+            type: object_owned
+            description: "Min key of the chunk"
+        max:
+            type: object_owned
+            description: "Max key of the chunk"
+        maxChunkSizeBytes:
+            type: safeInt64
+            description: "Max chunk size of the collection expressed in bytes"
diff --git a/src/mongo/s/shard_util.cpp b/src/mongo/s/shard_util.cpp
index 9419cfdd687..869750458f1 100644
--- a/src/mongo/s/shard_util.cpp
+++ b/src/mongo/s/shard_util.cpp
@@ -41,6 +41,7 @@
 #include "mongo/logv2/log.h"
 #include "mongo/s/client/shard_registry.h"
 #include "mongo/s/grid.h"
+#include "mongo/s/request_types/auto_split_vector_gen.h"
 #include "mongo/s/shard_key_pattern.h"
 #include "mongo/util/str.h"
 
@@ -95,43 +96,60 @@ StatusWith<std::vector<BSONObj>> selectChunkSplitPoints(OperationContext* opCtx,
                                                         const ChunkRange& chunkRange,
                                                         long long chunkSizeBytes,
                                                         boost::optional<int> maxObjs) {
-    BSONObjBuilder cmd;
-    cmd.append("splitVector", nss.ns());
-    cmd.append("keyPattern", shardKeyPattern.toBSON());
-    chunkRange.append(&cmd);
-    cmd.append("maxChunkSizeBytes", chunkSizeBytes);
-    if (maxObjs) {
-        cmd.append("maxChunkObjects", *maxObjs);
-    }
-
     auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId);
     if (!shardStatus.isOK()) {
         return shardStatus.getStatus();
     }
 
-    auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts(
-        opCtx,
-        ReadPreferenceSetting{ReadPreference::PrimaryPreferred},
-        "admin",
-        cmd.obj(),
-        Shard::RetryPolicy::kIdempotent);
-    if (!cmdStatus.isOK()) {
-        return std::move(cmdStatus.getStatus());
-    }
-    if (!cmdStatus.getValue().commandStatus.isOK()) {
-        return std::move(cmdStatus.getValue().commandStatus);
+    auto invokeSplitCommand = [&](const BSONObj& command, const StringData db) {
+        return shardStatus.getValue()->runCommandWithFixedRetryAttempts(
+            opCtx,
+            ReadPreferenceSetting{ReadPreference::PrimaryPreferred},
+            db.toString(),
+            command,
+            Shard::RetryPolicy::kIdempotent);
+    };
+
+    const AutoSplitVectorRequest req(
+        nss, shardKeyPattern.toBSON(), chunkRange.getMin(), chunkRange.getMax(), chunkSizeBytes);
+
+    auto cmdStatus = invokeSplitCommand(req.toBSON({}), nss.db());
+
+    // Fallback to splitVector command in case of mixed binaries not supporting autoSplitVector
+    bool fallback = [&]() {
+        auto status = Shard::CommandResponse::getEffectiveStatus(cmdStatus);
+        return !status.isOK() && status.code() == ErrorCodes::CommandNotFound;
+    }();
+
+    if (fallback) {
+        BSONObjBuilder cmd;
+        cmd.append("splitVector", nss.ns());
+        cmd.append("keyPattern", shardKeyPattern.toBSON());
+        chunkRange.append(&cmd);
+        cmd.append("maxChunkSizeBytes", chunkSizeBytes);
+        cmdStatus = invokeSplitCommand(cmd.obj(), NamespaceString::kAdminDb);
     }
 
-    const auto response = std::move(cmdStatus.getValue().response);
 
-    std::vector<BSONObj> splitPoints;
+    auto status = Shard::CommandResponse::getEffectiveStatus(cmdStatus);
+    if (!status.isOK()) {
+        return status;
+    }
+
+    if (fallback) {
+        const auto response = std::move(cmdStatus.getValue().response);
+        std::vector<BSONObj> splitPoints;
 
-    BSONObjIterator it(response.getObjectField("splitKeys"));
-    while (it.more()) {
-        splitPoints.push_back(it.next().Obj().getOwned());
+        BSONObjIterator it(response.getObjectField("splitKeys"));
+        while (it.more()) {
+            splitPoints.push_back(it.next().Obj().getOwned());
+        }
+        return std::move(splitPoints);
     }
 
-    return std::move(splitPoints);
+    const auto response = AutoSplitVectorResponse::parse(
+        IDLParserErrorContext("AutoSplitVectorResponse"), std::move(cmdStatus.getValue().response));
+    return response.getSplitKeys();
 }
 
 StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
diff --git a/src/mongo/s/sharding_mongod_test_fixture.cpp b/src/mongo/s/sharding_mongod_test_fixture.cpp
index dbd20638bee..04fc5084396 100644
--- a/src/mongo/s/sharding_mongod_test_fixture.cpp
+++ b/src/mongo/s/sharding_mongod_test_fixture.cpp
@@ -106,7 +106,7 @@ void ShardingMongodTestFixture::setUp() {
     // Set up this node as part of a replica set.
 
     repl::ReplSettings replSettings;
-    replSettings.setOplogSizeBytes(512'000);
+    replSettings.setOplogSizeBytes(1024 * 1024 * 2);  // 2MB
     replSettings.setReplSetString(ConnectionString::forReplicaSet(_setName, _servers).toString());
     auto replCoordPtr = makeReplicationCoordinator(replSettings);
     _replCoord = replCoordPtr.get();
author	Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>	2021-10-22 16:55:43 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-10-22 17:08:07 +0000
commit	6d6a3e814c3a36b180d696eef3a5ae4914c1f306 (patch)
tree	0f75f4aff8464e07897b82c19115f6f7da596413
parent	7ea91003d2dcd8d02599dc56a157ffd3f79c9fb6 (diff)
download	mongo-6d6a3e814c3a36b180d696eef3a5ae4914c1f306.tar.gz