SERVER-14052 Make the autosplitter work with less then three splitpoints

SplitVector is always removing the first splitpoint from the splitvector before returning it, thus the autsplitter should try to split even when the splitvector contains just one splitpoint.
author: Tommaso Tocci <tommaso.tocci@10gen.com> 2020-01-02 11:36:39 +0000
committer: evergreen <evergreen@mongodb.com> 2020-01-02 11:36:39 +0000
commit: c92e20479618b22355b9fb7efa935ff6db5883a9 (patch)
tree: 5affe6d5e21e9f5a6dc57a520f73ee105db2e821
parent: aa7260c8f699c3c691f836bf2286606b2a8eac93 (diff)
download: mongo-c92e20479618b22355b9fb7efa935ff6db5883a9.tar.gz
2 files changed, 54 insertions, 3 deletions
diff --git a/jstests/sharding/autosplit_low_cardinality.js b/jstests/sharding/autosplit_low_cardinality.js
new file mode 100644
index 00000000000..c3b91a6a7c7
--- /dev/null
+++ b/jstests/sharding/autosplit_low_cardinality.js
@@ -0,0 +1,53 @@
+/**
+ * Test the autosplitter when a collection has very low cardinality
+ *
+ * @tags: [requires_fcv_44]
+ */
+
+(function() {
+'use strict';
+load('jstests/sharding/autosplit_include.js');
+
+var st = new ShardingTest({
+    name: "low_cardinality",
+    other: {enableAutoSplit: true, chunkSize: 1},
+});
+
+assert.commandWorked(st.s.adminCommand({enablesharding: "test"}));
+assert.commandWorked(st.s.adminCommand({shardcollection: "test.foo", key: {sk: 1}}));
+
+const bigString = "X".repeat(1024 * 1024 / 4);  // 250 KB
+
+var coll = st.getDB("test").getCollection("foo");
+
+// Insert $numDocs documents into the collection under $key.
+// Each document contains a string of 250KB
+// waits for any ongoing splits to finish, and then prints some information
+// about the collection's chunks
+function insertBigDocsWithKey(key, numDocs) {
+    var bulk = coll.initializeUnorderedBulkOp();
+    for (var i = 0; i < numDocs; i++) {
+        bulk.insert({sk: key, sub: i, bs: bigString});
+    }
+    assert.commandWorked(bulk.execute());
+}
+
+function numChunks() {
+    return st.config.chunks.count({"ns": "test.foo"});
+}
+
+// Accumulate ~1MB of documents under -10 and +10
+insertBigDocsWithKey(-10, 4);
+insertBigDocsWithKey(10, 4);
+waitForOngoingChunkSplits(st);
+
+// At least one split should have been performed
+assert.gte(numChunks(), 2, "Number of chunks is less then 2, no split have been perfomed");
+
+insertBigDocsWithKey(20, 4);
+waitForOngoingChunkSplits(st);
+// An additional split should have been performed
+assert.gte(numChunks(), 3, "Number of chunks must be at least 3");
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp
index c7dd1e22250..33235a3be68 100644
--- a/src/mongo/db/s/chunk_splitter.cpp
+++ b/src/mongo/db/s/chunk_splitter.cpp
@@ -328,15 +328,13 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr<ChunkSplitStateDriver> chunkSp
                                                        boost::none,
                                                        maxChunkSizeBytes));
 
-        if (splitPoints.size() <= 1) {
+        if (splitPoints.empty()) {
             LOG(1)
                 << "ChunkSplitter attempted split but not enough split points were found for chunk "
                 << redact(chunk.toString());
             // Reset our size estimate that we had prior to splitVector to 0, while still counting
             // the bytes that have been written in parallel to this split task
             chunkSplitStateDriver->abandonPrepare();
-            // No split points means there isn't enough data to split on; 1 split point means we
-            // have between half the chunk size to full chunk size so there is no need to split yet
             return;
         }
author	Tommaso Tocci <tommaso.tocci@10gen.com>	2020-01-02 11:36:39 +0000
committer	evergreen <evergreen@mongodb.com>	2020-01-02 11:36:39 +0000
commit	c92e20479618b22355b9fb7efa935ff6db5883a9 (patch)
tree	5affe6d5e21e9f5a6dc57a520f73ee105db2e821
parent	aa7260c8f699c3c691f836bf2286606b2a8eac93 (diff)
download	mongo-c92e20479618b22355b9fb7efa935ff6db5883a9.tar.gz