summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaley Connelly <haley.connelly@mongodb.com>2021-12-20 23:51:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-21 00:32:35 +0000
commit3130c8146113e2c2efb4615cf93630f42fd9f1bf (patch)
treebba701f19253e4ba2c32e8309db1a4b702338a57
parent60c56d04a121194e16884eed520eb0676cbf0261 (diff)
downloadmongo-3130c8146113e2c2efb4615cf93630f42fd9f1bf.tar.gz
SERVER-60123 Support sharding by cluster key _id on explicitly created clustered collection
-rw-r--r--jstests/concurrency/fsm_workloads/CRUD_clustered_collection.js2
-rw-r--r--jstests/replsets/tenant_migration_on_clustered_collection.js104
-rw-r--r--jstests/sharding/clustered_cleanup_orphaned_with_resumable_range_deleter.js89
-rw-r--r--jstests/sharding/clustered_collection_balancer.js78
-rw-r--r--jstests/sharding/clustered_merge_chunks_test.js170
-rw-r--r--jstests/sharding/clustered_tag_auto_split.js52
-rw-r--r--jstests/sharding/clustered_top_chunk_split.js155
-rw-r--r--src/mongo/db/catalog/clustered_collection_util.cpp7
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp20
-rw-r--r--src/mongo/db/catalog/index_catalog.h35
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp26
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.h9
-rw-r--r--src/mongo/db/commands/dbcommands.cpp16
-rw-r--r--src/mongo/db/query/internal_plans.cpp217
-rw-r--r--src/mongo/db/query/internal_plans.h39
-rw-r--r--src/mongo/db/s/auto_split_vector.cpp69
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp22
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp14
-rw-r--r--src/mongo/db/s/range_deletion_util.cpp17
-rw-r--r--src/mongo/db/s/split_chunk.cpp22
-rw-r--r--src/mongo/db/s/split_vector.cpp48
21 files changed, 1044 insertions, 167 deletions
diff --git a/jstests/concurrency/fsm_workloads/CRUD_clustered_collection.js b/jstests/concurrency/fsm_workloads/CRUD_clustered_collection.js
index 0923f173185..1d5295d15fb 100644
--- a/jstests/concurrency/fsm_workloads/CRUD_clustered_collection.js
+++ b/jstests/concurrency/fsm_workloads/CRUD_clustered_collection.js
@@ -4,7 +4,7 @@
* Perform CRUD operations in parallel on a clustered collection. Disallows dropping the collection
* to prevent implicit creation of a non-clustered collection.
*
- * TODO SERVER-60123: Enable sharding for clustered collections.
+ * TODO SERVER-61449: Support implicit clustered collection creation via shardCollection().
* @tags: [
* requires_fcv_51,
* featureFlagClusteredIndexes,
diff --git a/jstests/replsets/tenant_migration_on_clustered_collection.js b/jstests/replsets/tenant_migration_on_clustered_collection.js
new file mode 100644
index 00000000000..ca253f1b736
--- /dev/null
+++ b/jstests/replsets/tenant_migration_on_clustered_collection.js
@@ -0,0 +1,104 @@
+/**
+ * Tests that clustered collections (both empty and non-empty) are successfully migrated in a basic
+ * tenant migration.
+ *
+ * @tags: [
+ * # Clustered collection specific.
+ * featureFlagClusteredIndexes,
+ * requires_fcv_52,
+ * # TODO SERVER-61449 - enable sharding.
+ * assumes_against_mongod_not_mongos,
+ * assumes_unsharded_collection,
+ * # Basic tags for tenant migration tests.
+ * incompatible_with_eft,
+ * incompatible_with_macos,
+ * incompatible_with_windows_tls,
+ * requires_majority_read_concern,
+ * requires_persistence,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/clustered_collections/clustered_collection_util.js"); // ClusteredCollectionUtil
+load("jstests/libs/parallelTester.js"); // Thread()
+load("jstests/libs/uuid_util.js"); // extractUUIDFromObject()
+load("jstests/replsets/libs/tenant_migration_test.js"); // TenantMigrationTest
+load("jstests/replsets/libs/tenant_migration_util.js"); // TenantMigrationUtil
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
+
+const kTenantId = "testTenantId1";
+const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
+const kEmptyCollName = "testEmptyColl";
+const kNonEmptyCollName = "testNonEmptyColl";
+
+// The documents used to populate the non-empty collection.
+const documents = [{_id: 1, a: 1, b: 1}, {_id: 2, a: 2, b: 2}, {_id: 3, a: 3, b: 3}];
+
+const clusteredCreateOptions = {
+ clusteredIndex: {key: {_id: 1}, name: "index_on_id", unique: true}
+};
+
+// Generates the clustered collections and populates the non-empty collection.
+const createClusteredCollections = () => {
+ const donorPrimary = tenantMigrationTest.getDonorPrimary();
+ const donorDB = donorPrimary.getDB(kDbName);
+
+ // Create a non-empty clustered collection and store it's original contents.
+ assert.commandWorked(donorDB.createCollection(kNonEmptyCollName, clusteredCreateOptions));
+ assert.commandWorked(donorDB[kNonEmptyCollName].insert(documents));
+
+ // Create an empty clustered collection.
+ assert.commandWorked(donorDB.createCollection(kEmptyCollName, clusteredCreateOptions));
+
+ // Account for test environments that may change default write concern.
+ tenantMigrationTest.getDonorRst().awaitReplication();
+};
+
+// Runs the entire tenant migration start to finish.
+const runTenantMigration = () => {
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ tenantId: kTenantId,
+ };
+ const donorRstArgs = TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst());
+ const migrationThread =
+ new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs);
+ migrationThread.start();
+
+ TenantMigrationTest.assertCommitted(migrationThread.returnData());
+};
+
+// Validates the clustered collections migrated to the recipient.
+const validateMigrationResults = () => {
+ const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+ const recipientDB = recipientPrimary.getDB(kDbName);
+
+ // Confirm the data was transferred correctly.
+ const nonEmptyCollDocs = recipientDB[kNonEmptyCollName].find().toArray();
+ assert.sameMembers(nonEmptyCollDocs, documents);
+ assert.eq(0,
+ recipientDB[kEmptyCollName].find().itcount(),
+ tojson(recipientDB[kEmptyCollName].find().toArray()));
+
+ ClusteredCollectionUtil.validateListCollections(
+ recipientDB, kNonEmptyCollName, clusteredCreateOptions);
+ ClusteredCollectionUtil.validateListCollections(
+ recipientDB, kEmptyCollName, clusteredCreateOptions);
+
+ ClusteredCollectionUtil.validateListIndexes(
+ recipientDB, kNonEmptyCollName, clusteredCreateOptions);
+ ClusteredCollectionUtil.validateListIndexes(
+ recipientDB, kEmptyCollName, clusteredCreateOptions);
+};
+
+createClusteredCollections();
+runTenantMigration();
+validateMigrationResults();
+
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/sharding/clustered_cleanup_orphaned_with_resumable_range_deleter.js b/jstests/sharding/clustered_cleanup_orphaned_with_resumable_range_deleter.js
new file mode 100644
index 00000000000..c175b5e0bf6
--- /dev/null
+++ b/jstests/sharding/clustered_cleanup_orphaned_with_resumable_range_deleter.js
@@ -0,0 +1,89 @@
+/**
+ * Tests the behavior of cleanupOrphaned when the resumable range deleter is enabled. That is, tests
+ * that cleanupOrphaned, on a clustered collection.
+ *
+ * 1) Ignores the 'startingFromKey' parameter
+ * 2) Waits for all possibly orphaned ranges to be deleted before returning
+ * 3) Does not return 'stoppedAtKey', to preserve the former API of not returning 'stoppedAtKey'
+ * once all orphans have been deleted.
+ *
+ * TODO SERVER-61501: Evaluate whether this test should be removed.
+ *
+ * @tags: [
+ * requires_fcv_52,
+ * featureFlagClusteredIndexes,
+ * ]
+ */
+
+(function() {
+
+load("jstests/libs/fail_point_util.js");
+
+const dbName = "test";
+const collName = "foo";
+const ns = dbName + "." + collName;
+
+var st = new ShardingTest({shards: 2});
+
+jsTest.log("Shard and split a collection");
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
+
+assert.commandWorked(st.s.getDB("dbName").createCollection(
+ "collName", {clusteredIndex: {key: {_id: 1}, unique: true}}));
+
+assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
+assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 0}}));
+assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 1}}));
+
+jsTest.log("Insert some documents");
+const numDocs = 100;
+var bulk = st.s.getCollection(ns).initializeUnorderedBulkOp();
+for (var i = 0; i < numDocs; i++) {
+ bulk.insert({_id: i});
+}
+assert.commandWorked(bulk.execute());
+
+jsTest.log("Assert that there are no range deletion tasks");
+assert.eq(0, st.shard0.getDB("config").getCollection("rangeDeletions").count());
+assert.eq(0, st.shard1.getDB("config").getCollection("rangeDeletions").count());
+
+let suspendRangeDeletionShard0 = configureFailPoint(st.shard0, "suspendRangeDeletion");
+
+jsTest.log("Move two chunks to create two orphaned ranges on the donor");
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: st.shard1.shardName}));
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 1}, to: st.shard1.shardName}));
+
+jsTest.log("Since the recipient does not have orphaned ranges, cleanupOrphaned should return");
+assert.eq(0, st.shard1.getDB("config").getCollection("rangeDeletions").count());
+assert.commandWorked(st.shard1.adminCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {_id: 50} /* The startingFromKey parameter should be ignored */
+}));
+
+jsTest.log("Since the donor has two orphaned ranges, cleanupOrphaned should block");
+assert.eq(2, st.shard0.getDB("config").getCollection("rangeDeletions").count());
+assert.commandFailedWithCode(st.shard0.adminCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {_id: 50} /* The startingFromKey parameter should be ignored */,
+ maxTimeMS: 10 * 1000
+}),
+ ErrorCodes.MaxTimeMSExpired);
+assert.eq(numDocs, st.shard0.getDB(dbName).getCollection(collName).count());
+
+jsTest.log("Once the donor can cleans up the ranges, cleanupOrphaned should eventually return.");
+suspendRangeDeletionShard0.off();
+const res = st.shard0.adminCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {_id: 50} /* The startingFromKey parameter should be ignored */
+});
+assert.commandWorked(res);
+assert.eq(0, st.shard0.getDB("config").getCollection("rangeDeletions").count());
+assert.eq(0, st.shard0.getDB(dbName).getCollection(collName).count());
+
+// The result should not have contained 'stoppedAtKey', since to match the API of the original
+// cleanupOrphaned command, no 'stoppedAtKey' is returned once all orphans have been deleted.
+assert.eq(null, res.stoppedAtKey);
+
+st.stop();
+})();
diff --git a/jstests/sharding/clustered_collection_balancer.js b/jstests/sharding/clustered_collection_balancer.js
new file mode 100644
index 00000000000..ef2af470400
--- /dev/null
+++ b/jstests/sharding/clustered_collection_balancer.js
@@ -0,0 +1,78 @@
+/**
+ * Test inserts into a sharded clustered collection with the balancer on.
+ *
+ * @tags: [
+ * requires_fcv_52,
+ * featureFlagClusteredIndexes,
+ * # The inMemory variants may run out of memory while inserting large input objects.
+ * requires_persistence,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+Random.setRandomSeed();
+
+const dbName = 'testDB';
+const collName = 'testColl';
+
+// Connections.
+const st = new ShardingTest({shards: 2, rs: {nodes: 2}, other: {chunkSize: 1}});
+const mongos = st.s0;
+
+// Enable sharding on the database.
+assert.commandWorked(mongos.adminCommand({enableSharding: dbName}));
+const mainDB = mongos.getDB(dbName);
+
+const largeStr = "a".repeat(10000);
+const clusteredCreateOpts = {
+ clusteredIndex: {key: {_id: 1}, unique: true, name: "nameOnId"}
+};
+
+st.startBalancer();
+
+function runTest(shardKey) {
+ assert.commandWorked(mainDB.createCollection(collName, clusteredCreateOpts));
+ let coll = mainDB.getCollection(collName);
+
+ if (shardKey != "_id") {
+ // Require creating an index when the shard key is not the cluster key.
+ assert.commandWorked(coll.createIndex({[shardKey]: 1}));
+ }
+
+ // Insert a large dataset so that the balancer is guranteed to split the chunks.
+ let bulk = coll.initializeUnorderedBulkOp();
+ const numDocs = 1000;
+ for (let i = 0; i < numDocs; i++) {
+ bulk.insert({[shardKey]: i * 100, largeField: largeStr});
+ }
+ assert.commandWorked(bulk.execute());
+
+ assert.commandWorked(mongos.adminCommand({
+ shardCollection: `${dbName}.${collName}`,
+ key: {[shardKey]: 1},
+ }));
+ st.awaitBalancerRound();
+
+ // Ensure that each shard has at least one chunk after the split.
+ const primaryShard = st.getPrimaryShard(dbName);
+ const otherShard = st.getOther(primaryShard);
+ assert.soon(
+ () => {
+ const counts = st.chunkCounts(collName, dbName);
+ return counts[primaryShard.shardName] >= 1 && counts[otherShard.shardName] >= 1;
+ },
+ () => {
+ return tojson(mongos.getDB("config").getCollection("chunks").find().toArray());
+ });
+
+ // Verify that all the documents still exist in the collection.
+ assert.eq(coll.find().itcount(), numDocs);
+ assert(coll.drop());
+}
+
+runTest("_id");
+runTest("b");
+st.stop();
+})();
diff --git a/jstests/sharding/clustered_merge_chunks_test.js b/jstests/sharding/clustered_merge_chunks_test.js
new file mode 100644
index 00000000000..17d391ec61f
--- /dev/null
+++ b/jstests/sharding/clustered_merge_chunks_test.js
@@ -0,0 +1,170 @@
+/*
+ * Tests that merging chunks via mongos works/doesn't work with different chunk configurations on a
+ * clustered collection.
+ *
+ * TODO SERVER-61501: Evaluate whether this test should be removed.
+ *
+ * @tags: [
+ * requires_fcv_52,
+ * featureFlagClusteredIndexes,
+ * ]
+ */
+(function() {
+'use strict';
+
+load("jstests/sharding/libs/find_chunks_util.js");
+
+var st = new ShardingTest({shards: 2, mongos: 2});
+
+var mongos = st.s0;
+var staleMongos = st.s1;
+var admin = mongos.getDB("admin");
+
+assert.commandWorked(
+ mongos.getDB("foo").createCollection("bar", {clusteredIndex: {key: {_id: 1}, unique: true}}));
+
+var coll = mongos.getCollection("foo.bar");
+
+assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
+st.ensurePrimaryShard('foo', st.shard0.shardName);
+assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));
+
+// Create ranges MIN->0,0->10,(hole),20->40,40->50,50->90,(hole),100->110,110->MAX on first
+// shard
+jsTest.log("Creating ranges...");
+
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 10}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 20}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 40}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 50}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 90}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 100}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 110}}));
+
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard1.shardName}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 90}, to: st.shard1.shardName}));
+
+st.printShardingStatus();
+
+// Insert some data into each of the consolidated ranges
+assert.commandWorked(coll.insert({_id: 0}));
+assert.commandWorked(coll.insert({_id: 10}));
+assert.commandWorked(coll.insert({_id: 40}));
+assert.commandWorked(coll.insert({_id: 110}));
+
+var staleCollection = staleMongos.getCollection(coll + "");
+
+jsTest.log("Trying merges that should fail...");
+
+// S0: min->0, 0->10, 20->40, 40->50, 50->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// Make sure merging non-exact chunks is invalid
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 5}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 5}, {_id: 10}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 15}, {_id: 50}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 55}]}));
+assert.commandFailed(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 105}, {_id: MaxKey}]}));
+
+// Make sure merging over holes is invalid
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 0}, {_id: 40}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 40}, {_id: 110}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 50}, {_id: 110}]}));
+
+// Make sure merging between shards is invalid
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 0}, {_id: 20}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 10}, {_id: 40}]}));
+assert.commandFailed(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 40}, {_id: 100}]}));
+assert.eq(4, staleCollection.find().itcount());
+
+jsTest.log("Trying merges that should succeed...");
+
+// Merging single chunks should be treated as a no-op
+// (or fail because 'the range specifies one single chunk' in multiversion test environments)
+try {
+ assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 0}]}));
+} catch (e) {
+ if (!e.message.match(/could not merge chunks, collection .* already contains chunk for/)) {
+ throw e;
+ }
+}
+try {
+ assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 40}]}));
+} catch (e) {
+ if (!e.message.match(/could not merge chunks, collection .* already contains chunk for/)) {
+ throw e;
+ }
+}
+try {
+ assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 110}, {_id: MaxKey}]}));
+} catch (e) {
+ if (!e.message.match(/could not merge chunks, collection .* already contains chunk for/)) {
+ throw e;
+ }
+}
+// Make sure merge including the MinKey works
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 10}]}));
+assert.eq(4, staleCollection.find().itcount());
+// S0: min->10, 20->40, 40->50, 50->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// Make sure merging three chunks in the middle works
+assert.commandWorked(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 90}]}));
+assert.eq(4, staleCollection.find().itcount());
+// S0: min->10, 20->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// Make sure merge including the MaxKey works
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 100}, {_id: MaxKey}]}));
+assert.eq(4, staleCollection.find().itcount());
+// S0: min->10, 20->90, 100->max
+// S1: 10->20, 90->100
+
+// Make sure merging chunks after a chunk has been moved out of a shard succeeds
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 110}, to: st.shard1.shardName}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard0.shardName}));
+assert.eq(4, staleCollection.find().itcount());
+// S0: min->10, 10->20, 20->90
+// S1: 90->100, 100->max
+
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 90}, {_id: MaxKey}]}));
+assert.eq(4, staleCollection.find().itcount());
+// S0: min->10, 10->20, 20->90
+// S1: 90->max
+
+// Make sure merge on the other shard after a chunk has been merged succeeds
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 90}]}));
+// S0: min->90
+// S1: 90->max
+
+st.printShardingStatus(true);
+
+assert.eq(2, findChunksUtil.findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(1,
+ findChunksUtil
+ .findChunksByNs(st.s0.getDB('config'),
+ 'foo.bar',
+ {'min._id': MinKey, 'max._id': 90, shard: st.shard0.shardName})
+ .itcount());
+assert.eq(1,
+ findChunksUtil
+ .findChunksByNs(st.s0.getDB('config'),
+ 'foo.bar',
+ {'min._id': 90, 'max._id': MaxKey, shard: st.shard1.shardName})
+ .itcount());
+
+st.stop();
+})();
diff --git a/jstests/sharding/clustered_tag_auto_split.js b/jstests/sharding/clustered_tag_auto_split.js
new file mode 100644
index 00000000000..2c866889d4b
--- /dev/null
+++ b/jstests/sharding/clustered_tag_auto_split.js
@@ -0,0 +1,52 @@
+/*
+ * Test to make sure that tag ranges get split when full keys are used for the tag ranges and the
+ * collection is clustered by _id.
+ *
+ * TODO SERVER-61501: Evaluate whether this test should be removed.
+ *
+ * @tags: [
+ * requires_fcv_52,
+ * featureFlagClusteredIndexes,
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/sharding/libs/find_chunks_util.js");
+
+var s = new ShardingTest({shards: 2, mongos: 1});
+
+assert.commandWorked(s.s0.adminCommand({enablesharding: "test"}));
+s.ensurePrimaryShard('test', s.shard1.shardName);
+
+assert.commandWorked(
+ s.s0.getDB("test").createCollection("foo", {clusteredIndex: {key: {_id: 1}, unique: true}}));
+
+assert.commandWorked(s.s0.adminCommand({shardcollection: "test.foo", key: {_id: 1}}));
+
+assert.eq(1, findChunksUtil.findChunksByNs(s.config, "test.foo").itcount());
+
+s.addShardTag(s.shard0.shardName, "a");
+s.addShardTag(s.shard0.shardName, "b");
+
+s.addTagRange("test.foo", {_id: 5}, {_id: 10}, "a");
+s.addTagRange("test.foo", {_id: 10}, {_id: 15}, "b");
+
+s.startBalancer();
+
+assert.soon(function() {
+ return findChunksUtil.findChunksByNs(s.config, "test.foo").itcount() == 4;
+}, 'Split did not occur', 3 * 60 * 1000);
+
+s.awaitBalancerRound();
+s.printShardingStatus(true);
+assert.eq(4, findChunksUtil.findChunksByNs(s.config, "test.foo").itcount(), 'Split points changed');
+
+assert.eq(1, findChunksUtil.findChunksByNs(s.config, "test.foo", {min: {_id: MinKey}}).itcount());
+assert.eq(1, findChunksUtil.findChunksByNs(s.config, "test.foo", {min: {_id: 5}}).itcount());
+assert.eq(1, findChunksUtil.findChunksByNs(s.config, "test.foo", {min: {_id: 10}}).itcount());
+assert.eq(1, findChunksUtil.findChunksByNs(s.config, "test.foo", {min: {_id: 15}}).itcount());
+
+s.stop();
+})();
diff --git a/jstests/sharding/clustered_top_chunk_split.js b/jstests/sharding/clustered_top_chunk_split.js
new file mode 100644
index 00000000000..a4747bd5456
--- /dev/null
+++ b/jstests/sharding/clustered_top_chunk_split.js
@@ -0,0 +1,155 @@
+/**
+ * This file tests the top-chunk optimization logic in splitChunk command. Whenever a chunk is
+ * split, the shouldMigrate field should be set if the extreme chunk has only a single document,
+ * where extreme chunk is defined as the chunk containing either the upper or lower bound of the
+ * entire shard key space.
+ *
+ * This test mimics the existing clustered_top_chunk_split.js but on a clustered collection.
+ *
+ * TODO SERVER-61557: evaluate usefulness of this test.
+ *
+ * @tags: [
+ * requires_fcv_52,
+ * featureFlagClusteredIndexes,
+ * ]
+ */
+(function() {
+'use strict';
+
+var st = new ShardingTest({shards: 1});
+var testDB = st.s.getDB('test');
+assert.commandWorked(testDB.adminCommand({enableSharding: 'test'}));
+
+var callSplit = function(db, minKey, maxKey, splitPoints) {
+ jsTestLog(`callSplit minKey ${tojson(minKey)}, ${tojson(maxKey)}, ${tojson(splitPoints)}`);
+ var res = st.s.adminCommand({getShardVersion: "test.user"});
+ assert.commandWorked(res);
+ var shardVersion = [res.version, res.versionEpoch];
+ return db.runCommand({
+ splitChunk: 'test.user',
+ from: st.shard0.shardName,
+ min: minKey,
+ max: maxKey,
+ keyPattern: {_id: 1},
+ splitKeys: splitPoints,
+ epoch: res.versionEpoch,
+ });
+};
+
+var tests = [
+ //
+ // Lower extreme chunk tests.
+ //
+
+ // All chunks have 1 doc.
+ //
+ // Expected doc counts for new chunks:
+ // [ MinKey, -2 ): 1
+ // [ -2, -1 ): 1
+ // [ -1, 0): 1
+ //
+ function(db) {
+ var res = callSplit(db, {_id: MinKey}, {_id: 0}, [{_id: -2}, {_id: -1}]);
+ assert.commandWorked(res);
+ assert.neq(res.shouldMigrate, null, tojson(res));
+ assert(bsonWoCompare(res.shouldMigrate.min, {_id: MinKey}) == 0,
+ tojson(res.shouldMigrate.min));
+ assert(bsonWoCompare(res.shouldMigrate.max, {_id: -2}) == 0, tojson(res.shouldMigrate.max));
+ },
+
+ // One chunk has single doc, extreme doesn't.
+ //
+ // Expected doc counts for new chunks:
+ // [ MinKey, -1 ): 2
+ // [ -1, 0): 1
+ //
+ function(db) {
+ var res = callSplit(db, {_id: MinKey}, {_id: 0}, [{_id: -1}]);
+ assert.commandWorked(res);
+ assert.eq(res.shouldMigrate, null, tojson(res));
+ },
+
+ // Only extreme has single doc.
+ //
+ // Expected doc counts for new chunks:
+ // [ MinKey, -2 ): 1
+ // [ -2, 0): 2
+ //
+ function(db) {
+ var res = callSplit(db, {_id: MinKey}, {_id: 0}, [{_id: -2}]);
+ assert.commandWorked(res);
+ assert.neq(res.shouldMigrate, null, tojson(res));
+ assert(bsonWoCompare(res.shouldMigrate.min, {_id: MinKey}) == 0,
+ tojson(res.shouldMigrate.min));
+ assert(bsonWoCompare(res.shouldMigrate.max, {_id: -2}) == 0, tojson(res.shouldMigrate.max));
+ },
+
+ //
+ // Upper extreme chunk tests.
+ //
+
+ // All chunks have 1 doc.
+ //
+ // Expected doc counts for new chunks:
+ // [ 0, 1 ): 1
+ // [ 1, 2 ): 1
+ // [ 2, MaxKey): 1
+ //
+ function(db) {
+ var res = callSplit(db, {_id: 0}, {_id: MaxKey}, [{_id: 1}, {_id: 2}]);
+ assert.commandWorked(res);
+ assert.neq(res.shouldMigrate, null, tojson(res));
+ assert(bsonWoCompare(res.shouldMigrate.min, {_id: 2}) == 0, tojson(res.shouldMigrate.min));
+ assert(bsonWoCompare(res.shouldMigrate.max, {_id: MaxKey}) == 0,
+ tojson(res.shouldMigrate.max));
+ },
+
+ // One chunk has single doc, extreme doesn't.
+ //
+ // Expected doc counts for new chunks:
+ // [ 0, 1 ): 1
+ // [ 1, MaxKey): 2
+ //
+ function(db) {
+ var res = callSplit(db, {_id: 0}, {_id: MaxKey}, [{_id: 1}]);
+ assert.commandWorked(res);
+ assert.eq(res.shouldMigrate, null, tojson(res));
+ },
+
+ // Only extreme has single doc.
+ //
+ // Expected doc counts for new chunks:
+ // [ 0, 2 ): 2
+ // [ 2, MaxKey): 1
+ //
+ function(db) {
+ var res = callSplit(db, {_id: 0}, {_id: MaxKey}, [{_id: 2}]);
+ assert.commandWorked(res);
+ assert.neq(res.shouldMigrate, null, tojson(res));
+ assert(bsonWoCompare(res.shouldMigrate.min, {_id: 2}) == 0, tojson(res.shouldMigrate.min));
+ assert(bsonWoCompare(res.shouldMigrate.max, {_id: MaxKey}) == 0,
+ tojson(res.shouldMigrate.max));
+ },
+];
+
+tests.forEach(function(test) {
+ // setup
+ assert.commandWorked(
+ testDB.createCollection("user", {clusteredIndex: {key: {_id: 1}, unique: true}}));
+
+ assert.commandWorked(testDB.adminCommand({shardCollection: 'test.user', key: {_id: 1}}));
+ assert.commandWorked(testDB.adminCommand({split: 'test.user', middle: {_id: 0}}));
+
+ for (var _id = -3; _id < 3; _id++) {
+ testDB.user.insert({_id: _id});
+ }
+
+ // run test
+ test(st.rs0.getPrimary().getDB('admin'));
+
+ // teardown
+ testDB.user.drop();
+});
+
+st.stop();
+})();
diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp
index 65a589c38ba..88c1ed92f42 100644
--- a/src/mongo/db/catalog/clustered_collection_util.cpp
+++ b/src/mongo/db/catalog/clustered_collection_util.cpp
@@ -118,14 +118,15 @@ BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo)
bool isClusteredOnId(const boost::optional<ClusteredCollectionInfo>& collInfo) {
- if (!collInfo) {
- return false;
- }
return clustered_util::matchesClusterKey(BSON("_id" << 1), collInfo);
}
bool matchesClusterKey(const BSONObj& obj,
const boost::optional<ClusteredCollectionInfo>& collInfo) {
+ if (!collInfo) {
+ return false;
+ }
+
const auto nFields = obj.nFields();
invariant(nFields > 0);
if (nFields > 1) {
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index 5d0a88b0a9b..90209b6bdc1 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -39,6 +39,7 @@ namespace mongo {
using IndexIterator = IndexCatalog::IndexIterator;
using ReadyIndexesIterator = IndexCatalog::ReadyIndexesIterator;
using AllIndexesIterator = IndexCatalog::AllIndexesIterator;
+using ShardKeyIndex = IndexCatalog::ShardKeyIndex;
bool IndexIterator::more() {
if (_start) {
@@ -101,6 +102,25 @@ const IndexCatalogEntry* AllIndexesIterator::_advance() {
return entry;
}
+ShardKeyIndex::ShardKeyIndex(const IndexDescriptor* indexDescriptor)
+ : _indexDescriptor(indexDescriptor) {
+ tassert(6012300,
+ "The indexDescriptor for ShardKeyIndex(const IndexDescriptor* indexDescripto) must not "
+ "be a nullptr",
+ indexDescriptor != nullptr);
+}
+
+ShardKeyIndex::ShardKeyIndex(const ClusteredIndexSpec& clusteredIndexSpec)
+ : _indexDescriptor(nullptr),
+ _clusteredIndexKeyPattern(clusteredIndexSpec.getKey().getOwned()) {}
+
+const BSONObj& ShardKeyIndex::keyPattern() const {
+ if (_indexDescriptor != nullptr) {
+ return _indexDescriptor->keyPattern();
+ }
+ return _clusteredIndexKeyPattern;
+}
+
StringData toString(IndexBuildMethod method) {
switch (method) {
case IndexBuildMethod::kHybrid:
diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h
index 1dfcc641869..88773b33266 100644
--- a/src/mongo/db/catalog/index_catalog.h
+++ b/src/mongo/db/catalog/index_catalog.h
@@ -33,6 +33,7 @@
#include <vector>
#include "mongo/base/clonable_ptr.h"
+#include "mongo/db/catalog/clustered_collection_options_gen.h"
#include "mongo/db/catalog/index_catalog_entry.h"
#include "mongo/db/index/multikey_paths.h"
#include "mongo/db/jsobj.h"
@@ -181,6 +182,31 @@ public:
std::unique_ptr<std::vector<IndexCatalogEntry*>> _ownedContainer;
};
+ class ShardKeyIndex {
+ public:
+ /**
+ * Wraps information pertaining to the 'index' used as the shard key.
+ *
+ * A clustered index is not tied to an IndexDescriptor whereas all other types of indexes
+ * are. Either the 'index' is a clustered index and '_clusteredIndexKeyPattern' is
+ * non-empty, or '_indexDescriptor' is non-null and a standard index exists.
+ */
+ ShardKeyIndex(const IndexDescriptor* indexDescriptor);
+ ShardKeyIndex(const ClusteredIndexSpec& clusteredIndexSpec);
+
+ const BSONObj& keyPattern() const;
+ const IndexDescriptor* descriptor() const {
+ return _indexDescriptor;
+ }
+
+ private:
+ const IndexDescriptor* _indexDescriptor;
+
+ // Stores the keyPattern when the index is a clustered index and there is no
+ // IndexDescriptor. Empty otherwise.
+ BSONObj _clusteredIndexKeyPattern;
+ };
+
IndexCatalog() = default;
virtual ~IndexCatalog() = default;
@@ -261,10 +287,11 @@ public:
*
* If no such index exists, returns NULL.
*/
- virtual const IndexDescriptor* findShardKeyPrefixedIndex(OperationContext* opCtx,
- const CollectionPtr& collection,
- const BSONObj& shardKey,
- bool requireSingleKey) const = 0;
+ virtual const boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(
+ OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const BSONObj& shardKey,
+ bool requireSingleKey) const = 0;
virtual void findIndexByType(OperationContext* opCtx,
const std::string& type,
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index d09fe5ebe59..cf99ee1b45e 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -1291,10 +1291,17 @@ void IndexCatalogImpl::findIndexesByKeyPattern(OperationContext* opCtx,
}
}
-const IndexDescriptor* IndexCatalogImpl::findShardKeyPrefixedIndex(OperationContext* opCtx,
- const CollectionPtr& collection,
- const BSONObj& shardKey,
- bool requireSingleKey) const {
+const boost::optional<IndexCatalog::ShardKeyIndex> IndexCatalogImpl::findShardKeyPrefixedIndex(
+ OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const BSONObj& shardKey,
+ bool requireSingleKey) const {
+ if (collection->isClustered() &&
+ clustered_util::matchesClusterKey(shardKey, collection->getClusteredInfo())) {
+ auto clusteredIndexSpec = collection->getClusteredInfo()->getIndexSpec();
+ return IndexCatalog::ShardKeyIndex(clusteredIndexSpec);
+ }
+
const IndexDescriptor* best = nullptr;
std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, false);
@@ -1309,14 +1316,19 @@ const IndexDescriptor* IndexCatalogImpl::findShardKeyPrefixedIndex(OperationCont
if (!shardKey.isPrefixOf(desc->keyPattern(), SimpleBSONElementComparator::kInstance))
continue;
- if (!entry->isMultikey(opCtx, collection) && hasSimpleCollation)
- return desc;
+ if (!entry->isMultikey(opCtx, collection) && hasSimpleCollation) {
+ return IndexCatalog::ShardKeyIndex(desc);
+ }
if (!requireSingleKey && hasSimpleCollation)
best = desc;
}
- return best;
+ if (best != nullptr) {
+ return IndexCatalog::ShardKeyIndex(best);
+ }
+
+ return boost::none;
}
void IndexCatalogImpl::findIndexByType(OperationContext* opCtx,
diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h
index 1e766198fce..ec15da563b8 100644
--- a/src/mongo/db/catalog/index_catalog_impl.h
+++ b/src/mongo/db/catalog/index_catalog_impl.h
@@ -136,10 +136,11 @@ public:
*
* If no such index exists, returns NULL.
*/
- const IndexDescriptor* findShardKeyPrefixedIndex(OperationContext* opCtx,
- const CollectionPtr& collection,
- const BSONObj& shardKey,
- bool requireSingleKey) const override;
+ const boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(
+ OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const BSONObj& shardKey,
+ bool requireSingleKey) const override;
void findIndexByType(OperationContext* opCtx,
const std::string& type,
diff --git a/src/mongo/db/commands/dbcommands.cpp b/src/mongo/db/commands/dbcommands.cpp
index 72720f43f44..9167dc0d18e 100644
--- a/src/mongo/db/commands/dbcommands.cpp
+++ b/src/mongo/db/commands/dbcommands.cpp
@@ -376,7 +376,7 @@ public:
keyPattern,
/*requireSingleKey=*/true);
- if (shardKeyIdx == nullptr) {
+ if (!shardKeyIdx) {
errmsg = "couldn't find valid index containing key pattern";
return false;
}
@@ -385,13 +385,13 @@ public:
min = Helpers::toKeyFormat(kp.extendRangeBound(min, false));
max = Helpers::toKeyFormat(kp.extendRangeBound(max, false));
- exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- min,
- max,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::INTERRUPT_ONLY);
+ exec = InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection.getCollection(),
+ *shardKeyIdx,
+ min,
+ max,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::INTERRUPT_ONLY);
}
CurOpFailpointHelpers::waitWhileFailPointEnabled(
diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp
index 6ad3fa5efcc..649962c31cb 100644
--- a/src/mongo/db/query/internal_plans.cpp
+++ b/src/mongo/db/query/internal_plans.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/query/internal_plans.h"
+#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/client.h"
#include "mongo/db/exec/collection_scan.h"
@@ -42,9 +43,102 @@
#include "mongo/db/exec/upsert_stage.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/plan_executor_factory.h"
+#include "mongo/db/record_id_helpers.h"
namespace mongo {
+namespace {
+CollectionScanParams::ScanBoundInclusion getScanBoundInclusion(BoundInclusion indexBoundInclusion) {
+ switch (indexBoundInclusion) {
+ case BoundInclusion::kExcludeBothStartAndEndKeys:
+ return CollectionScanParams::ScanBoundInclusion::kExcludeBothStartAndEndRecords;
+ case BoundInclusion::kIncludeStartKeyOnly:
+ return CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly;
+ case BoundInclusion::kIncludeEndKeyOnly:
+ return CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly;
+ case BoundInclusion::kIncludeBothStartAndEndKeys:
+ return CollectionScanParams::ScanBoundInclusion::kIncludeBothStartAndEndRecords;
+ default:
+ MONGO_UNREACHABLE;
+ }
+}
+
+// Construct collection scan params for a scan over the collection's cluster key. Callers must
+// confirm the collection's cluster key matches the keyPattern.
+CollectionScanParams convertIndexScanParamsToCollScanParams(
+ OperationContext* opCtx,
+ const CollectionPtr* coll,
+ const BSONObj& keyPattern,
+ const BSONObj& startKey,
+ const BSONObj& endKey,
+ BoundInclusion boundInclusion,
+ const InternalPlanner::Direction direction) {
+ const auto& collection = *coll;
+
+ dassert(collection->isClustered() &&
+ clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo()));
+
+ boost::optional<RecordId> startRecord, endRecord;
+ if (!startKey.isEmpty()) {
+ startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+ }
+ if (!endKey.isEmpty()) {
+ endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+ }
+
+ // For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord.
+ auto minRecord = (direction == InternalPlanner::FORWARD) ? startRecord : endRecord;
+ auto maxRecord = (direction == InternalPlanner::FORWARD) ? endRecord : startRecord;
+
+ if (minRecord && maxRecord) {
+ // Regardless of direction, the minRecord should always be less than the maxRecord
+ dassert(minRecord < maxRecord,
+ str::stream() << "Expected the minRecord " << minRecord
+ << " to be less than the maxRecord " << maxRecord
+ << " on a bounded collection scan. Original startKey and endKey for "
+ "index scan ["
+ << startKey << ", " << endKey << "]. Is FORWARD? "
+ << (direction == InternalPlanner::FORWARD));
+ }
+
+ CollectionScanParams params;
+ params.minRecord = minRecord;
+ params.maxRecord = maxRecord;
+ if (InternalPlanner::FORWARD == direction) {
+ params.direction = CollectionScanParams::FORWARD;
+ } else {
+ params.direction = CollectionScanParams::BACKWARD;
+ }
+ params.boundInclusion = getScanBoundInclusion(boundInclusion);
+ return params;
+}
+
+CollectionScanParams createCollectionScanParams(
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ WorkingSet* ws,
+ const CollectionPtr* coll,
+ InternalPlanner::Direction direction,
+ boost::optional<RecordId> resumeAfterRecordId,
+ boost::optional<RecordId> minRecord,
+ boost::optional<RecordId> maxRecord) {
+ const auto& collection = *coll;
+ invariant(collection);
+
+ CollectionScanParams params;
+ params.shouldWaitForOplogVisibility =
+ shouldWaitForOplogVisibility(expCtx->opCtx, collection, false);
+ params.resumeAfterRecordId = resumeAfterRecordId;
+ params.minRecord = minRecord;
+ params.maxRecord = maxRecord;
+ if (InternalPlanner::FORWARD == direction) {
+ params.direction = CollectionScanParams::FORWARD;
+ } else {
+ params.direction = CollectionScanParams::BACKWARD;
+ }
+ return params;
+}
+} // namespace
+
std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::collectionScan(
OperationContext* opCtx,
const CollectionPtr* coll,
@@ -60,8 +154,11 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::collection
auto expCtx = make_intrusive<ExpressionContext>(
opCtx, std::unique_ptr<CollatorInterface>(nullptr), collection->ns());
- auto cs = _collectionScan(
- expCtx, ws.get(), &collection, direction, resumeAfterRecordId, minRecord, maxRecord);
+
+ auto collScanParams = createCollectionScanParams(
+ expCtx, ws.get(), coll, direction, resumeAfterRecordId, minRecord, maxRecord);
+
+ auto cs = _collectionScan(expCtx, ws.get(), &collection, collScanParams);
// Takes ownership of 'ws' and 'cs'.
auto statusWithPlanExecutor =
@@ -116,14 +213,11 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::deleteWith
auto expCtx = make_intrusive<ExpressionContext>(
opCtx, std::unique_ptr<CollatorInterface>(nullptr), collection->ns());
- auto root = _collectionScan(expCtx,
- ws.get(),
- &collection,
- direction,
- boost::none,
- minRecord,
- maxRecord,
- true /* relaxCappedConstraints */);
+ auto collScanParams = createCollectionScanParams(
+ expCtx, ws.get(), coll, direction, boost::none /* resumeAfterId */, minRecord, maxRecord);
+
+ auto root = _collectionScan(
+ expCtx, ws.get(), &collection, collScanParams, true /* relaxCappedConstraints */);
root = std::make_unique<DeleteStage>(
expCtx.get(), std::move(params), ws.get(), collection, root.release());
@@ -217,6 +311,80 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::deleteWith
return std::move(executor.getValue());
}
+std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::shardKeyIndexScan(
+ OperationContext* opCtx,
+ const CollectionPtr* collection,
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx,
+ const BSONObj& startKey,
+ const BSONObj& endKey,
+ BoundInclusion boundInclusion,
+ PlanYieldPolicy::YieldPolicy yieldPolicy,
+ Direction direction,
+ int options) {
+ if (shardKeyIdx.descriptor() != nullptr) {
+ return indexScan(opCtx,
+ collection,
+ shardKeyIdx.descriptor(),
+ startKey,
+ endKey,
+ boundInclusion,
+ yieldPolicy,
+ direction,
+ options);
+ }
+ // Do a clustered collection scan.
+ auto params = convertIndexScanParamsToCollScanParams(
+ opCtx, collection, shardKeyIdx.keyPattern(), startKey, endKey, boundInclusion, direction);
+ return collectionScan(opCtx, collection, params, yieldPolicy);
+}
+
+std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::deleteWithShardKeyIndexScan(
+ OperationContext* opCtx,
+ const CollectionPtr* coll,
+ std::unique_ptr<DeleteStageParams> params,
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx,
+ const BSONObj& startKey,
+ const BSONObj& endKey,
+ BoundInclusion boundInclusion,
+ PlanYieldPolicy::YieldPolicy yieldPolicy,
+ Direction direction) {
+ if (shardKeyIdx.descriptor()) {
+ return deleteWithIndexScan(opCtx,
+ coll,
+ std::move(params),
+ shardKeyIdx.descriptor(),
+ startKey,
+ endKey,
+ boundInclusion,
+ yieldPolicy,
+ direction);
+ }
+ auto collectionScanParams = convertIndexScanParamsToCollScanParams(
+ opCtx, coll, shardKeyIdx.keyPattern(), startKey, endKey, boundInclusion, direction);
+
+ const auto& collection = *coll;
+ invariant(collection);
+
+ std::unique_ptr<WorkingSet> ws = std::make_unique<WorkingSet>();
+
+ auto expCtx = make_intrusive<ExpressionContext>(
+ opCtx, std::unique_ptr<CollatorInterface>(nullptr), collection->ns());
+
+ auto root = _collectionScan(expCtx, ws.get(), &collection, collectionScanParams);
+ root = std::make_unique<DeleteStage>(
+ expCtx.get(), std::move(params), ws.get(), collection, root.release());
+
+ auto executor = plan_executor_factory::make(expCtx,
+ std::move(ws),
+ std::move(root),
+ &collection,
+ yieldPolicy,
+ false /* whether owned BSON must be returned */
+ );
+ invariant(executor.getStatus());
+ return std::move(executor.getValue());
+}
+
std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::updateWithIdHack(
OperationContext* opCtx,
const CollectionPtr* coll,
@@ -255,43 +423,16 @@ std::unique_ptr<PlanStage> InternalPlanner::_collectionScan(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
WorkingSet* ws,
const CollectionPtr* coll,
- Direction direction,
- boost::optional<RecordId> resumeAfterRecordId,
- boost::optional<RecordId> minRecord,
- boost::optional<RecordId> maxRecord,
+ const CollectionScanParams& params,
bool relaxCappedConstraints) {
const auto& collection = *coll;
invariant(collection);
- CollectionScanParams params;
- params.shouldWaitForOplogVisibility =
- shouldWaitForOplogVisibility(expCtx->opCtx, collection, false);
- params.resumeAfterRecordId = resumeAfterRecordId;
- params.minRecord = minRecord;
- params.maxRecord = maxRecord;
- if (FORWARD == direction) {
- params.direction = CollectionScanParams::FORWARD;
- } else {
- params.direction = CollectionScanParams::BACKWARD;
- }
-
return std::make_unique<CollectionScan>(
expCtx.get(), collection, params, ws, nullptr, relaxCappedConstraints);
}
-std::unique_ptr<PlanStage> InternalPlanner::_collectionScan(
- const boost::intrusive_ptr<ExpressionContext>& expCtx,
- WorkingSet* ws,
- const CollectionPtr* coll,
- const CollectionScanParams& params) {
-
- const auto& collection = *coll;
- invariant(collection);
-
- return std::make_unique<CollectionScan>(expCtx.get(), collection, params, ws, nullptr);
-}
-
std::unique_ptr<PlanStage> InternalPlanner::_indexScan(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
WorkingSet* ws,
diff --git a/src/mongo/db/query/internal_plans.h b/src/mongo/db/query/internal_plans.h
index 6c2694720bf..387fb8f9d80 100644
--- a/src/mongo/db/query/internal_plans.h
+++ b/src/mongo/db/query/internal_plans.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/base/string_data.h"
+#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/exec/delete_stage.h"
#include "mongo/db/query/index_bounds.h"
#include "mongo/db/query/plan_executor.h"
@@ -92,7 +93,7 @@ public:
static std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> deleteWithCollectionScan(
OperationContext* opCtx,
const CollectionPtr* collection,
- std::unique_ptr<DeleteStageParams> params,
+ std::unique_ptr<DeleteStageParams> deleteStageParams,
PlanYieldPolicy::YieldPolicy yieldPolicy,
Direction direction = FORWARD,
boost::optional<RecordId> minRecord = boost::none,
@@ -127,6 +128,39 @@ public:
Direction direction = FORWARD);
/**
+ * Returns a scan over the 'shardKeyIdx'. If the 'shardKeyIdx' is a non-clustered index, returns
+ * an index scan. If the 'shardKeyIdx' is a clustered idx, returns a bounded collection scan
+ * since the clustered index does not require a separate index lookup table.
+ */
+ static std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> shardKeyIndexScan(
+ OperationContext* opCtx,
+ const CollectionPtr* collection,
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx,
+ const BSONObj& startKey,
+ const BSONObj& endKey,
+ BoundInclusion boundInclusion,
+ PlanYieldPolicy::YieldPolicy yieldPolicy,
+ Direction direction = FORWARD,
+ int options = IXSCAN_DEFAULT);
+
+
+ /**
+ * Returns an IXSCAN => FETCH => DELETE plan when 'shardKeyIdx' indicates the index is a
+ * standard index or a COLLSCAN => DELETE when 'shardKeyIdx' indicates the index is a clustered
+ * index.
+ */
+ static std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> deleteWithShardKeyIndexScan(
+ OperationContext* opCtx,
+ const CollectionPtr* collection,
+ std::unique_ptr<DeleteStageParams> params,
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx,
+ const BSONObj& startKey,
+ const BSONObj& endKey,
+ BoundInclusion boundInclusion,
+ PlanYieldPolicy::YieldPolicy yieldPolicy,
+ Direction direction = FORWARD);
+
+ /**
* Returns an IDHACK => UPDATE plan.
*/
static std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> updateWithIdHack(
@@ -157,7 +191,8 @@ private:
const boost::intrusive_ptr<ExpressionContext>& expCtx,
WorkingSet* ws,
const CollectionPtr* collection,
- const CollectionScanParams& params);
+ const CollectionScanParams& params,
+ bool relaxCappedConstraints = false);
/**
* Returns a plan stage that is either an index scan or an index scan with a fetch stage.
diff --git a/src/mongo/db/s/auto_split_vector.cpp b/src/mongo/db/s/auto_split_vector.cpp
index 062fb26cef8..bdf3220dea5 100644
--- a/src/mongo/db/s/auto_split_vector.cpp
+++ b/src/mongo/db/s/auto_split_vector.cpp
@@ -62,10 +62,9 @@ BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) {
* object extended to cover the entire shardKey. See KeyPattern::extendRangeBound documentation for
* some examples.
*/
-const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(const IndexDescriptor* shardKeyIdx,
- const BSONObj& min,
- const BSONObj& max) {
- KeyPattern kp(shardKeyIdx->keyPattern());
+const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx, const BSONObj& min, const BSONObj& max) {
+ KeyPattern kp(shardKeyIdx.keyPattern());
// Extend min to get (min, MinKey, MinKey, ....)
BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false /* upperInclusive */));
@@ -86,21 +85,21 @@ const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(const IndexDescriptor
*/
bool maxKeyEqualToMinKey(OperationContext* opCtx,
const CollectionPtr* collection,
- const IndexDescriptor* shardKeyIdx,
+ const IndexCatalog::ShardKeyIndex& shardKeyIdx,
const BSONObj& minBound,
const BSONObj& maxBound,
const BSONObj& minKeyInChunk) {
BSONObj maxKeyInChunk;
{
auto backwardIdxScanner =
- InternalPlanner::indexScan(opCtx,
- collection,
- shardKeyIdx,
- maxBound,
- minBound,
- BoundInclusion::kIncludeEndKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::BACKWARD);
+ InternalPlanner::shardKeyIndexScan(opCtx,
+ collection,
+ shardKeyIdx,
+ maxBound,
+ minBound,
+ BoundInclusion::kIncludeEndKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::BACKWARD);
PlanExecutor::ExecState state = backwardIdxScanner->getNext(&maxKeyInChunk, nullptr);
uassert(ErrorCodes::OperationFailed,
@@ -115,9 +114,9 @@ bool maxKeyEqualToMinKey(OperationContext* opCtx,
5865001,
"Possible low cardinality key detected in range. Range contains only a single key.",
"namespace"_attr = collection->get()->ns(),
- "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minBound)),
- "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxBound)),
- "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKeyInChunk)));
+ "minKey"_attr = redact(prettyKey(shardKeyIdx.keyPattern(), minBound)),
+ "maxKey"_attr = redact(prettyKey(shardKeyIdx.keyPattern(), maxBound)),
+ "key"_attr = redact(prettyKey(shardKeyIdx.keyPattern(), minKeyInChunk)));
return true;
}
@@ -171,18 +170,18 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
<< keyPattern.clientReadable().toString(),
shardKeyIdx);
- const auto [minKey, maxKey] = getMinMaxExtendedBounds(shardKeyIdx, min, max);
+ const auto [minKey, maxKey] = getMinMaxExtendedBounds(*shardKeyIdx, min, max);
// Setup the index scanner that will be used to find the split points
auto forwardIdxScanner =
- InternalPlanner::indexScan(opCtx,
- &(*collection),
- shardKeyIdx,
- minKey,
- maxKey,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::FORWARD);
+ InternalPlanner::shardKeyIndexScan(opCtx,
+ &(*collection),
+ *shardKeyIdx,
+ minKey,
+ maxKey,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::FORWARD);
// Get minimum key belonging to the chunk
BSONObj minKeyInOriginalChunk;
@@ -197,7 +196,7 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
// Return empty vector if chunk's min and max keys are the same.
if (maxKeyEqualToMinKey(opCtx,
&collection.getCollection(),
- shardKeyIdx,
+ *shardKeyIdx,
minKey,
maxKey,
minKeyInOriginalChunk)) {
@@ -295,15 +294,15 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
// Fairly recalculate the last `nSplitPointsToReposition` split points.
splitKeys.erase(splitKeys.end() - nSplitPointsToReposition, splitKeys.end());
- auto forwardIdxScanner =
- InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- splitKeys.empty() ? minKeyElement : splitKeys.back(),
- maxKey,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::FORWARD);
+ auto forwardIdxScanner = InternalPlanner::shardKeyIndexScan(
+ opCtx,
+ &collection.getCollection(),
+ *shardKeyIdx,
+ splitKeys.empty() ? minKeyElement : splitKeys.back(),
+ maxKey,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::FORWARD);
numScannedKeys = 0;
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index bb8eb56e260..6654e34db36 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -857,15 +857,15 @@ MigrationChunkClonerSourceLegacy::_getIndexScanExecutor(
// We can afford to yield here because any change to the base data that we might miss is already
// being queued and will migrate in the 'transferMods' stage.
- return InternalPlanner::indexScan(opCtx,
- &collection,
- shardKeyIdx,
- min,
- max,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::Direction::FORWARD,
- scanOption);
+ return InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection,
+ *shardKeyIdx,
+ min,
+ max,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::Direction::FORWARD,
+ scanOption);
}
Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opCtx) {
@@ -942,8 +942,8 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
uint64_t averageObjectIdSize = 0;
const uint64_t defaultObjectIdSize = OID::kOIDSize;
- // For a time series collection, an index on '_id' is not required.
- if (totalRecs > 0 && !collection->getTimeseriesOptions()) {
+ // For clustered collection, an index on '_id' is not required.
+ if (totalRecs > 0 && !collection->isClustered()) {
const auto idIdx = collection->getIndexCatalog()->findIdIndex(opCtx)->getEntry();
if (!idIdx) {
return {ErrorCodes::IndexNotFound,
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 9c47e67e3b1..1b0f631eee5 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -791,11 +791,15 @@ MigrationDestinationManager::IndexesAndIdIndex MigrationDestinationManager::getC
Milliseconds(-1)));
for (auto&& spec : indexes.docs) {
- donorIndexSpecs.push_back(spec);
- if (auto indexNameElem = spec[IndexDescriptor::kIndexNameFieldName]) {
- if (indexNameElem.type() == BSONType::String &&
- indexNameElem.valueStringData() == "_id_"_sd) {
- donorIdIndexSpec = spec;
+ if (spec["clustered"]) {
+ // The 'clustered' index is implicitly created upon clustered collection creation.
+ } else {
+ donorIndexSpecs.push_back(spec);
+ if (auto indexNameElem = spec[IndexDescriptor::kIndexNameFieldName]) {
+ if (indexNameElem.type() == BSONType::String &&
+ indexNameElem.valueStringData() == "_id_"_sd) {
+ donorIdIndexSpec = spec;
+ }
}
}
}
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp
index 94e859d913e..501d22cb3d0 100644
--- a/src/mongo/db/s/range_deletion_util.cpp
+++ b/src/mongo/db/s/range_deletion_util.cpp
@@ -160,18 +160,6 @@ StatusWith<int> deleteNextBatch(OperationContext* opCtx,
"max"_attr = max,
"namespace"_attr = nss.ns());
- const auto indexName = shardKeyIdx->indexName();
- const IndexDescriptor* descriptor =
- collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
- if (!descriptor) {
- LOGV2_ERROR_OPTIONS(23767,
- {logv2::UserAssertAfterLog(ErrorCodes::InternalError)},
- "Shard key index with name {indexName} on {namespace} was dropped",
- "Shard key index was dropped",
- "indexName"_attr = indexName,
- "namespace"_attr = nss.ns());
- }
-
auto deleteStageParams = std::make_unique<DeleteStageParams>();
deleteStageParams->fromMigrate = true;
deleteStageParams->isMulti = true;
@@ -182,10 +170,11 @@ StatusWith<int> deleteNextBatch(OperationContext* opCtx,
std::make_unique<RemoveSaver>("moveChunk", nss.ns(), "cleaning");
}
- auto exec = InternalPlanner::deleteWithIndexScan(opCtx,
+ auto exec =
+ InternalPlanner::deleteWithShardKeyIndexScan(opCtx,
&collection,
std::move(deleteStageParams),
- descriptor,
+ *shardKeyIdx,
min,
max,
BoundInclusion::kIncludeStartKeyOnly,
diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp
index 5f4d1156a64..cc66c2672cf 100644
--- a/src/mongo/db/s/split_chunk.cpp
+++ b/src/mongo/db/s/split_chunk.cpp
@@ -60,19 +60,19 @@ const ReadPreferenceSetting kPrimaryOnlyReadPreference{ReadPreference::PrimaryOn
bool checkIfSingleDoc(OperationContext* opCtx,
const CollectionPtr& collection,
- const IndexDescriptor* idx,
+ const IndexCatalog::ShardKeyIndex& idx,
const ChunkType* chunk) {
- KeyPattern kp(idx->keyPattern());
+ KeyPattern kp(idx.keyPattern());
BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false));
BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true));
- auto exec = InternalPlanner::indexScan(opCtx,
- &collection,
- idx,
- newmin,
- newmax,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::NO_YIELD);
+ auto exec = InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection,
+ idx,
+ newmin,
+ newmax,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::NO_YIELD);
// check if exactly one document found
PlanExecutor::ExecState state;
BSONObj obj;
@@ -241,10 +241,10 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
KeyPattern shardKeyPattern(keyPatternObj);
if (shardKeyPattern.globalMax().woCompare(backChunk.getMax()) == 0 &&
- checkIfSingleDoc(opCtx, collection.getCollection(), shardKeyIdx, &backChunk)) {
+ checkIfSingleDoc(opCtx, collection.getCollection(), *shardKeyIdx, &backChunk)) {
return boost::optional<ChunkRange>(ChunkRange(backChunk.getMin(), backChunk.getMax()));
} else if (shardKeyPattern.globalMin().woCompare(frontChunk.getMin()) == 0 &&
- checkIfSingleDoc(opCtx, collection.getCollection(), shardKeyIdx, &frontChunk)) {
+ checkIfSingleDoc(opCtx, collection.getCollection(), *shardKeyIdx, &frontChunk)) {
return boost::optional<ChunkRange>(ChunkRange(frontChunk.getMin(), frontChunk.getMax()));
}
return boost::optional<ChunkRange>(boost::none);
diff --git a/src/mongo/db/s/split_vector.cpp b/src/mongo/db/s/split_vector.cpp
index 54a358e8901..78bc2aa9d8e 100644
--- a/src/mongo/db/s/split_vector.cpp
+++ b/src/mongo/db/s/split_vector.cpp
@@ -166,14 +166,14 @@ std::vector<BSONObj> splitVector(OperationContext* opCtx,
long long currCount = 0;
long long numChunks = 0;
- auto exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- minKey,
- maxKey,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::FORWARD);
+ auto exec = InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection.getCollection(),
+ *shardKeyIdx,
+ minKey,
+ maxKey,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::FORWARD);
BSONObj currKey;
PlanExecutor::ExecState state = exec->getNext(&currKey, nullptr);
@@ -184,14 +184,14 @@ std::vector<BSONObj> splitVector(OperationContext* opCtx,
// Get the final key in the range, and see if it's the same as the first key.
BSONObj maxKeyInChunk;
{
- auto exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- maxKey,
- minKey,
- BoundInclusion::kIncludeEndKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::BACKWARD);
+ auto exec = InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection.getCollection(),
+ *shardKeyIdx,
+ maxKey,
+ minKey,
+ BoundInclusion::kIncludeEndKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::BACKWARD);
PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr);
uassert(
@@ -307,14 +307,14 @@ std::vector<BSONObj> splitVector(OperationContext* opCtx,
"splitVector doing another cycle because of force",
"keyCount"_attr = keyCount);
- exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- minKey,
- maxKey,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::FORWARD);
+ exec = InternalPlanner::shardKeyIndexScan(opCtx,
+ &collection.getCollection(),
+ *shardKeyIdx,
+ minKey,
+ maxKey,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::FORWARD);
state = exec->getNext(&currKey, nullptr);
}