summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/clustered_collection_passthrough.yml4
-rw-r--r--jstests/core/clustered_collection_collation.js275
-rw-r--r--jstests/core/collation.js179
-rw-r--r--jstests/libs/clustered_collections/clustered_collection_hint_common.js89
-rw-r--r--jstests/libs/clustered_collections/clustered_collection_util.js9
-rw-r--r--src/mongo/db/SConscript1
-rw-r--r--src/mongo/db/catalog/clustered_collection_util.cpp7
-rw-r--r--src/mongo/db/catalog/clustered_collection_util.h6
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp8
-rw-r--r--src/mongo/db/catalog/list_indexes.cpp6
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp11
-rw-r--r--src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp34
-rw-r--r--src/mongo/db/query/get_executor.cpp1
-rw-r--r--src/mongo/db/query/internal_plans.cpp5
-rw-r--r--src/mongo/db/query/planner_access.cpp90
-rw-r--r--src/mongo/db/query/query_planner.cpp29
-rw-r--r--src/mongo/db/query/query_planner_params.h5
-rw-r--r--src/mongo/db/record_id_helpers.cpp17
-rw-r--r--src/mongo/db/record_id_helpers.h7
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp2
-rw-r--r--src/mongo/db/repl/dbcheck.cpp12
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp7
-rw-r--r--src/mongo/db/storage/record_store_test_harness.cpp2
-rw-r--r--src/mongo/dbtests/query_stage_collscan.cpp6
24 files changed, 597 insertions, 215 deletions
diff --git a/buildscripts/resmokeconfig/suites/clustered_collection_passthrough.yml b/buildscripts/resmokeconfig/suites/clustered_collection_passthrough.yml
index 3556574cd28..6f4f3dc6083 100644
--- a/buildscripts/resmokeconfig/suites/clustered_collection_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/clustered_collection_passthrough.yml
@@ -46,10 +46,6 @@ selector:
# TODO (SERVER-61259): $text not supported: "No query solutions"
- jstests/core/fts6.js
- # TODO (SERVER-61260): _id collations not supported
- - jstests/core/collation.js
- - jstests/core/collation_convert_to_capped.js
- - jstests/core/collation_update.js
exclude_with_any_tags:
- assumes_standalone_mongod
diff --git a/jstests/core/clustered_collection_collation.js b/jstests/core/clustered_collection_collation.js
new file mode 100644
index 00000000000..23fcb82277d
--- /dev/null
+++ b/jstests/core/clustered_collection_collation.js
@@ -0,0 +1,275 @@
+/**
+ * Tests that clustered collections respect collation for the _id field and any other fields
+ *
+ * @tags: [
+ * assumes_against_mongod_not_mongos,
+ * assumes_no_implicit_collection_creation_after_drop,
+ * does_not_support_stepdowns,
+ * requires_fcv_53,
+ * requires_wiredtiger,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/collection_drop_recreate.js"); // For assertDropCollection.
+load("jstests/libs/clustered_collections/clustered_collection_util.js");
+load("jstests/libs/clustered_collections/clustered_collection_hint_common.js");
+
+if (ClusteredCollectionUtil.areClusteredIndexesEnabled(db.getMongo()) == false) {
+ jsTestLog('Skipping test because the clustered indexes feature flag is disabled');
+ return;
+}
+
+const collatedName = 'clustered_collection_with_collation';
+const collated = db[collatedName];
+
+assertDropCollection(db, collatedName);
+
+const noncollatedName = 'clustered_collection_without_collation';
+const noncollated = db[noncollatedName];
+
+assertDropCollection(db, noncollatedName);
+
+const defaultCollation = {
+ locale: "en",
+ strength: 2
+};
+const incompatibleCollation = {
+ locale: "fr_CA",
+ strength: 2
+};
+
+assert.commandWorked(db.createCollection(
+ collatedName, {clusteredIndex: {key: {_id: 1}, unique: true}, collation: defaultCollation}));
+assert.commandWorked(
+ db.createCollection(noncollatedName, {clusteredIndex: {key: {_id: 1}, unique: true}}));
+
+const expectedCollation = {
+ locale: "en",
+ caseLevel: false,
+ caseFirst: "off",
+ strength: 2,
+ numericOrdering: false,
+ alternate: "non-ignorable",
+ maxVariable: "punct",
+ normalization: false,
+ backwards: false,
+ version: "57.1"
+};
+
+// Verify clustered collection collation is reflected on the index spec.
+const indexes = collated.getIndexes();
+assert.eq(0,
+ bsonWoCompare(indexes[0].collation, expectedCollation),
+ "Default index doesn't match expected collation");
+
+// No collation spec when it's set to "simple".
+assertDropCollection(db, "simpleCollation");
+assert.commandWorked(db.createCollection(
+ "simpleCollation",
+ {clusteredIndex: {key: {_id: 1}, unique: true}, collation: {locale: "simple"}}));
+const indexSpec = db.simpleCollation.getIndexes()[0];
+assert(!indexSpec.hasOwnProperty("collation"), "Default index has collation for \"simple\" locale");
+
+const insertDocuments = function(coll) {
+ assert.commandWorked(coll.insert({_id: 5}));
+ assert.commandWorked(coll.insert({_id: 10}));
+
+ assert.commandWorked(coll.insert({_id: {int: 5}}));
+ assert.commandWorked(coll.insert({_id: {int: 10}}));
+
+ assert.commandWorked(coll.insert({_id: {ints: [5, 10]}}));
+ assert.commandWorked(coll.insert({_id: {ints: [15, 20]}}));
+
+ assert.commandWorked(coll.insert({_id: "a"}));
+ assert.commandWorked(coll.insert({_id: "b"}));
+
+ assert.commandWorked(coll.insert({_id: {str: "a"}}));
+ assert.commandWorked(coll.insert({_id: {str: "b"}}));
+
+ assert.commandWorked(coll.insert({_id: {strs: ["a", "b"]}}));
+ assert.commandWorked(coll.insert({_id: {strs: ["c", "d"]}}));
+
+ assert.commandWorked(coll.insert({data: ["a", "b"]}));
+ assert.commandWorked(coll.insert({data: ["c", "d"]}));
+ // Non _id duplicates are always fine
+ assert.commandWorked(coll.insert({data: ["C", "d"]}));
+ assert.commandWorked(coll.insert({data: ["C", "D"]}));
+};
+
+const testCollatedDuplicates = function(coll, collatedShouldFail) {
+ const checkCollated = function(res) {
+ if (collatedShouldFail) {
+ assert.commandFailedWithCode(res, ErrorCodes.DuplicateKey);
+ } else {
+ assert.commandWorked(res);
+ }
+ };
+ // Non string types should always fail
+ assert.commandFailedWithCode(coll.insert({_id: 10}), ErrorCodes.DuplicateKey);
+ assert.commandFailedWithCode(coll.insert({_id: {int: 10}}), ErrorCodes.DuplicateKey);
+ assert.commandFailedWithCode(coll.insert({_id: {ints: [15, 20]}}), ErrorCodes.DuplicateKey);
+
+ // These should only fail when there's a collation
+ checkCollated(coll.insert({_id: "B"}));
+ checkCollated(coll.insert({_id: {str: "B"}}));
+ checkCollated(coll.insert({_id: {strs: ["C", "d"]}}));
+ checkCollated(coll.insert({_id: {strs: ["C", "D"]}}));
+};
+
+const verifyHasBoundsAndFindsN = function(coll, expected, predicate, queryCollation) {
+ const res = queryCollation === undefined
+ ? assert.commandWorked(coll.find(predicate).explain())
+ : assert.commandWorked(coll.find(predicate).collation(queryCollation).explain());
+ const min = assert(res.queryPlanner.winningPlan.minRecord, "No min bound");
+ const max = assert(res.queryPlanner.winningPlan.maxRecord, "No max bound");
+ assert.eq(min, max, "COLLSCAN bounds are not equal");
+ assert.eq(expected, coll.find(predicate).count(), "Didn't find the expected records");
+};
+
+const verifyNoBoundsAndFindsN = function(coll, expected, predicate, queryCollation) {
+ const res = queryCollation === undefined
+ ? assert.commandWorked(coll.find(predicate).explain())
+ : assert.commandWorked(coll.find(predicate).collation(queryCollation).explain());
+ assert.eq(null, res.queryPlanner.winningPlan.minRecord, "There's a min bound");
+ assert.eq(null, res.queryPlanner.winningPlan.maxRecord, "There's a max bound");
+ assert.eq(expected, coll.find(predicate).count(), "Didn't find the expected records");
+};
+
+const testBounds = function(coll, expected, defaultCollation) {
+ // Test non string types.
+ verifyHasBoundsAndFindsN(coll, 1, {_id: 5});
+ verifyHasBoundsAndFindsN(coll, 1, {_id: {int: 5}});
+ verifyHasBoundsAndFindsN(coll, 1, {_id: {ints: [5, 10]}});
+
+ // Test non string types with incompatible collations.
+ verifyHasBoundsAndFindsN(coll, 1, {_id: 5}, incompatibleCollation);
+ verifyHasBoundsAndFindsN(coll, 1, {_id: {int: 5}}, incompatibleCollation);
+ verifyHasBoundsAndFindsN(coll, 1, {_id: {ints: [5, 10]}}, incompatibleCollation);
+
+ // Test strings respect the collation.
+ verifyHasBoundsAndFindsN(coll, expected, {_id: "A"});
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {str: "A"}});
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {strs: ["A", "b"]}});
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {strs: ["a", "B"]}});
+
+ // Test strings not in the _id field
+ verifyNoBoundsAndFindsN(coll, expected, {data: ["A", "b"]});
+ verifyNoBoundsAndFindsN(coll, expected, {data: ["a", "B"]});
+
+ // Test non compatible query collations don't generate bounds
+ verifyNoBoundsAndFindsN(coll, expected, {_id: "A"}, incompatibleCollation);
+ verifyNoBoundsAndFindsN(coll, expected, {_id: {str: "A"}}, incompatibleCollation);
+ verifyNoBoundsAndFindsN(coll, expected, {_id: {strs: ["A", "b"]}}, incompatibleCollation);
+ verifyNoBoundsAndFindsN(coll, expected, {_id: {strs: ["a", "B"]}}, incompatibleCollation);
+
+ // Test compatible query collations generate bounds
+ verifyHasBoundsAndFindsN(coll, expected, {_id: "A"}, defaultCollation);
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {str: "A"}}, defaultCollation);
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {strs: ["A", "b"]}}, defaultCollation);
+ verifyHasBoundsAndFindsN(coll, expected, {_id: {strs: ["a", "B"]}}, defaultCollation);
+};
+
+insertDocuments(collated);
+insertDocuments(noncollated);
+
+testCollatedDuplicates(collated, true /* should fail */);
+testCollatedDuplicates(noncollated, false /* shouldn't fail */);
+
+testBounds(collated, 1 /* expected records */, defaultCollation);
+testBounds(noncollated, 0 /*expected records, defaultCollation is undefined */);
+
+/*
+ *Test min/max hints
+ */
+
+const collatedEncodings = {
+ "a": ")\u0001\u0005",
+ "C": "-\u0001\u0005"
+};
+
+// Strings with default collation.
+validateClusteredCollectionHint(collated, {
+ expectedNReturned: 2,
+ cmd: {find: collatedName, hint: {_id: 1}, min: {_id: "a"}, max: {_id: "C"}},
+ expectedWinningPlanStats: {
+ stage: "COLLSCAN",
+ direction: "forward",
+ minRecord: collatedEncodings["a"],
+ maxRecord: collatedEncodings["C"]
+ }
+});
+assert.commandFailedWithCode(
+ db.runCommand(
+ {explain: {find: noncollatedName, hint: {_id: 1}, min: {_id: "a"}, max: {_id: "C"}}}),
+ 6137401); // max() must be greater than min().
+validateClusteredCollectionHint(noncollated, {
+ expectedNReturned: 3, // "a", "b" and "B"
+ cmd: {find: noncollatedName, hint: {_id: 1}, min: {_id: "A"}, max: {_id: "c"}},
+ expectedWinningPlanStats:
+ {stage: "COLLSCAN", direction: "forward", minRecord: "A", maxRecord: "c"}
+});
+
+// Strings with incompatible collation.
+assert.commandFailedWithCode(
+ db.runCommand({
+ explain: {
+ find: collatedName,
+ hint: {_id: 1},
+ min: {_id: "a"},
+ max: {_id: "C"},
+ collation: incompatibleCollation
+ }
+ }),
+ 6137400); // The clustered index is not compatible with the values provided for min/max
+assert.commandFailedWithCode(
+ db.runCommand({
+ explain: {
+ find: collatedName,
+ hint: {_id: 1},
+ min: {_id: "a"},
+ max: {_id: "C"},
+ collation: incompatibleCollation
+ },
+
+ }),
+ 6137400); // The clustered index is not compatible with the values provided for min/max
+
+// Numeric with default collation.
+validateClusteredCollectionHint(collated, {
+ expectedNReturned: 2,
+ cmd: {find: collatedName, hint: {_id: 1}, min: {_id: 5}, max: {_id: 11}},
+ expectedWinningPlanStats: {stage: "COLLSCAN", direction: "forward", minRecord: 5, maxRecord: 11}
+});
+validateClusteredCollectionHint(noncollated, {
+ expectedNReturned: 2,
+ cmd: {find: noncollatedName, hint: {_id: 1}, min: {_id: 5}, max: {_id: 11}},
+ expectedWinningPlanStats: {stage: "COLLSCAN", direction: "forward", minRecord: 5, maxRecord: 11}
+});
+
+// Numeric with incompatible collation.
+validateClusteredCollectionHint(collated, {
+ expectedNReturned: 2,
+ cmd: {
+ find: collatedName,
+ hint: {_id: 1},
+ min: {_id: 5},
+ max: {_id: 11},
+ collation: incompatibleCollation
+ },
+ expectedWinningPlanStats: {stage: "COLLSCAN", direction: "forward", minRecord: 5, maxRecord: 11}
+});
+validateClusteredCollectionHint(noncollated, {
+ expectedNReturned: 2,
+ cmd: {
+ find: noncollatedName,
+ hint: {_id: 1},
+ min: {_id: 5},
+ max: {_id: 11},
+ collation: incompatibleCollation
+ },
+ expectedWinningPlanStats: {stage: "COLLSCAN", direction: "forward", minRecord: 5, maxRecord: 11}
+});
+})();
diff --git a/jstests/core/collation.js b/jstests/core/collation.js
index 061e3c7a6fb..006dcc751b2 100644
--- a/jstests/core/collation.js
+++ b/jstests/core/collation.js
@@ -19,6 +19,8 @@ load("jstests/concurrency/fsm_workload_helpers/server_types.js");
// For isReplSet
load("jstests/libs/fixture_helpers.js");
load("jstests/libs/sbe_explain_helpers.js"); // For engineSpecificAssertion.
+// For areAllCollectionsClustered.
+load("jstests/libs/clustered_collections/clustered_collection_util.js");
var coll = db.collation;
coll.drop();
@@ -31,6 +33,7 @@ var hello = db.runCommand("hello");
assert.commandWorked(hello);
var isMongos = (hello.msg === "isdbgrid");
var isStandalone = !isMongos && !hello.hasOwnProperty('setName');
+var isClustered = ClusteredCollectionUtil.areAllCollectionsClustered(db);
var assertIndexHasCollation = function(keyPattern, collation) {
var indexSpecs = coll.getIndexes();
@@ -669,15 +672,6 @@ assert.commandWorked(
assert.commandWorked(coll.insert({_id: "foo"}));
assert.eq(1, coll.find({_id: "FOO"}).itcount());
-// Find on _id should use idhack stage when query inherits collection default collation.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").find({_id: "foo"}).finish();
-assert.commandWorked(explainRes);
-let classicAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IDHACK");
-let sbeAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
-engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
-
// Find should return correct results for query containing $expr when no collation specified and
// collection has a default collation.
coll.drop();
@@ -708,28 +702,39 @@ assert.commandWorked(coll.insert({_id: "foo"}));
assert.commandWorked(coll.insert({_id: "FOO"}));
assert.eq(2, coll.find({_id: "foo"}).collation({locale: "en_US", strength: 2}).itcount());
-// Find on _id should use idhack stage when explicitly given query collation matches
-// collection default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes =
- coll.explain("executionStats").find({_id: "foo"}).collation({locale: "en_US"}).finish();
-assert.commandWorked(explainRes);
-classicAssert = null !== getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-sbeAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
-engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
-
-// Find on _id should not use idhack stage when query collation does not match collection
-// default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes =
- coll.explain("executionStats").find({_id: "foo"}).collation({locale: "fr_CA"}).finish();
-assert.commandWorked(explainRes);
-
-classicAssert = null === getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-sbeAssert = null === getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
-engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
+if (!isClustered) {
+ // Find on _id should use idhack stage when query inherits collection default collation.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes = coll.explain("executionStats").find({_id: "foo"}).finish();
+ assert.commandWorked(explainRes);
+ let classicAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IDHACK");
+ let sbeAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
+ engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
+
+ // Find on _id should use idhack stage when explicitly given query collation matches
+ // collection default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes =
+ coll.explain("executionStats").find({_id: "foo"}).collation({locale: "en_US"}).finish();
+ assert.commandWorked(explainRes);
+ classicAssert = null !== getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ sbeAssert = null !== getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
+ engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
+
+ // Find on _id should not use idhack stage when query collation does not match collection
+ // default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes =
+ coll.explain("executionStats").find({_id: "foo"}).collation({locale: "fr_CA"}).finish();
+ assert.commandWorked(explainRes);
+
+ classicAssert = null === getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ sbeAssert = null === getPlanStage(getWinningPlan(explainRes.queryPlanner), "IXSCAN");
+ engineSpecificAssertion(classicAssert, sbeAssert, db, explainRes);
+}
// Find should select compatible index when no collation specified and collection has a default
// collation.
@@ -1053,13 +1058,15 @@ writeRes = coll.remove({_id: "FOO"}, {justOne: true, hint: {a: 1}});
assert.commandWorked(writeRes);
assert.eq(1, writeRes.nRemoved);
-// Remove on _id should use idhack stage when query inherits collection default collation.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").remove({_id: "foo"});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.neq(null, planStage);
+if (!isClustered) {
+ // Remove on _id should use idhack stage when query inherits collection default collation.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes = coll.explain("executionStats").remove({_id: "foo"});
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.neq(null, planStage);
+}
// Remove should return correct results when "simple" collation specified and collection has
// a default collation.
@@ -1081,23 +1088,27 @@ writeRes = coll.remove({_id: "FOO"}, {justOne: true, collation: {locale: "simple
assert.commandWorked(writeRes);
assert.eq(0, writeRes.nRemoved);
-// Remove on _id should use idhack stage when explicit query collation matches collection
-// default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").remove({_id: "foo"}, {collation: {locale: "en_US"}});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.neq(null, planStage);
-
-// Remove on _id should not use idhack stage when query collation does not match collection
-// default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").remove({_id: "foo"}, {collation: {locale: "fr_CA"}});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.eq(null, planStage);
+if (!isClustered) {
+ // Remove on _id should use idhack stage when explicit query collation matches collection
+ // default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes =
+ coll.explain("executionStats").remove({_id: "foo"}, {collation: {locale: "en_US"}});
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.neq(null, planStage);
+
+ // Remove on _id should not use idhack stage when query collation does not match collection
+ // default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes =
+ coll.explain("executionStats").remove({_id: "foo"}, {collation: {locale: "fr_CA"}});
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.eq(null, planStage);
+}
//
// Collation tests for update.
@@ -1149,13 +1160,15 @@ writeRes = coll.update({_id: "FOO"}, {$set: {other: 99}});
assert.commandWorked(writeRes);
assert.eq(1, writeRes.nMatched);
-// Update on _id should use idhack stage when query inherits collection default collation.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.neq(null, planStage);
+if (!isClustered) {
+ // Update on _id should use idhack stage when query inherits collection default collation.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}});
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.neq(null, planStage);
+}
// Update should return correct results when "simple" collation specified and collection has
// a default collation.
@@ -1177,27 +1190,29 @@ writeRes = coll.update({_id: "FOO"}, {$set: {other: 99}}, {collation: {locale: "
assert.commandWorked(writeRes);
assert.eq(0, writeRes.nModified);
-// Update on _id should use idhack stage when explicitly given query collation matches
-// collection default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}}, {
- collation: {locale: "en_US"}
-});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.neq(null, planStage);
+if (!isClustered) {
+ // Update on _id should use idhack stage when explicitly given query collation matches
+ // collection default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}}, {
+ collation: {locale: "en_US"}
+ });
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.neq(null, planStage);
-// Update on _id should not use idhack stage when query collation does not match collection
-// default.
-coll.drop();
-assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
-explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}}, {
- collation: {locale: "fr_CA"}
-});
-assert.commandWorked(explainRes);
-planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
-assert.eq(null, planStage);
+ // Update on _id should not use idhack stage when query collation does not match collection
+ // default.
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "en_US"}}));
+ explainRes = coll.explain("executionStats").update({_id: "foo"}, {$set: {other: 99}}, {
+ collation: {locale: "fr_CA"}
+ });
+ assert.commandWorked(explainRes);
+ planStage = getPlanStage(explainRes.executionStats.executionStages, "IDHACK");
+ assert.eq(null, planStage);
+}
//
// Collation tests for the $geoNear aggregation stage.
diff --git a/jstests/libs/clustered_collections/clustered_collection_hint_common.js b/jstests/libs/clustered_collections/clustered_collection_hint_common.js
index a844afc730b..6be6a4f4314 100644
--- a/jstests/libs/clustered_collections/clustered_collection_hint_common.js
+++ b/jstests/libs/clustered_collections/clustered_collection_hint_common.js
@@ -2,7 +2,7 @@
* Validate $hint on a clustered collection.
*/
-const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
+function testClusteredCollectionHint(coll, clusterKey, clusterKeyName) {
"use strict";
load("jstests/libs/analyze_plan.js");
load("jstests/libs/collection_drop_recreate.js");
@@ -10,28 +10,6 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
const clusterKeyFieldName = Object.keys(clusterKey)[0];
const batchSize = 100;
- function validateHint(coll, {expectedNReturned, cmd, expectedWinningPlanStats = {}}) {
- const explain = assert.commandWorked(coll.runCommand({explain: cmd}));
- assert.eq(explain.executionStats.nReturned, expectedNReturned, tojson(explain));
-
- const actualWinningPlan = getWinningPlan(explain.queryPlanner);
- const stageOfInterest = getPlanStage(actualWinningPlan, expectedWinningPlanStats.stage);
- assert.neq(null, stageOfInterest);
-
- for (const [key, value] of Object.entries(expectedWinningPlanStats)) {
- assert(stageOfInterest[key], tojson(explain));
- assert.eq(stageOfInterest[key], value, tojson(explain));
- }
-
- // Explicitly check that the plan is not bounded by default.
- if (!expectedWinningPlanStats.hasOwnProperty("minRecord")) {
- assert(!actualWinningPlan["minRecord"], tojson(explain));
- }
- if (!expectedWinningPlanStats.hasOwnProperty("maxRecord")) {
- assert(!actualWinningPlan["maxRecord"], tojson(explain));
- }
- }
-
function testHint(coll, clusterKey, clusterKeyName) {
// Create clustered collection.
assertDropCollection(coll.getDB(), coll.getName());
@@ -54,7 +32,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
const collName = coll.getName();
// Basic find with hints on cluster key.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize,
cmd: {
find: collName,
@@ -65,7 +43,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
direction: "forward",
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize,
cmd: {
find: collName,
@@ -76,7 +54,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
direction: "forward",
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 1,
cmd: {
find: collName,
@@ -88,7 +66,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
direction: "forward",
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 1,
cmd: {
find: collName,
@@ -103,7 +81,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
// Find with hints on cluster key that generate bounded collection scans.
const arbitraryDocId = 12;
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 1,
cmd: {
find: collName,
@@ -117,7 +95,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
maxRecord: arbitraryDocId
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
find: collName,
@@ -128,7 +106,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
expectedWinningPlanStats:
{stage: "COLLSCAN", direction: "forward", minRecord: 101, maxRecord: MaxKey}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
find: collName,
@@ -139,7 +117,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
expectedWinningPlanStats:
{stage: "COLLSCAN", direction: "forward", minRecord: MinKey, maxRecord: -2}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 1,
cmd: {
find: collName,
@@ -153,7 +131,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
maxRecord: arbitraryDocId
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: arbitraryDocId,
cmd: {
find: collName,
@@ -163,7 +141,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
expectedWinningPlanStats:
{stage: "COLLSCAN", direction: "forward", maxRecord: arbitraryDocId}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize - arbitraryDocId,
cmd: {
find: collName,
@@ -175,7 +153,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Find with $natural hints.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize,
cmd: {
find: collName,
@@ -186,7 +164,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
direction: "backward",
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize,
cmd: {
find: collName,
@@ -197,7 +175,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
direction: "forward",
}
});
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 1,
cmd: {
find: collName,
@@ -211,7 +189,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Find on a standard index.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: batchSize,
cmd: {find: collName, hint: idxA},
expectedWinningPlanStats: {
@@ -221,7 +199,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Update with hint on cluster key.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
update: collName,
@@ -232,7 +210,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Update with reverse $natural hint.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
update: collName,
@@ -244,7 +222,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Update with hint on secondary index.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {update: collName, updates: [{q: {a: -2}, u: {$set: {a: 2}}, hint: idxA}]},
expectedWinningPlanStats: {
@@ -254,7 +232,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Delete with hint on cluster key.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
delete: collName,
@@ -265,7 +243,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Delete reverse $natural hint.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {
delete: collName,
@@ -276,7 +254,7 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
});
// Delete with hint on standard index.
- validateHint(coll, {
+ validateClusteredCollectionHint(coll, {
expectedNReturned: 0,
cmd: {delete: collName, deletes: [{q: {a: -5}, limit: 0, hint: idxA}]},
expectedWinningPlanStats: {
@@ -292,4 +270,27 @@ const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) {
}
return testHint(coll, clusterKey, clusterKeyName);
-};
+}
+
+function validateClusteredCollectionHint(coll,
+ {expectedNReturned, cmd, expectedWinningPlanStats = {}}) {
+ const explain = assert.commandWorked(coll.runCommand({explain: cmd}));
+ assert.eq(explain.executionStats.nReturned, expectedNReturned, tojson(explain));
+
+ const actualWinningPlan = getWinningPlan(explain.queryPlanner);
+ const stageOfInterest = getPlanStage(actualWinningPlan, expectedWinningPlanStats.stage);
+ assert.neq(null, stageOfInterest);
+
+ for (const [key, value] of Object.entries(expectedWinningPlanStats)) {
+ assert(stageOfInterest[key], tojson(explain));
+ assert.eq(stageOfInterest[key], value, tojson(explain));
+ }
+
+ // Explicitly check that the plan is not bounded by default.
+ if (!expectedWinningPlanStats.hasOwnProperty("minRecord")) {
+ assert(!actualWinningPlan["minRecord"], tojson(explain));
+ }
+ if (!expectedWinningPlanStats.hasOwnProperty("maxRecord")) {
+ assert(!actualWinningPlan["maxRecord"], tojson(explain));
+ }
+}
diff --git a/jstests/libs/clustered_collections/clustered_collection_util.js b/jstests/libs/clustered_collections/clustered_collection_util.js
index d6f710b2607..8422e895f40 100644
--- a/jstests/libs/clustered_collections/clustered_collection_util.js
+++ b/jstests/libs/clustered_collections/clustered_collection_util.js
@@ -6,6 +6,15 @@ load("jstests/libs/analyze_plan.js");
load("jstests/libs/collection_drop_recreate.js");
var ClusteredCollectionUtil = class {
+ static areAllCollectionsClustered(conn) {
+ const res =
+ conn.adminCommand({getParameter: 1, "failpoint.clusterAllCollectionsByDefault": 1});
+ if (res.ok)
+ return res["failpoint.clusterAllCollectionsByDefault"].mode;
+ else
+ return false;
+ }
+
static areClusteredIndexesEnabled(conn) {
const clusteredIndexesEnabled =
assert
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 2c6e794531f..dd869d95e19 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1279,6 +1279,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/catalog/clustered_collection_options',
+ '$BUILD_DIR/mongo/db/query/collation/collator_interface',
'$BUILD_DIR/mongo/db/storage/key_string',
],)
diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp
index d71c6360050..7a0222bfa6b 100644
--- a/src/mongo/db/catalog/clustered_collection_util.cpp
+++ b/src/mongo/db/catalog/clustered_collection_util.cpp
@@ -109,14 +109,17 @@ bool requiresLegacyFormat(const NamespaceString& nss) {
return nss.isTimeseriesBucketsCollection() || nss.isChangeStreamPreImagesCollection();
}
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo) {
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+ const BSONObj& collation) {
BSONObjBuilder bob;
collInfo.getIndexSpec().serialize(&bob);
+ if (!collation.isEmpty()) {
+ bob.append("collation", collation);
+ }
bob.append("clustered", true);
return bob.obj();
}
-
bool isClusteredOnId(const boost::optional<ClusteredCollectionInfo>& collInfo) {
return clustered_util::matchesClusterKey(BSON("_id" << 1), collInfo);
}
diff --git a/src/mongo/db/catalog/clustered_collection_util.h b/src/mongo/db/catalog/clustered_collection_util.h
index 2dd603430de..c20418b3db8 100644
--- a/src/mongo/db/catalog/clustered_collection_util.h
+++ b/src/mongo/db/catalog/clustered_collection_util.h
@@ -69,9 +69,11 @@ bool requiresLegacyFormat(const NamespaceString& nss);
/**
* listIndexes requires the ClusteredIndexSpec be formatted with an additional field 'clustered:
- * true' to indicate it is a clustered index.
+ * true' to indicate it is a clustered index and with the collection's default collation. If the
+ * collection has the 'simple' collation this expects an empty BSONObj.
*/
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo);
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+ const BSONObj& collation);
/**
* Returns true if the BSON object matches the collection's cluster key. Caller's should ensure
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index eb8212cd85b..8cf7e7aa320 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -824,8 +824,8 @@ Status CollectionImpl::insertDocumentForBulkLoader(
RecordId recordId;
if (isClustered()) {
invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
- recordId =
- uassertStatusOK(record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+ recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+ doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
}
// Using timestamp 0 for these inserts, which are non-oplog so we don't have an appropriate
@@ -908,8 +908,8 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx,
RecordId recordId;
if (isClustered()) {
invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
- recordId = uassertStatusOK(
- record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+ recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+ doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
}
if (MONGO_unlikely(corruptDocumentOnInsert.shouldFail())) {
diff --git a/src/mongo/db/catalog/list_indexes.cpp b/src/mongo/db/catalog/list_indexes.cpp
index 780cc3c0b12..ce1515a4c13 100644
--- a/src/mongo/db/catalog/list_indexes.cpp
+++ b/src/mongo/db/catalog/list_indexes.cpp
@@ -79,8 +79,12 @@ std::list<BSONObj> listIndexesInLock(OperationContext* opCtx,
collection->getAllIndexes(&indexNames);
if (collection->isClustered() && !collection->ns().isTimeseriesBucketsCollection()) {
+ BSONObj collation;
+ if (auto collator = collection->getDefaultCollator()) {
+ collation = collator->getSpec().toBSON();
+ }
auto clusteredSpec = clustered_util::formatClusterKeyForListIndexes(
- collection->getClusteredInfo().get());
+ collection->getClusteredInfo().get(), collation);
if (additionalInclude == ListIndexesInclude::IndexBuildInfo) {
indexSpecs.push_back(BSON("spec"_sd << clusteredSpec));
} else {
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 4c715746119..46b0439b29d 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -75,8 +75,9 @@ void _validateClusteredCollectionRecordId(OperationContext* opCtx,
const RecordId& rid,
const BSONObj& doc,
const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator,
ValidateResults* results) {
- const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec);
+ const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec, collator);
if (!ridFromDoc.isOK()) {
results->valid = false;
results->errors.push_back(str::stream() << rid << " " << ridFromDoc.getStatus().reason());
@@ -119,8 +120,12 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
const CollectionPtr& coll = _validateState->getCollection();
if (coll->isClustered()) {
- _validateClusteredCollectionRecordId(
- opCtx, recordId, recordBson, coll->getClusteredInfo()->getIndexSpec(), results);
+ _validateClusteredCollectionRecordId(opCtx,
+ recordId,
+ recordBson,
+ coll->getClusteredInfo()->getIndexSpec(),
+ coll->getDefaultCollator(),
+ results);
}
auto& executionCtx = StorageExecutionContext::get(opCtx);
diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
index 75fea08ded7..c7eb0d6e33f 100644
--- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
+++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
@@ -159,9 +159,10 @@ public:
// Set up the new collection scan to start from the 'minPreImageId'.
void setupPlanExecutor(boost::optional<ChangeStreamPreImageId> minPreImageId) {
const auto minRecordId =
- (minPreImageId ? boost::optional<RecordId>(record_id_helpers::keyForElem(
- BSON("_id" << minPreImageId->toBSON()).firstElement()))
- : boost::none);
+ (minPreImageId
+ ? boost::optional<RecordId>(record_id_helpers::keyForElem(
+ BSON("_id" << minPreImageId->toBSON()).firstElement(), nullptr))
+ : boost::none);
_planExecutor =
InternalPlanner::collectionScan(_opCtx,
_preImagesCollPtr,
@@ -237,19 +238,20 @@ void deleteExpiredChangeStreamPreImages(Client* client) {
for (auto it = expiredPreImages.begin(); it != expiredPreImages.end(); ++it) {
it.saveState();
- writeConflictRetry(
- opCtx.get(),
- "ChangeStreamExpiredPreImagesRemover",
- NamespaceString::kChangeStreamPreImagesNamespace.ns(),
- [&] {
- WriteUnitOfWork wuow(opCtx.get());
- const auto recordId =
- record_id_helpers::keyForElem(it->getField(ChangeStreamPreImage::kIdFieldName));
- preImagesColl->deleteDocument(
- opCtx.get(), kUninitializedStmtId, recordId, &CurOp::get(*opCtx)->debug());
- wuow.commit();
- numberOfRemovals++;
- });
+ writeConflictRetry(opCtx.get(),
+ "ChangeStreamExpiredPreImagesRemover",
+ NamespaceString::kChangeStreamPreImagesNamespace.ns(),
+ [&] {
+ WriteUnitOfWork wuow(opCtx.get());
+ const auto recordId = record_id_helpers::keyForElem(
+ it->getField(ChangeStreamPreImage::kIdFieldName), nullptr);
+ preImagesColl->deleteDocument(opCtx.get(),
+ kUninitializedStmtId,
+ recordId,
+ &CurOp::get(*opCtx)->debug());
+ wuow.commit();
+ numberOfRemovals++;
+ });
it.restoreState();
}
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 91f17d08a30..6898ff287a5 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -359,6 +359,7 @@ void fillOutPlannerParams(OperationContext* opCtx,
if (collection->isClustered()) {
plannerParams->clusteredInfo = collection->getClusteredInfo();
+ plannerParams->clusteredCollectionCollator = collection->getDefaultCollator();
}
}
diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp
index 649962c31cb..0f45d3b4649 100644
--- a/src/mongo/db/query/internal_plans.cpp
+++ b/src/mongo/db/query/internal_plans.cpp
@@ -77,13 +77,14 @@ CollectionScanParams convertIndexScanParamsToCollScanParams(
dassert(collection->isClustered() &&
clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo()));
+ invariant(collection->getDefaultCollator() == nullptr);
boost::optional<RecordId> startRecord, endRecord;
if (!startKey.isEmpty()) {
- startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+ startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement(), nullptr));
}
if (!endKey.isEmpty()) {
- endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+ endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement(), nullptr));
}
// For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord.
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 77f0ff9d90c..58652f73279 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -211,11 +211,41 @@ bool isOplogTsLowerBoundPred(const mongo::MatchExpression* me) {
return me->path() == repl::OpTime::kTimestampFieldName;
}
+// True if the element type is affected by a collator (i.e. it is or contains a String).
+bool affectedByCollator(const BSONElement& element) {
+ switch (element.type()) {
+ case BSONType::String:
+ return true;
+ case BSONType::Array:
+ case BSONType::Object:
+ for (const auto& sub : element.Obj()) {
+ if (affectedByCollator(sub))
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+}
+
+// Returns whether element is not affected by collators or query and collection collators are
+// compatible.
+bool compatibleCollator(const QueryPlannerParams& params,
+ const CollatorInterface* queryCollator,
+ const BSONElement& element) {
+ auto const collCollator = params.clusteredCollectionCollator;
+ bool compatible = !queryCollator || (collCollator && *queryCollator == *collCollator);
+ return compatible || !affectedByCollator(element);
+}
+
/**
* Helper function that checks to see if min() or max() were provided along with the query. If so,
* adjusts the collection scan bounds to fit the constraints.
*/
-void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collScan) {
+void handleRIDRangeMinMax(const CanonicalQuery& query,
+ CollectionScanNode* collScan,
+ const QueryPlannerParams& params,
+ const CollatorInterface* collator) {
BSONObj minObj = query.getFindCommandRequest().getMin();
BSONObj maxObj = query.getFindCommandRequest().getMax();
if (minObj.isEmpty() && maxObj.isEmpty()) {
@@ -232,17 +262,17 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
collScan->direction == 1);
boost::optional<RecordId> newMinRecord, newMaxRecord;
- if (!maxObj.isEmpty()) {
+ if (!maxObj.isEmpty() && compatibleCollator(params, collator, maxObj.firstElement())) {
// max() is exclusive.
// Assumes clustered collection scans are only supported with the forward direction.
collScan->boundInclusion =
CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly;
- newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement());
+ newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement(), collator);
}
- if (!minObj.isEmpty()) {
+ if (!minObj.isEmpty() && compatibleCollator(params, collator, minObj.firstElement())) {
// The min() is inclusive as are bounded collection scans by default.
- newMinRecord = record_id_helpers::keyForElem(minObj.firstElement());
+ newMinRecord = record_id_helpers::keyForElem(minObj.firstElement(), collator);
}
if (!collScan->minRecord) {
@@ -271,7 +301,8 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
*/
void handleRIDRangeScan(const MatchExpression* conjunct,
CollectionScanNode* collScan,
- const QueryPlannerParams& params) {
+ const QueryPlannerParams& params,
+ const CollatorInterface* collator) {
invariant(params.clusteredInfo);
if (conjunct == nullptr) {
@@ -281,7 +312,7 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
auto* andMatchPtr = dynamic_cast<const AndMatchExpression*>(conjunct);
if (andMatchPtr != nullptr) {
for (size_t index = 0; index < andMatchPtr->numChildren(); index++) {
- handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params);
+ handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params, collator);
}
return;
}
@@ -292,31 +323,30 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
return;
}
- const bool hasMaxRecord = collScan->maxRecord.has_value();
- const bool hasMinRecord = collScan->minRecord.has_value();
-
- if (!hasMinRecord && !hasMaxRecord) {
- if (auto eq = dynamic_cast<const EqualityMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(eq->getData());
- collScan->maxRecord = collScan->minRecord;
- return;
- }
+ auto match = dynamic_cast<const ComparisonMatchExpression*>(conjunct);
+ if (match == nullptr) {
+ return; // Not a comparison match expression.
}
- if (!hasMaxRecord) {
- if (auto ltConjunct = dynamic_cast<const LTMatchExpression*>(conjunct)) {
- collScan->maxRecord = record_id_helpers::keyForElem(ltConjunct->getData());
- } else if (auto lteConjunct = dynamic_cast<const LTEMatchExpression*>(conjunct)) {
- collScan->maxRecord = record_id_helpers::keyForElem(lteConjunct->getData());
- }
+ const auto& element = match->getData();
+ bool compatible = compatibleCollator(params, collator, element);
+ if (!compatible) {
+ return; // Collator affects probe and it's not compatible with collection's collator.
}
- if (!hasMinRecord) {
- if (auto gtConjunct = dynamic_cast<const GTMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(gtConjunct->getData());
- } else if (auto gteConjunct = dynamic_cast<const GTEMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(gteConjunct->getData());
- }
+ auto& maxRecord = collScan->maxRecord;
+ auto& minRecord = collScan->minRecord;
+ if (dynamic_cast<const EqualityMatchExpression*>(match)) {
+ minRecord = record_id_helpers::keyForElem(element, collator);
+ maxRecord = minRecord;
+ } else if (!maxRecord &&
+ (dynamic_cast<const LTMatchExpression*>(match) ||
+ dynamic_cast<const LTEMatchExpression*>(match))) {
+ maxRecord = record_id_helpers::keyForElem(element, collator);
+ } else if (!minRecord &&
+ (dynamic_cast<const GTMatchExpression*>(match) ||
+ dynamic_cast<const GTEMatchExpression*>(match))) {
+ minRecord = record_id_helpers::keyForElem(element, collator);
}
}
@@ -404,8 +434,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
if (params.clusteredInfo && !csn->resumeAfterRecordId) {
// This is a clustered collection. Attempt to perform an efficient, bounded collection scan
// via minRecord and maxRecord if applicable.
- handleRIDRangeScan(csn->filter.get(), csn.get(), params);
- handleRIDRangeMinMax(query, csn.get());
+ handleRIDRangeScan(csn->filter.get(), csn.get(), params, query.getCollator());
+ handleRIDRangeMinMax(query, csn.get(), params, query.getCollator());
}
return csn;
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index c6c670d607f..2651bd5f547 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -715,20 +715,33 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey();
- // Since the clusteredIndex doesn't have a specific collator, check if it is
- // compatible with the max and min using the same collator as the query.
+ // Check if the query collator is compatible with the collection collator for the
+ // provided min and max values.
if ((!minObj.isEmpty() &&
- !indexCompatibleMaxMin(
- minObj, query.getCollator(), query.getCollator(), clusterKey)) ||
+ !indexCompatibleMaxMin(minObj,
+ query.getCollator(),
+ params.clusteredCollectionCollator,
+ clusterKey)) ||
(!maxObj.isEmpty() &&
- !indexCompatibleMaxMin(
- maxObj, query.getCollator(), query.getCollator(), clusterKey))) {
+ !indexCompatibleMaxMin(maxObj,
+ query.getCollator(),
+ params.clusteredCollectionCollator,
+ clusterKey))) {
return Status(ErrorCodes::Error(6137400),
"The clustered index is not compatible with the values provided "
- "for min/max");
+ "for min/max due to the query collation");
}
- if (!minObj.isEmpty() && !maxObj.isEmpty() && minObj.woCompare(maxObj) >= 0) {
+ auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() {
+ if (collator) {
+ auto min = stripFieldNamesAndApplyCollation(minObj, collator);
+ auto max = stripFieldNamesAndApplyCollation(maxObj, collator);
+ return min.woCompare(max) < 0;
+ } else {
+ return minObj.woCompare(maxObj) < 0;
+ }
+ };
+ if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) {
return Status(ErrorCodes::Error(6137401), "max() must be greater than min()");
}
}
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 8a9f14dcf7d..ad34bdd1ee6 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -33,6 +33,7 @@
#include "mongo/db/catalog/clustered_collection_options_gen.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/index_entry.h"
#include "mongo/db/query/query_knobs_gen.h"
@@ -142,6 +143,10 @@ struct QueryPlannerParams {
// Specifies the clusteredIndex information necessary to utilize the cluster key in bounded
// collection scans and other query operations.
boost::optional<ClusteredCollectionInfo> clusteredInfo;
+
+ // Specifies the collator information necessary to utilize the cluster key in bounded
+ // collection scans and other query operations.
+ const CollatorInterface* clusteredCollectionCollator;
};
} // namespace mongo
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index 75663948cf6..35925a75019 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -37,6 +37,7 @@
#include "mongo/bson/timestamp.h"
#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/record_id.h"
#include "mongo/db/storage/key_string.h"
#include "mongo/logv2/redaction.h"
@@ -83,7 +84,9 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
return keyForOptime(elem.timestamp());
}
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec) {
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+ const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator) {
// Get the collection's cluster key field name
const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec);
// Build a RecordId using the cluster key.
@@ -94,15 +97,21 @@ StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& ind
<< clusterKeyField << "' field"};
}
- return keyForElem(keyElement);
+ return keyForElem(keyElement, collator);
}
-RecordId keyForElem(const BSONElement& elem) {
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator) {
// Intentionally discard the TypeBits since the type information will be stored in the cluster
// key of the original document. The consequence of this behavior is that cluster key values
// that compare similarly, but are of different types may not be used concurrently.
KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
- keyBuilder.appendBSONElement(elem);
+ if (collator) {
+ BSONObjBuilder out;
+ CollationIndexKey::collationAwareIndexKeyAppend(elem, collator, &out);
+ keyBuilder.appendBSONElement(out.done().firstElement());
+ } else {
+ keyBuilder.appendBSONElement(elem);
+ }
return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
}
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 1d984c94a1e..4627d134f9b 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -33,6 +33,7 @@
#include "mongo/base/status_with.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/catalog/clustered_collection_options_gen.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/storage/key_format.h"
namespace mongo {
@@ -49,8 +50,10 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
/**
* For clustered collections, converts various values into a RecordId.
*/
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec);
-RecordId keyForElem(const BSONElement& elem);
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+ const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator);
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator);
RecordId keyForOID(OID oid);
RecordId keyForDate(Date_t date);
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index 773210d4cf3..0066240cc2f 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -212,7 +212,7 @@ BaseCloner::AfterStageBehavior CollectionCloner::listIndexesStage() {
invariant(_collectionOptions.clusteredIndex);
invariant(spec.getBoolField("clustered") == true);
invariant(clustered_util::formatClusterKeyForListIndexes(
- _collectionOptions.clusteredIndex.get())
+ _collectionOptions.clusteredIndex.get(), _collectionOptions.collation)
.woCompare(spec) == 0);
// Skip if the spec is for the collection's clusteredIndex.
} else if (spec.hasField("buildUUID")) {
diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp
index 79a98205972..c24c170e988 100644
--- a/src/mongo/db/repl/dbcheck.cpp
+++ b/src/mongo/db/repl/dbcheck.cpp
@@ -247,10 +247,14 @@ DbCheckHasher::DbCheckHasher(OperationContext* opCtx,
InternalPlanner::IXSCAN_FETCH);
} else {
CollectionScanParams params;
- params.minRecord = uassertStatusOK(record_id_helpers::keyForDoc(
- start.obj(), collection->getClusteredInfo()->getIndexSpec()));
- params.maxRecord = uassertStatusOK(record_id_helpers::keyForDoc(
- end.obj(), collection->getClusteredInfo()->getIndexSpec()));
+ params.minRecord = uassertStatusOK(
+ record_id_helpers::keyForDoc(start.obj(),
+ collection->getClusteredInfo()->getIndexSpec(),
+ collection->getDefaultCollator()));
+ params.maxRecord = uassertStatusOK(
+ record_id_helpers::keyForDoc(end.obj(),
+ collection->getClusteredInfo()->getIndexSpec(),
+ collection->getDefaultCollator()));
params.boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly;
_exec = InternalPlanner::collectionScan(
opCtx, &collection, params, PlanYieldPolicy::YieldPolicy::NO_YIELD);
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 5dd66d101d2..3c61a2de652 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -732,13 +732,16 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments(
"bounded collection scans only support forward scans");
}
+ auto collator = collection->getDefaultCollator();
boost::optional<RecordId> minRecord, maxRecord;
if (!startKey.isEmpty()) {
- minRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+ minRecord =
+ RecordId(record_id_helpers::keyForElem(startKey.firstElement(), collator));
}
if (!endKey.isEmpty()) {
- maxRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+ maxRecord =
+ RecordId(record_id_helpers::keyForElem(endKey.firstElement(), collator));
}
planExecutor = isFind
diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp
index dd6a586c1a0..eec2f336fc7 100644
--- a/src/mongo/db/storage/record_store_test_harness.cpp
+++ b/src/mongo/db/storage/record_store_test_harness.cpp
@@ -428,7 +428,7 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) {
recordData.makeOwned();
RecordId id = uassertStatusOK(
- record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec()));
+ record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec(), nullptr));
records.push_back({id, recordData});
}
diff --git a/src/mongo/dbtests/query_stage_collscan.cpp b/src/mongo/dbtests/query_stage_collscan.cpp
index cd041e97f4a..c1aaff74825 100644
--- a/src/mongo/dbtests/query_stage_collscan.cpp
+++ b/src/mongo/dbtests/query_stage_collscan.cpp
@@ -209,10 +209,10 @@ public:
_client.insert(ns.ns(), docs, ordered);
}
- // Returns the recordId generated by doc, assuming doc takes the shape of {<cluster key> :
- // <value>};
+ // Returns the recordId generated by doc, assuming there's no collation and doc takes the shape
+ // of {<cluster key> : <value>};
RecordId getRecordIdForClusteredDoc(const BSONObj& doc) {
- return RecordId(record_id_helpers::keyForElem(doc.firstElement()));
+ return RecordId(record_id_helpers::keyForElem(doc.firstElement(), nullptr));
}
// Performs a bounded collection scan from 'minRecord' to 'maxRecord' in the specified