summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Zhang <jason.zhang@mongodb.com>2021-12-14 14:49:05 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-14 15:08:31 +0000
commit491a47d35f6ded74274324f3fca2c147c1c124ac (patch)
tree6da7caa1a9ac9c8fec51c3068a37e6ba5ff573ef
parented7cca61938ee12f5a9cbe870af096987c662f5c (diff)
downloadmongo-491a47d35f6ded74274324f3fca2c147c1c124ac.tar.gz
SERVER-53335 Queries, updates, and deletes with non-"simple" collations may miss documents when using hashed sharding
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--jstests/sharding/collation_shard_targeting_hashed_shard_key.js444
-rw-r--r--src/mongo/s/chunk_manager.cpp12
-rw-r--r--src/mongo/s/chunk_manager.h4
-rw-r--r--src/mongo/s/commands/cluster_find_and_modify_cmd.cpp7
5 files changed, 464 insertions, 4 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 07f796429df..dc358309b0e 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -136,6 +136,7 @@ selector:
- jstests/sharding/retryable_mongos_write_errors.js
# Requires the fix for SERVER-58104 which was not backported to 3.6.
- jstests/sharding/balancing_sessions_collection_reshard.js
+ - jstests/sharding/collation_shard_targeting_hashed_shard_key.js
exclude_with_any_tags:
- multiversion_incompatible
diff --git a/jstests/sharding/collation_shard_targeting_hashed_shard_key.js b/jstests/sharding/collation_shard_targeting_hashed_shard_key.js
new file mode 100644
index 00000000000..81893f2a225
--- /dev/null
+++ b/jstests/sharding/collation_shard_targeting_hashed_shard_key.js
@@ -0,0 +1,444 @@
+
+/**
+ * Test shard targeting for queries on a collection with a non-simple collation and a hashed shard
+ * key.
+ * @tags: [
+ * requires_find_command
+ * ]
+ */
+(function() {
+ const st = new ShardingTest({mongos: 1, config: 1, shards: 2, rs: {nodes: 1}});
+
+ function shardCollectionWithSplitsAndMoves(
+ ns, shardKeyPattern, collation, splitPoints, chunksToMove) {
+ const collection = st.s.getCollection(ns);
+ const db = collection.getDB();
+
+ assert.commandWorked(db.runCommand({create: collection.getName(), collation: collation}));
+
+ st.ensurePrimaryShard(db.getName(), st.shard0.shardName);
+ assert.commandWorked(st.s.adminCommand({enableSharding: db.getName()}));
+
+ assert.commandWorked(st.s.adminCommand({
+ shardCollection: collection.getFullName(),
+ key: shardKeyPattern,
+ collation: {locale: "simple"}
+ }));
+
+ for (let splitPoint of splitPoints) {
+ assert.commandWorked(
+ st.s.adminCommand({split: collection.getFullName(), middle: splitPoint}));
+ }
+
+ for (let {
+ query, shard
+ } of chunksToMove) {
+ assert.commandWorked(st.s.adminCommand({
+ moveChunk: collection.getFullName(),
+ find: query,
+ to: shard,
+ }));
+ }
+
+ return collection;
+ }
+
+ function findQueryWithCollation(collection, query, collation) {
+ let cursor = collection.find(query);
+ if (collation) {
+ cursor = cursor.collation(collation);
+ }
+ return cursor.toArray();
+ }
+
+ {
+ jsTestLog(
+ "Test find command in an _id:hashed sharded collection with simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.id_hashed_sharding_with_simple_collation",
+ {_id: "hashed"},
+ {locale: "simple"},
+ [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}],
+ [
+ {query: {_id: "A"}, shard: st.shard0.shardName},
+ {query: {_id: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: "A"}];
+ assert.commandWorked(collection.insert(docs));
+
+ // Check default collation, simple collation, non-simple collation.
+ assert.eq([],
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {_id: "a"}, undefined));
+ assert.eq(
+ [],
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "simple"}));
+ assert.eq(docs,
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {_id: "a"},
+ {locale: "en", strength: 2}));
+ }
+
+ {
+ jsTestLog(
+ "Test find command in an _id:hashed sharded collection with non-simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.id_hashed_sharding_with_default_collation",
+ {_id: "hashed"},
+ {locale: "en", strength: 2},
+ [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}],
+ [
+ {query: {_id: "A"}, shard: st.shard0.shardName},
+ {query: {_id: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: "A"}];
+ assert.commandWorked(collection.insert(docs));
+
+ // Check default collation, simple collation, non-simple collation.
+ assert.eq(docs,
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {_id: "a"}, undefined));
+ assert.eq(
+ [],
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "simple"}));
+ assert.eq(docs,
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {_id: "a"},
+ {locale: "en", strength: 2}));
+ }
+
+ {
+ jsTestLog("Test an _id:1 sharded collection with non-simple default collation.");
+
+ const collection = st.s.getCollection("test.id_range_sharding_with_default_collation");
+ const db = collection.getDB();
+ assert.commandWorked(
+ db.runCommand({create: collection.getName(), collation: {locale: "en", strength: 2}}));
+
+ st.ensurePrimaryShard(db.getName(), st.shard0.shardName);
+ assert.commandWorked(st.s.adminCommand({enableSharding: db.getName()}));
+
+ const res = assert.commandFailedWithCode(st.s.adminCommand({
+ shardCollection: collection.getFullName(),
+ key: {_id: 1},
+ collation: {locale: "simple"}
+ }),
+ ErrorCodes.BadValue);
+ assert(/The _id index must have the same collation as the collection/.test(res.errmsg),
+ `expected shardCollection command to fail due to required collation for _id index: ${
+ tojson(res)}`);
+ }
+
+ {
+ jsTestLog(
+ "Test find command in a hashed sharded collection with simple default collation.");
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.non_id_hashed_sharding_with_simple_collation",
+ {notUnderscoreId: "hashed"},
+ {locale: "simple"},
+ [
+ {notUnderscoreId: convertShardKeyToHashed("A")},
+ {notUnderscoreId: convertShardKeyToHashed("a")}
+ ],
+ [
+ {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName},
+ {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: 0, notUnderscoreId: "A"}];
+ assert.commandWorked(collection.insert(docs));
+
+ // Check default collation, simple collation, non-simple collation.
+ assert.eq(
+ [],
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {notUnderscoreId: "a"}, undefined));
+ assert.eq([],
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {notUnderscoreId: "a"},
+ {locale: "simple"}));
+ assert.eq(docs,
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {notUnderscoreId: "a"},
+ {locale: "en", strength: 2}));
+ }
+
+ {
+ jsTestLog(
+ "Test find command in a hashed sharded collection with non-simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.non_id_hashed_sharding_with_non_simple_collation",
+ {notUnderscoreId: "hashed"},
+ {locale: "en", strength: 2},
+ [
+ {notUnderscoreId: convertShardKeyToHashed("A")},
+ {notUnderscoreId: convertShardKeyToHashed("a")}
+ ],
+ [
+ {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName},
+ {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: 0, notUnderscoreId: "A"}];
+ assert.commandWorked(collection.insert(docs));
+
+ // Check default collation, simple collation, non-simple collation.
+ assert.eq(
+ docs,
+ findQueryWithCollation(
+ st.s.getCollection(collection.getFullName()), {notUnderscoreId: "a"}, undefined));
+ assert.eq([],
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {notUnderscoreId: "a"},
+ {locale: "simple"}));
+ assert.eq(docs,
+ findQueryWithCollation(st.s.getCollection(collection.getFullName()),
+ {notUnderscoreId: "a"},
+ {locale: "en", strength: 2}));
+ }
+
+ {
+ jsTestLog(
+ "Test findAndModify command in an _id:hashed sharded collection with simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.id_hashed_sharding_find_and_modify_simple_collation",
+ {_id: "hashed"},
+ {locale: "simple"},
+ [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}],
+ [
+ {query: {_id: "A"}, shard: st.shard0.shardName},
+ {query: {_id: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: "A", count: 0}];
+ assert.commandWorked(collection.insert(docs));
+
+ const mongosCollection = st.s.getCollection(collection.getFullName());
+
+ // Check findAndModify results with the default, simple, and non-simple collation.
+ // Currently,
+ // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter
+ // gather to check every shard for a match. findAndModify's current behavior will target the
+ // first shard in which the max key of a chunk is greater than the query's shard key. In
+ // this
+ // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means
+ // when
+ // we query for {_id: "a"} we will target the shard containing the chunk for "a", likewise
+ // if we
+ // query for {_id: "A"} we will only target the shard containing the chunk for "A".
+ assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A"));
+ assert.eq(null,
+ mongosCollection.findAndModify({query: {_id: "a"}, update: {$inc: {count: 1}}}));
+ assert.eq(
+ null,
+ mongosCollection.findAndModify(
+ {query: {_id: "a"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}}));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {_id: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ assert.eq({_id: "A", count: 0},
+ mongosCollection.findAndModify({query: {_id: "A"}, update: {$inc: {count: 1}}}));
+ assert.eq(
+ {_id: "A", count: 1},
+ mongosCollection.findAndModify(
+ {query: {_id: "A"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}}));
+ assert.eq({_id: "A", count: 2}, mongosCollection.findAndModify({
+ query: {_id: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ }
+
+ {
+ jsTestLog(
+ "Test findAndModify command in an _id:hashed sharded collection with non-simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.id_hashed_sharding_find_and_modify_with_non_simple_collation",
+ {_id: "hashed"},
+ {locale: "en", strength: 2},
+ [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}],
+ [
+ {query: {_id: "A"}, shard: st.shard0.shardName},
+ {query: {_id: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: "A", count: 0}];
+ assert.commandWorked(collection.insert(docs));
+
+ const mongosCollection = st.s.getCollection(collection.getFullName());
+
+ // Check findAndModify results with the default, simple, and non-simple collation.
+ // Currently,
+ // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter
+ // gather to check every shard for a match. findAndModify's current behavior will target the
+ // first shard in which the max key of a chunk is greater than the query's shard key. In
+ // this
+ // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means
+ // when
+ // we query for {_id: "a"} we will target the shard containing the chunk for "a", likewise
+ // if we
+ // query for {_id: "A"} we will only target the shard containing the chunk for "A".
+ assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A"));
+ assert.eq(null,
+ mongosCollection.findAndModify({query: {_id: "a"}, update: {$inc: {count: 1}}}));
+ assert.eq(
+ null,
+ mongosCollection.findAndModify(
+ {query: {_id: "a"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}}));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {_id: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ assert.eq({_id: "A", count: 0},
+ mongosCollection.findAndModify({query: {_id: "A"}, update: {$inc: {count: 1}}}));
+ assert.eq(
+ {_id: "A", count: 1},
+ mongosCollection.findAndModify(
+ {query: {_id: "A"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}}));
+ assert.eq({_id: "A", count: 2}, mongosCollection.findAndModify({
+ query: {_id: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ }
+
+ {
+ jsTestLog(
+ "Test findAndModify command in a hashed sharded collection with simple default collation.");
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.non_id_hashed_sharding_find_and_modify_with_simple_collation",
+ {notUnderscoreId: "hashed"},
+ {locale: "simple"},
+ [
+ {notUnderscoreId: convertShardKeyToHashed("A")},
+ {notUnderscoreId: convertShardKeyToHashed("a")}
+ ],
+ [
+ {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName},
+ {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: 0, notUnderscoreId: "A", count: 0}];
+ assert.commandWorked(collection.insert(docs));
+
+ const mongosCollection = st.s.getCollection(collection.getFullName());
+
+ // Check findAndModify results with the default, simple, and non-simple collation.
+ // Currently,
+ // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter
+ // gather to check every shard for a match. findAndModify's current behavior will target the
+ // first shard in which the max key of a chunk is greater than the query's shard key. In
+ // this
+ // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means
+ // when
+ // we query for {notUnderscoeId: "a"} we will target the shard containing the chunk for "a",
+ // likewise if we query for {notUnderscoreId: "A"} we will only target the shard containing
+ // the
+ // chunk for "A".
+ assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A"));
+ assert.eq(null,
+ mongosCollection.findAndModify(
+ {query: {notUnderscoreId: "a"}, update: {$inc: {count: 1}}}));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "simple"}
+ }));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 0},
+ mongosCollection.findAndModify(
+ {query: {notUnderscoreId: "A"}, update: {$inc: {count: 1}}}));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 1}, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "simple"}
+ }));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 2}, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ }
+
+ {
+ jsTestLog(
+ "Test findAndModify command in a hashed sharded collection with non-simple default collation.");
+
+ const collection = shardCollectionWithSplitsAndMoves(
+ "test.non_id_hashed_sharding_find_and_modify_with_non_simple_collation",
+ {notUnderscoreId: "hashed"},
+ {locale: "en", strength: 2},
+ [
+ {notUnderscoreId: convertShardKeyToHashed("A")},
+ {notUnderscoreId: convertShardKeyToHashed("a")}
+ ],
+ [
+ {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName},
+ {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName}
+ ]);
+
+ const docs = [{_id: 0, notUnderscoreId: "A", count: 0}];
+ assert.commandWorked(collection.insert(docs));
+
+ const mongosCollection = st.s.getCollection(collection.getFullName());
+
+ // Check findAndModify results with the default, simple, and non-simple collation.
+ // Currently,
+ // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter
+ // gather to check every shard for a match. findAndModify's current behavior will target the
+ // first shard in which the max key of a chunk is greater than the query's shard key. In
+ // this
+ // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means
+ // when
+ // we query for {notUnderscoreId: "a"} we will target the shard containing the chunk for
+ // "a",
+ // likewise if we query for {notUnderscoreId: "A"} we will only target the shard containing
+ // the
+ // chunk for "A".
+ assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A"));
+ assert.eq(null,
+ mongosCollection.findAndModify(
+ {query: {notUnderscoreId: "a"}, update: {$inc: {count: 1}}}));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "simple"}
+ }));
+ assert.eq(null, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "a"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 0},
+ mongosCollection.findAndModify(
+ {query: {notUnderscoreId: "A"}, update: {$inc: {count: 1}}}));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 1}, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "simple"}
+ }));
+ assert.eq({_id: 0, notUnderscoreId: "A", count: 2}, mongosCollection.findAndModify({
+ query: {notUnderscoreId: "A"},
+ update: {$inc: {count: 1}},
+ collation: {locale: "en", strength: 2}
+ }));
+ }
+
+ st.stop();
+})(); \ No newline at end of file
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index f9bd130f8eb..fbca54ad10a 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -95,17 +95,25 @@ RoutingTableHistory::RoutingTableHistory(NamespaceString nss,
_collectionVersion(collectionVersion),
_shardVersions(_constructShardVersionMap()) {}
-Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const {
+Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey,
+ const BSONObj& collation,
+ bool bypassIsFieldHashedCheck) const {
const bool hasSimpleCollation = (collation.isEmpty() && !_rt->getDefaultCollator()) ||
SimpleBSONObjComparator::kInstance.evaluate(collation == CollationSpec::kSimpleSpec);
if (!hasSimpleCollation) {
for (BSONElement elt : shardKey) {
+ // We must assume that if the field is specified as "hashed" in the shard key pattern,
+ // then the hash value could have come from a collatable type. If we want to skip the
+ // check in the special case where the _id field is hashed and used as the shard key,
+ // set bypassIsFieldHashedCheck. This assumes that a request with a query that contains
+ // an _id field can target a specific shard.
uassert(ErrorCodes::ShardKeyNotFound,
str::stream() << "Cannot target single shard due to collation of key "
<< elt.fieldNameStringData()
<< " for namespace "
<< getns().toString(),
- !CollationIndexKey::isCollatableType(elt.type()));
+ !CollationIndexKey::isCollatableType(elt.type()) &&
+ (!_rt->getShardKeyPattern().isHashedPattern() || bypassIsFieldHashedCheck));
}
}
diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h
index 331dc569cd2..e976f4ba7e3 100644
--- a/src/mongo/s/chunk_manager.h
+++ b/src/mongo/s/chunk_manager.h
@@ -350,7 +350,9 @@ public:
* Throws a DBException with the ShardKeyNotFound code if unable to target a single shard due to
* collation or due to the key not matching the shard key pattern.
*/
- Chunk findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const;
+ Chunk findIntersectingChunk(const BSONObj& shardKey,
+ const BSONObj& collation,
+ bool bypassIsFieldHashedCheck = false) const;
/**
* Same as findIntersectingChunk, but assumes the simple collation.
diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
index 5b637931b2d..1fa55445ff7 100644
--- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
+++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
@@ -178,7 +178,12 @@ public:
const BSONObj query = cmdObj.getObjectField("query");
const BSONObj collation = getCollation(cmdObj);
const BSONObj shardKey = getShardKey(opCtx, *chunkMgr, query);
- auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation);
+
+ // For now, set bypassIsFieldHashedCheck to be true in order to skip the
+ // isFieldHashedCheck in the special case where _id is hashed and used as the shard key.
+ // This means that we always assume that a findAndModify request using _id is targetable
+ // to a single shard.
+ auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation, true);
_runCommand(opCtx,
chunk.getShardId(),