diff options
author | Jason Zhang <jason.zhang@mongodb.com> | 2021-12-10 17:48:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-10 18:10:20 +0000 |
commit | d3a93ec67a3cf149434bc5c112ecd7d71bdd89b5 (patch) | |
tree | ce117bc410f43651eade62e2dc90bc58c94d7f99 | |
parent | 0ff3674221b15c471ee5c5dae8f81c4365cb16e0 (diff) | |
download | mongo-d3a93ec67a3cf149434bc5c112ecd7d71bdd89b5.tar.gz |
SERVER-53335 Queries, updates, and deletes with non-"simple" collations may miss documents when using hashed sharding
6 files changed, 438 insertions, 4 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index a41dd01e6ee..c69832ebf6a 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -182,6 +182,7 @@ selector: - jstests/sharding/pipeline_length_limit.js - jstests/sharding/mongos_precache_routing_info.js - jstests/sharding/warm_up_connection_pool.js + - jstests/sharding/collation_shard_targeting_hashed_shard_key.js exclude_with_any_tags: - multiversion_incompatible diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 36553dae3a8..7d42eabfe83 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -120,6 +120,8 @@ all: test_file: jstests/replsets/cluster_chaining_override.js - ticket: SERVER-59613 test_file: jstests/aggregation/range.js + - ticket: SERVER-53335 + test_file: jstests/sharding/collation_shard_targeting_hashed_shard_key.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: diff --git a/jstests/sharding/collation_shard_targeting_hashed_shard_key.js b/jstests/sharding/collation_shard_targeting_hashed_shard_key.js new file mode 100644 index 00000000000..9ae58763ab3 --- /dev/null +++ b/jstests/sharding/collation_shard_targeting_hashed_shard_key.js @@ -0,0 +1,416 @@ + +/** + * Test shard targeting for queries on a collection with a non-simple collation and a hashed shard + * key. + * @tags: [ + * requires_find_command + * ] + */ +(function() { +const st = new ShardingTest({mongos: 1, config: 1, shards: 2, rs: {nodes: 1}}); + +function shardCollectionWithSplitsAndMoves( + ns, shardKeyPattern, collation, splitPoints, chunksToMove) { + const collection = st.s.getCollection(ns); + const db = collection.getDB(); + + assert.commandWorked(db.runCommand({create: collection.getName(), collation: collation})); + + st.ensurePrimaryShard(db.getName(), st.shard0.shardName); + assert.commandWorked(st.s.adminCommand({enableSharding: db.getName()})); + + assert.commandWorked(st.s.adminCommand({ + shardCollection: collection.getFullName(), + key: shardKeyPattern, + collation: {locale: "simple"} + })); + + for (let splitPoint of splitPoints) { + assert.commandWorked( + st.s.adminCommand({split: collection.getFullName(), middle: splitPoint})); + } + + for (let {query, shard} of chunksToMove) { + assert.commandWorked(st.s.adminCommand({ + moveChunk: collection.getFullName(), + find: query, + to: shard, + })); + } + + return collection; +} + +function findQueryWithCollation(collection, query, collation) { + let cursor = collection.find(query); + if (collation) { + cursor = cursor.collation(collation); + } + return cursor.toArray(); +} + +{ + jsTestLog( + "Test find command in an _id:hashed sharded collection with simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.id_hashed_sharding_with_simple_collation", + {_id: "hashed"}, + {locale: "simple"}, + [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}], + [ + {query: {_id: "A"}, shard: st.shard0.shardName}, + {query: {_id: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: "A"}]; + assert.commandWorked(collection.insert(docs)); + + // Check default collation, simple collation, non-simple collation. + assert.eq([], + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, undefined)); + assert.eq([], + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "simple"})); + assert.eq( + docs, + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "en", strength: 2})); +} + +{ + jsTestLog( + "Test find command in an _id:hashed sharded collection with non-simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.id_hashed_sharding_with_default_collation", + {_id: "hashed"}, + {locale: "en", strength: 2}, + [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}], + [ + {query: {_id: "A"}, shard: st.shard0.shardName}, + {query: {_id: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: "A"}]; + assert.commandWorked(collection.insert(docs)); + + // Check default collation, simple collation, non-simple collation. + assert.eq(docs, + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, undefined)); + assert.eq([], + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "simple"})); + assert.eq( + docs, + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {_id: "a"}, {locale: "en", strength: 2})); +} + +{ + jsTestLog("Test an _id:1 sharded collection with non-simple default collation."); + + const collection = st.s.getCollection("test.id_range_sharding_with_default_collation"); + const db = collection.getDB(); + assert.commandWorked( + db.runCommand({create: collection.getName(), collation: {locale: "en", strength: 2}})); + + st.ensurePrimaryShard(db.getName(), st.shard0.shardName); + assert.commandWorked(st.s.adminCommand({enableSharding: db.getName()})); + + const res = assert.commandFailedWithCode(st.s.adminCommand({ + shardCollection: collection.getFullName(), + key: {_id: 1}, + collation: {locale: "simple"} + }), + ErrorCodes.BadValue); + assert(/The _id index must have the same collation as the collection/.test(res.errmsg), + `expected shardCollection command to fail due to required collation for _id index: ${ + tojson(res)}`); +} + +{ + jsTestLog("Test find command in a hashed sharded collection with simple default collation."); + const collection = shardCollectionWithSplitsAndMoves( + "test.non_id_hashed_sharding_with_simple_collation", + {notUnderscoreId: "hashed"}, + {locale: "simple"}, + [ + {notUnderscoreId: convertShardKeyToHashed("A")}, + {notUnderscoreId: convertShardKeyToHashed("a")} + ], + [ + {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName}, + {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: 0, notUnderscoreId: "A"}]; + assert.commandWorked(collection.insert(docs)); + + // Check default collation, simple collation, non-simple collation. + assert.eq([], + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {notUnderscoreId: "a"}, undefined)); + assert.eq([], + findQueryWithCollation(st.s.getCollection(collection.getFullName()), + {notUnderscoreId: "a"}, + {locale: "simple"})); + assert.eq(docs, + findQueryWithCollation(st.s.getCollection(collection.getFullName()), + {notUnderscoreId: "a"}, + {locale: "en", strength: 2})); +} + +{ + jsTestLog( + "Test find command in a hashed sharded collection with non-simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.non_id_hashed_sharding_with_non_simple_collation", + {notUnderscoreId: "hashed"}, + {locale: "en", strength: 2}, + [ + {notUnderscoreId: convertShardKeyToHashed("A")}, + {notUnderscoreId: convertShardKeyToHashed("a")} + ], + [ + {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName}, + {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: 0, notUnderscoreId: "A"}]; + assert.commandWorked(collection.insert(docs)); + + // Check default collation, simple collation, non-simple collation. + assert.eq(docs, + findQueryWithCollation( + st.s.getCollection(collection.getFullName()), {notUnderscoreId: "a"}, undefined)); + assert.eq([], + findQueryWithCollation(st.s.getCollection(collection.getFullName()), + {notUnderscoreId: "a"}, + {locale: "simple"})); + assert.eq(docs, + findQueryWithCollation(st.s.getCollection(collection.getFullName()), + {notUnderscoreId: "a"}, + {locale: "en", strength: 2})); +} + +{ + jsTestLog( + "Test findAndModify command in an _id:hashed sharded collection with simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.id_hashed_sharding_find_and_modify_simple_collation", + {_id: "hashed"}, + {locale: "simple"}, + [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}], + [ + {query: {_id: "A"}, shard: st.shard0.shardName}, + {query: {_id: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: "A", count: 0}]; + assert.commandWorked(collection.insert(docs)); + + const mongosCollection = st.s.getCollection(collection.getFullName()); + + // Check findAndModify results with the default, simple, and non-simple collation. Currently, + // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter + // gather to check every shard for a match. findAndModify's current behavior will target the + // first shard in which the max key of a chunk is greater than the query's shard key. In this + // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means when + // we query for {_id: "a"} we will target the shard containing the chunk for "a", likewise if we + // query for {_id: "A"} we will only target the shard containing the chunk for "A". + assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A")); + assert.eq(null, + mongosCollection.findAndModify({query: {_id: "a"}, update: {$inc: {count: 1}}})); + assert.eq(null, + mongosCollection.findAndModify( + {query: {_id: "a"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}})); + assert.eq(null, mongosCollection.findAndModify({ + query: {_id: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); + assert.eq({_id: "A", count: 0}, + mongosCollection.findAndModify({query: {_id: "A"}, update: {$inc: {count: 1}}})); + assert.eq({_id: "A", count: 1}, + mongosCollection.findAndModify( + {query: {_id: "A"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}})); + assert.eq({_id: "A", count: 2}, mongosCollection.findAndModify({ + query: {_id: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); +} + +{ + jsTestLog( + "Test findAndModify command in an _id:hashed sharded collection with non-simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.id_hashed_sharding_find_and_modify_with_non_simple_collation", + {_id: "hashed"}, + {locale: "en", strength: 2}, + [{_id: convertShardKeyToHashed("A")}, {_id: convertShardKeyToHashed("a")}], + [ + {query: {_id: "A"}, shard: st.shard0.shardName}, + {query: {_id: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: "A", count: 0}]; + assert.commandWorked(collection.insert(docs)); + + const mongosCollection = st.s.getCollection(collection.getFullName()); + + // Check findAndModify results with the default, simple, and non-simple collation. Currently, + // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter + // gather to check every shard for a match. findAndModify's current behavior will target the + // first shard in which the max key of a chunk is greater than the query's shard key. In this + // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means when + // we query for {_id: "a"} we will target the shard containing the chunk for "a", likewise if we + // query for {_id: "A"} we will only target the shard containing the chunk for "A". + assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A")); + assert.eq(null, + mongosCollection.findAndModify({query: {_id: "a"}, update: {$inc: {count: 1}}})); + assert.eq(null, + mongosCollection.findAndModify( + {query: {_id: "a"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}})); + assert.eq(null, mongosCollection.findAndModify({ + query: {_id: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); + assert.eq({_id: "A", count: 0}, + mongosCollection.findAndModify({query: {_id: "A"}, update: {$inc: {count: 1}}})); + assert.eq({_id: "A", count: 1}, + mongosCollection.findAndModify( + {query: {_id: "A"}, update: {$inc: {count: 1}}, collation: {locale: "simple"}})); + assert.eq({_id: "A", count: 2}, mongosCollection.findAndModify({ + query: {_id: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); +} + +{ + jsTestLog( + "Test findAndModify command in a hashed sharded collection with simple default collation."); + const collection = shardCollectionWithSplitsAndMoves( + "test.non_id_hashed_sharding_find_and_modify_with_simple_collation", + {notUnderscoreId: "hashed"}, + {locale: "simple"}, + [ + {notUnderscoreId: convertShardKeyToHashed("A")}, + {notUnderscoreId: convertShardKeyToHashed("a")} + ], + [ + {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName}, + {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: 0, notUnderscoreId: "A", count: 0}]; + assert.commandWorked(collection.insert(docs)); + + const mongosCollection = st.s.getCollection(collection.getFullName()); + + // Check findAndModify results with the default, simple, and non-simple collation. Currently, + // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter + // gather to check every shard for a match. findAndModify's current behavior will target the + // first shard in which the max key of a chunk is greater than the query's shard key. In this + // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means when + // we query for {notUnderscoeId: "a"} we will target the shard containing the chunk for "a", + // likewise if we query for {notUnderscoreId: "A"} we will only target the shard containing the + // chunk for "A". + assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A")); + assert.eq(null, + mongosCollection.findAndModify( + {query: {notUnderscoreId: "a"}, update: {$inc: {count: 1}}})); + assert.eq(null, mongosCollection.findAndModify({ + query: {notUnderscoreId: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "simple"} + })); + assert.eq(null, mongosCollection.findAndModify({ + query: {notUnderscoreId: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); + assert.eq({_id: 0, notUnderscoreId: "A", count: 0}, + mongosCollection.findAndModify( + {query: {notUnderscoreId: "A"}, update: {$inc: {count: 1}}})); + assert.eq({_id: 0, notUnderscoreId: "A", count: 1}, mongosCollection.findAndModify({ + query: {notUnderscoreId: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "simple"} + })); + assert.eq({_id: 0, notUnderscoreId: "A", count: 2}, mongosCollection.findAndModify({ + query: {notUnderscoreId: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); +} + +{ + jsTestLog( + "Test findAndModify command in a hashed sharded collection with non-simple default collation."); + + const collection = shardCollectionWithSplitsAndMoves( + "test.non_id_hashed_sharding_find_and_modify_with_non_simple_collation", + {notUnderscoreId: "hashed"}, + {locale: "en", strength: 2}, + [ + {notUnderscoreId: convertShardKeyToHashed("A")}, + {notUnderscoreId: convertShardKeyToHashed("a")} + ], + [ + {query: {notUnderscoreId: "A"}, shard: st.shard0.shardName}, + {query: {notUnderscoreId: "a"}, shard: st.shard1.shardName} + ]); + + const docs = [{_id: 0, notUnderscoreId: "A", count: 0}]; + assert.commandWorked(collection.insert(docs)); + + const mongosCollection = st.s.getCollection(collection.getFullName()); + + // Check findAndModify results with the default, simple, and non-simple collation. Currently, + // due to findAndModify's assumption that _id is uniquely targetable, we do not do a scatter + // gather to check every shard for a match. findAndModify's current behavior will target the + // first shard in which the max key of a chunk is greater than the query's shard key. In this + // case, because we're using hashed sharding, hash('a') is less than hash('A'), which means when + // we query for {notUnderscoreId: "a"} we will target the shard containing the chunk for "a", + // likewise if we query for {notUnderscoreId: "A"} we will only target the shard containing the + // chunk for "A". + assert.lt(convertShardKeyToHashed("a"), convertShardKeyToHashed("A")); + assert.eq(null, + mongosCollection.findAndModify( + {query: {notUnderscoreId: "a"}, update: {$inc: {count: 1}}})); + assert.eq(null, mongosCollection.findAndModify({ + query: {notUnderscoreId: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "simple"} + })); + assert.eq(null, mongosCollection.findAndModify({ + query: {notUnderscoreId: "a"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); + assert.eq({_id: 0, notUnderscoreId: "A", count: 0}, + mongosCollection.findAndModify( + {query: {notUnderscoreId: "A"}, update: {$inc: {count: 1}}})); + assert.eq({_id: 0, notUnderscoreId: "A", count: 1}, mongosCollection.findAndModify({ + query: {notUnderscoreId: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "simple"} + })); + assert.eq({_id: 0, notUnderscoreId: "A", count: 2}, mongosCollection.findAndModify({ + query: {notUnderscoreId: "A"}, + update: {$inc: {count: 1}}, + collation: {locale: "en", strength: 2} + })); +} + +st.stop(); +})();
\ No newline at end of file diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index 2c79b51be75..23de84a9e4d 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -95,15 +95,23 @@ RoutingTableHistory::RoutingTableHistory(NamespaceString nss, _collectionVersion(collectionVersion), _shardVersions(_constructShardVersionMap()) {} -Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const { +Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey, + const BSONObj& collation, + bool bypassIsFieldHashedCheck) const { const bool hasSimpleCollation = (collation.isEmpty() && !_rt->getDefaultCollator()) || SimpleBSONObjComparator::kInstance.evaluate(collation == CollationSpec::kSimpleSpec); if (!hasSimpleCollation) { for (BSONElement elt : shardKey) { + // We must assume that if the field is specified as "hashed" in the shard key pattern, + // then the hash value could have come from a collatable type. If we want to skip the + // check in the special case where the _id field is hashed and used as the shard key, + // set bypassIsFieldHashedCheck. This assumes that a request with a query that contains + // an _id field can target a specific shard. uassert(ErrorCodes::ShardKeyNotFound, str::stream() << "Cannot target single shard due to collation of key " << elt.fieldNameStringData() << " for namespace " << getns(), - !CollationIndexKey::isCollatableType(elt.type())); + !CollationIndexKey::isCollatableType(elt.type()) && + (!_rt->getShardKeyPattern().isHashedPattern() || bypassIsFieldHashedCheck)); } } diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h index c6ece9146e8..2ef4c6d1479 100644 --- a/src/mongo/s/chunk_manager.h +++ b/src/mongo/s/chunk_manager.h @@ -333,7 +333,9 @@ public: * Throws a DBException with the ShardKeyNotFound code if unable to target a single shard due to * collation or due to the key not matching the shard key pattern. */ - Chunk findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const; + Chunk findIntersectingChunk(const BSONObj& shardKey, + const BSONObj& collation, + bool bypassIsFieldHashedCheck = false) const; /** * Same as findIntersectingChunk, but assumes the simple collation. diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp index e5d86c59f8a..b760b7800d5 100644 --- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp +++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp @@ -254,7 +254,12 @@ public: const BSONObj query = cmdObjForShard.getObjectField("query"); const BSONObj collation = getCollation(cmdObjForShard); const BSONObj shardKey = getShardKey(opCtx, *chunkMgr, query); - auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation); + + // For now, set bypassIsFieldHashedCheck to be true in order to skip the + // isFieldHashedCheck in the special case where _id is hashed and used as the shard key. + // This means that we always assume that a findAndModify request using _id is targetable + // to a single shard. + auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation, true); _runCommand(opCtx, chunk.getShardId(), |