diff options
author | Jason Zhang <jason.zhang@mongodb.com> | 2022-12-08 21:15:29 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-08 22:37:26 +0000 |
commit | ccdf183d91c830f53a5e98c7ad43c7d651129ac2 (patch) | |
tree | c305b098da43dda0645ca6c891621e67a4677105 | |
parent | c2d50870e6e2a25ac40a1760a4acbdb855bd1873 (diff) | |
download | mongo-ccdf183d91c830f53a5e98c7ad43c7d651129ac2.tar.gz |
SERVER-71896 Validate if a query with _id or shard key is directly targetable to a shard
-rw-r--r-- | jstests/sharding/query/collation_targeting.js | 35 | ||||
-rw-r--r-- | jstests/sharding/query/collation_targeting_inherited.js | 30 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_find_and_modify_cmd.cpp | 7 | ||||
-rw-r--r-- | src/mongo/s/write_ops/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/s/write_ops/batch_write_op.cpp | 15 | ||||
-rw-r--r-- | src/mongo/s/write_ops/write_without_shard_key_util.cpp | 104 | ||||
-rw-r--r-- | src/mongo/s/write_ops/write_without_shard_key_util.h | 3 | ||||
-rw-r--r-- | src/mongo/s/write_ops/write_without_shard_key_util_test.cpp | 106 |
8 files changed, 254 insertions, 48 deletions
diff --git a/jstests/sharding/query/collation_targeting.js b/jstests/sharding/query/collation_targeting.js index 9b7268518a1..3e945e83ee8 100644 --- a/jstests/sharding/query/collation_targeting.js +++ b/jstests/sharding/query/collation_targeting.js @@ -2,6 +2,8 @@ (function() { "use strict"; +load("jstests/sharding/updateOne_without_shard_key/libs/write_without_shard_key_test_util.js"); + const caseInsensitive = { locale: "en_US", strength: 2 @@ -181,16 +183,29 @@ assert.eq(1, explain.queryPlanner.winningPlan.shards.length); // FindAndModify. -// Sharded findAndModify on strings with non-simple collation should fail, because findAndModify -// must target a single shard. -assert.throws(function() { - coll.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); -}); -assert.throws(function() { - coll.explain().findAndModify( - {query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); -}); - +// Sharded findAndModify that does not target a single shard can now be executed with a two phase +// write protocol that will target at most 1 matching document. +if (WriteWithoutShardKeyTestUtil.isWriteWithoutShardKeyFeatureEnabled(testDB)) { + let res = + coll.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); + assert(res.a === "foo" || res.a === "FOO"); + + // TODO: SERVER-69925 Implement explain for findAndModify. + // assert.throws(function() { + // coll.explain().findAndModify( + // {query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); + // }); +} else { + // Sharded findAndModify on strings with non-simple collation should fail, because findAndModify + // must target a single shard. + assert.throws(function() { + coll.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); + }); + assert.throws(function() { + coll.explain().findAndModify( + {query: {a: "foo"}, update: {$set: {b: 1}}, collation: caseInsensitive}); + }); +} // Sharded findAndModify on strings with simple collation should succeed. This should be // single-shard. assert.eq("foo", coll.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}).a); diff --git a/jstests/sharding/query/collation_targeting_inherited.js b/jstests/sharding/query/collation_targeting_inherited.js index e971bf51629..502d89c99b4 100644 --- a/jstests/sharding/query/collation_targeting_inherited.js +++ b/jstests/sharding/query/collation_targeting_inherited.js @@ -2,6 +2,8 @@ (function() { "use strict"; +load("jstests/sharding/updateOne_without_shard_key/libs/write_without_shard_key_test_util.js"); + const caseInsensitive = { locale: "en_US", strength: 2 @@ -201,14 +203,26 @@ assert.eq(1, explain.queryPlanner.winningPlan.shards.length); // FindAndModify. -// Sharded findAndModify on strings with non-simple collation inherited from the collection -// default should fail, because findAndModify must target a single shard. -assert.throws(function() { - collCaseInsensitive.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); -}); -assert.throws(function() { - collCaseInsensitive.explain().findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); -}); +// Sharded findAndModify that does not target a single shard can now be executed with a two phase +// write protocol that will target at most 1 matching document. +if (WriteWithoutShardKeyTestUtil.isWriteWithoutShardKeyFeatureEnabled(testDB)) { + let res = collCaseInsensitive.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); + assert(res.a === "foo" || res.a === "FOO"); + + // TODO: SERVER-69925 Implement explain for findAndModify. + // assert.throws(function() { + // collCaseInsensitive.explain().findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); + // }); +} else { + // Sharded findAndModify on strings with non-simple collation inherited from the collection + // default should fail, because findAndModify must target a single shard. + assert.throws(function() { + collCaseInsensitive.findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); + }); + assert.throws(function() { + collCaseInsensitive.explain().findAndModify({query: {a: "foo"}, update: {$set: {b: 1}}}); + }); +} // Sharded findAndModify on strings with simple collation should succeed. This should be // single-shard. diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp index 4a108edb575..b7135ddeec3 100644 --- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp +++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp @@ -503,8 +503,11 @@ public: if (cm.isSharded()) { const BSONObj query = cmdObjForShard.getObjectField("query"); - if (write_without_shard_key::useTwoPhaseProtocol( - opCtx, nss, false /* isUpdateOrDelete */, query)) { + if (write_without_shard_key::useTwoPhaseProtocol(opCtx, + nss, + false /* isUpdateOrDelete */, + query, + getCollation(cmdObjForShard))) { _runCommandWithoutShardKey(opCtx, boost::none /* dbVersion */, nss, diff --git a/src/mongo/s/write_ops/SConscript b/src/mongo/s/write_ops/SConscript index 4974289c61a..4d6f897e771 100644 --- a/src/mongo/s/write_ops/SConscript +++ b/src/mongo/s/write_ops/SConscript @@ -52,6 +52,6 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/catalog/collection_uuid_mismatch_info', - '$BUILD_DIR/mongo/s/write_ops/write_without_shard_key_util', + 'write_without_shard_key_util', ], ) diff --git a/src/mongo/s/write_ops/batch_write_op.cpp b/src/mongo/s/write_ops/batch_write_op.cpp index b89b6f9d487..0d784787ffa 100644 --- a/src/mongo/s/write_ops/batch_write_op.cpp +++ b/src/mongo/s/write_ops/batch_write_op.cpp @@ -415,18 +415,23 @@ StatusWith<bool> BatchWriteOp::targetBatch( bool isMultiWrite = false; BSONObj query; + BSONObj collation; if (writeItem.getOpType() == BatchedCommandRequest::BatchType_Update) { - isMultiWrite = writeItem.getUpdate().getMulti(); - query = writeItem.getUpdate().getQ(); + auto updateReq = writeItem.getUpdate(); + isMultiWrite = updateReq.getMulti(); + query = updateReq.getQ(); + collation = updateReq.getCollation().value_or(BSONObj()); } else { - isMultiWrite = writeItem.getDelete().getMulti(); - query = writeItem.getDelete().getQ(); + auto deleteReq = writeItem.getDelete(); + isMultiWrite = deleteReq.getMulti(); + query = deleteReq.getQ(); + collation = deleteReq.getCollation().value_or(BSONObj()); } if (!isMultiWrite && write_without_shard_key::useTwoPhaseProtocol( - _opCtx, targeter.getNS(), true /* isUpdateOrDelete */, query)) { + _opCtx, targeter.getNS(), true /* isUpdateOrDelete */, query, collation)) { // Writes without shard key should be in their own batch. if (!batchMap.empty()) { diff --git a/src/mongo/s/write_ops/write_without_shard_key_util.cpp b/src/mongo/s/write_ops/write_without_shard_key_util.cpp index 96776e1ecfa..7448ebac4d2 100644 --- a/src/mongo/s/write_ops/write_without_shard_key_util.cpp +++ b/src/mongo/s/write_ops/write_without_shard_key_util.cpp @@ -29,6 +29,8 @@ #include "mongo/bson/bsonobj.h" #include "mongo/db/internal_transactions_feature_flag_gen.h" +#include "mongo/db/query/collation/collation_index_key.h" +#include "mongo/db/query/collation/collator_factory_interface.h" #include "mongo/logv2/log.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/grid.h" @@ -37,22 +39,75 @@ namespace mongo { namespace write_without_shard_key { +namespace { + +constexpr auto kIdFieldName = "_id"_sd; + +// Used to do query validation for the _id field. +const ShardKeyPattern kVirtualIdShardKey(BSON(kIdFieldName << 1)); + +/** + * This returns "does the query have an _id field" and "is the _id field querying for a direct + * value" e.g. _id : 3 and not _id : { $gt : 3 }. + */ +bool isExactIdQuery(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj query, + const BSONObj collation, + bool hasDefaultCollation) { + auto findCommand = std::make_unique<FindCommandRequest>(nss); + findCommand->setFilter(query); + if (!collation.isEmpty()) { + findCommand->setCollation(collation); + } + const auto cq = CanonicalQuery::canonicalize(opCtx, + std::move(findCommand), + false, /* isExplain */ + nullptr, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures); + if (cq.isOK()) { + // Only returns a shard key iff a query has a full shard key with direct/equality matches on + // all shard key fields. + auto shardKey = kVirtualIdShardKey.extractShardKeyFromQuery(*cq.getValue()); + BSONElement idElt = shardKey["_id"]; + + if (!idElt) { + return false; + } + + if (CollationIndexKey::isCollatableType(idElt.type()) && !collation.isEmpty() && + !hasDefaultCollation) { + // The collation applies to the _id field, but the user specified a collation which + // doesn't match the collection default. + return false; + } + return true; + } + + return false; +} + +bool shardKeyHasCollatableType(const BSONObj& shardKey) { + for (BSONElement elt : shardKey) { + if (CollationIndexKey::isCollatableType(elt.type())) { + return true; + } + } + return false; +} +} // namespace bool useTwoPhaseProtocol(OperationContext* opCtx, NamespaceString nss, bool isUpdateOrDelete, - const BSONObj& query) { + const BSONObj& query, + const BSONObj& collation) { if (!feature_flags::gFeatureFlagUpdateOneWithoutShardKey.isEnabled( serverGlobalParams.featureCompatibility)) { return false; } - // updateOne and deleteOne do not use the two phase protocol for single writes that specify _id - // in their queries. - if (isUpdateOrDelete && query.hasField("_id")) { - return false; - } - auto cm = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionPlacementInfo(opCtx, nss)); @@ -60,9 +115,42 @@ bool useTwoPhaseProtocol(OperationContext* opCtx, if (!cm.isSharded()) { return false; } + + // Check if the query has specified a different collation than the default collation. + auto collator = collation.isEmpty() + ? nullptr // If no collation is specified we return a nullptr signifying the simple + // collation. + : uassertStatusOK( + CollatorFactoryInterface::get(opCtx->getServiceContext())->makeFromBSON(collation)); + auto hasDefaultCollation = + CollatorInterface::collatorsMatch(collator.get(), cm.getDefaultCollator()); + + // updateOne and deleteOne do not use the two phase protocol for single writes that specify + // _id in their queries. An exact _id match requires default collation if the _id value is a + // collatable type. + if (isUpdateOrDelete && query.hasField("_id") && + isExactIdQuery(opCtx, nss, query, collation, hasDefaultCollation)) { + return false; + } + auto shardKey = uassertStatusOK(cm.getShardKeyPattern().extractShardKeyFromQuery(opCtx, nss, query)); - return shardKey.isEmpty(); + + // 'shardKey' will only be populated only if a full equality shard key is extracted. + if (shardKey.isEmpty()) { + return true; + } else { + // If the default collection collation is not used and any field of the shard key is a + // collatable type, then we will use the two phase write protocol since we cannot target + // directly to a shard. + if (!hasDefaultCollation && shardKeyHasCollatableType(shardKey)) { + return true; + } else { + return false; + } + } + + return true; } } // namespace write_without_shard_key } // namespace mongo diff --git a/src/mongo/s/write_ops/write_without_shard_key_util.h b/src/mongo/s/write_ops/write_without_shard_key_util.h index adcc604ca2b..74c9eab8394 100644 --- a/src/mongo/s/write_ops/write_without_shard_key_util.h +++ b/src/mongo/s/write_ops/write_without_shard_key_util.h @@ -43,7 +43,8 @@ namespace write_without_shard_key { bool useTwoPhaseProtocol(OperationContext* opCtx, NamespaceString ns, bool isUpdateOrDelete, - const BSONObj& query); + const BSONObj& query, + const BSONObj& collation); } // namespace write_without_shard_key } // namespace mongo diff --git a/src/mongo/s/write_ops/write_without_shard_key_util_test.cpp b/src/mongo/s/write_ops/write_without_shard_key_util_test.cpp index 943746151d5..ffd06abc44e 100644 --- a/src/mongo/s/write_ops/write_without_shard_key_util_test.cpp +++ b/src/mongo/s/write_ops/write_without_shard_key_util_test.cpp @@ -80,12 +80,19 @@ protected: TEST_F(WriteWithoutShardKeyUtilTest, WriteQueryContainingFullShardKeyCanTargetSingleDocument) { RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); - auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("a" << 1 << "b" << 1)); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("a" << 1 << "b" << 1), + {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, false); - useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("a" << 1 << "b" << 1)); + useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + false /* isUpdateOrDelete */, + BSON("a" << 1 << "b" << 1), + {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, false); } @@ -94,11 +101,11 @@ TEST_F(WriteWithoutShardKeyUtilTest, RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("a" << 1)); + getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("a" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("a" << 1)); + getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("a" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); } @@ -107,7 +114,7 @@ TEST_F(WriteWithoutShardKeyUtilTest, RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("_id" << 1)); + getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("_id" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, false); } @@ -116,11 +123,11 @@ TEST_F(WriteWithoutShardKeyUtilTest, RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("x" << 1)); + getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("x" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1)); + getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); } @@ -128,7 +135,7 @@ TEST_F(WriteWithoutShardKeyUtilTest, FindAndModifyQueryWithOnlyIdMustUseTwoPhase RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("_id" << 1)); + getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("_id" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); } @@ -136,7 +143,7 @@ TEST_F(WriteWithoutShardKeyUtilTest, FindAndModifyQueryWithoutShardKeyMustUseTwo RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", true); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1)); + getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, true); } @@ -144,7 +151,7 @@ TEST_F(WriteWithoutShardKeyUtilTest, QueryWithFeatureFlagDisabledDoesNotUseTwoPh RAIIServerParameterControllerForTest featureFlagController( "featureFlagUpdateOneWithoutShardKey", false); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1)); + getOpCtx(), kNss, false /* isUpdateOrDelete */, BSON("x" << 1), {} /* collation */); ASSERT_EQ(useTwoPhaseProtocol, false); } @@ -170,10 +177,83 @@ TEST_F(UnshardedCollectionTest, UnshardedCollectionDoesNotUseTwoPhaseProtocol) { ASSERT(!cm.isSharded()); auto useTwoPhaseProtocol = write_without_shard_key::useTwoPhaseProtocol( - getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("x" << 1)); + getOpCtx(), kNss, true /* isUpdateOrDelete */, BSON("x" << 1), {} /* collation */); + ASSERT_EQ(useTwoPhaseProtocol, false); +} + +TEST_F(WriteWithoutShardKeyUtilTest, + WriteQueryWithFullShardKeyAndCollationWithCollatableTypesUsesTwoPhaseProtocol) { + RAIIServerParameterControllerForTest featureFlagController( + "featureFlagUpdateOneWithoutShardKey", true); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("a" + << "a" + << "b" + << "b"), + BSON("collation" + << "lowercase") /* collation */); + ASSERT_EQ(useTwoPhaseProtocol, true); +} + +TEST_F(WriteWithoutShardKeyUtilTest, + WriteQueryWithFullShardKeyAndCollationWithoutCollatableTypesDoesNotUseTwoPhaseProtocol) { + RAIIServerParameterControllerForTest featureFlagController( + "featureFlagUpdateOneWithoutShardKey", true); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("a" << 1 << "b" << 1), + BSON("collation" + << "lowercase") /* collation */); + ASSERT_EQ(useTwoPhaseProtocol, false); +} + +TEST_F(WriteWithoutShardKeyUtilTest, + WriteQueryWithOnlyIdAndCollationWithCollatableTypeUsesTwoPhaseProtocol) { + RAIIServerParameterControllerForTest featureFlagController( + "featureFlagUpdateOneWithoutShardKey", true); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("_id" + << "hello"), + BSON("collation" + << "lowercase") /* collation */); + ASSERT_EQ(useTwoPhaseProtocol, true); +} + +TEST_F(WriteWithoutShardKeyUtilTest, + WriteQueryWithOnlyIdAndCollationWithoutCollatableTypeDoesNotUseTwoPhaseProtocol) { + RAIIServerParameterControllerForTest featureFlagController( + "featureFlagUpdateOneWithoutShardKey", true); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("_id" << 1), + BSON("collation" + << "lowercase") /* collation */); ASSERT_EQ(useTwoPhaseProtocol, false); } +TEST_F(WriteWithoutShardKeyUtilTest, + WriteQueryWithOnlyIdThatIsNotADirectEqualityUsesTwoPhaseProtocol) { + RAIIServerParameterControllerForTest featureFlagController( + "featureFlagUpdateOneWithoutShardKey", true); + auto useTwoPhaseProtocol = + write_without_shard_key::useTwoPhaseProtocol(getOpCtx(), + kNss, + true /* isUpdateOrDelete */, + BSON("_id" << BSON("$gt" << 1)), + {} /* collation */); + ASSERT_EQ(useTwoPhaseProtocol, true); +} + } // namespace } // namespace write_without_shard_key } // namespace mongo |