diff options
author | Sanika Phanse <sanika.phanse@mongodb.com> | 2022-08-24 18:00:56 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-24 18:35:51 +0000 |
commit | d6ed8415e2499f972dacf479b4760db7e8ee2959 (patch) | |
tree | 668accd64aa576fb84ce59328e42a6176edc44d7 /src | |
parent | 40a2da19ffc71fc9b22ea9c828ef9de6a79fa35a (diff) | |
download | mongo-d6ed8415e2499f972dacf479b4760db7e8ee2959.tar.gz |
SERVER-56127 Retryable update may execute more than once if chunk is migrated and shard key pattern uses nested fields
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/oplog_entry.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_entry.h | 5 | ||||
-rw-r--r-- | src/mongo/db/s/session_catalog_migration_source.cpp | 3 | ||||
-rw-r--r-- | src/mongo/s/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/s/shard_key_pattern.cpp | 57 | ||||
-rw-r--r-- | src/mongo/s/shard_key_pattern.h | 45 | ||||
-rw-r--r-- | src/mongo/s/shard_key_pattern_test.cpp | 103 |
7 files changed, 224 insertions, 4 deletions
diff --git a/src/mongo/db/repl/oplog_entry.cpp b/src/mongo/db/repl/oplog_entry.cpp index 0ee07fb03ee..3c6b0568413 100644 --- a/src/mongo/db/repl/oplog_entry.cpp +++ b/src/mongo/db/repl/oplog_entry.cpp @@ -277,6 +277,20 @@ bool OplogEntry::isCrudOpType() const { return isCrudOpType(getOpType()); } +bool OplogEntry::isUpdateOrDelete() const { + auto opType = getOpType(); + switch (opType) { + case OpTypeEnum::kDelete: + case OpTypeEnum::kUpdate: + return true; + case OpTypeEnum::kInsert: + case OpTypeEnum::kCommand: + case OpTypeEnum::kNoop: + return false; + } + MONGO_UNREACHABLE; +} + bool OplogEntry::shouldPrepare() const { return getCommandType() == CommandType::kApplyOps && getObject()[ApplyOpsCommandInfoBase::kPrepareFieldName].booleanSafe(); diff --git a/src/mongo/db/repl/oplog_entry.h b/src/mongo/db/repl/oplog_entry.h index 7462d70694e..a557b25ee35 100644 --- a/src/mongo/db/repl/oplog_entry.h +++ b/src/mongo/db/repl/oplog_entry.h @@ -231,6 +231,11 @@ public: bool isCrudOpType() const; /** + * Returns true if the oplog entry is for an Update or Delete operation. + */ + bool isUpdateOrDelete() const; + + /** * Returns if the operation should be prepared. Must be called on an 'applyOps' entry. */ bool shouldPrepare() const; diff --git a/src/mongo/db/s/session_catalog_migration_source.cpp b/src/mongo/db/s/session_catalog_migration_source.cpp index 7c3ac3b2fd3..ba7c3338719 100644 --- a/src/mongo/db/s/session_catalog_migration_source.cpp +++ b/src/mongo/db/s/session_catalog_migration_source.cpp @@ -314,8 +314,7 @@ bool SessionCatalogMigrationSource::_handleWriteHistory(WithLock, OperationConte } if (nextOplog->isCrudOpType()) { - auto shardKey = - _keyPattern.extractShardKeyFromDoc(nextOplog->getObjectContainingDocumentKey()); + auto shardKey = _keyPattern.extractShardKeyFromOplogEntry(*nextOplog); if (!_chunkRange.containsKey(shardKey)) { continue; } diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 3ba193d8a1f..e1be6ac9065 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -90,6 +90,7 @@ env.Library( LIBDEPS=[ '$BUILD_DIR/mongo/db/matcher/expressions', '$BUILD_DIR/mongo/db/query/query_planner', + '$BUILD_DIR/mongo/db/repl/oplog_entry', '$BUILD_DIR/mongo/db/storage/key_string', '$BUILD_DIR/mongo/db/update/update_common', '$BUILD_DIR/mongo/util/concurrency/ticketholder', diff --git a/src/mongo/s/shard_key_pattern.cpp b/src/mongo/s/shard_key_pattern.cpp index 5d59c25653c..8b56f945fe3 100644 --- a/src/mongo/s/shard_key_pattern.cpp +++ b/src/mongo/s/shard_key_pattern.cpp @@ -54,6 +54,8 @@ constexpr size_t kMaxFlattenedInCombinations = 4000000; constexpr auto kIdField = "_id"_sd; +const BSONObj kNullObj = BSON("" << BSONNULL); + /** * Currently the allowable shard keys are either: * i) a hashed single field, e.g. { a : "hashed" }, or @@ -102,6 +104,10 @@ std::vector<std::unique_ptr<FieldRef>> parseShardKeyPattern(const BSONObj& keyPa return parsedPaths; } +bool isValidShardKeyElementForExtractionFromDocument(const BSONElement& element) { + return element.type() != Array; +} + bool isValidShardKeyElement(const BSONElement& element) { return !element.eoo() && element.type() != Array; } @@ -152,6 +158,15 @@ BSONElement findEqualityElement(const EqualityMatches& equalities, const FieldRe return extractKeyElementFromMatchable(matchable, suffixStr); } +BSONElement extractFieldFromDocumentKey(const BSONObj& documentKey, StringData fieldName) { + BSONElement output; + for (auto&& documentKeyElt : documentKey) { + if (fieldName == documentKeyElt.fieldNameStringData()) { + return documentKeyElt; + } + } + return output; +} } // namespace constexpr int ShardKeyPattern::kMaxShardKeySizeBytes; @@ -271,11 +286,53 @@ BSONObj ShardKeyPattern::extractShardKeyFromMatchable(const MatchableDocument& m return keyBuilder.obj(); } +BSONObj ShardKeyPattern::extractShardKeyFromDocumentKey(const BSONObj& documentKey) const { + BSONObjBuilder keyBuilder; + for (auto&& shardKeyField : _keyPattern.toBSON()) { + auto matchEl = + extractFieldFromDocumentKey(documentKey, shardKeyField.fieldNameStringData()); + + if (matchEl.eoo()) { + matchEl = kNullObj.firstElement(); + } + + // A shard key field cannot have array values. If we encounter array values return + // immediately. + if (!isValidShardKeyElementForExtractionFromDocument(matchEl)) { + return BSONObj(); + } + + if (isHashedPatternEl(shardKeyField)) { + keyBuilder.append( + shardKeyField.fieldNameStringData(), + BSONElementHasher::hash64(matchEl, BSONElementHasher::DEFAULT_HASH_SEED)); + } else { + keyBuilder.appendAs(matchEl, shardKeyField.fieldNameStringData()); + } + } + dassert(isShardKey(keyBuilder.asTempObj())); + return keyBuilder.obj(); +} + BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const { BSONMatchableDocument matchable(doc); return extractShardKeyFromMatchable(matchable); } +BSONObj ShardKeyPattern::extractShardKeyFromOplogEntry(const repl::OplogEntry& entry) const { + if (!entry.isCrudOpType()) { + return BSONObj(); + } + + auto objWithDocumentKey = entry.getObjectContainingDocumentKey(); + + if (!entry.isUpdateOrDelete()) { + return extractShardKeyFromDoc(objWithDocumentKey); + } + + return extractShardKeyFromDocumentKey(objWithDocumentKey); +} + std::vector<StringData> ShardKeyPattern::findMissingShardKeyFieldsFromDoc(const BSONObj doc) const { std::vector<StringData> missingFields; BSONMatchableDocument matchable(doc); diff --git a/src/mongo/s/shard_key_pattern.h b/src/mongo/s/shard_key_pattern.h index 42557730e0c..ccc570fe4d7 100644 --- a/src/mongo/s/shard_key_pattern.h +++ b/src/mongo/s/shard_key_pattern.h @@ -38,6 +38,7 @@ #include "mongo/db/keypattern.h" #include "mongo/db/matcher/matchable.h" #include "mongo/db/query/index_bounds.h" +#include "mongo/db/repl/oplog_entry.h" namespace mongo { @@ -121,6 +122,30 @@ public: BSONObj normalizeShardKey(const BSONObj& shardKey) const; /** + * Given a document key expressed in dotted notation, extracts its shard key, applying hashing + * if necessary. + * Note: For a shardKeyPattern {a.b: 1, c: 1} + * The documentKey for the document {a: {b: 10}, c: 20} is {a.b: 10, c: 20} + * The documentKey for the document {a: {b: 10, d: 20}, c: 30} is {a.b: 10, c: 30} + * The documentKey for the document {a: {b: {d: 10}}, c: 30} is {a.b: {d: 10}, c: 30} + * + * Examples: + * If 'this' KeyPattern is {a: 1} + * {a: 10, b: 20} --> returns {a: 10} + * {b: 20} --> returns {a: null} + * {a: {b: 10}} --> returns {a: {b: 10}} + * {a: [1,2]} --> returns {} + * If 'this' KeyPattern is {a.b: 1, c: 1} + * {a.b: 10, c: 20} --> returns {a.b: 10, c: 20} + * {a.b: 10} --> returns {a.b: 10, c: null} + * {a.b: {z: 10}, c: 20} --> returns {a.b: {z: 10}, c: 20} + * If 'this' KeyPattern is {a : "hashed"} + * {a: 10, b: 20} --> returns {a: NumberLong("7766103514953448109")} + * {b: 20} --> returns {a: NumberLong("2338878944348059895")} + */ + BSONObj extractShardKeyFromDocumentKey(const BSONObj& documentKey) const; + + /** * Given a MatchableDocument, extracts the shard key corresponding to the key pattern. * For each path in the shard key pattern, extracts a value from the matchable document. * @@ -151,6 +176,26 @@ public: BSONObj extractShardKeyFromDoc(const BSONObj& doc) const; /** + * Given an Oplog entry, extracts the shard key corresponding to the key pattern for insert, + * update, and delete op types. If the op type is not a CRUD operation, an empty BSONObj() + * will be returned. + * + * For update and delete operations, the Oplog entry will contain an object with the document + * key. + * + * For insert operations, the Oplog entry will contain the original document from which the + * document key must be extracted + * + * Examples: + * For KeyPattern {'a.b': 1} + * If the oplog entries contains field op='i' + * oplog contains: { a : { b : "1" } } + * If the oplog entries contains field op='u' or op='d' + * oplog contains: { 'a.b': "1" } + */ + BSONObj extractShardKeyFromOplogEntry(const repl::OplogEntry& entry) const; + + /** * Returns the set of shard key fields which are absent from the given document. Note that the * vector returned by this method contains StringData elements pointing into ShardKeyPattern's * underlying BSONObj. If the fieldnames are required to survive beyond the lifetime of this diff --git a/src/mongo/s/shard_key_pattern_test.cpp b/src/mongo/s/shard_key_pattern_test.cpp index ecde034896b..15e1edf03fe 100644 --- a/src/mongo/s/shard_key_pattern_test.cpp +++ b/src/mongo/s/shard_key_pattern_test.cpp @@ -29,11 +29,10 @@ #include "mongo/platform/basic.h" -#include "mongo/s/shard_key_pattern.h" - #include "mongo/db/hasher.h" #include "mongo/db/json.h" #include "mongo/db/query/query_test_service_context.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/unittest/unittest.h" namespace mongo { @@ -41,6 +40,32 @@ namespace { using std::string; +/** + * Creates OplogEntry with given field values. + */ +repl::OplogEntry makeOplogEntry(repl::OpTime opTime, + repl::OpTypeEnum opType, + NamespaceString nss, + BSONObj oField, + boost::optional<BSONObj> o2Field = boost::none) { + return {repl::OplogEntry(opTime, // optime + boost::none, // hash + opType, // opType + nss, // namespace + boost::none, // uuid + boost::none, // fromMigrate + repl::OplogEntry::kOplogVersion, // version + oField, // o + o2Field, // o2 + {}, // sessionInfo + boost::none, // upsert + Date_t(), // wall clock time + {}, // statement ids + boost::none, // optime of previous write within same transaction + boost::none, // pre-image optime + boost::none)}; // post-image optime +} + TEST(ShardKeyPattern, SingleFieldShardKeyPatternsValidityCheck) { ShardKeyPattern(BSON("a" << 1)); ShardKeyPattern(BSON("a" << 1.0f)); @@ -124,6 +149,10 @@ static BSONObj docKey(const ShardKeyPattern& pattern, const BSONObj& doc) { return pattern.extractShardKeyFromDoc(doc); } +static BSONObj docKeyFromOplog(const ShardKeyPattern& pattern, const repl::OplogEntry& entry) { + return pattern.extractShardKeyFromOplogEntry(entry); +} + TEST(ShardKeyPattern, ExtractDocShardKeySingle) { // // Single field ShardKeyPatterns @@ -207,6 +236,76 @@ TEST(ShardKeyPattern, ExtractDocShardKeyNested) { ASSERT_BSONOBJ_EQ(docKey(pattern, fromjson("{a:{b:[10, 20]}, c:30}")), BSONObj()); } +TEST(ShardKeyPattern, ExtractShardKeyFromOplogUnnested) { + // + // Unnested ShardKeyPatterns from oplog entries with CRUD operation + // + + ShardKeyPattern pattern(BSON("a" << 1)); + auto deleteOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime + repl::OpTypeEnum::kDelete, // op type + NamespaceString("a"), // namespace + BSON("_id" << 1 << "a" << 5)); // o + auto insertOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime + repl::OpTypeEnum::kInsert, // op type + NamespaceString("a"), // namespace + BSON("_id" << 2 << "a" << 6)); // o + auto updateOplog = makeOplogEntry(repl::OpTime(Timestamp(70, 10), 1), // optime + repl::OpTypeEnum::kUpdate, // op type + NamespaceString("a"), // namespace + BSON("_id" << 3), // o + BSON("_id" << 3 << "a" << 7)); // o2 + + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, deleteOplog), fromjson("{a: 5}")); + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, insertOplog), fromjson("{a: 6}")); + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, updateOplog), fromjson("{a: 7}")); +} + +TEST(ShardKeyPattern, ExtractShardKeyFromOplogNested) { + // + // Nested ShardKeyPatterns from oplog entries with CRUD operation + // + + ShardKeyPattern pattern(BSON("a.b" << 1)); + auto deleteOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime + repl::OpTypeEnum::kDelete, // op type + NamespaceString("a.b"), // namespace + BSON("_id" << 1 << "a.b" << 5)); // o + auto insertOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime + repl::OpTypeEnum::kInsert, // op type + NamespaceString("a.b"), // namespace + BSON("_id" << 2 << "a" << BSON("b" << 6))); // o + auto updateOplog = makeOplogEntry(repl::OpTime(Timestamp(70, 10), 1), // optime + repl::OpTypeEnum::kUpdate, // op type + NamespaceString("a.b"), // namespace + BSON("_id" << 3), // o + BSON("_id" << 3 << "a.b" << 7)); // o2 + + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, deleteOplog), fromjson("{'a.b': 5}")); + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, insertOplog), fromjson("{'a.b': 6}")); + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, updateOplog), fromjson("{'a.b': 7}")); +} + +TEST(ShardKeyPattern, ExtractShardKeyFromOplogNonCRUD) { + // + // Oplogs with non-CRUD op types + // + + ShardKeyPattern pattern(BSON("a.b" << 1)); + auto noopOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime + repl::OpTypeEnum::kNoop, // op type + NamespaceString("a.b"), // namespace + BSON("_id" << 1 << "a.b" << 5)); // o + auto commandOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime + repl::OpTypeEnum::kCommand, // op type + NamespaceString("a.b"), // namespace + BSON("create" + << "c")); // o + + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, noopOplog), BSONObj()); + ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, commandOplog), BSONObj()); +} + TEST(ShardKeyPattern, ExtractDocShardKeyDeepNested) { // // Deeply nested ShardKeyPatterns |