summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSanika Phanse <sanika.phanse@mongodb.com>2022-08-24 18:00:56 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-24 18:35:51 +0000
commitd6ed8415e2499f972dacf479b4760db7e8ee2959 (patch)
tree668accd64aa576fb84ce59328e42a6176edc44d7 /src
parent40a2da19ffc71fc9b22ea9c828ef9de6a79fa35a (diff)
downloadmongo-d6ed8415e2499f972dacf479b4760db7e8ee2959.tar.gz
SERVER-56127 Retryable update may execute more than once if chunk is migrated and shard key pattern uses nested fields
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/oplog_entry.cpp14
-rw-r--r--src/mongo/db/repl/oplog_entry.h5
-rw-r--r--src/mongo/db/s/session_catalog_migration_source.cpp3
-rw-r--r--src/mongo/s/SConscript1
-rw-r--r--src/mongo/s/shard_key_pattern.cpp57
-rw-r--r--src/mongo/s/shard_key_pattern.h45
-rw-r--r--src/mongo/s/shard_key_pattern_test.cpp103
7 files changed, 224 insertions, 4 deletions
diff --git a/src/mongo/db/repl/oplog_entry.cpp b/src/mongo/db/repl/oplog_entry.cpp
index 0ee07fb03ee..3c6b0568413 100644
--- a/src/mongo/db/repl/oplog_entry.cpp
+++ b/src/mongo/db/repl/oplog_entry.cpp
@@ -277,6 +277,20 @@ bool OplogEntry::isCrudOpType() const {
return isCrudOpType(getOpType());
}
+bool OplogEntry::isUpdateOrDelete() const {
+ auto opType = getOpType();
+ switch (opType) {
+ case OpTypeEnum::kDelete:
+ case OpTypeEnum::kUpdate:
+ return true;
+ case OpTypeEnum::kInsert:
+ case OpTypeEnum::kCommand:
+ case OpTypeEnum::kNoop:
+ return false;
+ }
+ MONGO_UNREACHABLE;
+}
+
bool OplogEntry::shouldPrepare() const {
return getCommandType() == CommandType::kApplyOps &&
getObject()[ApplyOpsCommandInfoBase::kPrepareFieldName].booleanSafe();
diff --git a/src/mongo/db/repl/oplog_entry.h b/src/mongo/db/repl/oplog_entry.h
index 7462d70694e..a557b25ee35 100644
--- a/src/mongo/db/repl/oplog_entry.h
+++ b/src/mongo/db/repl/oplog_entry.h
@@ -231,6 +231,11 @@ public:
bool isCrudOpType() const;
/**
+ * Returns true if the oplog entry is for an Update or Delete operation.
+ */
+ bool isUpdateOrDelete() const;
+
+ /**
* Returns if the operation should be prepared. Must be called on an 'applyOps' entry.
*/
bool shouldPrepare() const;
diff --git a/src/mongo/db/s/session_catalog_migration_source.cpp b/src/mongo/db/s/session_catalog_migration_source.cpp
index 7c3ac3b2fd3..ba7c3338719 100644
--- a/src/mongo/db/s/session_catalog_migration_source.cpp
+++ b/src/mongo/db/s/session_catalog_migration_source.cpp
@@ -314,8 +314,7 @@ bool SessionCatalogMigrationSource::_handleWriteHistory(WithLock, OperationConte
}
if (nextOplog->isCrudOpType()) {
- auto shardKey =
- _keyPattern.extractShardKeyFromDoc(nextOplog->getObjectContainingDocumentKey());
+ auto shardKey = _keyPattern.extractShardKeyFromOplogEntry(*nextOplog);
if (!_chunkRange.containsKey(shardKey)) {
continue;
}
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript
index 3ba193d8a1f..e1be6ac9065 100644
--- a/src/mongo/s/SConscript
+++ b/src/mongo/s/SConscript
@@ -90,6 +90,7 @@ env.Library(
LIBDEPS=[
'$BUILD_DIR/mongo/db/matcher/expressions',
'$BUILD_DIR/mongo/db/query/query_planner',
+ '$BUILD_DIR/mongo/db/repl/oplog_entry',
'$BUILD_DIR/mongo/db/storage/key_string',
'$BUILD_DIR/mongo/db/update/update_common',
'$BUILD_DIR/mongo/util/concurrency/ticketholder',
diff --git a/src/mongo/s/shard_key_pattern.cpp b/src/mongo/s/shard_key_pattern.cpp
index 5d59c25653c..8b56f945fe3 100644
--- a/src/mongo/s/shard_key_pattern.cpp
+++ b/src/mongo/s/shard_key_pattern.cpp
@@ -54,6 +54,8 @@ constexpr size_t kMaxFlattenedInCombinations = 4000000;
constexpr auto kIdField = "_id"_sd;
+const BSONObj kNullObj = BSON("" << BSONNULL);
+
/**
* Currently the allowable shard keys are either:
* i) a hashed single field, e.g. { a : "hashed" }, or
@@ -102,6 +104,10 @@ std::vector<std::unique_ptr<FieldRef>> parseShardKeyPattern(const BSONObj& keyPa
return parsedPaths;
}
+bool isValidShardKeyElementForExtractionFromDocument(const BSONElement& element) {
+ return element.type() != Array;
+}
+
bool isValidShardKeyElement(const BSONElement& element) {
return !element.eoo() && element.type() != Array;
}
@@ -152,6 +158,15 @@ BSONElement findEqualityElement(const EqualityMatches& equalities, const FieldRe
return extractKeyElementFromMatchable(matchable, suffixStr);
}
+BSONElement extractFieldFromDocumentKey(const BSONObj& documentKey, StringData fieldName) {
+ BSONElement output;
+ for (auto&& documentKeyElt : documentKey) {
+ if (fieldName == documentKeyElt.fieldNameStringData()) {
+ return documentKeyElt;
+ }
+ }
+ return output;
+}
} // namespace
constexpr int ShardKeyPattern::kMaxShardKeySizeBytes;
@@ -271,11 +286,53 @@ BSONObj ShardKeyPattern::extractShardKeyFromMatchable(const MatchableDocument& m
return keyBuilder.obj();
}
+BSONObj ShardKeyPattern::extractShardKeyFromDocumentKey(const BSONObj& documentKey) const {
+ BSONObjBuilder keyBuilder;
+ for (auto&& shardKeyField : _keyPattern.toBSON()) {
+ auto matchEl =
+ extractFieldFromDocumentKey(documentKey, shardKeyField.fieldNameStringData());
+
+ if (matchEl.eoo()) {
+ matchEl = kNullObj.firstElement();
+ }
+
+ // A shard key field cannot have array values. If we encounter array values return
+ // immediately.
+ if (!isValidShardKeyElementForExtractionFromDocument(matchEl)) {
+ return BSONObj();
+ }
+
+ if (isHashedPatternEl(shardKeyField)) {
+ keyBuilder.append(
+ shardKeyField.fieldNameStringData(),
+ BSONElementHasher::hash64(matchEl, BSONElementHasher::DEFAULT_HASH_SEED));
+ } else {
+ keyBuilder.appendAs(matchEl, shardKeyField.fieldNameStringData());
+ }
+ }
+ dassert(isShardKey(keyBuilder.asTempObj()));
+ return keyBuilder.obj();
+}
+
BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const {
BSONMatchableDocument matchable(doc);
return extractShardKeyFromMatchable(matchable);
}
+BSONObj ShardKeyPattern::extractShardKeyFromOplogEntry(const repl::OplogEntry& entry) const {
+ if (!entry.isCrudOpType()) {
+ return BSONObj();
+ }
+
+ auto objWithDocumentKey = entry.getObjectContainingDocumentKey();
+
+ if (!entry.isUpdateOrDelete()) {
+ return extractShardKeyFromDoc(objWithDocumentKey);
+ }
+
+ return extractShardKeyFromDocumentKey(objWithDocumentKey);
+}
+
std::vector<StringData> ShardKeyPattern::findMissingShardKeyFieldsFromDoc(const BSONObj doc) const {
std::vector<StringData> missingFields;
BSONMatchableDocument matchable(doc);
diff --git a/src/mongo/s/shard_key_pattern.h b/src/mongo/s/shard_key_pattern.h
index 42557730e0c..ccc570fe4d7 100644
--- a/src/mongo/s/shard_key_pattern.h
+++ b/src/mongo/s/shard_key_pattern.h
@@ -38,6 +38,7 @@
#include "mongo/db/keypattern.h"
#include "mongo/db/matcher/matchable.h"
#include "mongo/db/query/index_bounds.h"
+#include "mongo/db/repl/oplog_entry.h"
namespace mongo {
@@ -121,6 +122,30 @@ public:
BSONObj normalizeShardKey(const BSONObj& shardKey) const;
/**
+ * Given a document key expressed in dotted notation, extracts its shard key, applying hashing
+ * if necessary.
+ * Note: For a shardKeyPattern {a.b: 1, c: 1}
+ * The documentKey for the document {a: {b: 10}, c: 20} is {a.b: 10, c: 20}
+ * The documentKey for the document {a: {b: 10, d: 20}, c: 30} is {a.b: 10, c: 30}
+ * The documentKey for the document {a: {b: {d: 10}}, c: 30} is {a.b: {d: 10}, c: 30}
+ *
+ * Examples:
+ * If 'this' KeyPattern is {a: 1}
+ * {a: 10, b: 20} --> returns {a: 10}
+ * {b: 20} --> returns {a: null}
+ * {a: {b: 10}} --> returns {a: {b: 10}}
+ * {a: [1,2]} --> returns {}
+ * If 'this' KeyPattern is {a.b: 1, c: 1}
+ * {a.b: 10, c: 20} --> returns {a.b: 10, c: 20}
+ * {a.b: 10} --> returns {a.b: 10, c: null}
+ * {a.b: {z: 10}, c: 20} --> returns {a.b: {z: 10}, c: 20}
+ * If 'this' KeyPattern is {a : "hashed"}
+ * {a: 10, b: 20} --> returns {a: NumberLong("7766103514953448109")}
+ * {b: 20} --> returns {a: NumberLong("2338878944348059895")}
+ */
+ BSONObj extractShardKeyFromDocumentKey(const BSONObj& documentKey) const;
+
+ /**
* Given a MatchableDocument, extracts the shard key corresponding to the key pattern.
* For each path in the shard key pattern, extracts a value from the matchable document.
*
@@ -151,6 +176,26 @@ public:
BSONObj extractShardKeyFromDoc(const BSONObj& doc) const;
/**
+ * Given an Oplog entry, extracts the shard key corresponding to the key pattern for insert,
+ * update, and delete op types. If the op type is not a CRUD operation, an empty BSONObj()
+ * will be returned.
+ *
+ * For update and delete operations, the Oplog entry will contain an object with the document
+ * key.
+ *
+ * For insert operations, the Oplog entry will contain the original document from which the
+ * document key must be extracted
+ *
+ * Examples:
+ * For KeyPattern {'a.b': 1}
+ * If the oplog entries contains field op='i'
+ * oplog contains: { a : { b : "1" } }
+ * If the oplog entries contains field op='u' or op='d'
+ * oplog contains: { 'a.b': "1" }
+ */
+ BSONObj extractShardKeyFromOplogEntry(const repl::OplogEntry& entry) const;
+
+ /**
* Returns the set of shard key fields which are absent from the given document. Note that the
* vector returned by this method contains StringData elements pointing into ShardKeyPattern's
* underlying BSONObj. If the fieldnames are required to survive beyond the lifetime of this
diff --git a/src/mongo/s/shard_key_pattern_test.cpp b/src/mongo/s/shard_key_pattern_test.cpp
index ecde034896b..15e1edf03fe 100644
--- a/src/mongo/s/shard_key_pattern_test.cpp
+++ b/src/mongo/s/shard_key_pattern_test.cpp
@@ -29,11 +29,10 @@
#include "mongo/platform/basic.h"
-#include "mongo/s/shard_key_pattern.h"
-
#include "mongo/db/hasher.h"
#include "mongo/db/json.h"
#include "mongo/db/query/query_test_service_context.h"
+#include "mongo/s/shard_key_pattern.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -41,6 +40,32 @@ namespace {
using std::string;
+/**
+ * Creates OplogEntry with given field values.
+ */
+repl::OplogEntry makeOplogEntry(repl::OpTime opTime,
+ repl::OpTypeEnum opType,
+ NamespaceString nss,
+ BSONObj oField,
+ boost::optional<BSONObj> o2Field = boost::none) {
+ return {repl::OplogEntry(opTime, // optime
+ boost::none, // hash
+ opType, // opType
+ nss, // namespace
+ boost::none, // uuid
+ boost::none, // fromMigrate
+ repl::OplogEntry::kOplogVersion, // version
+ oField, // o
+ o2Field, // o2
+ {}, // sessionInfo
+ boost::none, // upsert
+ Date_t(), // wall clock time
+ {}, // statement ids
+ boost::none, // optime of previous write within same transaction
+ boost::none, // pre-image optime
+ boost::none)}; // post-image optime
+}
+
TEST(ShardKeyPattern, SingleFieldShardKeyPatternsValidityCheck) {
ShardKeyPattern(BSON("a" << 1));
ShardKeyPattern(BSON("a" << 1.0f));
@@ -124,6 +149,10 @@ static BSONObj docKey(const ShardKeyPattern& pattern, const BSONObj& doc) {
return pattern.extractShardKeyFromDoc(doc);
}
+static BSONObj docKeyFromOplog(const ShardKeyPattern& pattern, const repl::OplogEntry& entry) {
+ return pattern.extractShardKeyFromOplogEntry(entry);
+}
+
TEST(ShardKeyPattern, ExtractDocShardKeySingle) {
//
// Single field ShardKeyPatterns
@@ -207,6 +236,76 @@ TEST(ShardKeyPattern, ExtractDocShardKeyNested) {
ASSERT_BSONOBJ_EQ(docKey(pattern, fromjson("{a:{b:[10, 20]}, c:30}")), BSONObj());
}
+TEST(ShardKeyPattern, ExtractShardKeyFromOplogUnnested) {
+ //
+ // Unnested ShardKeyPatterns from oplog entries with CRUD operation
+ //
+
+ ShardKeyPattern pattern(BSON("a" << 1));
+ auto deleteOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime
+ repl::OpTypeEnum::kDelete, // op type
+ NamespaceString("a"), // namespace
+ BSON("_id" << 1 << "a" << 5)); // o
+ auto insertOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime
+ repl::OpTypeEnum::kInsert, // op type
+ NamespaceString("a"), // namespace
+ BSON("_id" << 2 << "a" << 6)); // o
+ auto updateOplog = makeOplogEntry(repl::OpTime(Timestamp(70, 10), 1), // optime
+ repl::OpTypeEnum::kUpdate, // op type
+ NamespaceString("a"), // namespace
+ BSON("_id" << 3), // o
+ BSON("_id" << 3 << "a" << 7)); // o2
+
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, deleteOplog), fromjson("{a: 5}"));
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, insertOplog), fromjson("{a: 6}"));
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, updateOplog), fromjson("{a: 7}"));
+}
+
+TEST(ShardKeyPattern, ExtractShardKeyFromOplogNested) {
+ //
+ // Nested ShardKeyPatterns from oplog entries with CRUD operation
+ //
+
+ ShardKeyPattern pattern(BSON("a.b" << 1));
+ auto deleteOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime
+ repl::OpTypeEnum::kDelete, // op type
+ NamespaceString("a.b"), // namespace
+ BSON("_id" << 1 << "a.b" << 5)); // o
+ auto insertOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime
+ repl::OpTypeEnum::kInsert, // op type
+ NamespaceString("a.b"), // namespace
+ BSON("_id" << 2 << "a" << BSON("b" << 6))); // o
+ auto updateOplog = makeOplogEntry(repl::OpTime(Timestamp(70, 10), 1), // optime
+ repl::OpTypeEnum::kUpdate, // op type
+ NamespaceString("a.b"), // namespace
+ BSON("_id" << 3), // o
+ BSON("_id" << 3 << "a.b" << 7)); // o2
+
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, deleteOplog), fromjson("{'a.b': 5}"));
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, insertOplog), fromjson("{'a.b': 6}"));
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, updateOplog), fromjson("{'a.b': 7}"));
+}
+
+TEST(ShardKeyPattern, ExtractShardKeyFromOplogNonCRUD) {
+ //
+ // Oplogs with non-CRUD op types
+ //
+
+ ShardKeyPattern pattern(BSON("a.b" << 1));
+ auto noopOplog = makeOplogEntry(repl::OpTime(Timestamp(50, 10), 1), // optime
+ repl::OpTypeEnum::kNoop, // op type
+ NamespaceString("a.b"), // namespace
+ BSON("_id" << 1 << "a.b" << 5)); // o
+ auto commandOplog = makeOplogEntry(repl::OpTime(Timestamp(60, 10), 1), // optime
+ repl::OpTypeEnum::kCommand, // op type
+ NamespaceString("a.b"), // namespace
+ BSON("create"
+ << "c")); // o
+
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, noopOplog), BSONObj());
+ ASSERT_BSONOBJ_EQ(docKeyFromOplog(pattern, commandOplog), BSONObj());
+}
+
TEST(ShardKeyPattern, ExtractDocShardKeyDeepNested) {
//
// Deeply nested ShardKeyPatterns