summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2022-11-01 19:52:33 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-21 16:53:19 +0000
commit73bb4a80add2bd900f89025b231e54ed18167f85 (patch)
treebf71b45a693749cb56f8ee73a5ea707124112434 /src
parentc8a222bd7cddb738dac52b83bed55d2bddf9363b (diff)
downloadmongo-73bb4a80add2bd900f89025b231e54ed18167f85.tar.gz
SERVER-68361 Make migration properly handle cases when shard key value modification also results to changes in chunk membership
(cherry picked from commit e9858528d75b8b56211799c982f836b21912e780)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/op_observer_impl.cpp9
-rw-r--r--src/mongo/db/repl/oplog_entry.h13
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp96
3 files changed, 85 insertions, 33 deletions
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index 4ba2ccf847b..162b5bdbc51 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -607,11 +607,18 @@ void OpObserverImpl::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArg
if (inMultiDocumentTransaction) {
auto operation = OplogEntry::makeUpdateOperation(
args.nss, args.uuid, args.updateArgs.update, args.updateArgs.criteria);
+
+ auto collectionDescription =
+ CollectionShardingState::get(opCtx, args.nss)->getCurrentMetadata();
+ if (collectionDescription->isSharded()) {
+ operation.setPostImageDocumentKey(
+ collectionDescription->extractDocumentKey(args.updateArgs.updatedDoc).getOwned());
+ }
+
txnParticipant.addTransactionOperation(opCtx, operation);
} else {
opTime = replLogUpdate(opCtx, args, storeImagesInSideCollection);
// Check if we're in a retryable write that should save the image to
- // `config.image_collection`.
if (storeImagesInSideCollection && opCtx->getTxnNumber() &&
args.updateArgs.storeDocOption != CollectionUpdateArgs::StoreDocOption::None) {
BSONObj imageDoc;
diff --git a/src/mongo/db/repl/oplog_entry.h b/src/mongo/db/repl/oplog_entry.h
index a557b25ee35..69910d0ba48 100644
--- a/src/mongo/db/repl/oplog_entry.h
+++ b/src/mongo/db/repl/oplog_entry.h
@@ -59,15 +59,18 @@ public:
o.parseProtected(ctxt, bsonObject);
return o;
}
- const BSONObj& getPreImageDocumentKey() const {
- return _preImageDocumentKey;
+
+ const BSONObj& getPostImageDocumentKey() const {
+ return _postImageDocumentKey;
}
- void setPreImageDocumentKey(BSONObj value) {
- _preImageDocumentKey = std::move(value);
+
+ void setPostImageDocumentKey(BSONObj value) {
+ _postImageDocumentKey = std::move(value);
}
private:
- BSONObj _preImageDocumentKey;
+ // Stores the post image _id + shard key values.
+ BSONObj _postImageDocumentKey;
};
/**
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index 051357d57cf..ff23d01ae25 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -72,12 +72,22 @@ const char kRecvChunkAbort[] = "_recvChunkAbort";
const int kMaxObjectPerChunk{250000};
-bool isInRange(const BSONObj& obj,
- const BSONObj& min,
- const BSONObj& max,
- const ShardKeyPattern& shardKeyPattern) {
- BSONObj k = shardKeyPattern.extractShardKeyFromDoc(obj);
- return k.woCompare(min) >= 0 && k.woCompare(max) < 0;
+/**
+ * Returns true if the given BSON object in the shard key value pair format is within the given
+ * range.
+ */
+bool isShardKeyValueInRange(const BSONObj& shardKeyValue, const BSONObj& min, const BSONObj& max) {
+ return shardKeyValue.woCompare(min) >= 0 && shardKeyValue.woCompare(max) < 0;
+}
+
+/**
+ * Returns true if the given BSON document is within the given chunk range.
+ */
+bool isDocInRange(const BSONObj& obj,
+ const BSONObj& min,
+ const BSONObj& max,
+ const ShardKeyPattern& shardKeyPattern) {
+ return isShardKeyValueInRange(shardKeyPattern.extractShardKeyFromDoc(obj), min, max);
}
BSONObj createRequestWithSessionId(StringData commandName,
@@ -177,11 +187,11 @@ void LogTransactionOperationsForShardingHandler::commit(boost::optional<Timestam
auto cloner = dynamic_cast<MigrationChunkClonerSourceLegacy*>(msm->getCloner().get());
auto opType = stmt.getOpType();
- auto documentKey = getDocumentKeyFromReplOperation(stmt, opType);
+ auto preImageDocKey = getDocumentKeyFromReplOperation(stmt, opType);
- auto idElement = documentKey["_id"];
+ auto idElement = preImageDocKey["_id"];
if (idElement.eoo()) {
- warning() << "Received a document with no id, ignoring: " << redact(documentKey);
+ warning() << "Received a document with no id, ignoring: " << redact(preImageDocKey);
continue;
}
@@ -189,18 +199,41 @@ void LogTransactionOperationsForShardingHandler::commit(boost::optional<Timestam
auto const& maxKey = cloner->_args.getMaxKey();
auto const& shardKeyPattern = cloner->_shardKeyPattern;
- if (!isInRange(documentKey, minKey, maxKey, shardKeyPattern)) {
- // If the preImageDoc is not in range but the postImageDoc was, we know that the
- // document has changed shard keys and no longer belongs in the chunk being cloned.
- // We will model the deletion of the preImage document so that the destination chunk
- // does not receive an outdated version of this document.
- if (opType == repl::OpTypeEnum::kUpdate &&
- isInRange(stmt.getPreImageDocumentKey(), minKey, maxKey, shardKeyPattern) &&
- !stmt.getPreImageDocumentKey()["_id"].eoo()) {
- opType = repl::OpTypeEnum::kDelete;
- idElement = stmt.getPreImageDocumentKey()["id"];
- } else {
- continue;
+ // Note: This assumes that prepared transactions will always have post document key
+ // set. There is a small window where create collection coordinator releases the critical
+ // section and before it writes down the chunks for non-empty collections. So in theory,
+ // it is possible to have a prepared transaction while collection is unsharded
+ // and becomes sharded midway. This doesn't happen in practice because the only way to
+ // have a prepared transactions without being sharded is by directly connecting to the
+ // shards and manually preparing the transaction. Another exception is when transaction
+ // is prepared on an older version that doesn't set the post image document key.
+ auto postImageDocKey = stmt.getPostImageDocumentKey();
+ if (postImageDocKey.isEmpty()) {
+ warning() << "Migration encountered a transaction operation without a post image "
+ "document key. "
+ << "preImageDocKey:" << preImageDocKey;
+ } else {
+ auto postShardKeyValues =
+ shardKeyPattern.extractShardKeyFromDocumentKey(postImageDocKey);
+ fassert(6836100, !postShardKeyValues.isEmpty());
+
+ if (!isShardKeyValueInRange(postShardKeyValues, minKey, maxKey)) {
+ // If the preImageDoc is not in range but the postImageDoc was, we know that the
+ // document has changed shard keys and no longer belongs in the chunk being cloned.
+ // We will model the deletion of the preImage document so that the destination chunk
+ // does not receive an outdated version of this document.
+
+ auto preImageShardKeyValues =
+ shardKeyPattern.extractShardKeyFromDocumentKey(preImageDocKey);
+ fassert(6836101, !preImageShardKeyValues.isEmpty());
+
+ if (opType == repl::OpTypeEnum::kUpdate &&
+ isShardKeyValueInRange(preImageShardKeyValues, minKey, maxKey)) {
+ opType = repl::OpTypeEnum::kDelete;
+ idElement = postImageDocKey["_id"];
+ } else {
+ continue;
+ }
}
}
@@ -466,7 +499,7 @@ void MigrationChunkClonerSourceLegacy::cancelClone(OperationContext* opCtx) {
}
bool MigrationChunkClonerSourceLegacy::isDocumentInMigratingChunk(const BSONObj& doc) {
- return isInRange(doc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern);
+ return isDocInRange(doc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern);
}
void MigrationChunkClonerSourceLegacy::onInsertOp(OperationContext* opCtx,
@@ -481,7 +514,7 @@ void MigrationChunkClonerSourceLegacy::onInsertOp(OperationContext* opCtx,
return;
}
- if (!isInRange(insertedDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
+ if (!isDocInRange(insertedDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
return;
}
@@ -512,13 +545,13 @@ void MigrationChunkClonerSourceLegacy::onUpdateOp(OperationContext* opCtx,
return;
}
- if (!isInRange(postImageDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
+ if (!isDocInRange(postImageDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
// If the preImageDoc is not in range but the postImageDoc was, we know that the document
// has changed shard keys and no longer belongs in the chunk being cloned. We will model
// the deletion of the preImage document so that the destination chunk does not receive an
// outdated version of this document.
if (preImageDoc &&
- isInRange(*preImageDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
+ isDocInRange(*preImageDoc, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
onDeleteOp(opCtx, *preImageDoc, opTime, prePostImageOpTime);
}
return;
@@ -665,9 +698,20 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
auto nextRecordId = *iter;
lk.unlock();
+ ON_BLOCK_EXIT([&lk] { lk.lock(); });
Snapshotted<BSONObj> doc;
if (collection->findDoc(opCtx, nextRecordId, &doc)) {
+ // Do not send documents that are no longer in the chunk range being moved. This can
+ // happen when document shard key value of the document changed after the initial
+ // index scan during cloning. This is needed because the destination is very
+ // conservative in processing xferMod deletes and won't delete docs that are not in
+ // the range of the chunk being migrated.
+ if (!isDocInRange(
+ doc.value(), _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern)) {
+ continue;
+ }
+
// Use the builder size instead of accumulating the document sizes directly so
// that we take into consideration the overhead of BSONArray indices.
if (arrBuilder->arrSize() &&
@@ -678,8 +722,6 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
arrBuilder->append(doc.value());
ShardingStatistics::get(opCtx).countDocsClonedOnDonor.addAndFetch(1);
}
-
- lk.lock();
}
_cloneLocs.erase(_cloneLocs.begin(), iter);