diff options
author | Jason Chan <jason.chan@10gen.com> | 2021-05-05 10:43:30 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-05 20:01:41 +0000 |
commit | 8447dea32f0afdc0cfb8bf9bd1ca040ac6850c48 (patch) | |
tree | 93bb1b0bda2f2173524e8cb76108e786a1c7aed4 | |
parent | 778dcc77d9a5c3f2e8463c5b8f9ceafeeeb302b8 (diff) | |
download | mongo-8447dea32f0afdc0cfb8bf9bd1ca040ac6850c48.tar.gz |
SERVER-56373 Add storeFindAndModifyImagesInOplog parameter and introduce 'needsRetryImage' field to oplog entries.
-rw-r--r-- | src/mongo/db/commands/dbcheck.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/op_observer_impl.cpp | 99 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_entry.idl | 13 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/session_catalog_migration_destination.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/session_test.cpp | 9 |
8 files changed, 109 insertions, 44 deletions
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp index 6f867aa710a..c6ae019e61c 100644 --- a/src/mongo/db/commands/dbcheck.cpp +++ b/src/mongo/db/commands/dbcheck.cpp @@ -481,7 +481,8 @@ private: {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); uow.commit(); return result; }); diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index b44a8194378..171427154d1 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -50,6 +50,7 @@ #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/server_options.h" +#include "mongo/db/server_parameters.h" #include "mongo/db/session_catalog.h" #include "mongo/db/views/durable_view_catalog.h" #include "mongo/s/client/shard_registry.h" @@ -63,6 +64,7 @@ using repl::OplogEntry; namespace { MONGO_FAIL_POINT_DEFINE(failCollectionUpdates); +MONGO_EXPORT_SERVER_PARAMETER(storeFindAndModifyImagesInSideCollection, bool, false); const auto documentKeyDecoration = OperationContext::declareDecoration<BSONObj>(); @@ -77,7 +79,8 @@ repl::OpTime logOperation(OperationContext* opCtx, const OperationSessionInfo& sessionInfo, StmtId stmtId, const repl::OplogLink& oplogLink, - const OplogSlot& oplogSlot) { + const OplogSlot& oplogSlot, + boost::optional<repl::RetryImageEnum> needsRetryImage) { auto& times = OpObserver::Times::get(opCtx).reservedOpTimes; auto opTime = repl::logOp(opCtx, opstr, @@ -90,7 +93,8 @@ repl::OpTime logOperation(OperationContext* opCtx, sessionInfo, stmtId, oplogLink, - oplogSlot); + oplogSlot, + needsRetryImage); times.push_back(opTime); return opTime; @@ -169,11 +173,21 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx, Session* session, const OplogUpdateEntryArgs& args) { BSONObj storeObj; + boost::optional<repl::RetryImageEnum> needsRetryImage; + const auto storeImagesInSideCollection = storeFindAndModifyImagesInSideCollection.load() && + serverGlobalParams.featureCompatibility.getVersion() >= + ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40; if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) { invariant(args.preImageDoc); storeObj = *args.preImageDoc; + if (storeImagesInSideCollection && opCtx->getTxnNumber()) { + needsRetryImage = repl::RetryImageEnum::kPreImage; + } } else if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PostImage) { storeObj = args.updatedDoc; + if (storeImagesInSideCollection && opCtx->getTxnNumber()) { + needsRetryImage = repl::RetryImageEnum::kPostImage; + } } OperationSessionInfo sessionInfo; @@ -188,7 +202,7 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx, OpTimeBundle opTimes; opTimes.wallClockTime = getWallClockTimeForOpLog(opCtx); - if (!storeObj.isEmpty() && opCtx->getTxnNumber()) { + if (!storeImagesInSideCollection && !storeObj.isEmpty() && opCtx->getTxnNumber()) { auto noteUpdateOpTime = logOperation(opCtx, "n", args.nss, @@ -200,10 +214,10 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx, sessionInfo, args.stmtId, {}, - OplogSlot()); + OplogSlot(), + {}); opTimes.prePostImageOpTime = noteUpdateOpTime; - if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) { oplogLink.preImageOpTime = noteUpdateOpTime; } else if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PostImage) { @@ -222,7 +236,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx, sessionInfo, args.stmtId, oplogLink, - OplogSlot()); + OplogSlot(), + needsRetryImage); return opTimes; } @@ -249,21 +264,29 @@ OpTimeBundle replLogDelete(OperationContext* opCtx, OpTimeBundle opTimes; opTimes.wallClockTime = getWallClockTimeForOpLog(opCtx); + boost::optional<repl::RetryImageEnum> needsRetryImage; if (deletedDoc && opCtx->getTxnNumber()) { - auto noteOplog = logOperation(opCtx, - "n", - nss, - uuid, - deletedDoc.get(), - nullptr, - false, - opTimes.wallClockTime, - sessionInfo, - stmtId, - {}, - OplogSlot()); - opTimes.prePostImageOpTime = noteOplog; - oplogLink.preImageOpTime = noteOplog; + if (storeFindAndModifyImagesInSideCollection.load() && + serverGlobalParams.featureCompatibility.getVersion() >= + ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40) { + needsRetryImage = repl::RetryImageEnum::kPreImage; + } else { + auto noteOplog = logOperation(opCtx, + "n", + nss, + uuid, + deletedDoc.get(), + nullptr, + false, + opTimes.wallClockTime, + sessionInfo, + stmtId, + {}, + OplogSlot(), + {}); + opTimes.prePostImageOpTime = noteOplog; + oplogLink.preImageOpTime = noteOplog; + } } auto& documentKey = documentKeyDecoration(opCtx); @@ -278,7 +301,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx, sessionInfo, stmtId, oplogLink, - OplogSlot()); + OplogSlot(), + needsRetryImage); return opTimes; } @@ -304,7 +328,8 @@ OpTimeBundle replLogApplyOps(OperationContext* opCtx, sessionInfo, stmtId, oplogLink, - OplogSlot()); + OplogSlot(), + {}); return times; } @@ -344,7 +369,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } else { logOperation(opCtx, "i", @@ -357,7 +383,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } AuthorizationManager::get(opCtx->getServiceContext()) @@ -574,7 +601,8 @@ void OpObserverImpl::onInternalOpMessage(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } void OpObserverImpl::onCreateCollection(OperationContext* opCtx, @@ -619,7 +647,8 @@ void OpObserverImpl::onCreateCollection(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - createOpTime); + createOpTime, + {}); } AuthorizationManager::get(opCtx->getServiceContext()) @@ -666,7 +695,8 @@ void OpObserverImpl::onCollMod(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } AuthorizationManager::get(opCtx->getServiceContext()) @@ -704,7 +734,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string& {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); uassert( 50714, "dropping the admin database is not allowed.", dbName != NamespaceString::kAdminDb); @@ -738,7 +769,8 @@ repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } uassert(50715, @@ -779,7 +811,8 @@ void OpObserverImpl::onDropIndex(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); AuthorizationManager::get(opCtx->getServiceContext()) ->logOp(opCtx, "c", cmdNss, cmdObj, &indexInfo); @@ -815,7 +848,8 @@ repl::OpTime OpObserverImpl::preRenameCollection(OperationContext* const opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); return {}; } @@ -893,7 +927,8 @@ void OpObserverImpl::onEmptyCapped(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); } AuthorizationManager::get(opCtx->getServiceContext()) diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 8b1bf69fed5..ce2cf2ed50b 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -360,7 +360,8 @@ OplogDocWriter _logOpWriter(OperationContext* opCtx, Date_t wallTime, const OperationSessionInfo& sessionInfo, StmtId statementId, - const OplogLink& oplogLink) { + const OplogLink& oplogLink, + boost::optional<repl::RetryImageEnum> needsRetryImage) { BSONObjBuilder b(256); b.append("ts", optime.getTimestamp()); @@ -379,6 +380,10 @@ OplogDocWriter _logOpWriter(OperationContext* opCtx, if (o2) b.append("o2", *o2); + if (needsRetryImage) { + b.append("needsRetryImage", repl::RetryImage_serializer(*needsRetryImage)); + } + invariant(wallTime != Date_t{}); b.appendDate("wall", wallTime); @@ -458,7 +463,8 @@ OpTime logOp(OperationContext* opCtx, const OperationSessionInfo& sessionInfo, StmtId statementId, const OplogLink& oplogLink, - const OplogSlot& oplogSlot) { + const OplogSlot& oplogSlot, + boost::optional<repl::RetryImageEnum> needsRetryImage) { auto replCoord = ReplicationCoordinator::get(opCtx); // For commands, the test below is on the command ns and therefore does not check for // specific namespaces such as system.profile. This is the caller's responsibility. @@ -499,7 +505,8 @@ OpTime logOp(OperationContext* opCtx, wallClockTime, sessionInfo, statementId, - oplogLink); + oplogLink, + needsRetryImage); const DocWriter* basePtr = &writer; auto timestamp = slot.opTime.getTimestamp(); _logOpsInner(opCtx, nss, &basePtr, ×tamp, 1, oplog, slot.opTime); @@ -571,7 +578,8 @@ std::vector<OpTime> logInsertOps(OperationContext* opCtx, wallClockTime, sessionInfo, begin[i].stmtId, - oplogLink)); + oplogLink, + {})); oplogLink.prevOpTime = insertStatementOplogSlot.opTime; timestamps[i] = oplogLink.prevOpTime.getTimestamp(); opTimes.push_back(insertStatementOplogSlot.opTime); diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h index bda617909c4..40cd7fea97e 100644 --- a/src/mongo/db/repl/oplog.h +++ b/src/mongo/db/repl/oplog.h @@ -38,6 +38,7 @@ #include "mongo/bson/timestamp.h" #include "mongo/db/catalog/collection_options.h" #include "mongo/db/logical_session_id.h" +#include "mongo/db/repl/oplog_entry_gen.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/stdx/functional.h" @@ -144,7 +145,8 @@ OpTime logOp(OperationContext* opCtx, const OperationSessionInfo& sessionInfo, StmtId stmtId, const OplogLink& oplogLink, - const OplogSlot& oplogSlot); + const OplogSlot& oplogSlot, + boost::optional<repl::RetryImageEnum> needsRetryImage); // Flush out the cached pointer to the oplog. // Used by the closeDatabase command to ensure we don't cache closed things. diff --git a/src/mongo/db/repl/oplog_entry.idl b/src/mongo/db/repl/oplog_entry.idl index f7a7fa6f949..0a30146c53f 100644 --- a/src/mongo/db/repl/oplog_entry.idl +++ b/src/mongo/db/repl/oplog_entry.idl @@ -46,6 +46,13 @@ enums: kUpdate: "u" kDelete: "d" kNoop: "n" + RetryImage: + description: "Dictates whether a pre-image or post-image is to be stored on behalf of this + retryable write." + type: string + values: + kPreImage: "preImage" + kPostImage: "postImage" structs: ReplOperation: @@ -141,3 +148,9 @@ structs: optional: true description: "The optime of another oplog entry that contains the document after an update was applied." + + needsRetryImage: + type: RetryImage + optional: true + description: "Identifies whether a secondary should store a pre-image or post-image + associated with this oplog entry." diff --git a/src/mongo/db/repl/oplog_test.cpp b/src/mongo/db/repl/oplog_test.cpp index d98220eebb9..dbc8b434885 100644 --- a/src/mongo/db/repl/oplog_test.cpp +++ b/src/mongo/db/repl/oplog_test.cpp @@ -113,7 +113,8 @@ TEST_F(OplogTest, LogOpReturnsOpTimeOnSuccessfulInsertIntoOplogCollection) { {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); ASSERT_FALSE(opTime.isNull()); wunit.commit(); } @@ -236,7 +237,8 @@ OpTime _logOpNoopWithMsg(OperationContext* opCtx, {}, kUninitializedStmtId, {}, - OplogSlot()); + OplogSlot(), + {}); ASSERT_FALSE(opTime.isNull()); ASSERT(opTimeNssMap->find(opTime) == opTimeNssMap->end()) diff --git a/src/mongo/db/s/session_catalog_migration_destination.cpp b/src/mongo/db/s/session_catalog_migration_destination.cpp index bcea0d02b51..c35928016d8 100644 --- a/src/mongo/db/s/session_catalog_migration_destination.cpp +++ b/src/mongo/db/s/session_catalog_migration_destination.cpp @@ -291,7 +291,8 @@ ProcessOplogResult processSessionOplog(const BSONObj& oplogBSON, sessionInfo, stmtId, oplogLink, - OplogSlot()); + OplogSlot(), + {}); const auto& oplogOpTime = result.oplogTime; uassert(40633, diff --git a/src/mongo/db/session_test.cpp b/src/mongo/db/session_test.cpp index 60eb1134448..59e2a5ee057 100644 --- a/src/mongo/db/session_test.cpp +++ b/src/mongo/db/session_test.cpp @@ -131,7 +131,8 @@ protected: osi, stmtId, link, - OplogSlot()); + OplogSlot(), + {}); } void bumpTxnNumberFromDifferentOpCtx(Session* session, TxnNumber newTxnNum) { @@ -529,7 +530,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) { osi, 1, {}, - OplogSlot()); + OplogSlot(), + {}); session.onWriteOpCompletedOnPrimary(opCtx(), txnNum, {1}, opTime, wallClockTime); wuow.commit(); @@ -556,7 +558,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) { osi, kIncompleteHistoryStmtId, link, - OplogSlot()); + OplogSlot(), + {}); session.onWriteOpCompletedOnPrimary( opCtx(), txnNum, {kIncompleteHistoryStmtId}, opTime, wallClockTime); |