summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Chan <jason.chan@10gen.com>2021-05-05 10:43:30 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-05 20:01:41 +0000
commit8447dea32f0afdc0cfb8bf9bd1ca040ac6850c48 (patch)
tree93bb1b0bda2f2173524e8cb76108e786a1c7aed4
parent778dcc77d9a5c3f2e8463c5b8f9ceafeeeb302b8 (diff)
downloadmongo-8447dea32f0afdc0cfb8bf9bd1ca040ac6850c48.tar.gz
SERVER-56373 Add storeFindAndModifyImagesInOplog parameter and introduce 'needsRetryImage' field to oplog entries.
-rw-r--r--src/mongo/db/commands/dbcheck.cpp3
-rw-r--r--src/mongo/db/op_observer_impl.cpp99
-rw-r--r--src/mongo/db/repl/oplog.cpp16
-rw-r--r--src/mongo/db/repl/oplog.h4
-rw-r--r--src/mongo/db/repl/oplog_entry.idl13
-rw-r--r--src/mongo/db/repl/oplog_test.cpp6
-rw-r--r--src/mongo/db/s/session_catalog_migration_destination.cpp3
-rw-r--r--src/mongo/db/session_test.cpp9
8 files changed, 109 insertions, 44 deletions
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp
index 6f867aa710a..c6ae019e61c 100644
--- a/src/mongo/db/commands/dbcheck.cpp
+++ b/src/mongo/db/commands/dbcheck.cpp
@@ -481,7 +481,8 @@ private:
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
uow.commit();
return result;
});
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index b44a8194378..171427154d1 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/server_options.h"
+#include "mongo/db/server_parameters.h"
#include "mongo/db/session_catalog.h"
#include "mongo/db/views/durable_view_catalog.h"
#include "mongo/s/client/shard_registry.h"
@@ -63,6 +64,7 @@ using repl::OplogEntry;
namespace {
MONGO_FAIL_POINT_DEFINE(failCollectionUpdates);
+MONGO_EXPORT_SERVER_PARAMETER(storeFindAndModifyImagesInSideCollection, bool, false);
const auto documentKeyDecoration = OperationContext::declareDecoration<BSONObj>();
@@ -77,7 +79,8 @@ repl::OpTime logOperation(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId stmtId,
const repl::OplogLink& oplogLink,
- const OplogSlot& oplogSlot) {
+ const OplogSlot& oplogSlot,
+ boost::optional<repl::RetryImageEnum> needsRetryImage) {
auto& times = OpObserver::Times::get(opCtx).reservedOpTimes;
auto opTime = repl::logOp(opCtx,
opstr,
@@ -90,7 +93,8 @@ repl::OpTime logOperation(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- oplogSlot);
+ oplogSlot,
+ needsRetryImage);
times.push_back(opTime);
return opTime;
@@ -169,11 +173,21 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
Session* session,
const OplogUpdateEntryArgs& args) {
BSONObj storeObj;
+ boost::optional<repl::RetryImageEnum> needsRetryImage;
+ const auto storeImagesInSideCollection = storeFindAndModifyImagesInSideCollection.load() &&
+ serverGlobalParams.featureCompatibility.getVersion() >=
+ ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40;
if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) {
invariant(args.preImageDoc);
storeObj = *args.preImageDoc;
+ if (storeImagesInSideCollection && opCtx->getTxnNumber()) {
+ needsRetryImage = repl::RetryImageEnum::kPreImage;
+ }
} else if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PostImage) {
storeObj = args.updatedDoc;
+ if (storeImagesInSideCollection && opCtx->getTxnNumber()) {
+ needsRetryImage = repl::RetryImageEnum::kPostImage;
+ }
}
OperationSessionInfo sessionInfo;
@@ -188,7 +202,7 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
OpTimeBundle opTimes;
opTimes.wallClockTime = getWallClockTimeForOpLog(opCtx);
- if (!storeObj.isEmpty() && opCtx->getTxnNumber()) {
+ if (!storeImagesInSideCollection && !storeObj.isEmpty() && opCtx->getTxnNumber()) {
auto noteUpdateOpTime = logOperation(opCtx,
"n",
args.nss,
@@ -200,10 +214,10 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
sessionInfo,
args.stmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
opTimes.prePostImageOpTime = noteUpdateOpTime;
-
if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) {
oplogLink.preImageOpTime = noteUpdateOpTime;
} else if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PostImage) {
@@ -222,7 +236,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
sessionInfo,
args.stmtId,
oplogLink,
- OplogSlot());
+ OplogSlot(),
+ needsRetryImage);
return opTimes;
}
@@ -249,21 +264,29 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
OpTimeBundle opTimes;
opTimes.wallClockTime = getWallClockTimeForOpLog(opCtx);
+ boost::optional<repl::RetryImageEnum> needsRetryImage;
if (deletedDoc && opCtx->getTxnNumber()) {
- auto noteOplog = logOperation(opCtx,
- "n",
- nss,
- uuid,
- deletedDoc.get(),
- nullptr,
- false,
- opTimes.wallClockTime,
- sessionInfo,
- stmtId,
- {},
- OplogSlot());
- opTimes.prePostImageOpTime = noteOplog;
- oplogLink.preImageOpTime = noteOplog;
+ if (storeFindAndModifyImagesInSideCollection.load() &&
+ serverGlobalParams.featureCompatibility.getVersion() >=
+ ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40) {
+ needsRetryImage = repl::RetryImageEnum::kPreImage;
+ } else {
+ auto noteOplog = logOperation(opCtx,
+ "n",
+ nss,
+ uuid,
+ deletedDoc.get(),
+ nullptr,
+ false,
+ opTimes.wallClockTime,
+ sessionInfo,
+ stmtId,
+ {},
+ OplogSlot(),
+ {});
+ opTimes.prePostImageOpTime = noteOplog;
+ oplogLink.preImageOpTime = noteOplog;
+ }
}
auto& documentKey = documentKeyDecoration(opCtx);
@@ -278,7 +301,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- OplogSlot());
+ OplogSlot(),
+ needsRetryImage);
return opTimes;
}
@@ -304,7 +328,8 @@ OpTimeBundle replLogApplyOps(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- OplogSlot());
+ OplogSlot(),
+ {});
return times;
}
@@ -344,7 +369,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
} else {
logOperation(opCtx,
"i",
@@ -357,7 +383,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -574,7 +601,8 @@ void OpObserverImpl::onInternalOpMessage(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
}
void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
@@ -619,7 +647,8 @@ void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- createOpTime);
+ createOpTime,
+ {});
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -666,7 +695,8 @@ void OpObserverImpl::onCollMod(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -704,7 +734,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string&
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
uassert(
50714, "dropping the admin database is not allowed.", dbName != NamespaceString::kAdminDb);
@@ -738,7 +769,8 @@ repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
}
uassert(50715,
@@ -779,7 +811,8 @@ void OpObserverImpl::onDropIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
AuthorizationManager::get(opCtx->getServiceContext())
->logOp(opCtx, "c", cmdNss, cmdObj, &indexInfo);
@@ -815,7 +848,8 @@ repl::OpTime OpObserverImpl::preRenameCollection(OperationContext* const opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
return {};
}
@@ -893,7 +927,8 @@ void OpObserverImpl::onEmptyCapped(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
}
AuthorizationManager::get(opCtx->getServiceContext())
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 8b1bf69fed5..ce2cf2ed50b 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -360,7 +360,8 @@ OplogDocWriter _logOpWriter(OperationContext* opCtx,
Date_t wallTime,
const OperationSessionInfo& sessionInfo,
StmtId statementId,
- const OplogLink& oplogLink) {
+ const OplogLink& oplogLink,
+ boost::optional<repl::RetryImageEnum> needsRetryImage) {
BSONObjBuilder b(256);
b.append("ts", optime.getTimestamp());
@@ -379,6 +380,10 @@ OplogDocWriter _logOpWriter(OperationContext* opCtx,
if (o2)
b.append("o2", *o2);
+ if (needsRetryImage) {
+ b.append("needsRetryImage", repl::RetryImage_serializer(*needsRetryImage));
+ }
+
invariant(wallTime != Date_t{});
b.appendDate("wall", wallTime);
@@ -458,7 +463,8 @@ OpTime logOp(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId statementId,
const OplogLink& oplogLink,
- const OplogSlot& oplogSlot) {
+ const OplogSlot& oplogSlot,
+ boost::optional<repl::RetryImageEnum> needsRetryImage) {
auto replCoord = ReplicationCoordinator::get(opCtx);
// For commands, the test below is on the command ns and therefore does not check for
// specific namespaces such as system.profile. This is the caller's responsibility.
@@ -499,7 +505,8 @@ OpTime logOp(OperationContext* opCtx,
wallClockTime,
sessionInfo,
statementId,
- oplogLink);
+ oplogLink,
+ needsRetryImage);
const DocWriter* basePtr = &writer;
auto timestamp = slot.opTime.getTimestamp();
_logOpsInner(opCtx, nss, &basePtr, &timestamp, 1, oplog, slot.opTime);
@@ -571,7 +578,8 @@ std::vector<OpTime> logInsertOps(OperationContext* opCtx,
wallClockTime,
sessionInfo,
begin[i].stmtId,
- oplogLink));
+ oplogLink,
+ {}));
oplogLink.prevOpTime = insertStatementOplogSlot.opTime;
timestamps[i] = oplogLink.prevOpTime.getTimestamp();
opTimes.push_back(insertStatementOplogSlot.opTime);
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index bda617909c4..40cd7fea97e 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -38,6 +38,7 @@
#include "mongo/bson/timestamp.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/logical_session_id.h"
+#include "mongo/db/repl/oplog_entry_gen.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/stdx/functional.h"
@@ -144,7 +145,8 @@ OpTime logOp(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId stmtId,
const OplogLink& oplogLink,
- const OplogSlot& oplogSlot);
+ const OplogSlot& oplogSlot,
+ boost::optional<repl::RetryImageEnum> needsRetryImage);
// Flush out the cached pointer to the oplog.
// Used by the closeDatabase command to ensure we don't cache closed things.
diff --git a/src/mongo/db/repl/oplog_entry.idl b/src/mongo/db/repl/oplog_entry.idl
index f7a7fa6f949..0a30146c53f 100644
--- a/src/mongo/db/repl/oplog_entry.idl
+++ b/src/mongo/db/repl/oplog_entry.idl
@@ -46,6 +46,13 @@ enums:
kUpdate: "u"
kDelete: "d"
kNoop: "n"
+ RetryImage:
+ description: "Dictates whether a pre-image or post-image is to be stored on behalf of this
+ retryable write."
+ type: string
+ values:
+ kPreImage: "preImage"
+ kPostImage: "postImage"
structs:
ReplOperation:
@@ -141,3 +148,9 @@ structs:
optional: true
description: "The optime of another oplog entry that contains the document
after an update was applied."
+
+ needsRetryImage:
+ type: RetryImage
+ optional: true
+ description: "Identifies whether a secondary should store a pre-image or post-image
+ associated with this oplog entry."
diff --git a/src/mongo/db/repl/oplog_test.cpp b/src/mongo/db/repl/oplog_test.cpp
index d98220eebb9..dbc8b434885 100644
--- a/src/mongo/db/repl/oplog_test.cpp
+++ b/src/mongo/db/repl/oplog_test.cpp
@@ -113,7 +113,8 @@ TEST_F(OplogTest, LogOpReturnsOpTimeOnSuccessfulInsertIntoOplogCollection) {
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
ASSERT_FALSE(opTime.isNull());
wunit.commit();
}
@@ -236,7 +237,8 @@ OpTime _logOpNoopWithMsg(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
ASSERT_FALSE(opTime.isNull());
ASSERT(opTimeNssMap->find(opTime) == opTimeNssMap->end())
diff --git a/src/mongo/db/s/session_catalog_migration_destination.cpp b/src/mongo/db/s/session_catalog_migration_destination.cpp
index bcea0d02b51..c35928016d8 100644
--- a/src/mongo/db/s/session_catalog_migration_destination.cpp
+++ b/src/mongo/db/s/session_catalog_migration_destination.cpp
@@ -291,7 +291,8 @@ ProcessOplogResult processSessionOplog(const BSONObj& oplogBSON,
sessionInfo,
stmtId,
oplogLink,
- OplogSlot());
+ OplogSlot(),
+ {});
const auto& oplogOpTime = result.oplogTime;
uassert(40633,
diff --git a/src/mongo/db/session_test.cpp b/src/mongo/db/session_test.cpp
index 60eb1134448..59e2a5ee057 100644
--- a/src/mongo/db/session_test.cpp
+++ b/src/mongo/db/session_test.cpp
@@ -131,7 +131,8 @@ protected:
osi,
stmtId,
link,
- OplogSlot());
+ OplogSlot(),
+ {});
}
void bumpTxnNumberFromDifferentOpCtx(Session* session, TxnNumber newTxnNum) {
@@ -529,7 +530,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
osi,
1,
{},
- OplogSlot());
+ OplogSlot(),
+ {});
session.onWriteOpCompletedOnPrimary(opCtx(), txnNum, {1}, opTime, wallClockTime);
wuow.commit();
@@ -556,7 +558,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
osi,
kIncompleteHistoryStmtId,
link,
- OplogSlot());
+ OplogSlot(),
+ {});
session.onWriteOpCompletedOnPrimary(
opCtx(), txnNum, {kIncompleteHistoryStmtId}, opTime, wallClockTime);