summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl/oplog.cpp
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2021-05-07 15:27:39 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-07 20:55:23 +0000
commite557b13c8cff42b717bd85495e44e14eb888c482 (patch)
treeaed97ac112b668cf52abca04d0e9dbfaf7872974 /src/mongo/db/repl/oplog.cpp
parentf696fa0f8836290a56b5a3ceaf754a41036627ae (diff)
downloadmongo-e557b13c8cff42b717bd85495e44e14eb888c482.tar.gz
SERVER-56374: Add ability to write retryable findAndModify updates to `config.image_collection`.
Diffstat (limited to 'src/mongo/db/repl/oplog.cpp')
-rw-r--r--src/mongo/db/repl/oplog.cpp88
1 files changed, 88 insertions, 0 deletions
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index b36fefd5aa2..89fa8482d5d 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -75,6 +75,7 @@
#include "mongo/db/repl/apply_ops.h"
#include "mongo/db/repl/bgsync.h"
#include "mongo/db/repl/dbcheck.h"
+#include "mongo/db/repl/image_collection_entry_gen.h"
#include "mongo/db/repl/local_oplog_info.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/repl_client_info.h"
@@ -165,6 +166,24 @@ void applyImportCollectionDefault(OperationContext* opCtx,
"isDryRun"_attr = isDryRun);
}
+/**
+ * Remove this function when proper lifetime of the config.image_collection is in place.
+ */
+void createConfigImagesIfNecessary_temporary(OperationContext* opCtx) {
+ AutoGetCollection configImages(
+ opCtx, NamespaceString::kConfigImagesNamespace, LockMode::MODE_IX);
+ if (configImages.getCollection()) {
+ return;
+ }
+
+ repl::UnreplicatedWritesBlock unreplicated(opCtx);
+ CollectionOptions options;
+ options.uuid = UUID::gen();
+ invariant(DatabaseHolder::get(opCtx)
+ ->getDb(opCtx, NamespaceString::kConfigImagesNamespace.db())
+ ->createCollection(opCtx, NamespaceString::kConfigImagesNamespace, options));
+}
+
} // namespace
ApplyImportCollectionFn applyImportCollection = applyImportCollectionDefault;
@@ -1391,6 +1410,11 @@ Status applyOperation_inlock(OperationContext* opCtx,
request.setUpdateModification(std::move(updateMod));
request.setUpsert(upsert);
request.setFromOplogApplication(true);
+ if (op.getNeedsRetryImage() == repl::RetryImageEnum::kPreImage) {
+ request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD);
+ } else if (op.getNeedsRetryImage() == repl::RetryImageEnum::kPostImage) {
+ request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_NEW);
+ }
Timestamp timestamp;
if (assignOperationTimestamp) {
@@ -1398,6 +1422,36 @@ Status applyOperation_inlock(OperationContext* opCtx,
}
const StringData ns = op.getNss().ns();
+ // Operations that were part of a retryable findAndModify have two formats for
+ // replicating pre/post images. The classic format has primaries writing explicit noop
+ // oplog entries that contain the necessary details for reconstructed a response to a
+ // retried operation.
+ //
+ // In the new format, we "implicitly" replicate the necessary data. Oplog entries may
+ // contain an optional field, `needsRetryImage` with a value of `preImage` or
+ // `postImage`. When applying these oplog entries, we also retrieve the pre/post image
+ // retrieved by the query system and write that value into `config.image_collection` as
+ // part of the same oplog application transaction. The `config.image_collection`
+ // documents are keyed by the oplog entries logical session id, which is the same as the
+ // `config.transactions` table.
+ //
+ // Batches of oplog entries can contain multiple oplog entries from the same logical
+ // session. Thus updates to `config.image_collection` documents can be
+ // concurrent. Secondaries already coalesce (read: intentionally ignore) some writes to
+ // `config.transactions`, we may also omit some writes to `config.image_collection`, so
+ // long as the last write persists. To accomplish this we update
+ // `config.image_collection` entries with an upsert. The query predicate is `{_id:
+ // <lsid>, ts $lt <oplogEntry.ts>}`. This can result in a WriteConflictException when
+ // two writers are concurrently updating/inserting the same document.
+ //
+ // However, when an upsert turns into an insert, a writer can also observe a
+ // DuplicateKeyException as its `ts` clause can hide the document from being
+ // updated. Following up the failed update with an insert turns into a
+ // DuplicateKeyException. This is safe, but to break an infinite loop, we retry the
+ // operation with a regular update as opposed to an upsert. We're guaranteed to not need
+ // to insert a document. We only have to make sure we didn't race with an insert that
+ // won, but with an earlier `ts`.
+ bool upsertConfigImage = true;
auto status = writeConflictRetry(opCtx, "applyOps_update", ns, [&] {
WriteUnitOfWork wuow(opCtx);
if (timestamp != Timestamp::min()) {
@@ -1472,6 +1526,40 @@ Status applyOperation_inlock(OperationContext* opCtx,
invariant(!oplogApplicationEnforcesSteadyStateConstraints);
}
+ if (op.getNeedsRetryImage()) {
+ createConfigImagesIfNecessary_temporary(opCtx);
+ AutoGetCollection autoColl(
+ opCtx, NamespaceString::kConfigImagesNamespace, LockMode::MODE_IX);
+ repl::ImageEntry image;
+ image.set_id(op.getSessionId().get());
+ image.setTs(op.getTimestamp());
+ switch (op.getNeedsRetryImage().get()) {
+ case repl::RetryImageEnum::kPreImage:
+ image.setImageKind(repl::RetryImageEnum::kPreImage);
+ break;
+ case repl::RetryImageEnum::kPostImage:
+ image.setImageKind(repl::RetryImageEnum::kPostImage);
+ break;
+ }
+ image.setImage(ur.requestedDocImage);
+
+ auto request = UpdateRequest();
+ request.setNamespaceString(NamespaceString("config.image_collection"));
+ request.setQuery(BSON("_id" << image.get_id().toBSON() << "ts"
+ << BSON("$lt" << image.getTs())));
+ request.setUpsert(upsertConfigImage);
+ request.setUpdateModification(
+ write_ops::UpdateModification::parseFromClassicUpdate(image.toBSON()));
+ request.setFromOplogApplication(true);
+ try {
+ ::mongo::update(opCtx, autoColl.getDb(), request);
+ } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
+ // We can get a duplicate key when two upserts race on inserting a document.
+ upsertConfigImage = false;
+ throw WriteConflictException();
+ }
+ }
+
wuow.commit();
return Status::OK();
});