summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-05-28 00:06:26 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-28 00:39:00 +0000
commitf9f3b16400713f97fca753bb00bebd109b2667f0 (patch)
treec2fe77e1f3200c5d9a992a25d625c24aff7c6f3a /src/mongo
parentfcfc73be325777faf3964289f599b63ef3131c3a (diff)
downloadmongo-f9f3b16400713f97fca753bb00bebd109b2667f0.tar.gz
SERVER-56713: BACKPORT-8977 [RRFaM] Avoid creating images while in initial sync
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/op_observer_impl.cpp2
-rw-r--r--src/mongo/db/repl/oplog.cpp52
2 files changed, 41 insertions, 13 deletions
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index 8d01ddd1583..4ba2ccf847b 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -610,6 +610,8 @@ void OpObserverImpl::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArg
txnParticipant.addTransactionOperation(opCtx, operation);
} else {
opTime = replLogUpdate(opCtx, args, storeImagesInSideCollection);
+ // Check if we're in a retryable write that should save the image to
+ // `config.image_collection`.
if (storeImagesInSideCollection && opCtx->getTxnNumber() &&
args.updateArgs.storeDocOption != CollectionUpdateArgs::StoreDocOption::None) {
BSONObj imageDoc;
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index cebb59e3990..0e6829f0644 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -331,6 +331,10 @@ void createIndexForApplyOps(OperationContext* opCtx,
}
}
+/**
+ * @param dataImage can be BSONObj::isEmpty to signal the node is in initial sync and must
+ * invalidate relevant image collection data.
+ */
void writeToImageCollection(OperationContext* opCtx,
const BSONObj& op,
const BSONObj& image,
@@ -342,9 +346,14 @@ void writeToImageCollection(OperationContext* opCtx,
LogicalSessionId::parse(IDLParserErrorContext("ParseSessionIdWhenWritingToImageCollection"),
op.getField(OplogEntryBase::kSessionIdFieldName).Obj());
imageEntry.set_id(sessionId);
+ imageEntry.setTxnNumber(op.getField(OplogEntryBase::kTxnNumberFieldName).numberLong());
imageEntry.setTs(op["ts"].timestamp());
imageEntry.setImageKind(imageKind);
imageEntry.setImage(image);
+ if (image.isEmpty()) {
+ imageEntry.setInvalidated(true);
+ imageEntry.setInvalidatedReason("initial sync"_sd);
+ }
UpdateRequest request(NamespaceString::kConfigImagesNamespace);
request.setQuery(
@@ -1695,10 +1704,12 @@ Status applyOperation_inlock(OperationContext* opCtx,
imageKind = repl::RetryImage_parse(
IDLParserErrorContext("applyUpdate"),
op.getField(OplogEntryBase::kNeedsRetryImageFieldName).String());
- if (imageKind == repl::RetryImageEnum::kPreImage) {
- request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD);
- } else if (imageKind == repl::RetryImageEnum::kPostImage) {
- request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_NEW);
+ if (mode != OplogApplication::Mode::kInitialSync) {
+ if (imageKind == repl::RetryImageEnum::kPreImage) {
+ request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD);
+ } else if (imageKind == repl::RetryImageEnum::kPostImage) {
+ request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_NEW);
+ }
}
}
@@ -1756,8 +1767,14 @@ Status applyOperation_inlock(OperationContext* opCtx,
}
if (op.hasField(OplogEntryBase::kNeedsRetryImageFieldName)) {
invariant(imageKind);
- writeToImageCollection(
- opCtx, op, ur.requestedDocImage, *imageKind, &upsertConfigImage);
+ writeToImageCollection(opCtx,
+ op,
+ // If we did not request an image because we're in
+ // initial sync, the value passed in here is conveniently
+ // the empty BSONObj.
+ ur.requestedDocImage,
+ *imageKind,
+ &upsertConfigImage);
}
wuow.commit();
return Status::OK();
@@ -1804,17 +1821,26 @@ Status applyOperation_inlock(OperationContext* opCtx,
op.hasField(OplogEntryBase::kNeedsRetryImageFieldName);
DeleteRequest request(requestNss);
request.setQuery(deleteCriteria);
- if (kNeedsRetryImage) {
+ if (mode != OplogApplication::Mode::kInitialSync && kNeedsRetryImage) {
+ // When in initial sync, we'll pass an empty image into
+ // `writeToImageCollection`.
request.setReturnDeleted(true);
}
DeleteResult result = deleteObject(opCtx, collection, request);
- if (result.nDeleted == 1 && kNeedsRetryImage) {
- writeToImageCollection(opCtx,
- op,
- result.requestedPreImage.get(),
- repl::RetryImageEnum::kPreImage,
- &upsertConfigImage);
+ if (kNeedsRetryImage) {
+ // Even if `result.nDeleted` is 0, we want to perform a write to the
+ // imageCollection to advance the txnNumber/ts and invalidate the image. This
+ // isn't strictly necessary for correctness -- the `config.transactions` table
+ // is responsible for whether to retry. The motivation here is to simply reduce
+ // the number of states related documents in the two collections can be in.
+ BSONObj imageDoc;
+ if (result.nDeleted > 0 && mode != OplogApplication::Mode::kInitialSync) {
+ imageDoc = result.requestedPreImage.get();
+ }
+
+ writeToImageCollection(
+ opCtx, op, imageDoc, repl::RetryImageEnum::kPreImage, &upsertConfigImage);
}
} else
verify(opType[1] == 'b'); // "db" advertisement