diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2021-05-28 00:06:26 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-28 00:39:00 +0000 |
commit | f9f3b16400713f97fca753bb00bebd109b2667f0 (patch) | |
tree | c2fe77e1f3200c5d9a992a25d625c24aff7c6f3a /src/mongo | |
parent | fcfc73be325777faf3964289f599b63ef3131c3a (diff) | |
download | mongo-f9f3b16400713f97fca753bb00bebd109b2667f0.tar.gz |
SERVER-56713: BACKPORT-8977 [RRFaM] Avoid creating images while in initial sync
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/op_observer_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 52 |
2 files changed, 41 insertions, 13 deletions
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index 8d01ddd1583..4ba2ccf847b 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -610,6 +610,8 @@ void OpObserverImpl::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArg txnParticipant.addTransactionOperation(opCtx, operation); } else { opTime = replLogUpdate(opCtx, args, storeImagesInSideCollection); + // Check if we're in a retryable write that should save the image to + // `config.image_collection`. if (storeImagesInSideCollection && opCtx->getTxnNumber() && args.updateArgs.storeDocOption != CollectionUpdateArgs::StoreDocOption::None) { BSONObj imageDoc; diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index cebb59e3990..0e6829f0644 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -331,6 +331,10 @@ void createIndexForApplyOps(OperationContext* opCtx, } } +/** + * @param dataImage can be BSONObj::isEmpty to signal the node is in initial sync and must + * invalidate relevant image collection data. + */ void writeToImageCollection(OperationContext* opCtx, const BSONObj& op, const BSONObj& image, @@ -342,9 +346,14 @@ void writeToImageCollection(OperationContext* opCtx, LogicalSessionId::parse(IDLParserErrorContext("ParseSessionIdWhenWritingToImageCollection"), op.getField(OplogEntryBase::kSessionIdFieldName).Obj()); imageEntry.set_id(sessionId); + imageEntry.setTxnNumber(op.getField(OplogEntryBase::kTxnNumberFieldName).numberLong()); imageEntry.setTs(op["ts"].timestamp()); imageEntry.setImageKind(imageKind); imageEntry.setImage(image); + if (image.isEmpty()) { + imageEntry.setInvalidated(true); + imageEntry.setInvalidatedReason("initial sync"_sd); + } UpdateRequest request(NamespaceString::kConfigImagesNamespace); request.setQuery( @@ -1695,10 +1704,12 @@ Status applyOperation_inlock(OperationContext* opCtx, imageKind = repl::RetryImage_parse( IDLParserErrorContext("applyUpdate"), op.getField(OplogEntryBase::kNeedsRetryImageFieldName).String()); - if (imageKind == repl::RetryImageEnum::kPreImage) { - request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD); - } else if (imageKind == repl::RetryImageEnum::kPostImage) { - request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_NEW); + if (mode != OplogApplication::Mode::kInitialSync) { + if (imageKind == repl::RetryImageEnum::kPreImage) { + request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD); + } else if (imageKind == repl::RetryImageEnum::kPostImage) { + request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_NEW); + } } } @@ -1756,8 +1767,14 @@ Status applyOperation_inlock(OperationContext* opCtx, } if (op.hasField(OplogEntryBase::kNeedsRetryImageFieldName)) { invariant(imageKind); - writeToImageCollection( - opCtx, op, ur.requestedDocImage, *imageKind, &upsertConfigImage); + writeToImageCollection(opCtx, + op, + // If we did not request an image because we're in + // initial sync, the value passed in here is conveniently + // the empty BSONObj. + ur.requestedDocImage, + *imageKind, + &upsertConfigImage); } wuow.commit(); return Status::OK(); @@ -1804,17 +1821,26 @@ Status applyOperation_inlock(OperationContext* opCtx, op.hasField(OplogEntryBase::kNeedsRetryImageFieldName); DeleteRequest request(requestNss); request.setQuery(deleteCriteria); - if (kNeedsRetryImage) { + if (mode != OplogApplication::Mode::kInitialSync && kNeedsRetryImage) { + // When in initial sync, we'll pass an empty image into + // `writeToImageCollection`. request.setReturnDeleted(true); } DeleteResult result = deleteObject(opCtx, collection, request); - if (result.nDeleted == 1 && kNeedsRetryImage) { - writeToImageCollection(opCtx, - op, - result.requestedPreImage.get(), - repl::RetryImageEnum::kPreImage, - &upsertConfigImage); + if (kNeedsRetryImage) { + // Even if `result.nDeleted` is 0, we want to perform a write to the + // imageCollection to advance the txnNumber/ts and invalidate the image. This + // isn't strictly necessary for correctness -- the `config.transactions` table + // is responsible for whether to retry. The motivation here is to simply reduce + // the number of states related documents in the two collections can be in. + BSONObj imageDoc; + if (result.nDeleted > 0 && mode != OplogApplication::Mode::kInitialSync) { + imageDoc = result.requestedPreImage.get(); + } + + writeToImageCollection( + opCtx, op, imageDoc, repl::RetryImageEnum::kPreImage, &upsertConfigImage); } } else verify(opType[1] == 'b'); // "db" advertisement |