diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2022-09-07 14:23:22 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-03 20:50:01 +0000 |
commit | ab5364bc82fba17b2347942aac0a8afff9d4ae23 (patch) | |
tree | 8b5f494f80d0c2570f281053ced742593ddff262 /src | |
parent | 9709ef3d8cbb715196c5609339b06793a4551886 (diff) | |
download | mongo-ab5364bc82fba17b2347942aac0a8afff9d4ae23.tar.gz |
SERVER-69001: Have initial sync persist its last oplog time into the minvalid document.
(cherry picked from commit ff2fffdf496ac1bc039cd8c84024cc6159cf80b6)
(cherry picked from commit 14b1ea6d58cf2a2169b2a07268fa2266419703b4)
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 96 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_impl.cpp | 7 |
2 files changed, 57 insertions, 46 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index e63d48e920c..68d3da1ffe9 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -1384,53 +1384,63 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForStopTimestamp( std::shared_ptr<OnCompletionGuard> onCompletionGuard) { OpTimeAndWallTime resultOpTimeAndWallTime = {OpTime(), Date_t()}; { - stdx::lock_guard<Latch> lock(_mutex); - auto status = _checkForShutdownAndConvertStatus_inlock( - result.getStatus(), "error fetching last oplog entry for stop timestamp"); - if (_shouldRetryError(lock, status)) { - auto scheduleStatus = - (*_attemptExec) - ->scheduleWork([this, - onCompletionGuard](executor::TaskExecutor::CallbackArgs args) { - // It is not valid to schedule the retry from within this callback, - // hence we schedule a lambda to schedule the retry. - stdx::lock_guard<Latch> lock(_mutex); - // Since the stopTimestamp is retrieved after we have done all the work of - // retrieving collection data, we handle retries within this class by - // retrying for 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 - // hours). This is the same retry strategy used when retrieving collection - // data, and avoids retrieving all the data and then throwing it away due to - // a transient network outage. - auto status = _scheduleLastOplogEntryFetcher_inlock( - [=](const StatusWith<mongo::Fetcher::QueryResponse>& status, - mongo::Fetcher::NextAction*, - mongo::BSONObjBuilder*) { - _lastOplogEntryFetcherCallbackForStopTimestamp(status, - onCompletionGuard); - }, - kInitialSyncerHandlesRetries); - if (!status.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); - } - }); - if (scheduleStatus.isOK()) + { + stdx::lock_guard<Latch> lock(_mutex); + auto status = _checkForShutdownAndConvertStatus_inlock( + result.getStatus(), "error fetching last oplog entry for stop timestamp"); + if (_shouldRetryError(lock, status)) { + auto scheduleStatus = + (*_attemptExec) + ->scheduleWork( + [this, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) { + // It is not valid to schedule the retry from within this callback, + // hence we schedule a lambda to schedule the retry. + stdx::lock_guard<Latch> lock(_mutex); + // Since the stopTimestamp is retrieved after we have done all the + // work of retrieving collection data, we handle retries within this + // class by retrying for + // 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 hours). + // This is the same retry strategy used when retrieving collection + // data, and avoids retrieving all the data and then throwing it + // away due to a transient network outage. + auto status = _scheduleLastOplogEntryFetcher_inlock( + [=](const StatusWith<mongo::Fetcher::QueryResponse>& status, + mongo::Fetcher::NextAction*, + mongo::BSONObjBuilder*) { + _lastOplogEntryFetcherCallbackForStopTimestamp( + status, onCompletionGuard); + }, + kInitialSyncerHandlesRetries); + if (!status.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock( + lock, status); + } + }); + if (scheduleStatus.isOK()) + return; + // If scheduling failed, we're shutting down and cannot retry. + // So just continue with the original failed status. + } + if (!status.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); return; - // If scheduling failed, we're shutting down and cannot retry. - // So just continue with the original failed status. - } - if (!status.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); - return; - } + } - auto&& optimeStatus = parseOpTimeAndWallTime(result); - if (!optimeStatus.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, - optimeStatus.getStatus()); - return; + auto&& optimeStatus = parseOpTimeAndWallTime(result); + if (!optimeStatus.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, + optimeStatus.getStatus()); + return; + } + resultOpTimeAndWallTime = optimeStatus.getValue(); } - resultOpTimeAndWallTime = optimeStatus.getValue(); + // Release the _mutex to write to disk. + auto opCtx = makeOpCtx(); + _replicationProcess->getConsistencyMarkers()->setMinValid(opCtx.get(), + resultOpTimeAndWallTime.opTime); + + stdx::lock_guard<Latch> lock(_mutex); _initialSyncState->stopTimestamp = resultOpTimeAndWallTime.opTime.getTimestamp(); // If the beginFetchingTimestamp is different from the stopTimestamp, it indicates that diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp index 91c222a0d97..8b7605620aa 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp @@ -216,15 +216,16 @@ void ReplicationConsistencyMarkersImpl::setMinValid(OperationContext* opCtx, "Setting minvalid to exactly", "minValidString"_attr = minValid.toString(), "minValidBSON"_attr = minValid.toBSON()); + TimestampedBSONObj update; update.obj = BSON("$set" << BSON(MinValidDocument::kMinValidTimestampFieldName << minValid.getTimestamp() << MinValidDocument::kMinValidTermFieldName << minValid.getTerm())); - // This method is only used with storage engines that do not support recover to stable - // timestamp. As a result, their timestamps do not matter. - invariant(!opCtx->getServiceContext()->getStorageEngine()->supportsRecoverToStableTimestamp()); + // We do not provide a timestamp when we set the initial sync flag. Initial sync can only + // occur right when we start up, and thus there cannot be any checkpoints being taken. This + // write should go into the next checkpoint. update.timestamp = Timestamp(); _updateMinValidDocument(opCtx, update); |