diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2022-09-07 14:23:22 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-15 19:50:40 +0000 |
commit | ff2fffdf496ac1bc039cd8c84024cc6159cf80b6 (patch) | |
tree | ba11c4a785f01c2c8692ba234e4745347fa25373 /src/mongo/db | |
parent | 84f7412daaf4c44c4fd325230076101e0d95c05f (diff) | |
download | mongo-ff2fffdf496ac1bc039cd8c84024cc6159cf80b6.tar.gz |
SERVER-69001: Have initial sync persist its last oplog time into the minvalid document.
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 96 |
1 files changed, 53 insertions, 43 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index 4c28e28fd56..fae8ee042e0 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -1400,53 +1400,63 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForStopTimestamp( std::shared_ptr<OnCompletionGuard> onCompletionGuard) { OpTimeAndWallTime resultOpTimeAndWallTime = {OpTime(), Date_t()}; { - stdx::lock_guard<Latch> lock(_mutex); - auto status = _checkForShutdownAndConvertStatus_inlock( - result.getStatus(), "error fetching last oplog entry for stop timestamp"); - if (_shouldRetryError(lock, status)) { - auto scheduleStatus = - (*_attemptExec) - ->scheduleWork([this, - onCompletionGuard](executor::TaskExecutor::CallbackArgs args) { - // It is not valid to schedule the retry from within this callback, - // hence we schedule a lambda to schedule the retry. - stdx::lock_guard<Latch> lock(_mutex); - // Since the stopTimestamp is retrieved after we have done all the work of - // retrieving collection data, we handle retries within this class by - // retrying for 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 - // hours). This is the same retry strategy used when retrieving collection - // data, and avoids retrieving all the data and then throwing it away due to - // a transient network outage. - auto status = _scheduleLastOplogEntryFetcher_inlock( - [=](const StatusWith<mongo::Fetcher::QueryResponse>& status, - mongo::Fetcher::NextAction*, - mongo::BSONObjBuilder*) { - _lastOplogEntryFetcherCallbackForStopTimestamp(status, - onCompletionGuard); - }, - kInitialSyncerHandlesRetries); - if (!status.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); - } - }); - if (scheduleStatus.isOK()) + { + stdx::lock_guard<Latch> lock(_mutex); + auto status = _checkForShutdownAndConvertStatus_inlock( + result.getStatus(), "error fetching last oplog entry for stop timestamp"); + if (_shouldRetryError(lock, status)) { + auto scheduleStatus = + (*_attemptExec) + ->scheduleWork( + [this, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) { + // It is not valid to schedule the retry from within this callback, + // hence we schedule a lambda to schedule the retry. + stdx::lock_guard<Latch> lock(_mutex); + // Since the stopTimestamp is retrieved after we have done all the + // work of retrieving collection data, we handle retries within this + // class by retrying for + // 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 hours). + // This is the same retry strategy used when retrieving collection + // data, and avoids retrieving all the data and then throwing it + // away due to a transient network outage. + auto status = _scheduleLastOplogEntryFetcher_inlock( + [=](const StatusWith<mongo::Fetcher::QueryResponse>& status, + mongo::Fetcher::NextAction*, + mongo::BSONObjBuilder*) { + _lastOplogEntryFetcherCallbackForStopTimestamp( + status, onCompletionGuard); + }, + kInitialSyncerHandlesRetries); + if (!status.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock( + lock, status); + } + }); + if (scheduleStatus.isOK()) + return; + // If scheduling failed, we're shutting down and cannot retry. + // So just continue with the original failed status. + } + if (!status.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); return; - // If scheduling failed, we're shutting down and cannot retry. - // So just continue with the original failed status. - } - if (!status.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); - return; - } + } - auto&& optimeStatus = parseOpTimeAndWallTime(result); - if (!optimeStatus.isOK()) { - onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, - optimeStatus.getStatus()); - return; + auto&& optimeStatus = parseOpTimeAndWallTime(result); + if (!optimeStatus.isOK()) { + onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, + optimeStatus.getStatus()); + return; + } + resultOpTimeAndWallTime = optimeStatus.getValue(); } - resultOpTimeAndWallTime = optimeStatus.getValue(); + // Release the _mutex to write to disk. + auto opCtx = makeOpCtx(); + _replicationProcess->getConsistencyMarkers()->setMinValid( + opCtx.get(), resultOpTimeAndWallTime.opTime, true); + + stdx::lock_guard<Latch> lock(_mutex); _initialSyncState->stopTimestamp = resultOpTimeAndWallTime.opTime.getTimestamp(); // If the beginFetchingTimestamp is different from the stopTimestamp, it indicates that |