summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2022-09-07 14:23:22 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-15 19:50:40 +0000
commitff2fffdf496ac1bc039cd8c84024cc6159cf80b6 (patch)
treeba11c4a785f01c2c8692ba234e4745347fa25373 /src/mongo/db
parent84f7412daaf4c44c4fd325230076101e0d95c05f (diff)
downloadmongo-ff2fffdf496ac1bc039cd8c84024cc6159cf80b6.tar.gz
SERVER-69001: Have initial sync persist its last oplog time into the minvalid document.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/repl/initial_syncer.cpp96
1 files changed, 53 insertions, 43 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index 4c28e28fd56..fae8ee042e0 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -1400,53 +1400,63 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForStopTimestamp(
std::shared_ptr<OnCompletionGuard> onCompletionGuard) {
OpTimeAndWallTime resultOpTimeAndWallTime = {OpTime(), Date_t()};
{
- stdx::lock_guard<Latch> lock(_mutex);
- auto status = _checkForShutdownAndConvertStatus_inlock(
- result.getStatus(), "error fetching last oplog entry for stop timestamp");
- if (_shouldRetryError(lock, status)) {
- auto scheduleStatus =
- (*_attemptExec)
- ->scheduleWork([this,
- onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
- // It is not valid to schedule the retry from within this callback,
- // hence we schedule a lambda to schedule the retry.
- stdx::lock_guard<Latch> lock(_mutex);
- // Since the stopTimestamp is retrieved after we have done all the work of
- // retrieving collection data, we handle retries within this class by
- // retrying for 'initialSyncTransientErrorRetryPeriodSeconds' (default 24
- // hours). This is the same retry strategy used when retrieving collection
- // data, and avoids retrieving all the data and then throwing it away due to
- // a transient network outage.
- auto status = _scheduleLastOplogEntryFetcher_inlock(
- [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
- mongo::Fetcher::NextAction*,
- mongo::BSONObjBuilder*) {
- _lastOplogEntryFetcherCallbackForStopTimestamp(status,
- onCompletionGuard);
- },
- kInitialSyncerHandlesRetries);
- if (!status.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
- }
- });
- if (scheduleStatus.isOK())
+ {
+ stdx::lock_guard<Latch> lock(_mutex);
+ auto status = _checkForShutdownAndConvertStatus_inlock(
+ result.getStatus(), "error fetching last oplog entry for stop timestamp");
+ if (_shouldRetryError(lock, status)) {
+ auto scheduleStatus =
+ (*_attemptExec)
+ ->scheduleWork(
+ [this, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
+ // It is not valid to schedule the retry from within this callback,
+ // hence we schedule a lambda to schedule the retry.
+ stdx::lock_guard<Latch> lock(_mutex);
+ // Since the stopTimestamp is retrieved after we have done all the
+ // work of retrieving collection data, we handle retries within this
+ // class by retrying for
+ // 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 hours).
+ // This is the same retry strategy used when retrieving collection
+ // data, and avoids retrieving all the data and then throwing it
+ // away due to a transient network outage.
+ auto status = _scheduleLastOplogEntryFetcher_inlock(
+ [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
+ mongo::Fetcher::NextAction*,
+ mongo::BSONObjBuilder*) {
+ _lastOplogEntryFetcherCallbackForStopTimestamp(
+ status, onCompletionGuard);
+ },
+ kInitialSyncerHandlesRetries);
+ if (!status.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(
+ lock, status);
+ }
+ });
+ if (scheduleStatus.isOK())
+ return;
+ // If scheduling failed, we're shutting down and cannot retry.
+ // So just continue with the original failed status.
+ }
+ if (!status.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
return;
- // If scheduling failed, we're shutting down and cannot retry.
- // So just continue with the original failed status.
- }
- if (!status.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
- return;
- }
+ }
- auto&& optimeStatus = parseOpTimeAndWallTime(result);
- if (!optimeStatus.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
- optimeStatus.getStatus());
- return;
+ auto&& optimeStatus = parseOpTimeAndWallTime(result);
+ if (!optimeStatus.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
+ optimeStatus.getStatus());
+ return;
+ }
+ resultOpTimeAndWallTime = optimeStatus.getValue();
}
- resultOpTimeAndWallTime = optimeStatus.getValue();
+ // Release the _mutex to write to disk.
+ auto opCtx = makeOpCtx();
+ _replicationProcess->getConsistencyMarkers()->setMinValid(
+ opCtx.get(), resultOpTimeAndWallTime.opTime, true);
+
+ stdx::lock_guard<Latch> lock(_mutex);
_initialSyncState->stopTimestamp = resultOpTimeAndWallTime.opTime.getTimestamp();
// If the beginFetchingTimestamp is different from the stopTimestamp, it indicates that