SERVER-69001: Have initial sync persist its last oplog time into the minvalid document.

author: Daniel Gottlieb <daniel.gottlieb@mongodb.com> 2022-09-07 14:23:22 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-09-15 19:50:40 +0000
commit: ff2fffdf496ac1bc039cd8c84024cc6159cf80b6 (patch)
tree: ba11c4a785f01c2c8692ba234e4745347fa25373 /src/mongo/db
parent: 84f7412daaf4c44c4fd325230076101e0d95c05f (diff)
download: mongo-ff2fffdf496ac1bc039cd8c84024cc6159cf80b6.tar.gz
1 files changed, 53 insertions, 43 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index 4c28e28fd56..fae8ee042e0 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -1400,53 +1400,63 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForStopTimestamp(
     std::shared_ptr<OnCompletionGuard> onCompletionGuard) {
     OpTimeAndWallTime resultOpTimeAndWallTime = {OpTime(), Date_t()};
     {
-        stdx::lock_guard<Latch> lock(_mutex);
-        auto status = _checkForShutdownAndConvertStatus_inlock(
-            result.getStatus(), "error fetching last oplog entry for stop timestamp");
-        if (_shouldRetryError(lock, status)) {
-            auto scheduleStatus =
-                (*_attemptExec)
-                    ->scheduleWork([this,
-                                    onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
-                        // It is not valid to schedule the retry from within this callback,
-                        // hence we schedule a lambda to schedule the retry.
-                        stdx::lock_guard<Latch> lock(_mutex);
-                        // Since the stopTimestamp is retrieved after we have done all the work of
-                        // retrieving collection data, we handle retries within this class by
-                        // retrying for 'initialSyncTransientErrorRetryPeriodSeconds' (default 24
-                        // hours).  This is the same retry strategy used when retrieving collection
-                        // data, and avoids retrieving all the data and then throwing it away due to
-                        // a transient network outage.
-                        auto status = _scheduleLastOplogEntryFetcher_inlock(
-                            [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
-                                mongo::Fetcher::NextAction*,
-                                mongo::BSONObjBuilder*) {
-                                _lastOplogEntryFetcherCallbackForStopTimestamp(status,
-                                                                               onCompletionGuard);
-                            },
-                            kInitialSyncerHandlesRetries);
-                        if (!status.isOK()) {
-                            onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
-                        }
-                    });
-            if (scheduleStatus.isOK())
+        {
+            stdx::lock_guard<Latch> lock(_mutex);
+            auto status = _checkForShutdownAndConvertStatus_inlock(
+                result.getStatus(), "error fetching last oplog entry for stop timestamp");
+            if (_shouldRetryError(lock, status)) {
+                auto scheduleStatus =
+                    (*_attemptExec)
+                        ->scheduleWork(
+                            [this, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
+                                // It is not valid to schedule the retry from within this callback,
+                                // hence we schedule a lambda to schedule the retry.
+                                stdx::lock_guard<Latch> lock(_mutex);
+                                // Since the stopTimestamp is retrieved after we have done all the
+                                // work of retrieving collection data, we handle retries within this
+                                // class by retrying for
+                                // 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 hours).
+                                // This is the same retry strategy used when retrieving collection
+                                // data, and avoids retrieving all the data and then throwing it
+                                // away due to a transient network outage.
+                                auto status = _scheduleLastOplogEntryFetcher_inlock(
+                                    [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
+                                        mongo::Fetcher::NextAction*,
+                                        mongo::BSONObjBuilder*) {
+                                        _lastOplogEntryFetcherCallbackForStopTimestamp(
+                                            status, onCompletionGuard);
+                                    },
+                                    kInitialSyncerHandlesRetries);
+                                if (!status.isOK()) {
+                                    onCompletionGuard->setResultAndCancelRemainingWork_inlock(
+                                        lock, status);
+                                }
+                            });
+                if (scheduleStatus.isOK())
+                    return;
+                // If scheduling failed, we're shutting down and cannot retry.
+                // So just continue with the original failed status.
+            }
+            if (!status.isOK()) {
+                onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
                 return;
-            // If scheduling failed, we're shutting down and cannot retry.
-            // So just continue with the original failed status.
-        }
-        if (!status.isOK()) {
-            onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
-            return;
-        }
+            }
 
-        auto&& optimeStatus = parseOpTimeAndWallTime(result);
-        if (!optimeStatus.isOK()) {
-            onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
-                                                                      optimeStatus.getStatus());
-            return;
+            auto&& optimeStatus = parseOpTimeAndWallTime(result);
+            if (!optimeStatus.isOK()) {
+                onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
+                                                                          optimeStatus.getStatus());
+                return;
+            }
+            resultOpTimeAndWallTime = optimeStatus.getValue();
         }
-        resultOpTimeAndWallTime = optimeStatus.getValue();
 
+        // Release the _mutex to write to disk.
+        auto opCtx = makeOpCtx();
+        _replicationProcess->getConsistencyMarkers()->setMinValid(
+            opCtx.get(), resultOpTimeAndWallTime.opTime, true);
+
+        stdx::lock_guard<Latch> lock(_mutex);
         _initialSyncState->stopTimestamp = resultOpTimeAndWallTime.opTime.getTimestamp();
 
         // If the beginFetchingTimestamp is different from the stopTimestamp, it indicates that
author	Daniel Gottlieb <daniel.gottlieb@mongodb.com>	2022-09-07 14:23:22 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-09-15 19:50:40 +0000
commit	ff2fffdf496ac1bc039cd8c84024cc6159cf80b6 (patch)
tree	ba11c4a785f01c2c8692ba234e4745347fa25373 /src/mongo/db
parent	84f7412daaf4c44c4fd325230076101e0d95c05f (diff)
download	mongo-ff2fffdf496ac1bc039cd8c84024cc6159cf80b6.tar.gz