summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2022-09-07 14:23:22 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-03 20:50:01 +0000
commitab5364bc82fba17b2347942aac0a8afff9d4ae23 (patch)
tree8b5f494f80d0c2570f281053ced742593ddff262 /src
parent9709ef3d8cbb715196c5609339b06793a4551886 (diff)
downloadmongo-ab5364bc82fba17b2347942aac0a8afff9d4ae23.tar.gz
SERVER-69001: Have initial sync persist its last oplog time into the minvalid document.
(cherry picked from commit ff2fffdf496ac1bc039cd8c84024cc6159cf80b6) (cherry picked from commit 14b1ea6d58cf2a2169b2a07268fa2266419703b4)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/initial_syncer.cpp96
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.cpp7
2 files changed, 57 insertions, 46 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index e63d48e920c..68d3da1ffe9 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -1384,53 +1384,63 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForStopTimestamp(
std::shared_ptr<OnCompletionGuard> onCompletionGuard) {
OpTimeAndWallTime resultOpTimeAndWallTime = {OpTime(), Date_t()};
{
- stdx::lock_guard<Latch> lock(_mutex);
- auto status = _checkForShutdownAndConvertStatus_inlock(
- result.getStatus(), "error fetching last oplog entry for stop timestamp");
- if (_shouldRetryError(lock, status)) {
- auto scheduleStatus =
- (*_attemptExec)
- ->scheduleWork([this,
- onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
- // It is not valid to schedule the retry from within this callback,
- // hence we schedule a lambda to schedule the retry.
- stdx::lock_guard<Latch> lock(_mutex);
- // Since the stopTimestamp is retrieved after we have done all the work of
- // retrieving collection data, we handle retries within this class by
- // retrying for 'initialSyncTransientErrorRetryPeriodSeconds' (default 24
- // hours). This is the same retry strategy used when retrieving collection
- // data, and avoids retrieving all the data and then throwing it away due to
- // a transient network outage.
- auto status = _scheduleLastOplogEntryFetcher_inlock(
- [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
- mongo::Fetcher::NextAction*,
- mongo::BSONObjBuilder*) {
- _lastOplogEntryFetcherCallbackForStopTimestamp(status,
- onCompletionGuard);
- },
- kInitialSyncerHandlesRetries);
- if (!status.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
- }
- });
- if (scheduleStatus.isOK())
+ {
+ stdx::lock_guard<Latch> lock(_mutex);
+ auto status = _checkForShutdownAndConvertStatus_inlock(
+ result.getStatus(), "error fetching last oplog entry for stop timestamp");
+ if (_shouldRetryError(lock, status)) {
+ auto scheduleStatus =
+ (*_attemptExec)
+ ->scheduleWork(
+ [this, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
+ // It is not valid to schedule the retry from within this callback,
+ // hence we schedule a lambda to schedule the retry.
+ stdx::lock_guard<Latch> lock(_mutex);
+ // Since the stopTimestamp is retrieved after we have done all the
+ // work of retrieving collection data, we handle retries within this
+ // class by retrying for
+ // 'initialSyncTransientErrorRetryPeriodSeconds' (default 24 hours).
+ // This is the same retry strategy used when retrieving collection
+ // data, and avoids retrieving all the data and then throwing it
+ // away due to a transient network outage.
+ auto status = _scheduleLastOplogEntryFetcher_inlock(
+ [=](const StatusWith<mongo::Fetcher::QueryResponse>& status,
+ mongo::Fetcher::NextAction*,
+ mongo::BSONObjBuilder*) {
+ _lastOplogEntryFetcherCallbackForStopTimestamp(
+ status, onCompletionGuard);
+ },
+ kInitialSyncerHandlesRetries);
+ if (!status.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(
+ lock, status);
+ }
+ });
+ if (scheduleStatus.isOK())
+ return;
+ // If scheduling failed, we're shutting down and cannot retry.
+ // So just continue with the original failed status.
+ }
+ if (!status.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
return;
- // If scheduling failed, we're shutting down and cannot retry.
- // So just continue with the original failed status.
- }
- if (!status.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
- return;
- }
+ }
- auto&& optimeStatus = parseOpTimeAndWallTime(result);
- if (!optimeStatus.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
- optimeStatus.getStatus());
- return;
+ auto&& optimeStatus = parseOpTimeAndWallTime(result);
+ if (!optimeStatus.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
+ optimeStatus.getStatus());
+ return;
+ }
+ resultOpTimeAndWallTime = optimeStatus.getValue();
}
- resultOpTimeAndWallTime = optimeStatus.getValue();
+ // Release the _mutex to write to disk.
+ auto opCtx = makeOpCtx();
+ _replicationProcess->getConsistencyMarkers()->setMinValid(opCtx.get(),
+ resultOpTimeAndWallTime.opTime);
+
+ stdx::lock_guard<Latch> lock(_mutex);
_initialSyncState->stopTimestamp = resultOpTimeAndWallTime.opTime.getTimestamp();
// If the beginFetchingTimestamp is different from the stopTimestamp, it indicates that
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
index 91c222a0d97..8b7605620aa 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
@@ -216,15 +216,16 @@ void ReplicationConsistencyMarkersImpl::setMinValid(OperationContext* opCtx,
"Setting minvalid to exactly",
"minValidString"_attr = minValid.toString(),
"minValidBSON"_attr = minValid.toBSON());
+
TimestampedBSONObj update;
update.obj =
BSON("$set" << BSON(MinValidDocument::kMinValidTimestampFieldName
<< minValid.getTimestamp() << MinValidDocument::kMinValidTermFieldName
<< minValid.getTerm()));
- // This method is only used with storage engines that do not support recover to stable
- // timestamp. As a result, their timestamps do not matter.
- invariant(!opCtx->getServiceContext()->getStorageEngine()->supportsRecoverToStableTimestamp());
+ // We do not provide a timestamp when we set the initial sync flag. Initial sync can only
+ // occur right when we start up, and thus there cannot be any checkpoints being taken. This
+ // write should go into the next checkpoint.
update.timestamp = Timestamp();
_updateMinValidDocument(opCtx, update);