diff options
-rw-r--r-- | src/mongo/base/error_codes.yml | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_fetcher.cpp | 249 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_fetcher.h | 22 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_fetcher_test.cpp | 99 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_source_resolver.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_source_resolver_test.cpp | 4 |
7 files changed, 290 insertions, 123 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml index fb49ced4c25..93f4842e92a 100644 --- a/src/mongo/base/error_codes.yml +++ b/src/mongo/base/error_codes.yml @@ -366,6 +366,10 @@ error_codes: - {code: 313,name: ResumableRangeDeleterDisabled} - {code: 314,name: ObjectIsBusy} + + # The code below is for internal use only and must never be returned in a network response + - {code: 315,name: TooStaleToSyncFromSource} + # Error codes 4000-8999 are reserved. # Non-sequential error codes for compatibility only) diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index a81b0ebbb11..52d8ef2e718 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -338,7 +338,7 @@ void BackgroundSync::_produce() { numSyncSourceSelections.increment(1); - if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) { + if (syncSourceResp.syncSourceStatus == ErrorCodes::TooStaleToSyncFromSource) { // All (accessible) sync sources are too far ahead of us. if (_replCoord->getMemberState().primary()) { LOGV2_WARNING(21115, @@ -560,12 +560,13 @@ void BackgroundSync::_produce() { return; } + Seconds blacklistDuration(60); if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) { // This is bad because it means that our source // has not returned oplog entries in ascending ts order, and they need to be. LOGV2_WARNING( - 21120, "{error}", "Fetcher returned error", "error"_attr = redact(fetcherReturnStatus)); + 21120, "Oplog fetcher returned error", "error"_attr = redact(fetcherReturnStatus)); // Do not blacklist the server here, it will be blacklisted when we try to reuse it, // if it can't return a matching oplog start from the last fetch oplog ts field. return; @@ -581,19 +582,27 @@ void BackgroundSync::_produce() { mongo::sleepmillis(100); } } + } else if (fetcherReturnStatus.code() == ErrorCodes::TooStaleToSyncFromSource) { + LOGV2_WARNING( + 2806800, + "Oplog fetcher discovered we are too stale to sync from sync source. Blacklisting " + "sync source", + "syncSource"_attr = source, + "blacklistDuration"_attr = blacklistDuration); + _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) { - Seconds blacklistDuration(60); - LOGV2_WARNING(21121, - "Fetcher got invalid BSON while querying oplog. Blacklisting sync source " - "{syncSource} for {blacklistDuration}.", - "Fetcher got invalid BSON while querying oplog. Blacklisting sync source", - "syncSource"_attr = source, - "blacklistDuration"_attr = blacklistDuration); + LOGV2_WARNING( + 21121, + "Oplog fetcher got invalid BSON while querying oplog. Blacklisting sync source " + "{syncSource} for {blacklistDuration}.", + "Oplog fetcher got invalid BSON while querying oplog. Blacklisting sync source", + "syncSource"_attr = source, + "blacklistDuration"_attr = blacklistDuration); _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); } else if (!fetcherReturnStatus.isOK()) { LOGV2_WARNING(21122, - "Fetcher stopped querying remote oplog with error: {error}", - "Fetcher stopped querying remote oplog with error", + "Oplog fetcher stopped querying remote oplog with error: {error}", + "Oplog fetcher stopped querying remote oplog with error", "error"_attr = redact(fetcherReturnStatus)); } } diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp index 7426a544de1..37cc0d9d4cd 100644 --- a/src/mongo/db/repl/oplog_fetcher.cpp +++ b/src/mongo/db/repl/oplog_fetcher.cpp @@ -113,105 +113,6 @@ Milliseconds calculateAwaitDataTimeout(const ReplSetConfig& config) { // We never wait longer than 30 seconds. return std::min((config.getElectionTimeoutPeriod() / 2), maximumAwaitDataTimeoutMS); } - -/** - * Checks the first batch of results from query. - * 'documents' are the first batch of results returned from tailing the remote oplog. - * 'lastFetched' optime should be consistent with the predicate in the query. - * 'remoteLastOpApplied' is the last OpTime applied on the sync source. This is optional for - * compatibility with 3.4 servers that do not send OplogQueryMetadata. - * 'requiredRBID' is a RollbackID received when we chose the sync source that we use here to - * guarantee we have not rolled back since we confirmed the sync source had our minValid. - * 'remoteRBID' is a RollbackId for the sync source returned in this oplog query. This is optional - * for compatibility with 3.4 servers that do not send OplogQueryMetadata. - * 'requireFresherSyncSource' is a boolean indicating whether we should require the sync source's - * oplog to be ahead of ours. If false, the sync source's oplog is allowed to be at the same point - * as ours, but still cannot be behind ours. - * - * Returns OplogStartMissing if we cannot find the optime of the last fetched operation in - * the remote oplog. - */ -Status checkRemoteOplogStart(const OplogFetcher::Documents& documents, - OpTime lastFetched, - OpTime remoteLastOpApplied, - int requiredRBID, - int remoteRBID, - bool requireFresherSyncSource) { - // Once we establish our cursor, we need to ensure that our upstream node hasn't rolled back - // since that could cause it to not have our required minValid point. The cursor will be - // killed if the upstream node rolls back so we don't need to keep checking once the cursor - // is established. - if (remoteRBID != requiredRBID) { - return Status(ErrorCodes::InvalidSyncSource, - "Upstream node rolled back after choosing it as a sync source. Choosing " - "new sync source."); - } - - // Sometimes our remoteLastOpApplied may be stale; if we received a document with an - // opTime later than remoteLastApplied, we can assume the remote is at least up to that - // opTime. - if (!documents.empty()) { - const auto docOpTime = OpTime::parseFromOplogEntry(documents.back()); - if (docOpTime.isOK()) { - remoteLastOpApplied = std::max(remoteLastOpApplied, docOpTime.getValue()); - } - } - - // The sync source could be behind us if it rolled back after we selected it. We could have - // failed to detect the rollback if it occurred between sync source selection (when we check the - // candidate is ahead of us) and sync source resolution (when we got 'requiredRBID'). If the - // sync source is now behind us, choose a new sync source to prevent going into rollback. - if (remoteLastOpApplied < lastFetched) { - return Status(ErrorCodes::InvalidSyncSource, - str::stream() - << "Sync source's last applied OpTime " << remoteLastOpApplied.toString() - << " is older than our last fetched OpTime " << lastFetched.toString() - << ". Choosing new sync source."); - } - - // If 'requireFresherSyncSource' is true, we must check that the sync source's - // lastApplied is ahead of us to prevent forming a cycle. Although we check for - // this condition in sync source selection, if an undetected rollback occurred between sync - // source selection and sync source resolution, this condition may no longer hold. - // 'requireFresherSyncSource' is false for initial sync, since no other node can sync off an - // initial syncing node, so we do not need to check for cycles. In addition, it would be - // problematic to check this condition for initial sync, since the 'lastFetched' OpTime will - // almost always equal the 'remoteLastApplied', since we fetch the sync source's last applied - // OpTime to determine where to start our OplogFetcher. - if (requireFresherSyncSource && remoteLastOpApplied <= lastFetched) { - return Status(ErrorCodes::InvalidSyncSource, - str::stream() - << "Sync source must be ahead of me. My last fetched oplog optime: " - << lastFetched.toString() << ", latest oplog optime of sync source: " - << remoteLastOpApplied.toString()); - } - - // At this point we know that our sync source has our minValid and is not behind us, so if our - // history diverges from our sync source's we should prefer its history and roll back ours. - - // Since we checked for rollback and our sync source is ahead of us, an empty batch means that - // we have a higher timestamp on our last fetched OpTime than our sync source's last applied - // OpTime, but a lower term. When this occurs, we must roll back our inconsistent oplog entry. - if (documents.empty()) { - return Status(ErrorCodes::OplogStartMissing, "Received an empty batch from sync source."); - } - - const auto& o = documents.front(); - auto opTimeResult = OpTime::parseFromOplogEntry(o); - if (!opTimeResult.isOK()) { - return Status(ErrorCodes::InvalidBSON, - str::stream() << "our last optime fetched: " << lastFetched.toString() - << ". failed to parse optime from first oplog on source: " - << o.toString() << ": " << opTimeResult.getStatus().toString()); - } - auto opTime = opTimeResult.getValue(); - if (opTime != lastFetched) { - std::string message = str::stream() << "Our last optime fetched: " << lastFetched.toString() - << ". source's GTE: " << opTime.toString(); - return Status(ErrorCodes::OplogStartMissing, message); - } - return Status::OK(); -} } // namespace @@ -749,16 +650,9 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) { } auto oqMetadata = oqMetadataResult.getValue(); - // This lastFetched value is the last OpTime from the previous batch. - auto lastFetched = _getLastOpTimeFetched(); - if (_firstBatch) { - auto status = checkRemoteOplogStart(documents, - lastFetched, - oqMetadata.getLastOpApplied(), - _requiredRBID, - oqMetadata.getRBID(), - _requireFresherSyncSource); + auto status = + _checkRemoteOplogStart(documents, oqMetadata.getLastOpApplied(), oqMetadata.getRBID()); if (!status.isOK()) { // Stop oplog fetcher and execute rollback if necessary. return status; @@ -785,6 +679,9 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) { } } + // This lastFetched value is the last OpTime from the previous batch. + auto lastFetched = _getLastOpTimeFetched(); + auto validateResult = OplogFetcher::validateDocuments( documents, _firstBatch, lastFetched.getTimestamp(), _startingPoint); if (!validateResult.isOK()) { @@ -862,6 +759,142 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) { return Status::OK(); } +Status OplogFetcher::_checkRemoteOplogStart(const OplogFetcher::Documents& documents, + OpTime remoteLastOpApplied, + int remoteRBID) { + using namespace fmt::literals; + + // Once we establish our cursor, we need to ensure that our upstream node hasn't rolled back + // since that could cause it to not have our required minValid point. The cursor will be + // killed if the upstream node rolls back so we don't need to keep checking once the cursor + // is established. + if (remoteRBID != _requiredRBID) { + return Status(ErrorCodes::InvalidSyncSource, + "Upstream node rolled back after choosing it as a sync source. Choosing " + "new sync source."); + } + + // Sometimes our remoteLastOpApplied may be stale; if we received a document with an + // opTime later than remoteLastApplied, we can assume the remote is at least up to that + // opTime. + if (!documents.empty()) { + const auto docOpTime = OpTime::parseFromOplogEntry(documents.back()); + if (docOpTime.isOK()) { + remoteLastOpApplied = std::max(remoteLastOpApplied, docOpTime.getValue()); + } + } + + auto lastFetched = _getLastOpTimeFetched(); + + // The sync source could be behind us if it rolled back after we selected it. We could have + // failed to detect the rollback if it occurred between sync source selection (when we check the + // candidate is ahead of us) and sync source resolution (when we got '_requiredRBID'). If the + // sync source is now behind us, choose a new sync source to prevent going into rollback. + if (remoteLastOpApplied < lastFetched) { + return Status(ErrorCodes::InvalidSyncSource, + "Sync source's last applied OpTime {} is older than our last fetched OpTime " + "{}. Choosing new sync source."_format(remoteLastOpApplied.toString(), + lastFetched.toString())); + } + + // If '_requireFresherSyncSource' is true, we must check that the sync source's + // lastApplied is ahead of us to prevent forming a cycle. Although we check for + // this condition in sync source selection, if an undetected rollback occurred between sync + // source selection and sync source resolution, this condition may no longer hold. + // '_requireFresherSyncSource' is false for initial sync, since no other node can sync off an + // initial syncing node, so we do not need to check for cycles. In addition, it would be + // problematic to check this condition for initial sync, since the 'lastFetched' OpTime will + // almost always equal the 'remoteLastApplied', since we fetch the sync source's last applied + // OpTime to determine where to start our OplogFetcher. + if (_requireFresherSyncSource && remoteLastOpApplied <= lastFetched) { + return Status(ErrorCodes::InvalidSyncSource, + "Sync source must be ahead of me. My last fetched oplog optime: {}, latest " + "oplog optime of sync source: {}"_format(lastFetched.toString(), + remoteLastOpApplied.toString())); + } + + // At this point we know that our sync source has our minValid and is not behind us, so if our + // history diverges from our sync source's we should prefer its history and roll back ours. + + // Since we checked for rollback and our sync source is ahead of us, an empty batch means that + // we have a higher timestamp on our last fetched OpTime than our sync source's last applied + // OpTime, but a lower term. When this occurs, we must roll back our inconsistent oplog entry. + if (documents.empty()) { + return Status(ErrorCodes::OplogStartMissing, "Received an empty batch from sync source."); + } + + const auto& o = documents.front(); + auto opTimeResult = OpTime::parseFromOplogEntry(o); + + if (!opTimeResult.isOK()) { + return Status(ErrorCodes::InvalidBSON, + "our last optime fetched: {}. failed to parse optime from first oplog in " + "batch on source: {}: {}"_format(lastFetched.toString(), + o.toString(), + opTimeResult.getStatus().toString())); + } + auto opTime = opTimeResult.getValue(); + if (opTime != lastFetched) { + Status status = _checkTooStaleToSyncFromSource(lastFetched, opTime); + + // We should never return an OK status here. + invariant(!status.isOK()); + return status; + } + return Status::OK(); +} + +Status OplogFetcher::_checkTooStaleToSyncFromSource(const OpTime lastFetched, + const OpTime firstOpTimeInDocument) { + // Check to see if the sync source's first oplog entry is later than 'lastFetched'. If it is, we + // are too stale to sync from this node. If it isn't, we should go into rollback instead. + BSONObj remoteFirstOplogEntry; + try { + // Query for the first oplog entry in the sync source's oplog. + auto query = Query().sort(BSON("$natural" << 1)); + // Since this function is called after the first batch, the exhaust stream has not been + // started yet. As a result, using the same connection is safe. + remoteFirstOplogEntry = _conn->findOne(_nss.ns(), query); + } catch (DBException& e) { + // If an error occurs with the query, throw an error. + return Status(ErrorCodes::TooStaleToSyncFromSource, e.reason()); + } + + using namespace fmt::literals; + + StatusWith<OpTime> remoteFirstOpTimeResult = OpTime::parseFromOplogEntry(remoteFirstOplogEntry); + if (!remoteFirstOpTimeResult.isOK()) { + return Status( + ErrorCodes::InvalidBSON, + "failed to parse optime from first entry in source's oplog: {}: {}"_format( + remoteFirstOplogEntry.toString(), remoteFirstOpTimeResult.getStatus().toString())); + } + + auto remoteFirstOpTime = remoteFirstOpTimeResult.getValue(); + if (remoteFirstOpTime.isNull()) { + return Status(ErrorCodes::InvalidBSON, + "optime of first entry in source's oplog cannot be null: {}"_format( + remoteFirstOplogEntry.toString())); + } + + // remoteFirstOpTime may come from a very old config, so we cannot compare their terms. + if (lastFetched.getTimestamp() < remoteFirstOpTime.getTimestamp()) { + // We are too stale to sync from our current sync source. + return Status(ErrorCodes::TooStaleToSyncFromSource, + "we are too stale to sync from the sync source's oplog. our last fetched " + "timestamp is earlier than the sync source's first timestamp. our last " + "optime fetched: {}. sync source's first optime: {}"_format( + lastFetched.toString(), remoteFirstOpTime.toString())); + } + + // If we are not too stale to sync from the source, we should go into rollback. + std::string message = + "the sync source's oplog and our oplog have diverged, going into rollback. our last optime " + "fetched: {}. source's GTE: {}"_format(lastFetched.toString(), + firstOpTimeInDocument.toString()); + return Status(ErrorCodes::OplogStartMissing, message); +} + bool OplogFetcher::OplogFetcherRestartDecisionDefault::shouldContinue(OplogFetcher* fetcher, Status status) { if (_numRestarts == _maxRestarts) { diff --git a/src/mongo/db/repl/oplog_fetcher.h b/src/mongo/db/repl/oplog_fetcher.h index 7d66b38c109..0292dc5e7b0 100644 --- a/src/mongo/db/repl/oplog_fetcher.h +++ b/src/mongo/db/repl/oplog_fetcher.h @@ -346,6 +346,28 @@ private: */ Milliseconds _getRetriedFindMaxTime() const; + /** + * Checks the first batch of results from query. + * 'documents' are the first batch of results returned from tailing the remote oplog. + * 'remoteLastOpApplied' is the last OpTime applied on the sync source. + * 'remoteRBID' is a RollbackId for the sync source returned in this oplog query. This is + * optional for compatibility with 3.4 servers that do not send OplogQueryMetadata. + * + * Returns TooStaleToSyncFromSource if we are too stale to sync from our source. + * Returns OplogStartMissing if we should go into rollback. + */ + Status _checkRemoteOplogStart(const OplogFetcher::Documents& documents, + OpTime remoteLastOpApplied, + int remoteRBID); + + /** + * Distinguishes between needing to rollback and being too stale to sync from our sync source. + * This will be called when we check the first batch of results and our last fetched optime does + * not equal the first document in that batch. This function should never return Status::OK(). + */ + Status _checkTooStaleToSyncFromSource(const OpTime lastFetched, + const OpTime firstOpTimeInDocument); + // Protects member data of this OplogFetcher. mutable Mutex _mutex = MONGO_MAKE_LATCH("OplogFetcher::_mutex"); diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp index 170e506a5d6..bb22b7537fe 100644 --- a/src/mongo/db/repl/oplog_fetcher_test.cpp +++ b/src/mongo/db/repl/oplog_fetcher_test.cpp @@ -951,12 +951,106 @@ TEST_F(OplogFetcherTest, MetadataIsNotProcessedOnBatchThatTriggersRollback) { auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); auto entry = makeNoopOplogEntry(Seconds(456)); + // Set the remote node's first oplog entry to equal to lastFetched. + auto remoteFirstOplogEntry = makeNoopOplogEntry(lastFetched); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + ASSERT_EQUALS( ErrorCodes::OplogStartMissing, processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); + ASSERT_FALSE(dataReplicatorExternalState->metadataWasProcessed); } +TEST_F(OplogFetcherTest, TooStaleToSyncFromSyncSource) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to be later than lastFetched, so we have fallen off + // the sync source's oplog. + auto remoteFirstOplogEntry = makeNoopOplogEntry(Seconds(200)); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + ASSERT_EQUALS( + ErrorCodes::TooStaleToSyncFromSource, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + +TEST_F(OplogFetcherTest, NotTooStaleShouldReturnOplogStartMissing) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to be earlier than lastFetched, so we have not fallen + // off the sync source's oplog. + auto remoteFirstOplogEntry = makeNoopOplogEntry(Seconds(1)); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + ASSERT_EQUALS( + ErrorCodes::OplogStartMissing, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + +TEST_F(OplogFetcherTest, BadRemoteFirstOplogEntryReturnsInvalidBSON) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to be an invalid BSON. + auto remoteFirstOplogEntry = BSON("ok" << false); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + ASSERT_EQUALS( + ErrorCodes::InvalidBSON, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + +TEST_F(OplogFetcherTest, EmptyRemoteFirstOplogEntryReturnsInvalidBSON) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to be an empty BSON. + auto remoteFirstOplogEntry = BSONObj(); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + ASSERT_EQUALS( + ErrorCodes::InvalidBSON, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + +TEST_F(OplogFetcherTest, RemoteFirstOplogEntryWithNullTimestampReturnsInvalidBSON) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to have a null timestamp. + auto remoteFirstOplogEntry = makeNoopOplogEntry(Seconds(0)); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + ASSERT_EQUALS( + ErrorCodes::InvalidBSON, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + +TEST_F(OplogFetcherTest, RemoteFirstOplogEntryWithExtraFieldsReturnsOplogStartMissing) { + CursorId cursorId = 22LL; + auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + auto entry = makeNoopOplogEntry(Seconds(456)); + + // Set the remote node's first oplog entry to include extra fields. + auto remoteFirstOplogEntry = BSON("ts" << Timestamp(1, 0) << "t" << 1 << "extra" + << "field"); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + + // We should have parsed the OpTime correctly and realized that we have not fallen off the sync + // source's oplog, so we should go into rollback. + ASSERT_EQUALS( + ErrorCodes::OplogStartMissing, + processSingleBatch(makeFirstBatch(cursorId, {entry}, {metadataObj}))->getStatus()); +} + TEST_F(OplogFetcherTest, FailingInitialCreateNewCursorNoRetriesShutsDownOplogFetcher) { ASSERT_EQUALS(ErrorCodes::InvalidSyncSource, processSingleBatch(Message())->getStatus()); } @@ -1457,6 +1551,11 @@ TEST_F( CursorId cursorId = 22LL; auto entry = makeNoopOplogEntry({{Seconds(456), 0}, lastFetched.getTerm()}); auto metadataObj = makeOplogBatchMetadata(replSetMetadata, oqMetadata); + + // Set the remote node's first oplog entry to equal to lastFetched. + auto remoteFirstOplogEntry = makeNoopOplogEntry(lastFetched); + _mockServer->insert(nss.ns(), remoteFirstOplogEntry); + ASSERT_EQUALS(ErrorCodes::OplogStartMissing, processSingleBatch(makeFirstBatch(cursorId, {entry}, metadataObj))->getStatus()); } diff --git a/src/mongo/db/repl/sync_source_resolver.cpp b/src/mongo/db/repl/sync_source_resolver.cpp index 84cb3183933..4b4a9a178a2 100644 --- a/src/mongo/db/repl/sync_source_resolver.cpp +++ b/src/mongo/db/repl/sync_source_resolver.cpp @@ -342,7 +342,7 @@ void SyncSourceResolver::_firstOplogEntryFetcherCallback( if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) { // We're too stale to use this sync source. const auto blacklistDuration = kTooStaleBlacklistDuration; - const auto until = _taskExecutor->now() + Minutes(1); + const auto until = _taskExecutor->now() + blacklistDuration; LOGV2(21771, "We are too stale to use {candidate} as a sync source. Blacklisting this sync source " @@ -553,7 +553,7 @@ Status SyncSourceResolver::_chooseAndProbeNextSyncSource(OpTime earliestOpTimeSe } SyncSourceResolverResponse response; - response.syncSourceStatus = {ErrorCodes::OplogStartMissing, "too stale to catch up"}; + response.syncSourceStatus = {ErrorCodes::TooStaleToSyncFromSource, "too stale to catch up"}; response.earliestOpTimeSeen = earliestOpTimeSeen; return _finishCallback(response); } diff --git a/src/mongo/db/repl/sync_source_resolver_test.cpp b/src/mongo/db/repl/sync_source_resolver_test.cpp index 6366392bd3a..1e4380d2fca 100644 --- a/src/mongo/db/repl/sync_source_resolver_test.cpp +++ b/src/mongo/db/repl/sync_source_resolver_test.cpp @@ -428,7 +428,7 @@ TEST_F(SyncSourceResolverTest, } TEST_F(SyncSourceResolverTest, - SyncSourceResolverReturnsOplogStartMissingAndEarliestOpTimeAvailableWhenAllSourcesTooFresh) { + SyncSourceResolverReturnsTooStaleAndEarliestOpTimeAvailableWhenAllSourcesTooFresh) { HostAndPort candidate1("node1", 12345); HostAndPort candidate2("node2", 12345); HostAndPort candidate3("node3", 12345); @@ -448,7 +448,7 @@ TEST_F(SyncSourceResolverTest, _resolver->join(); ASSERT_FALSE(_resolver->isActive()); - ASSERT_EQUALS(ErrorCodes::OplogStartMissing, _response.syncSourceStatus); + ASSERT_EQUALS(ErrorCodes::TooStaleToSyncFromSource, _response.syncSourceStatus); ASSERT_EQUALS(Timestamp(200, 2), _response.earliestOpTimeSeen.getTimestamp()); } |