diff options
author | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2021-07-02 22:01:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-20 23:16:52 +0000 |
commit | 744675fe9ca246317dc8d68aaeb2e9070c06989c (patch) | |
tree | aaef49ac9d00f4f49a95942c530ae77e65d7d381 | |
parent | a395774d97caeaeb9f2019f6430332b152f3314d (diff) | |
download | mongo-744675fe9ca246317dc8d68aaeb2e9070c06989c.tar.gz |
SERVER-58258 Fix race between 'replSetGetStatus' request and initial syncer clearing state.
(cherry picked from commit f4860f196c4c7cc335955361865f3511d0c73606)
-rw-r--r-- | jstests/replsets/initial_sync_replSetGetStatus.js | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 40 |
2 files changed, 39 insertions, 17 deletions
diff --git a/jstests/replsets/initial_sync_replSetGetStatus.js b/jstests/replsets/initial_sync_replSetGetStatus.js index 64958be9dd7..9b17141112d 100644 --- a/jstests/replsets/initial_sync_replSetGetStatus.js +++ b/jstests/replsets/initial_sync_replSetGetStatus.js @@ -34,8 +34,10 @@ let secondary = replSet.add( {rsConfig: {votes: 0, priority: 0}, setParameter: {'collectionClonerBatchSize': 2}}); secondary.setSecondaryOk(); -var failPointBeforeCopying = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases'); -var failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish'); +const failPointBeforeCopying = + configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases'); +const failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish'); +const failPointAfterFinish = configureFailPoint(secondary, 'initialSyncHangAfterFinish'); let failPointAfterNumDocsCopied = configureFailPoint(secondary, 'initialSyncHangDuringCollectionClone', @@ -151,9 +153,10 @@ assert.eq(endOfCloningRes.initialSyncStatus.approxTotalDataSize, assert.eq(endOfCloningRes.initialSyncStatus.approxTotalBytesCopied, fooCollRes.approxBytesCopied + barCollRes.approxBytesCopied + bytesCopiedAdminDb); -// Let initial sync finish and get into secondary state. failPointBeforeFinish.off(); -replSet.awaitSecondaryNodes(60 * 1000); + +// Wait until the 'initialSync' field has been cleared before issuing 'replSetGetStatus'. +failPointAfterFinish.wait(); // Test that replSetGetStatus returns the correct results after initial sync is finished. res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1})); @@ -162,6 +165,11 @@ assert(!res.initialSyncStatus, assert.commandFailedWithCode(secondary.adminCommand({replSetGetStatus: 1, initialSync: "m"}), ErrorCodes.TypeMismatch); + +// Let initial sync finish and get into secondary state. +failPointAfterFinish.off(); +replSet.awaitSecondaryNodes(60 * 1000); + assert.eq(0, secondary.getDB('local')['temp_oplog_buffer'].find().itcount(), "Oplog buffer was not dropped after initial sync"); diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index cab70266214..c56ef6794f0 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -122,6 +122,9 @@ MONGO_FAIL_POINT_DEFINE(initialSyncFassertIfApplyingBatchFails); // Failpoint which causes the initial sync function to hang before stopping the oplog fetcher. MONGO_FAIL_POINT_DEFINE(initialSyncHangBeforeCompletingOplogFetching); +// Failpoint which causes the initial sync function to hang after finishing. +MONGO_FAIL_POINT_DEFINE(initialSyncHangAfterFinish); + // Failpoints for synchronization, shared with cloners. extern FailPoint initialSyncFuzzerSynchronizationPoint1; extern FailPoint initialSyncFuzzerSynchronizationPoint2; @@ -1851,22 +1854,33 @@ void InitialSyncer::_finishCallback(StatusWith<OpTimeAndWallTime> lastApplied, // before InitialSyncer::join() returns. onCompletion = {}; - stdx::lock_guard<Latch> lock(_mutex); - invariant(_state != State::kComplete); - _state = State::kComplete; - _stateCondition.notify_all(); + { + stdx::lock_guard<Latch> lock(_mutex); + invariant(_state != State::kComplete); + _state = State::kComplete; + _stateCondition.notify_all(); - // Clear the initial sync progress after an initial sync attempt has been successfully - // completed. - if (lastApplied.isOK() && !MONGO_unlikely(skipClearInitialSyncState.shouldFail())) { - _initialSyncState.reset(); + // Clear the initial sync progress after an initial sync attempt has been successfully + // completed. + if (lastApplied.isOK() && !MONGO_unlikely(skipClearInitialSyncState.shouldFail())) { + _initialSyncState.reset(); + } + + // Destroy shared references to executors. + _attemptExec = nullptr; + _clonerAttemptExec = nullptr; + _clonerExec = nullptr; + _exec = nullptr; } - // Destroy shared references to executors. - _attemptExec = nullptr; - _clonerAttemptExec = nullptr; - _clonerExec = nullptr; - _exec = nullptr; + if (MONGO_unlikely(initialSyncHangAfterFinish.shouldFail())) { + LOGV2(5825800, + "initial sync finished - initialSyncHangAfterFinish fail point " + "enabled. Blocking until fail point is disabled."); + while (MONGO_unlikely(initialSyncHangAfterFinish.shouldFail()) && !_isShuttingDown()) { + mongo::sleepsecs(1); + } + } } Status InitialSyncer::_scheduleLastOplogEntryFetcher_inlock( |