summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-07-02 22:01:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-20 23:16:52 +0000
commit744675fe9ca246317dc8d68aaeb2e9070c06989c (patch)
treeaaef49ac9d00f4f49a95942c530ae77e65d7d381
parenta395774d97caeaeb9f2019f6430332b152f3314d (diff)
downloadmongo-744675fe9ca246317dc8d68aaeb2e9070c06989c.tar.gz
SERVER-58258 Fix race between 'replSetGetStatus' request and initial syncer clearing state.
(cherry picked from commit f4860f196c4c7cc335955361865f3511d0c73606)
-rw-r--r--jstests/replsets/initial_sync_replSetGetStatus.js16
-rw-r--r--src/mongo/db/repl/initial_syncer.cpp40
2 files changed, 39 insertions, 17 deletions
diff --git a/jstests/replsets/initial_sync_replSetGetStatus.js b/jstests/replsets/initial_sync_replSetGetStatus.js
index 64958be9dd7..9b17141112d 100644
--- a/jstests/replsets/initial_sync_replSetGetStatus.js
+++ b/jstests/replsets/initial_sync_replSetGetStatus.js
@@ -34,8 +34,10 @@ let secondary = replSet.add(
{rsConfig: {votes: 0, priority: 0}, setParameter: {'collectionClonerBatchSize': 2}});
secondary.setSecondaryOk();
-var failPointBeforeCopying = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
-var failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish');
+const failPointBeforeCopying =
+ configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
+const failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish');
+const failPointAfterFinish = configureFailPoint(secondary, 'initialSyncHangAfterFinish');
let failPointAfterNumDocsCopied =
configureFailPoint(secondary,
'initialSyncHangDuringCollectionClone',
@@ -151,9 +153,10 @@ assert.eq(endOfCloningRes.initialSyncStatus.approxTotalDataSize,
assert.eq(endOfCloningRes.initialSyncStatus.approxTotalBytesCopied,
fooCollRes.approxBytesCopied + barCollRes.approxBytesCopied + bytesCopiedAdminDb);
-// Let initial sync finish and get into secondary state.
failPointBeforeFinish.off();
-replSet.awaitSecondaryNodes(60 * 1000);
+
+// Wait until the 'initialSync' field has been cleared before issuing 'replSetGetStatus'.
+failPointAfterFinish.wait();
// Test that replSetGetStatus returns the correct results after initial sync is finished.
res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
@@ -162,6 +165,11 @@ assert(!res.initialSyncStatus,
assert.commandFailedWithCode(secondary.adminCommand({replSetGetStatus: 1, initialSync: "m"}),
ErrorCodes.TypeMismatch);
+
+// Let initial sync finish and get into secondary state.
+failPointAfterFinish.off();
+replSet.awaitSecondaryNodes(60 * 1000);
+
assert.eq(0,
secondary.getDB('local')['temp_oplog_buffer'].find().itcount(),
"Oplog buffer was not dropped after initial sync");
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index cab70266214..c56ef6794f0 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -122,6 +122,9 @@ MONGO_FAIL_POINT_DEFINE(initialSyncFassertIfApplyingBatchFails);
// Failpoint which causes the initial sync function to hang before stopping the oplog fetcher.
MONGO_FAIL_POINT_DEFINE(initialSyncHangBeforeCompletingOplogFetching);
+// Failpoint which causes the initial sync function to hang after finishing.
+MONGO_FAIL_POINT_DEFINE(initialSyncHangAfterFinish);
+
// Failpoints for synchronization, shared with cloners.
extern FailPoint initialSyncFuzzerSynchronizationPoint1;
extern FailPoint initialSyncFuzzerSynchronizationPoint2;
@@ -1851,22 +1854,33 @@ void InitialSyncer::_finishCallback(StatusWith<OpTimeAndWallTime> lastApplied,
// before InitialSyncer::join() returns.
onCompletion = {};
- stdx::lock_guard<Latch> lock(_mutex);
- invariant(_state != State::kComplete);
- _state = State::kComplete;
- _stateCondition.notify_all();
+ {
+ stdx::lock_guard<Latch> lock(_mutex);
+ invariant(_state != State::kComplete);
+ _state = State::kComplete;
+ _stateCondition.notify_all();
- // Clear the initial sync progress after an initial sync attempt has been successfully
- // completed.
- if (lastApplied.isOK() && !MONGO_unlikely(skipClearInitialSyncState.shouldFail())) {
- _initialSyncState.reset();
+ // Clear the initial sync progress after an initial sync attempt has been successfully
+ // completed.
+ if (lastApplied.isOK() && !MONGO_unlikely(skipClearInitialSyncState.shouldFail())) {
+ _initialSyncState.reset();
+ }
+
+ // Destroy shared references to executors.
+ _attemptExec = nullptr;
+ _clonerAttemptExec = nullptr;
+ _clonerExec = nullptr;
+ _exec = nullptr;
}
- // Destroy shared references to executors.
- _attemptExec = nullptr;
- _clonerAttemptExec = nullptr;
- _clonerExec = nullptr;
- _exec = nullptr;
+ if (MONGO_unlikely(initialSyncHangAfterFinish.shouldFail())) {
+ LOGV2(5825800,
+ "initial sync finished - initialSyncHangAfterFinish fail point "
+ "enabled. Blocking until fail point is disabled.");
+ while (MONGO_unlikely(initialSyncHangAfterFinish.shouldFail()) && !_isShuttingDown()) {
+ mongo::sleepsecs(1);
+ }
+ }
}
Status InitialSyncer::_scheduleLastOplogEntryFetcher_inlock(