diff options
author | Samyukta Lanka <samy.lanka@mongodb.com> | 2019-11-12 20:16:46 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-11-12 20:16:46 +0000 |
commit | e13eebf4a091b3725e35312114684518fb7afde5 (patch) | |
tree | 40d47e65b6fca912e5064360bd20d7c988652900 | |
parent | 2d8acdf659431d334b2a18ef2dc19162c8cd416f (diff) | |
download | mongo-e13eebf4a091b3725e35312114684518fb7afde5.tar.gz |
SERVER=44014 Add missing synchronization points to initial sync fuzzer
-rw-r--r-- | jstests/replsets/initial_sync_test_fixture_test.js | 85 | ||||
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 44 |
2 files changed, 105 insertions, 24 deletions
diff --git a/jstests/replsets/initial_sync_test_fixture_test.js b/jstests/replsets/initial_sync_test_fixture_test.js index c112bc58ef7..9d7f57b19c6 100644 --- a/jstests/replsets/initial_sync_test_fixture_test.js +++ b/jstests/replsets/initial_sync_test_fixture_test.js @@ -20,6 +20,28 @@ load("jstests/libs/check_log.js"); load("jstests/replsets/libs/initial_sync_test.js"); /** + * Helper function to check that specific messages appeared or did not appear in the logs. + */ +function checkLogForMsg(node, msg, contains) { + if (contains) { + assert(checkLog.checkContainsOnce(node, msg)); + } else { + assert(!checkLog.checkContainsOnce(node, msg)); + } +} + +/** + * Helper function to check that specific messages appeared or did not appear in the logs. If we + * expect the log message to appear, this will show that the node is paused after getting the + * specified timestamp. + */ +function checkLogForGetTimestampMsg(node, timestampName, timestamp, contains) { + let msg = "Initial Syncer got the " + timestampName + ": { ts: " + timestamp; + + checkLogForMsg(node, msg, contains); +} + +/** * Helper function to check that specific messages appeared or did not appear in the logs. If * the command was listIndexes and we expect the message to appear, we also add the collection * UUID to make sure that it corresponds to the expected collection. @@ -31,11 +53,7 @@ function checkLogForCollectionClonerMsg(node, commandName, dbname, contains, col msg += ": " + collUUID; } - if (contains) { - assert(checkLog.checkContainsOnce(node, msg)); - } else { - assert(!checkLog.checkContainsOnce(node, msg)); - } + checkLogForMsg(node, msg, contains); } /** @@ -62,7 +80,8 @@ assert.commandWorked(db.foo.insert({a: 1})); assert.commandWorked(db.bar.insert({b: 1})); // Prepare a transaction so that we know that step() can restart the secondary even if there is -// a prepared transaction. +// a prepared transaction. The prepareTimestamp will be used as the beginFetchingTimestamp and +// beginApplyingTimestamp during initial sync. const session = primary.startSession({causalConsistency: false}); const sessionDB = session.getDatabase("test"); const sessionColl = sessionDB.getCollection("foo"); @@ -70,11 +89,8 @@ session.startTransaction(); assert.commandWorked(sessionColl.insert({c: 1})); let prepareTimestamp = PrepareHelpers.prepareTransaction(session); -// Do same listDatabases command as CollectionCloner. -const databases = - assert.commandWorked(primary.adminCommand({listDatabases: 1, nameOnly: true})).databases; - -// This step call restarts the secondary and causes it to go into initial sync. +// This step call restarts the secondary and causes it to go into initial sync. It will pause +// initial sync after the node has fetched the defaultBeginFetchingTimestamp. assert(!initialSyncTest.step()); secondary = initialSyncTest.getSecondary(); @@ -84,12 +100,59 @@ secondary.setSlaveOk(); assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}), ErrorCodes.NotMasterOrSecondary); +// Make sure that we see that the node got the defaultBeginFetchingTimestamp, but hasn't gotten the +// beginFetchingTimestamp yet. +checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, true); +checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false); +checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false); + +// This step call will resume initial sync and pause it again after the node gets the +// beginFetchingTimestamp from its sync source. +assert(!initialSyncTest.step()); + +// Make sure that we cannot read from this node yet. +assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}), + ErrorCodes.NotMasterOrSecondary); + +// Make sure that we see that the node got the beginFetchingTimestamp, but hasn't gotten the +// beginApplyingTimestamp yet. +checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false); +checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, true); +checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false); + +// This step call will resume initial sync and pause it again after the node gets the +// beginApplyingTimestamp from its sync source. +assert(!initialSyncTest.step()); + +// Make sure that we cannot read from this node yet. +assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}), + ErrorCodes.NotMasterOrSecondary); + +// Make sure that we see that the node got the beginApplyingTimestamp, but that we don't see the +// listDatabases call yet. +checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false); +checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false); +checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, true); +checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", false); + +// This step call will resume initial sync and pause it again after the node gets the +// listDatabases result from its sync source. +assert(!initialSyncTest.step()); + +// Make sure that we cannot read from this node yet. +assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}), + ErrorCodes.NotMasterOrSecondary); + // Make sure that we saw the listDatabases call in the log messages, but didn't see any // listCollections or listIndexes call. checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", true); checkLogForCollectionClonerMsg(secondary, "listCollections", "admin", false); checkLogForCollectionClonerMsg(secondary, "listIndexes", "admin", false); +// Do same listDatabases command as CollectionCloner. +const databases = + assert.commandWorked(primary.adminCommand({listDatabases: 1, nameOnly: true})).databases; + // Iterate over the databases and collections in the same order that the test fixture would so // that we can check the log messages to make sure initial sync is paused as expected. for (let dbObj of databases) { diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index f1afb44ce7f..a1feb89f2d5 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -170,6 +170,21 @@ StatusWith<OpTimeAndWallTime> parseOpTimeAndWallTime(const QueryResponseStatus& return OpTimeAndWallTime::parseOpTimeAndWallTimeFromOplogEntry(docs.front()); } +void pauseAtInitialSyncFuzzerSyncronizationPoints(std::string msg) { + // Set and unset by the InitialSyncTest fixture to cause initial sync to pause so that the + // Initial Sync Fuzzer can run commands on the sync source. + if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint1.shouldFail())) { + log() << msg; + log() << "initialSyncFuzzerSynchronizationPoint1 fail point enabled."; + initialSyncFuzzerSynchronizationPoint1.pauseWhileSet(); + } + + if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint2.shouldFail())) { + log() << "initialSyncFuzzerSynchronizationPoint2 fail point enabled."; + initialSyncFuzzerSynchronizationPoint2.pauseWhileSet(); + } +} + } // namespace InitialSyncer::InitialSyncer( @@ -701,6 +716,10 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForDefaultBeginFetchingOpTime( // beginFetchingTimestamp. const auto& defaultBeginFetchingOpTime = opTimeResult.getValue().opTime; + std::string logMsg = str::stream() << "Initial Syncer got the defaultBeginFetchingTimestamp: " + << defaultBeginFetchingOpTime.toString(); + pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg); + status = _scheduleGetBeginFetchingOpTime_inlock(onCompletionGuard, defaultBeginFetchingOpTime); if (!status.isOK()) { onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status); @@ -794,6 +813,10 @@ void InitialSyncer::_getBeginFetchingOpTimeCallback( } } + std::string logMsg = str::stream() + << "Initial Syncer got the beginFetchingTimestamp: " << beginFetchingOpTime.toString(); + pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg); + if (MONGO_unlikely(initialSyncHangAfterGettingBeginFetchingTimestamp.shouldFail())) { log() << "initialSyncHangAfterGettingBeginFetchingTimestamp fail point enabled."; initialSyncHangAfterGettingBeginFetchingTimestamp.pauseWhileSet(); @@ -833,6 +856,10 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForBeginApplyingTimestamp( const auto& lastOpTime = opTimeResult.getValue().opTime; + std::string logMsg = str::stream() + << "Initial Syncer got the beginApplyingTimestamp: " << lastOpTime.toString(); + pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg); + BSONObjBuilder queryBob; queryBob.append("find", NamespaceString::kServerConfigurationNamespace.coll()); auto filterBob = BSONObjBuilder(queryBob.subobjStart("filter")); @@ -1193,19 +1220,10 @@ void InitialSyncer::_getNextApplierBatchCallback( return; } - // Set and unset by the InitialSyncTest fixture to cause initial sync to pause so that the - // Initial Sync Fuzzer can run commands on the sync source. - if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint1.shouldFail())) { - log() << "Initial Syncer is about to apply the next oplog batch of size: " - << batchResult.getValue().size(); - log() << "initialSyncFuzzerSynchronizationPoint1 fail point enabled."; - initialSyncFuzzerSynchronizationPoint1.pauseWhileSet(); - } - - if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint2.shouldFail())) { - log() << "initialSyncFuzzerSynchronizationPoint2 fail point enabled."; - initialSyncFuzzerSynchronizationPoint2.pauseWhileSet(); - } + std::string logMsg = str::stream() + << "Initial Syncer is about to apply the next oplog batch of size: " + << batchResult.getValue().size(); + pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg); if (MONGO_unlikely(failInitialSyncBeforeApplyingBatch.shouldFail())) { log() << "initial sync - failInitialSyncBeforeApplyingBatch fail point enabled. Pausing" |