summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamyukta Lanka <samy.lanka@mongodb.com>2019-11-12 20:16:46 +0000
committerevergreen <evergreen@mongodb.com>2019-11-12 20:16:46 +0000
commite13eebf4a091b3725e35312114684518fb7afde5 (patch)
tree40d47e65b6fca912e5064360bd20d7c988652900
parent2d8acdf659431d334b2a18ef2dc19162c8cd416f (diff)
downloadmongo-e13eebf4a091b3725e35312114684518fb7afde5.tar.gz
SERVER=44014 Add missing synchronization points to initial sync fuzzer
-rw-r--r--jstests/replsets/initial_sync_test_fixture_test.js85
-rw-r--r--src/mongo/db/repl/initial_syncer.cpp44
2 files changed, 105 insertions, 24 deletions
diff --git a/jstests/replsets/initial_sync_test_fixture_test.js b/jstests/replsets/initial_sync_test_fixture_test.js
index c112bc58ef7..9d7f57b19c6 100644
--- a/jstests/replsets/initial_sync_test_fixture_test.js
+++ b/jstests/replsets/initial_sync_test_fixture_test.js
@@ -20,6 +20,28 @@ load("jstests/libs/check_log.js");
load("jstests/replsets/libs/initial_sync_test.js");
/**
+ * Helper function to check that specific messages appeared or did not appear in the logs.
+ */
+function checkLogForMsg(node, msg, contains) {
+ if (contains) {
+ assert(checkLog.checkContainsOnce(node, msg));
+ } else {
+ assert(!checkLog.checkContainsOnce(node, msg));
+ }
+}
+
+/**
+ * Helper function to check that specific messages appeared or did not appear in the logs. If we
+ * expect the log message to appear, this will show that the node is paused after getting the
+ * specified timestamp.
+ */
+function checkLogForGetTimestampMsg(node, timestampName, timestamp, contains) {
+ let msg = "Initial Syncer got the " + timestampName + ": { ts: " + timestamp;
+
+ checkLogForMsg(node, msg, contains);
+}
+
+/**
* Helper function to check that specific messages appeared or did not appear in the logs. If
* the command was listIndexes and we expect the message to appear, we also add the collection
* UUID to make sure that it corresponds to the expected collection.
@@ -31,11 +53,7 @@ function checkLogForCollectionClonerMsg(node, commandName, dbname, contains, col
msg += ": " + collUUID;
}
- if (contains) {
- assert(checkLog.checkContainsOnce(node, msg));
- } else {
- assert(!checkLog.checkContainsOnce(node, msg));
- }
+ checkLogForMsg(node, msg, contains);
}
/**
@@ -62,7 +80,8 @@ assert.commandWorked(db.foo.insert({a: 1}));
assert.commandWorked(db.bar.insert({b: 1}));
// Prepare a transaction so that we know that step() can restart the secondary even if there is
-// a prepared transaction.
+// a prepared transaction. The prepareTimestamp will be used as the beginFetchingTimestamp and
+// beginApplyingTimestamp during initial sync.
const session = primary.startSession({causalConsistency: false});
const sessionDB = session.getDatabase("test");
const sessionColl = sessionDB.getCollection("foo");
@@ -70,11 +89,8 @@ session.startTransaction();
assert.commandWorked(sessionColl.insert({c: 1}));
let prepareTimestamp = PrepareHelpers.prepareTransaction(session);
-// Do same listDatabases command as CollectionCloner.
-const databases =
- assert.commandWorked(primary.adminCommand({listDatabases: 1, nameOnly: true})).databases;
-
-// This step call restarts the secondary and causes it to go into initial sync.
+// This step call restarts the secondary and causes it to go into initial sync. It will pause
+// initial sync after the node has fetched the defaultBeginFetchingTimestamp.
assert(!initialSyncTest.step());
secondary = initialSyncTest.getSecondary();
@@ -84,12 +100,59 @@ secondary.setSlaveOk();
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
ErrorCodes.NotMasterOrSecondary);
+// Make sure that we see that the node got the defaultBeginFetchingTimestamp, but hasn't gotten the
+// beginFetchingTimestamp yet.
+checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, true);
+checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false);
+checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false);
+
+// This step call will resume initial sync and pause it again after the node gets the
+// beginFetchingTimestamp from its sync source.
+assert(!initialSyncTest.step());
+
+// Make sure that we cannot read from this node yet.
+assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
+ ErrorCodes.NotMasterOrSecondary);
+
+// Make sure that we see that the node got the beginFetchingTimestamp, but hasn't gotten the
+// beginApplyingTimestamp yet.
+checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false);
+checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, true);
+checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false);
+
+// This step call will resume initial sync and pause it again after the node gets the
+// beginApplyingTimestamp from its sync source.
+assert(!initialSyncTest.step());
+
+// Make sure that we cannot read from this node yet.
+assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
+ ErrorCodes.NotMasterOrSecondary);
+
+// Make sure that we see that the node got the beginApplyingTimestamp, but that we don't see the
+// listDatabases call yet.
+checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false);
+checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false);
+checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, true);
+checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", false);
+
+// This step call will resume initial sync and pause it again after the node gets the
+// listDatabases result from its sync source.
+assert(!initialSyncTest.step());
+
+// Make sure that we cannot read from this node yet.
+assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
+ ErrorCodes.NotMasterOrSecondary);
+
// Make sure that we saw the listDatabases call in the log messages, but didn't see any
// listCollections or listIndexes call.
checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", true);
checkLogForCollectionClonerMsg(secondary, "listCollections", "admin", false);
checkLogForCollectionClonerMsg(secondary, "listIndexes", "admin", false);
+// Do same listDatabases command as CollectionCloner.
+const databases =
+ assert.commandWorked(primary.adminCommand({listDatabases: 1, nameOnly: true})).databases;
+
// Iterate over the databases and collections in the same order that the test fixture would so
// that we can check the log messages to make sure initial sync is paused as expected.
for (let dbObj of databases) {
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index f1afb44ce7f..a1feb89f2d5 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -170,6 +170,21 @@ StatusWith<OpTimeAndWallTime> parseOpTimeAndWallTime(const QueryResponseStatus&
return OpTimeAndWallTime::parseOpTimeAndWallTimeFromOplogEntry(docs.front());
}
+void pauseAtInitialSyncFuzzerSyncronizationPoints(std::string msg) {
+ // Set and unset by the InitialSyncTest fixture to cause initial sync to pause so that the
+ // Initial Sync Fuzzer can run commands on the sync source.
+ if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint1.shouldFail())) {
+ log() << msg;
+ log() << "initialSyncFuzzerSynchronizationPoint1 fail point enabled.";
+ initialSyncFuzzerSynchronizationPoint1.pauseWhileSet();
+ }
+
+ if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint2.shouldFail())) {
+ log() << "initialSyncFuzzerSynchronizationPoint2 fail point enabled.";
+ initialSyncFuzzerSynchronizationPoint2.pauseWhileSet();
+ }
+}
+
} // namespace
InitialSyncer::InitialSyncer(
@@ -701,6 +716,10 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForDefaultBeginFetchingOpTime(
// beginFetchingTimestamp.
const auto& defaultBeginFetchingOpTime = opTimeResult.getValue().opTime;
+ std::string logMsg = str::stream() << "Initial Syncer got the defaultBeginFetchingTimestamp: "
+ << defaultBeginFetchingOpTime.toString();
+ pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg);
+
status = _scheduleGetBeginFetchingOpTime_inlock(onCompletionGuard, defaultBeginFetchingOpTime);
if (!status.isOK()) {
onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
@@ -794,6 +813,10 @@ void InitialSyncer::_getBeginFetchingOpTimeCallback(
}
}
+ std::string logMsg = str::stream()
+ << "Initial Syncer got the beginFetchingTimestamp: " << beginFetchingOpTime.toString();
+ pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg);
+
if (MONGO_unlikely(initialSyncHangAfterGettingBeginFetchingTimestamp.shouldFail())) {
log() << "initialSyncHangAfterGettingBeginFetchingTimestamp fail point enabled.";
initialSyncHangAfterGettingBeginFetchingTimestamp.pauseWhileSet();
@@ -833,6 +856,10 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForBeginApplyingTimestamp(
const auto& lastOpTime = opTimeResult.getValue().opTime;
+ std::string logMsg = str::stream()
+ << "Initial Syncer got the beginApplyingTimestamp: " << lastOpTime.toString();
+ pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg);
+
BSONObjBuilder queryBob;
queryBob.append("find", NamespaceString::kServerConfigurationNamespace.coll());
auto filterBob = BSONObjBuilder(queryBob.subobjStart("filter"));
@@ -1193,19 +1220,10 @@ void InitialSyncer::_getNextApplierBatchCallback(
return;
}
- // Set and unset by the InitialSyncTest fixture to cause initial sync to pause so that the
- // Initial Sync Fuzzer can run commands on the sync source.
- if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint1.shouldFail())) {
- log() << "Initial Syncer is about to apply the next oplog batch of size: "
- << batchResult.getValue().size();
- log() << "initialSyncFuzzerSynchronizationPoint1 fail point enabled.";
- initialSyncFuzzerSynchronizationPoint1.pauseWhileSet();
- }
-
- if (MONGO_unlikely(initialSyncFuzzerSynchronizationPoint2.shouldFail())) {
- log() << "initialSyncFuzzerSynchronizationPoint2 fail point enabled.";
- initialSyncFuzzerSynchronizationPoint2.pauseWhileSet();
- }
+ std::string logMsg = str::stream()
+ << "Initial Syncer is about to apply the next oplog batch of size: "
+ << batchResult.getValue().size();
+ pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg);
if (MONGO_unlikely(failInitialSyncBeforeApplyingBatch.shouldFail())) {
log() << "initial sync - failInitialSyncBeforeApplyingBatch fail point enabled. Pausing"