summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2016-05-18 14:29:12 -0400
committerBenety Goh <benety@mongodb.com>2016-05-20 13:32:28 -0400
commite6e7e099aaf11afb626d2cd95dd1d339433f6e7f (patch)
treee52a55d689183a5b51f682dd2cc9d4b657631ed5
parent99c8710c68d6008428f45f3730846db2e7143d71 (diff)
downloadmongo-e6e7e099aaf11afb626d2cd95dd1d339433f6e7f.tar.gz
SERVER-23308 added initial sync tests for handing failed updates on missing documents
-rw-r--r--jstests/replsets/initial_sync_update_missing_doc1.js67
-rw-r--r--jstests/replsets/initial_sync_update_missing_doc2.js85
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp11
-rw-r--r--src/mongo/db/repl/sync_tail.cpp12
4 files changed, 175 insertions, 0 deletions
diff --git a/jstests/replsets/initial_sync_update_missing_doc1.js b/jstests/replsets/initial_sync_update_missing_doc1.js
new file mode 100644
index 00000000000..31968e4a345
--- /dev/null
+++ b/jstests/replsets/initial_sync_update_missing_doc1.js
@@ -0,0 +1,67 @@
+/**
+ * Initial sync runs in several phases - the first 3 are as follows:
+ * 1) fetches the last oplog entry (op_start1) on the source;
+ * 2) copies all non-local databases from the source; and
+ * 3) fetches and applies operations from the source after op_start1.
+ *
+ * This test updates and deletes a document on the source between phases 1 and 2. The
+ * secondary will initially fail to apply the update operation in phase 3 and subsequently have
+ * to attempt to check the source for a new copy of the document. The absence of the document on
+ * the source indicates that the source is free to ignore the failed update operation.
+ */
+
+(function() {
+ var name = 'initial_sync_update_missing_doc1';
+ var replSet = new ReplSetTest({
+ name: name,
+ nodes: [{}, {rsConfig: {arbiterOnly: true}}],
+ });
+
+ replSet.startSet();
+ replSet.initiate();
+ var primary = replSet.getPrimary();
+
+ var coll = primary.getDB('test').getCollection(name);
+ assert.writeOK(coll.insert({_id: 0, x: 1}));
+
+ // Add a secondary node but make it hang after retrieving the last op on the source
+ // but before copying databases.
+ var secondary = replSet.add();
+ secondary.setSlaveOk();
+
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'}));
+ replSet.reInitiate();
+
+ // Wait for fail point message to be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000);
+ };
+ checkLog(secondary, 'initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled');
+
+ assert.writeOK(coll.update({_id: 0}, {x: 2}, {upsert: false, writeConcern: {w: 1}}));
+ assert.writeOK(coll.remove({_id: 0}, {justOne: true, writeConcern: {w: 1}}));
+
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));
+
+ checkLog(secondary, 'update of non-mod failed');
+ checkLog(secondary, 'adding missing object');
+ checkLog(secondary, 'missing object not found on source. presumably deleted later in oplog');
+ checkLog(secondary, 'initial sync done');
+
+ replSet.awaitReplication();
+ assert.eq(0,
+ secondary.getDB('test').getCollection(name).count(),
+ 'collection successfully synced to secondary');
+
+ replSet.stopSet();
+})();
diff --git a/jstests/replsets/initial_sync_update_missing_doc2.js b/jstests/replsets/initial_sync_update_missing_doc2.js
new file mode 100644
index 00000000000..802347a1567
--- /dev/null
+++ b/jstests/replsets/initial_sync_update_missing_doc2.js
@@ -0,0 +1,85 @@
+/**
+ * Initial sync runs in several phases - the first 3 are as follows:
+ * 1) fetches the last oplog entry (op_start1) on the source;
+ * 2) copies all non-local databases from the source; and
+ * 3) fetches and applies operations from the source after op_start1.
+ *
+ * This test updates and deletes a document on the source between phases 1 and 2. The
+ * secondary will initially fail to apply the update operation in phase 3 and subsequently have
+ * to attempt to check the source for a new copy of the document. Before the secondary checks the
+ * source, we insert a new copy of the document on the source so that the secondary can fetch it.
+ */
+
+(function() {
+ var name = 'initial_sync_update_missing_doc2';
+ var replSet = new ReplSetTest({
+ name: name,
+ nodes: [{}, {rsConfig: {arbiterOnly: true}}],
+ });
+
+ replSet.startSet();
+ replSet.initiate();
+ var primary = replSet.getPrimary();
+
+ var coll = primary.getDB('test').getCollection(name);
+ assert.writeOK(coll.insert({_id: 0, x: 1}));
+
+ // Add a secondary node but make it hang after retrieving the last op on the source
+ // but before copying databases.
+ var secondary = replSet.add();
+ secondary.setSlaveOk();
+
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'}));
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'alwaysOn'}));
+ replSet.reInitiate();
+
+ // Wait for fail point message to be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000);
+ };
+ checkLog(secondary, 'initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled');
+
+ assert.writeOK(coll.update({_id: 0}, {x: 2}, {upsert: false, writeConcern: {w: 1}}));
+ assert.writeOK(coll.remove({_id: 0}, {justOne: true, writeConcern: {w: 1}}));
+
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));
+
+ checkLog(secondary, 'update of non-mod failed');
+ checkLog(secondary, 'adding missing object');
+
+ checkLog(secondary,
+ 'initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled');
+ var doc = {
+ _id: 0,
+ x: 3
+ };
+ // Re-insert deleted document.
+ assert.writeOK(coll.insert(doc, {writeConcern: {w: 1}}));
+
+ secondary.getDB('test').setLogLevel(1, 'replication');
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'off'}));
+
+ checkLog(secondary, 'inserted missing doc:');
+ secondary.getDB('test').setLogLevel(0, 'replication');
+
+ checkLog(secondary, 'initial sync done');
+
+ replSet.awaitReplication();
+ var coll = secondary.getDB('test').getCollection(name);
+ assert.eq(1, coll.count(), 'collection successfully synced to secondary');
+ assert.eq(doc, coll.findOne(), 'document on secondary matches primary');
+
+ replSet.stopSet();
+})();
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index 15a5657eed5..2f31e9a63d7 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -68,6 +68,9 @@ using std::string;
// Failpoint which fails initial sync and leaves on oplog entry in the buffer.
MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft);
+// Failpoint which causes the initial sync function to hang before copying databases.
+MONGO_FP_DECLARE(initialSyncHangBeforeCopyingDatabases);
+
/**
* Truncates the oplog (removes any documents) and resets internal variables that were
* originally initialized or affected by using values from the oplog at startup time. These
@@ -355,6 +358,14 @@ Status _initialSync() {
log() << "initial sync drop all databases";
dropAllDatabasesExceptLocal(&txn);
+ if (MONGO_FAIL_POINT(initialSyncHangBeforeCopyingDatabases)) {
+ log() << "initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled. "
+ "Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(initialSyncHangBeforeCopyingDatabases)) {
+ mongo::sleepsecs(1);
+ }
+ }
+
log() << "initial sync clone all databases";
list<string> dbs = r.conn()->getDatabaseNames();
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 313da81150c..fd98522e69b 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -114,6 +114,10 @@ static ServerStatusMetricField<Counter64> displayOpsApplied("repl.apply.ops", &o
MONGO_FP_DECLARE(rsSyncApplyStop);
+// Failpoint which causes the initial sync function to hang before calling shouldRetry on a failed
+// operation.
+MONGO_FP_DECLARE(initialSyncHangBeforeGettingMissingDocument);
+
// Number and time of each ApplyOps worker pool round
static TimerStats applyBatchStats;
static ServerStatusMetricField<TimerStats> displayOpBatchesApplied("repl.apply.batches",
@@ -872,6 +876,14 @@ BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONO
return BSONObj();
}
+ if (MONGO_FAIL_POINT(initialSyncHangBeforeGettingMissingDocument)) {
+ log() << "initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled. "
+ "Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(initialSyncHangBeforeGettingMissingDocument)) {
+ mongo::sleepsecs(1);
+ }
+ }
+
const int retryMax = 3;
for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
if (retryCount != 1) {