diff options
-rw-r--r-- | jstests/replsets/initial_sync_update_missing_doc1.js | 67 | ||||
-rw-r--r-- | jstests/replsets/initial_sync_update_missing_doc2.js | 85 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_initialsync.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail.cpp | 12 |
4 files changed, 175 insertions, 0 deletions
diff --git a/jstests/replsets/initial_sync_update_missing_doc1.js b/jstests/replsets/initial_sync_update_missing_doc1.js new file mode 100644 index 00000000000..31968e4a345 --- /dev/null +++ b/jstests/replsets/initial_sync_update_missing_doc1.js @@ -0,0 +1,67 @@ +/** + * Initial sync runs in several phases - the first 3 are as follows: + * 1) fetches the last oplog entry (op_start1) on the source; + * 2) copies all non-local databases from the source; and + * 3) fetches and applies operations from the source after op_start1. + * + * This test updates and deletes a document on the source between phases 1 and 2. The + * secondary will initially fail to apply the update operation in phase 3 and subsequently have + * to attempt to check the source for a new copy of the document. The absence of the document on + * the source indicates that the source is free to ignore the failed update operation. + */ + +(function() { + var name = 'initial_sync_update_missing_doc1'; + var replSet = new ReplSetTest({ + name: name, + nodes: [{}, {rsConfig: {arbiterOnly: true}}], + }); + + replSet.startSet(); + replSet.initiate(); + var primary = replSet.getPrimary(); + + var coll = primary.getDB('test').getCollection(name); + assert.writeOK(coll.insert({_id: 0, x: 1})); + + // Add a secondary node but make it hang after retrieving the last op on the source + // but before copying databases. + var secondary = replSet.add(); + secondary.setSlaveOk(); + + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'})); + replSet.reInitiate(); + + // Wait for fail point message to be logged. + var checkLog = function(node, msg) { + assert.soon(function() { + var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log; + for (var i = 0; i < logMessages.length; i++) { + if (logMessages[i].indexOf(msg) != -1) { + return true; + } + } + return false; + }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000); + }; + checkLog(secondary, 'initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled'); + + assert.writeOK(coll.update({_id: 0}, {x: 2}, {upsert: false, writeConcern: {w: 1}})); + assert.writeOK(coll.remove({_id: 0}, {justOne: true, writeConcern: {w: 1}})); + + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'})); + + checkLog(secondary, 'update of non-mod failed'); + checkLog(secondary, 'adding missing object'); + checkLog(secondary, 'missing object not found on source. presumably deleted later in oplog'); + checkLog(secondary, 'initial sync done'); + + replSet.awaitReplication(); + assert.eq(0, + secondary.getDB('test').getCollection(name).count(), + 'collection successfully synced to secondary'); + + replSet.stopSet(); +})(); diff --git a/jstests/replsets/initial_sync_update_missing_doc2.js b/jstests/replsets/initial_sync_update_missing_doc2.js new file mode 100644 index 00000000000..802347a1567 --- /dev/null +++ b/jstests/replsets/initial_sync_update_missing_doc2.js @@ -0,0 +1,85 @@ +/** + * Initial sync runs in several phases - the first 3 are as follows: + * 1) fetches the last oplog entry (op_start1) on the source; + * 2) copies all non-local databases from the source; and + * 3) fetches and applies operations from the source after op_start1. + * + * This test updates and deletes a document on the source between phases 1 and 2. The + * secondary will initially fail to apply the update operation in phase 3 and subsequently have + * to attempt to check the source for a new copy of the document. Before the secondary checks the + * source, we insert a new copy of the document on the source so that the secondary can fetch it. + */ + +(function() { + var name = 'initial_sync_update_missing_doc2'; + var replSet = new ReplSetTest({ + name: name, + nodes: [{}, {rsConfig: {arbiterOnly: true}}], + }); + + replSet.startSet(); + replSet.initiate(); + var primary = replSet.getPrimary(); + + var coll = primary.getDB('test').getCollection(name); + assert.writeOK(coll.insert({_id: 0, x: 1})); + + // Add a secondary node but make it hang after retrieving the last op on the source + // but before copying databases. + var secondary = replSet.add(); + secondary.setSlaveOk(); + + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'})); + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'alwaysOn'})); + replSet.reInitiate(); + + // Wait for fail point message to be logged. + var checkLog = function(node, msg) { + assert.soon(function() { + var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log; + for (var i = 0; i < logMessages.length; i++) { + if (logMessages[i].indexOf(msg) != -1) { + return true; + } + } + return false; + }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000); + }; + checkLog(secondary, 'initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled'); + + assert.writeOK(coll.update({_id: 0}, {x: 2}, {upsert: false, writeConcern: {w: 1}})); + assert.writeOK(coll.remove({_id: 0}, {justOne: true, writeConcern: {w: 1}})); + + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'})); + + checkLog(secondary, 'update of non-mod failed'); + checkLog(secondary, 'adding missing object'); + + checkLog(secondary, + 'initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled'); + var doc = { + _id: 0, + x: 3 + }; + // Re-insert deleted document. + assert.writeOK(coll.insert(doc, {writeConcern: {w: 1}})); + + secondary.getDB('test').setLogLevel(1, 'replication'); + assert.commandWorked(secondary.getDB('admin').runCommand( + {configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'off'})); + + checkLog(secondary, 'inserted missing doc:'); + secondary.getDB('test').setLogLevel(0, 'replication'); + + checkLog(secondary, 'initial sync done'); + + replSet.awaitReplication(); + var coll = secondary.getDB('test').getCollection(name); + assert.eq(1, coll.count(), 'collection successfully synced to secondary'); + assert.eq(doc, coll.findOne(), 'document on secondary matches primary'); + + replSet.stopSet(); +})(); diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp index 15a5657eed5..2f31e9a63d7 100644 --- a/src/mongo/db/repl/rs_initialsync.cpp +++ b/src/mongo/db/repl/rs_initialsync.cpp @@ -68,6 +68,9 @@ using std::string; // Failpoint which fails initial sync and leaves on oplog entry in the buffer. MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft); +// Failpoint which causes the initial sync function to hang before copying databases. +MONGO_FP_DECLARE(initialSyncHangBeforeCopyingDatabases); + /** * Truncates the oplog (removes any documents) and resets internal variables that were * originally initialized or affected by using values from the oplog at startup time. These @@ -355,6 +358,14 @@ Status _initialSync() { log() << "initial sync drop all databases"; dropAllDatabasesExceptLocal(&txn); + if (MONGO_FAIL_POINT(initialSyncHangBeforeCopyingDatabases)) { + log() << "initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled. " + "Blocking until fail point is disabled."; + while (MONGO_FAIL_POINT(initialSyncHangBeforeCopyingDatabases)) { + mongo::sleepsecs(1); + } + } + log() << "initial sync clone all databases"; list<string> dbs = r.conn()->getDatabaseNames(); diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 313da81150c..fd98522e69b 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -114,6 +114,10 @@ static ServerStatusMetricField<Counter64> displayOpsApplied("repl.apply.ops", &o MONGO_FP_DECLARE(rsSyncApplyStop); +// Failpoint which causes the initial sync function to hang before calling shouldRetry on a failed +// operation. +MONGO_FP_DECLARE(initialSyncHangBeforeGettingMissingDocument); + // Number and time of each ApplyOps worker pool round static TimerStats applyBatchStats; static ServerStatusMetricField<TimerStats> displayOpBatchesApplied("repl.apply.batches", @@ -872,6 +876,14 @@ BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONO return BSONObj(); } + if (MONGO_FAIL_POINT(initialSyncHangBeforeGettingMissingDocument)) { + log() << "initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled. " + "Blocking until fail point is disabled."; + while (MONGO_FAIL_POINT(initialSyncHangBeforeGettingMissingDocument)) { + mongo::sleepsecs(1); + } + } + const int retryMax = 3; for (int retryCount = 1; retryCount <= retryMax; ++retryCount) { if (retryCount != 1) { |