diff options
author | Matthew Russotto <matthew.russotto@10gen.com> | 2020-01-24 14:43:40 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-01-27 21:56:56 +0000 |
commit | 6ccdc0ce67811ac9ed4699f9cf006d803720a3d7 (patch) | |
tree | 207c594efe43fbd972b5071ad07fc1bd0a0877ab /jstests/replsets/initial_sync_survives_restart.js | |
parent | 960e71f038a268cff71f52eb6b16e6f020a7840c (diff) | |
download | mongo-6ccdc0ce67811ac9ed4699f9cf006d803720a3d7.tar.gz |
SERVER-42705 Create test that initial sync survives sync source restart
create mode 100644 jstests/replsets/initial_sync_oplog_fetcher_survives_restart.js
create mode 100644 jstests/replsets/initial_sync_survives_restart.js
Diffstat (limited to 'jstests/replsets/initial_sync_survives_restart.js')
-rw-r--r-- | jstests/replsets/initial_sync_survives_restart.js | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/jstests/replsets/initial_sync_survives_restart.js b/jstests/replsets/initial_sync_survives_restart.js new file mode 100644 index 00000000000..fdf8cc6db63 --- /dev/null +++ b/jstests/replsets/initial_sync_survives_restart.js @@ -0,0 +1,115 @@ +/** + * Tests that initial sync survives a restart during each stage of the cloning process. + * @tags: [requires_persistence, requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +const testName = "initial_sync_survives_restart"; +const rst = new ReplSetTest({name: testName, nodes: 1}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const primaryDb = primary.getDB("test"); +// Add some data to be cloned. +assert.commandWorked(primaryDb.test.insert([{a: 1}, {b: 2}, {c: 3}])); + +jsTest.log("Adding a new node to the replica set"); +const secondary = rst.add({ + rsConfig: {priority: 0}, + setParameter: { + 'failpoint.initialSyncHangBeforeCopyingDatabases': tojson({mode: 'alwaysOn'}), + // This test is specifically testing that the cloners stop, so we turn off the + // oplog fetcher to ensure that we don't inadvertently test that instead. + 'failpoint.hangBeforeStartingOplogFetcher': tojson({mode: 'alwaysOn'}), + 'numInitialSyncAttempts': 1, + } +}); +rst.reInitiate(); +rst.waitForState(secondary, ReplSetTest.State.STARTUP_2); + +function retryStage(rst, {cloner, stage, extraData}) { + const nRetries = 2; + let primary = rst.getPrimary(); + const secondary = rst.getSecondary(); + const secondaryDb = secondary.getDB("test"); + const failPointData = Object.merge(extraData || {}, {cloner: cloner, stage: stage}); + // Set us up to stop right before the given stage. + const beforeStageFailPoint = + configureFailPoint(secondaryDb, "hangBeforeClonerStage", failPointData); + // Set us up to stop after the given stage. This will also release the failpoint for the + // previous stage, if it was set. + const afterStageFailPoint = + configureFailPoint(secondaryDb, "hangAfterClonerStage", failPointData); + // Release the initial failpoint if set. + assert.commandWorked(secondaryDb.adminCommand( + {configureFailPoint: "initialSyncHangBeforeCopyingDatabases", mode: "off"})); + + beforeStageFailPoint.wait(); + + jsTestLog("Testing restart of sync source in cloner " + cloner + " stage " + stage); + + // We stop the node and wait for it, then start it separately, to avoid the clone completing + // before the node actually stops. + rst.stop(primary, null, null, {forRestart: true, waitPid: true}); + + // Release the syncing node fail point to allow retries while the node is down and restarting. + beforeStageFailPoint.off(); + + // Make sure some retries happen while the sync source is completely down. + let beforeRetryFailPoint = configureFailPoint( + secondaryDb, "hangBeforeRetryingClonerStage", failPointData, {skip: nRetries}); + beforeRetryFailPoint.wait(); + beforeRetryFailPoint.off(); + + // Turning on rsSyncApplyStop prevents the sync source from coming out of RECOVERING, + // so we can ensure the syncing node does some retries while the sync source is not ready. + const options = { + setParameter: {'failpoint.rsSyncApplyStop': tojson({mode: 'alwaysOn'})}, + waitForConnect: true + }; + primary = rst.start(primary, options, true /* restart */); + + // Wait for the sync source to be in RECOVERING. + assert.commandWorked(primary.adminCommand({ + waitForFailPoint: "rsSyncApplyStop", + timesEntered: 1, + maxTimeMS: kDefaultWaitForFailPointTimeout + })); + + // Make sure some retries happen while the sync source is available and in "RECOVERING" + beforeRetryFailPoint = configureFailPoint( + secondaryDb, "hangBeforeRetryingClonerStage", failPointData, {skip: nRetries}); + beforeRetryFailPoint.wait(); + beforeRetryFailPoint.off(); + + // Now let the sync source finish recovering and keep retrying. + assert.commandWorked( + primary.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"})); + afterStageFailPoint.wait(); + jsTestLog("Cloner " + cloner + " stage " + stage + " complete."); + return afterStageFailPoint; +} +retryStage(rst, {cloner: "AllDatabaseCloner", stage: "connect"}); +retryStage(rst, {cloner: "AllDatabaseCloner", stage: "listDatabases"}); +retryStage(rst, + {cloner: "DatabaseCloner", stage: "listCollections", extraData: {database: 'test'}}); +retryStage(rst, {cloner: "CollectionCloner", stage: "count", extraData: {nss: 'test.test'}}); +retryStage(rst, {cloner: "CollectionCloner", stage: "listIndexes", extraData: {nss: 'test.test'}}); +const afterStageFailPoint = + retryStage(rst, {cloner: "CollectionCloner", stage: "query", extraData: {nss: 'test.test'}}); + +jsTestLog("Releasing the oplog fetcher failpoint."); +assert.commandWorked(secondary.getDB("test").adminCommand( + {configureFailPoint: "hangBeforeStartingOplogFetcher", mode: "off"})); + +jsTestLog("Releasing the final cloner failpoint."); +afterStageFailPoint.off(); +jsTestLog("Waiting for initial sync to complete."); +// Wait for initial sync to complete. +rst.waitForState(secondary, ReplSetTest.State.SECONDARY); +rst.stopSet(); +})(); |