diff options
author | Matthew Russotto <matthew.russotto@mongodb.com> | 2021-04-29 16:07:24 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-29 21:04:41 +0000 |
commit | 8a7e9a21fd0e10ddc1b41345e5bea1a82141061b (patch) | |
tree | 66439d7dfb6aebd0649c66483a77fdd475596f45 /jstests/replsets/startup_recovery_for_restore.js | |
parent | d9fbb80aa08741261580ebf3473e1b31ed795b7c (diff) | |
download | mongo-8a7e9a21fd0e10ddc1b41345e5bea1a82141061b.tar.gz |
SERVER-55766 Introduce an optimized "for restore" startup replication recovery mechanism
Diffstat (limited to 'jstests/replsets/startup_recovery_for_restore.js')
-rw-r--r-- | jstests/replsets/startup_recovery_for_restore.js | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/jstests/replsets/startup_recovery_for_restore.js b/jstests/replsets/startup_recovery_for_restore.js new file mode 100644 index 00000000000..bd5b5a98db7 --- /dev/null +++ b/jstests/replsets/startup_recovery_for_restore.js @@ -0,0 +1,160 @@ +/* + * Tests that we can recover from a node with a lagged stable timestamp using the special + * "for restore" mode, but not read from older points-in-time on the recovered node. + * + * This test only makes sense for storage engines that support recover to stable timestamp. + * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication, + * requires_majority_read_concern, uses_transactions, uses_prepare_transaction, + * # We don't expect to do this while upgrading. + * multiversion_incompatible] + */ + +(function() { +"use strict"; +load("jstests/libs/fail_point_util.js"); + +const dbName = TestData.testName; + +const logLevel = tojson({storage: {recovery: 2}}); + +const rst = new ReplSetTest({ + nodes: [{}, {}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}], + settings: {chainingAllowed: false} +}); + +const startParams = { + logComponentVerbosity: logLevel, + replBatchLimitOperations: 100 +}; +const nodes = rst.startSet({setParameter: startParams}); +let restoreNode = nodes[1]; +rst.initiateWithHighElectionTimeout(); +const primary = rst.getPrimary(); +const db = primary.getDB(dbName); +const collName = "testcoll"; +const sentinelCollName = "sentinelcoll"; +const coll = db[collName]; +const paddingStr = "XXXXXXXXX"; + +// Pre-load some documents. +const nPreDocs = 2; +coll.insert([{_id: "pre1"}, {_id: "pre2"}]); +rst.awaitReplication(); + +const holdOpTime = assert.commandWorked(db.runCommand({find: collName, limit: 1})).operationTime; + +// Keep the stable timestamp from moving on the node we're going to restart in restore mode. +assert.commandWorked(restoreNode.adminCommand({ + configureFailPoint: 'holdStableTimestampAtSpecificTimestamp', + mode: 'alwaysOn', + data: {"timestamp": holdOpTime} +})); + +// Insert a bunch of documents. +let bulk = coll.initializeUnorderedBulkOp(); +const nDocs = 1000; +jsTestLog("Inserting " + nDocs + " documents with snapshotting disabled on one node."); +for (let id = 1; id <= nDocs; id++) { + bulk.insert({_id: id, paddingStr: paddingStr}); +} +bulk.execute(); +rst.awaitReplication(); + +jsTestLog("Stopping replication on secondaries to hold back majority commit point."); +let stopReplProducer2 = configureFailPoint(nodes[2], 'stopReplProducer'); +let stopReplProducer3 = configureFailPoint(nodes[3], 'stopReplProducer'); + +jsTestLog("Writing first sentinel document."); +const sentinel1Timestamp = + assert.commandWorked(db.runCommand({insert: sentinelCollName, documents: [{_id: "s1"}]})) + .operationTime; + +const nExtraDocs = 50; +jsTestLog("Inserting " + nExtraDocs + " documents with majority point held back."); +bulk = coll.initializeUnorderedBulkOp(); +for (let id = 1; id <= nExtraDocs; id++) { + bulk.insert({_id: (id + nDocs), paddingStr: paddingStr}); +} +bulk.execute(); +const lastId = nDocs + nExtraDocs; + +const penultimateOpTime = + assert.commandWorked(db.runCommand({find: collName, limit: 1})).operationTime; + +const sentinel2Timestamp = + assert.commandWorked(db.runCommand({insert: sentinelCollName, documents: [{_id: "s2"}]})) + .operationTime; + +rst.awaitReplication(undefined, undefined, [restoreNode]); + +jsTestLog("Restarting restore node with the --startupRecoveryForRestore flag"); +restoreNode = rst.restart(restoreNode, { + noReplSet: true, + setParameter: Object.merge(startParams, { + startupRecoveryForRestore: true, + recoverFromOplogAsStandalone: true, + takeUnstableCheckpointOnShutdown: true + }) +}); +// Make sure we can read something after standalone recovery. +assert.eq(2, restoreNode.getDB(dbName)[sentinelCollName].find({}).itcount()); + +jsTestLog("Restarting restore node again, in repl set mode"); +restoreNode = rst.restart(restoreNode, {noReplSet: false, setParameter: startParams}); + +rst.awaitSecondaryNodes(undefined, [restoreNode]); +jsTestLog("Finished restarting restore node"); + +const restoreDb = restoreNode.getDB(dbName); + +jsTestLog("Checking restore node untimestamped read."); +// Basic test: should see all docs with untimestamped read. +assert.eq(nPreDocs + nDocs + nExtraDocs, coll.find().itcount()); +assert.eq(nPreDocs + nDocs + nExtraDocs, restoreDb[collName].find().itcount()); + +// For the remaining checks we step up the restored node so we can do atClusterTime reads on it. +// They are necessarily speculative because we are preventing majority optimes from advancing. + +jsTestLog("Stepping up restore node"); +rst.stepUp(restoreNode, {awaitReplicationBeforeStepUp: false}); + +// Should also be able to read at the final sentinel optime on restore node. +const restoreNodeSession = restoreNode.startSession({causalConsistency: false}); +restoreNodeSession.startTransaction( + {readConcern: {level: "snapshot", atClusterTime: sentinel2Timestamp}}); +const restoreNodeSessionDb = restoreNodeSession.getDatabase(dbName); +jsTestLog("Checking top-of-oplog read works on restored node."); + +let res = assert.commandWorked( + restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": lastId}})); +assert.eq(1, res.cursor.firstBatch.length); +assert.docEq({_id: lastId, paddingStr: paddingStr}, res.cursor.firstBatch[0]); + +// Must abort because majority is not advancing. +restoreNodeSession.abortTransaction(); + +// Should NOT able to read at the first sentinel optime on the restore node. +restoreNodeSession.startTransaction( + {readConcern: {level: "snapshot", atClusterTime: sentinel1Timestamp}}); +jsTestLog( + "Checking restore node majority optime read, which should fail, because the restore node does not have that history."); +res = assert.commandFailedWithCode( + restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": {"$gte": nDocs}}}), + ErrorCodes.SnapshotTooOld); +restoreNodeSession.abortTransaction(); + +// Should NOT able to read at the penultimate optime on the restore node either. +jsTestLog( + "Checking restore node top-of-oplog minus 1 read, which should fail, because the restore node does not have that history."); +restoreNodeSession.startTransaction( + {readConcern: {level: "snapshot", atClusterTime: penultimateOpTime}}); +res = assert.commandFailedWithCode( + restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": lastId}}), + ErrorCodes.SnapshotTooOld); +restoreNodeSession.abortTransaction(); + +// Allow set to become current and shut down with ordinary dbHash verification. +stopReplProducer2.off(); +stopReplProducer3.off(); +rst.stopSet(); +})(); |