summaryrefslogtreecommitdiff
path: root/jstests/replsets/startup_recovery_for_restore.js
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@mongodb.com>2021-04-29 16:07:24 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-29 21:04:41 +0000
commit8a7e9a21fd0e10ddc1b41345e5bea1a82141061b (patch)
tree66439d7dfb6aebd0649c66483a77fdd475596f45 /jstests/replsets/startup_recovery_for_restore.js
parentd9fbb80aa08741261580ebf3473e1b31ed795b7c (diff)
downloadmongo-8a7e9a21fd0e10ddc1b41345e5bea1a82141061b.tar.gz
SERVER-55766 Introduce an optimized "for restore" startup replication recovery mechanism
Diffstat (limited to 'jstests/replsets/startup_recovery_for_restore.js')
-rw-r--r--jstests/replsets/startup_recovery_for_restore.js160
1 files changed, 160 insertions, 0 deletions
diff --git a/jstests/replsets/startup_recovery_for_restore.js b/jstests/replsets/startup_recovery_for_restore.js
new file mode 100644
index 00000000000..bd5b5a98db7
--- /dev/null
+++ b/jstests/replsets/startup_recovery_for_restore.js
@@ -0,0 +1,160 @@
+/*
+ * Tests that we can recover from a node with a lagged stable timestamp using the special
+ * "for restore" mode, but not read from older points-in-time on the recovered node.
+ *
+ * This test only makes sense for storage engines that support recover to stable timestamp.
+ * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication,
+ * requires_majority_read_concern, uses_transactions, uses_prepare_transaction,
+ * # We don't expect to do this while upgrading.
+ * multiversion_incompatible]
+ */
+
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+
+const dbName = TestData.testName;
+
+const logLevel = tojson({storage: {recovery: 2}});
+
+const rst = new ReplSetTest({
+ nodes: [{}, {}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}],
+ settings: {chainingAllowed: false}
+});
+
+const startParams = {
+ logComponentVerbosity: logLevel,
+ replBatchLimitOperations: 100
+};
+const nodes = rst.startSet({setParameter: startParams});
+let restoreNode = nodes[1];
+rst.initiateWithHighElectionTimeout();
+const primary = rst.getPrimary();
+const db = primary.getDB(dbName);
+const collName = "testcoll";
+const sentinelCollName = "sentinelcoll";
+const coll = db[collName];
+const paddingStr = "XXXXXXXXX";
+
+// Pre-load some documents.
+const nPreDocs = 2;
+coll.insert([{_id: "pre1"}, {_id: "pre2"}]);
+rst.awaitReplication();
+
+const holdOpTime = assert.commandWorked(db.runCommand({find: collName, limit: 1})).operationTime;
+
+// Keep the stable timestamp from moving on the node we're going to restart in restore mode.
+assert.commandWorked(restoreNode.adminCommand({
+ configureFailPoint: 'holdStableTimestampAtSpecificTimestamp',
+ mode: 'alwaysOn',
+ data: {"timestamp": holdOpTime}
+}));
+
+// Insert a bunch of documents.
+let bulk = coll.initializeUnorderedBulkOp();
+const nDocs = 1000;
+jsTestLog("Inserting " + nDocs + " documents with snapshotting disabled on one node.");
+for (let id = 1; id <= nDocs; id++) {
+ bulk.insert({_id: id, paddingStr: paddingStr});
+}
+bulk.execute();
+rst.awaitReplication();
+
+jsTestLog("Stopping replication on secondaries to hold back majority commit point.");
+let stopReplProducer2 = configureFailPoint(nodes[2], 'stopReplProducer');
+let stopReplProducer3 = configureFailPoint(nodes[3], 'stopReplProducer');
+
+jsTestLog("Writing first sentinel document.");
+const sentinel1Timestamp =
+ assert.commandWorked(db.runCommand({insert: sentinelCollName, documents: [{_id: "s1"}]}))
+ .operationTime;
+
+const nExtraDocs = 50;
+jsTestLog("Inserting " + nExtraDocs + " documents with majority point held back.");
+bulk = coll.initializeUnorderedBulkOp();
+for (let id = 1; id <= nExtraDocs; id++) {
+ bulk.insert({_id: (id + nDocs), paddingStr: paddingStr});
+}
+bulk.execute();
+const lastId = nDocs + nExtraDocs;
+
+const penultimateOpTime =
+ assert.commandWorked(db.runCommand({find: collName, limit: 1})).operationTime;
+
+const sentinel2Timestamp =
+ assert.commandWorked(db.runCommand({insert: sentinelCollName, documents: [{_id: "s2"}]}))
+ .operationTime;
+
+rst.awaitReplication(undefined, undefined, [restoreNode]);
+
+jsTestLog("Restarting restore node with the --startupRecoveryForRestore flag");
+restoreNode = rst.restart(restoreNode, {
+ noReplSet: true,
+ setParameter: Object.merge(startParams, {
+ startupRecoveryForRestore: true,
+ recoverFromOplogAsStandalone: true,
+ takeUnstableCheckpointOnShutdown: true
+ })
+});
+// Make sure we can read something after standalone recovery.
+assert.eq(2, restoreNode.getDB(dbName)[sentinelCollName].find({}).itcount());
+
+jsTestLog("Restarting restore node again, in repl set mode");
+restoreNode = rst.restart(restoreNode, {noReplSet: false, setParameter: startParams});
+
+rst.awaitSecondaryNodes(undefined, [restoreNode]);
+jsTestLog("Finished restarting restore node");
+
+const restoreDb = restoreNode.getDB(dbName);
+
+jsTestLog("Checking restore node untimestamped read.");
+// Basic test: should see all docs with untimestamped read.
+assert.eq(nPreDocs + nDocs + nExtraDocs, coll.find().itcount());
+assert.eq(nPreDocs + nDocs + nExtraDocs, restoreDb[collName].find().itcount());
+
+// For the remaining checks we step up the restored node so we can do atClusterTime reads on it.
+// They are necessarily speculative because we are preventing majority optimes from advancing.
+
+jsTestLog("Stepping up restore node");
+rst.stepUp(restoreNode, {awaitReplicationBeforeStepUp: false});
+
+// Should also be able to read at the final sentinel optime on restore node.
+const restoreNodeSession = restoreNode.startSession({causalConsistency: false});
+restoreNodeSession.startTransaction(
+ {readConcern: {level: "snapshot", atClusterTime: sentinel2Timestamp}});
+const restoreNodeSessionDb = restoreNodeSession.getDatabase(dbName);
+jsTestLog("Checking top-of-oplog read works on restored node.");
+
+let res = assert.commandWorked(
+ restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": lastId}}));
+assert.eq(1, res.cursor.firstBatch.length);
+assert.docEq({_id: lastId, paddingStr: paddingStr}, res.cursor.firstBatch[0]);
+
+// Must abort because majority is not advancing.
+restoreNodeSession.abortTransaction();
+
+// Should NOT able to read at the first sentinel optime on the restore node.
+restoreNodeSession.startTransaction(
+ {readConcern: {level: "snapshot", atClusterTime: sentinel1Timestamp}});
+jsTestLog(
+ "Checking restore node majority optime read, which should fail, because the restore node does not have that history.");
+res = assert.commandFailedWithCode(
+ restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": {"$gte": nDocs}}}),
+ ErrorCodes.SnapshotTooOld);
+restoreNodeSession.abortTransaction();
+
+// Should NOT able to read at the penultimate optime on the restore node either.
+jsTestLog(
+ "Checking restore node top-of-oplog minus 1 read, which should fail, because the restore node does not have that history.");
+restoreNodeSession.startTransaction(
+ {readConcern: {level: "snapshot", atClusterTime: penultimateOpTime}});
+res = assert.commandFailedWithCode(
+ restoreNodeSessionDb.runCommand({find: collName, filter: {"_id": lastId}}),
+ ErrorCodes.SnapshotTooOld);
+restoreNodeSession.abortTransaction();
+
+// Allow set to become current and shut down with ordinary dbHash verification.
+stopReplProducer2.off();
+stopReplProducer3.off();
+rst.stopSet();
+})();