summaryrefslogtreecommitdiff
path: root/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2020-02-26 13:14:49 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-27 13:54:12 +0000
commit3a59e217a4b34234acbd6a404f98276a87435ee0 (patch)
tree0e0890c6e5bd0db9d962b694acf0f3ee5edb19b5 /jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
parent77c6d1044cff7b113881a1c97f5dca63567fbe81 (diff)
downloadmongo-3a59e217a4b34234acbd6a404f98276a87435ee0.tar.gz
SERVER-41386 Test that the replica set's majority commit point can move forward because of secondaries without primary durable writes
SERVER-41387 Test that oplogTruncateAfterPoint will clear oplog holes during startup recovery after primary crash create mode 100644 jstests/noPassthrough/non_durable_writes_on_primary_can_reach_majority.js create mode 100644 jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
Diffstat (limited to 'jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js')
-rw-r--r--jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js93
1 files changed, 93 insertions, 0 deletions
diff --git a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
new file mode 100644
index 00000000000..14d50245f98
--- /dev/null
+++ b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
@@ -0,0 +1,93 @@
+/**
+ * Test that a confirmed write against a primary with oplog holes behind it when a crash occurs will
+ * be truncated on startup recovery.
+ *
+ * There must be more than 1 voting node, otherwise the write concern behavior changes to waiting
+ * for no holes for writes with {j: true} write concern, and no confirmed writes will be truncated.
+ *
+ * @tags: [
+ * # The primary is restarted and must retain its data.
+ * requires_persistence,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+const rst = new ReplSetTest({name: jsTest.name(), nodes: 2});
+rst.startSet();
+// Make sure there are no election timeouts. This should prevent primary stepdown. Normally we would
+// set the secondary node votes to 0, but that would affect the feature that is being tested.
+rst.initiateWithHighElectionTimeout();
+
+const primary = rst.getPrimary();
+const dbName = "testDB";
+const collName = jsTest.name();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+assert.commandWorked(primaryDB.createCollection(collName, {writeConcern: {w: "majority"}}));
+
+const failPoint = configureFailPoint(primaryDB,
+ "hangAfterCollectionInserts",
+ {collectionNS: primaryColl.getFullName(), first_id: "b"});
+
+try {
+ // Hold back the durable timestamp by leaving an uncommitted transaction hanging.
+
+ TestData.dbName = dbName;
+ TestData.collName = collName;
+
+ startParallelShell(() => {
+ jsTestLog("Insert a document that will hang before the insert completes.");
+ // Crashing the server while this command is running may cause the parallel shell code to
+ // error and stop executing. We will therefore ignore the result of this command and
+ // parallel shell. Test correctness is guaranteed by waiting for the failpoint this command
+ // hits.
+ db.getSiblingDB(TestData.dbName)[TestData.collName].insert({_id: "b"});
+ }, primary.port);
+
+ jsTest.log("Wait for async insert to hit the failpoint.");
+ failPoint.wait();
+
+ // Execute an insert with confirmation that it made it to disk ({j: true});
+ //
+ // The primary's durable timestamp should be pinned by the prior hanging uncommitted write. So
+ // this second write will have an oplog hole behind it and will be truncated after a crash.
+ assert.commandWorked(
+ primaryColl.insert({_id: "writeAfterHole"}, {writeConcern: {w: 1, j: true}}));
+
+ const findResult = primaryColl.findOne({_id: "writeAfterHole"});
+ assert.eq(findResult, {"_id": "writeAfterHole"});
+
+ jsTest.log("Force a checkpoint so the primary has data on startup recovery after a crash");
+ assert.commandWorked(primary.adminCommand({fsync: 1}));
+
+ // Crash and restart the primary, which should truncate the second successful write, because
+ // the first write never committed and left a hole in the oplog.
+ rst.stop(primary, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL});
+} catch (error) {
+ // Turn off the failpoint before allowing the test to end, so nothing hangs while the server
+ // shuts down or in post-test hooks.
+ failPoint.off();
+ throw error;
+}
+
+rst.start(primary);
+
+// Wait for the restarted node to complete startup recovery and start accepting user requests.
+// Note: no new primary will be elected because of the high election timeout set on the replica set.
+assert.soonNoExcept(function() {
+ const nodeState = assert.commandWorked(primary.adminCommand("replSetGetStatus")).myState;
+ return nodeState == ReplSetTest.State.SECONDARY;
+});
+
+// Confirm that the write with the oplog hold behind it is now gone (truncated) as expected.
+primary.setSlaveOk();
+const find = primary.getDB(dbName).getCollection(collName).findOne({_id: "writeAfterHole"});
+assert.eq(find, null);
+
+rst.stopSet();
+})();