diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2020-02-26 13:14:49 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-02-27 13:54:12 +0000 |
commit | 3a59e217a4b34234acbd6a404f98276a87435ee0 (patch) | |
tree | 0e0890c6e5bd0db9d962b694acf0f3ee5edb19b5 /jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js | |
parent | 77c6d1044cff7b113881a1c97f5dca63567fbe81 (diff) | |
download | mongo-3a59e217a4b34234acbd6a404f98276a87435ee0.tar.gz |
SERVER-41386 Test that the replica set's majority commit point can move forward because of secondaries without primary durable writes
SERVER-41387 Test that oplogTruncateAfterPoint will clear oplog holes during startup recovery
after primary crash
create mode 100644 jstests/noPassthrough/non_durable_writes_on_primary_can_reach_majority.js
create mode 100644 jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
Diffstat (limited to 'jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js')
-rw-r--r-- | jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js new file mode 100644 index 00000000000..14d50245f98 --- /dev/null +++ b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js @@ -0,0 +1,93 @@ +/** + * Test that a confirmed write against a primary with oplog holes behind it when a crash occurs will + * be truncated on startup recovery. + * + * There must be more than 1 voting node, otherwise the write concern behavior changes to waiting + * for no holes for writes with {j: true} write concern, and no confirmed writes will be truncated. + * + * @tags: [ + * # The primary is restarted and must retain its data. + * requires_persistence, + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +const rst = new ReplSetTest({name: jsTest.name(), nodes: 2}); +rst.startSet(); +// Make sure there are no election timeouts. This should prevent primary stepdown. Normally we would +// set the secondary node votes to 0, but that would affect the feature that is being tested. +rst.initiateWithHighElectionTimeout(); + +const primary = rst.getPrimary(); +const dbName = "testDB"; +const collName = jsTest.name(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB[collName]; + +assert.commandWorked(primaryDB.createCollection(collName, {writeConcern: {w: "majority"}})); + +const failPoint = configureFailPoint(primaryDB, + "hangAfterCollectionInserts", + {collectionNS: primaryColl.getFullName(), first_id: "b"}); + +try { + // Hold back the durable timestamp by leaving an uncommitted transaction hanging. + + TestData.dbName = dbName; + TestData.collName = collName; + + startParallelShell(() => { + jsTestLog("Insert a document that will hang before the insert completes."); + // Crashing the server while this command is running may cause the parallel shell code to + // error and stop executing. We will therefore ignore the result of this command and + // parallel shell. Test correctness is guaranteed by waiting for the failpoint this command + // hits. + db.getSiblingDB(TestData.dbName)[TestData.collName].insert({_id: "b"}); + }, primary.port); + + jsTest.log("Wait for async insert to hit the failpoint."); + failPoint.wait(); + + // Execute an insert with confirmation that it made it to disk ({j: true}); + // + // The primary's durable timestamp should be pinned by the prior hanging uncommitted write. So + // this second write will have an oplog hole behind it and will be truncated after a crash. + assert.commandWorked( + primaryColl.insert({_id: "writeAfterHole"}, {writeConcern: {w: 1, j: true}})); + + const findResult = primaryColl.findOne({_id: "writeAfterHole"}); + assert.eq(findResult, {"_id": "writeAfterHole"}); + + jsTest.log("Force a checkpoint so the primary has data on startup recovery after a crash"); + assert.commandWorked(primary.adminCommand({fsync: 1})); + + // Crash and restart the primary, which should truncate the second successful write, because + // the first write never committed and left a hole in the oplog. + rst.stop(primary, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}); +} catch (error) { + // Turn off the failpoint before allowing the test to end, so nothing hangs while the server + // shuts down or in post-test hooks. + failPoint.off(); + throw error; +} + +rst.start(primary); + +// Wait for the restarted node to complete startup recovery and start accepting user requests. +// Note: no new primary will be elected because of the high election timeout set on the replica set. +assert.soonNoExcept(function() { + const nodeState = assert.commandWorked(primary.adminCommand("replSetGetStatus")).myState; + return nodeState == ReplSetTest.State.SECONDARY; +}); + +// Confirm that the write with the oplog hold behind it is now gone (truncated) as expected. +primary.setSlaveOk(); +const find = primary.getDB(dbName).getCollection(collName).findOne({_id: "writeAfterHole"}); +assert.eq(find, null); + +rst.stopSet(); +})(); |