summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorJason Chan <jason.chan@mongodb.com>2021-04-28 19:53:03 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-04 22:29:28 +0000
commit320e2d28396c250dfd69640dcf865dff50ea0b55 (patch)
treee5ddaba95d3c5ee27ee386137a74435b17637993 /jstests
parent4aa27885874b90e098c1225fccb10f4daa3b3d38 (diff)
downloadmongo-320e2d28396c250dfd69640dcf865dff50ea0b55.tar.gz
SERVER-55305 Add new step to replication rollback to restore the txns table to be consistent with the stableTimestamp
Diffstat (limited to 'jstests')
-rw-r--r--jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application.js183
-rw-r--r--jstests/replsets/rollback_with_coalesced_txn_table_updates_from_vectored_inserts.js115
2 files changed, 298 insertions, 0 deletions
diff --git a/jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application.js b/jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application.js
new file mode 100644
index 00000000000..39ceaa77713
--- /dev/null
+++ b/jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application.js
@@ -0,0 +1,183 @@
+/**
+ * Tests that the rollback procedure will update the 'config.transactions' table to be consistent
+ * with the node data at the 'stableTimestamp', specifically in the case where multiple derived ops
+ * to the 'config.transactions' table were coalesced into a single operation during secondary oplog
+ * application.
+ * We also test that if a node crashes after oplog truncation during rollback, the update made to
+ * the 'config.transactions' table is persisted on startup.
+ *
+ * @tags: [requires_persistence]
+ */
+
+(function() {
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/write_concern_util.js");
+
+const oplogApplierBatchSize = 100;
+
+function runTest(crashAfterRollbackTruncation) {
+ const rst = new ReplSetTest({
+ nodes: {
+ n0: {},
+ // Set the 'syncdelay' to 1s to speed up checkpointing. Also explicitly set the batch
+ // size for oplog application to ensure the number of retryable write statements being
+ // made majority committed isn't a multiple of it.
+ n1: {syncdelay: 1, setParameter: {replBatchLimitOperations: oplogApplierBatchSize}},
+ // Set the bgSyncOplogFetcherBatchSize to 1 oplog entry to guarantee replication
+ // progress with the stopReplProducerOnDocument failpoint.
+ n2: {setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ n3: {setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ n4: {setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ },
+ // Force secondaries to sync from the primary to guarantee replication progress with the
+ // stopReplProducerOnDocument failpoint. Also disable primary catchup because some
+ // replicated retryable write statements are intentionally not being made majority
+ // committed.
+ settings: {chainingAllowed: false, catchUpTimeoutMillis: 0},
+ });
+ rst.startSet();
+ rst.initiate();
+
+ const primary = rst.getPrimary();
+ const ns = "test.retryable_write_partial_rollback";
+ assert.commandWorked(
+ primary.getCollection(ns).insert({_id: 0, counter: 0}, {writeConcern: {w: 5}}));
+
+ const [secondary1, secondary2, secondary3, secondary4] = rst.getSecondaries();
+
+ // Disable replication on all of the secondaries to manually control the replication progress.
+ const stopReplProducerFailpoints = [secondary1, secondary2, secondary3, secondary4].map(
+ conn => configureFailPoint(conn, 'stopReplProducer'));
+
+ // While replication is still entirely disabled, additionally disable replication partway into
+ // the retryable write on all but the first secondary. The idea is that while secondary1 will
+ // apply all of the oplog entries in a single batch, the other secondaries will only apply up to
+ // counterMajorityCommitted oplog entries.
+ const counterTotal = oplogApplierBatchSize;
+ const counterMajorityCommitted = counterTotal - 2;
+ const stopReplProducerOnDocumentFailpoints = [secondary2, secondary3, secondary4].map(
+ conn => configureFailPoint(conn,
+ 'stopReplProducerOnDocument',
+ {document: {"diff.u.counter": counterMajorityCommitted + 1}}));
+
+ const lsid = ({id: UUID()});
+
+ assert.commandWorked(primary.getCollection(ns).runCommand("update", {
+ updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
+ lsid,
+ txnNumber: NumberLong(1),
+ }));
+
+ const stmtMajorityCommitted = primary.getCollection("local.oplog.rs")
+ .findOne({ns, "o.diff.u.counter": counterMajorityCommitted});
+ assert.neq(null, stmtMajorityCommitted);
+
+ for (const fp of stopReplProducerFailpoints) {
+ fp.off();
+
+ // Wait for the secondary to have applied through the counterMajorityCommitted retryable
+ // write statement. We do this for each secondary individually, starting with secondary1, to
+ // guarantee that secondary1 will advance its stable_timestamp when learning of the other
+ // secondaries also having applied through counterMajorityCommitted.
+ assert.soon(() => {
+ const {optimes: {appliedOpTime, durableOpTime}} =
+ assert.commandWorked(fp.conn.adminCommand({replSetGetStatus: 1}));
+
+ print(`${fp.conn.host}: ${tojsononeline({
+ appliedOpTime,
+ durableOpTime,
+ stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
+ })}`);
+
+ return bsonWoCompare(appliedOpTime.ts, stmtMajorityCommitted.ts) >= 0 &&
+ bsonWoCompare(durableOpTime.ts, stmtMajorityCommitted.ts) >= 0;
+ });
+ }
+
+ // Wait for secondary1 to have advanced its stable_timestamp.
+ assert.soon(() => {
+ const {lastStableRecoveryTimestamp} =
+ assert.commandWorked(secondary1.adminCommand({replSetGetStatus: 1}));
+
+ print(`${secondary1.host}: ${tojsononeline({
+ lastStableRecoveryTimestamp,
+ stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
+ })}`);
+
+ return bsonWoCompare(lastStableRecoveryTimestamp, stmtMajorityCommitted.ts) >= 0;
+ });
+
+ // Step up one of the other secondaries and do a write which becomes majority committed to force
+ // secondary1 to go into rollback.
+ rst.freeze(secondary1);
+ assert.commandWorked(secondary2.adminCommand({replSetStepUp: 1}));
+ rst.freeze(primary);
+ rst.awaitNodesAgreeOnPrimary(undefined, undefined, secondary2);
+
+ let hangAfterTruncate;
+ if (crashAfterRollbackTruncation) {
+ hangAfterTruncate = configureFailPoint(secondary1, 'hangAfterOplogTruncationInRollback');
+ }
+
+ for (const fp of stopReplProducerOnDocumentFailpoints) {
+ fp.off();
+ }
+
+ // Wait for secondary2 to be a writable primary.
+ rst.getPrimary();
+
+ // Do a write which becomes majority committed and wait for secondary1 to complete its rollback.
+ assert.commandWorked(
+ secondary2.getCollection("test.dummy").insert({}, {writeConcern: {w: 'majority'}}));
+
+ if (crashAfterRollbackTruncation) {
+ // Entering rollback will close connections so we expect some network errors when waiting
+ // on the failpoint.
+ assert.soonNoExcept(() => {
+ hangAfterTruncate.wait();
+ return true;
+ }, `failed to wait for fail point ${hangAfterTruncate.failPointName}`);
+
+ // Crash the node after it performs oplog truncation.
+ rst.stop(secondary1, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL});
+ node = rst.restart(secondary1, {
+ "noReplSet": false,
+ setParameter: 'failpoint.stopReplProducer=' + tojson({mode: 'alwaysOn'})
+ });
+ rst.waitForState(secondary1, ReplSetTest.State.SECONDARY);
+ secondary1.setSecondaryOk();
+ // On startup, we expect to see the update persisted in the 'config.transactions' table.
+ let restoredDoc =
+ secondary1.getCollection('config.transactions').findOne({"_id.id": lsid.id});
+ assert.neq(null, restoredDoc);
+ secondary1.adminCommand({configureFailPoint: "stopReplProducer", mode: "off"});
+ }
+
+ // Reconnect to secondary1 after it completes its rollback and step it up to be the new primary.
+ rst.awaitNodesAgreeOnPrimary(undefined, undefined, secondary2);
+ assert.commandWorked(secondary1.adminCommand({replSetFreeze: 0}));
+ rst.stepUp(secondary1, {awaitWritablePrimary: false});
+
+ const docBeforeRetry = secondary1.getCollection(ns).findOne({_id: 0});
+ assert.eq(docBeforeRetry, {_id: 0, counter: counterMajorityCommitted});
+
+ assert.commandWorked(secondary1.getCollection(ns).runCommand("update", {
+ updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
+ lsid,
+ txnNumber: NumberLong(1),
+ writeConcern: {w: 5},
+ }));
+
+ const docAfterRetry = secondary1.getCollection(ns).findOne({_id: 0});
+ assert.eq(docAfterRetry, {_id: 0, counter: counterTotal});
+
+ rst.stopSet();
+}
+
+// Test the general scenario where we perform the appropriate update to the 'config.transactions'
+// table during rollback.
+runTest(false);
+// Extends the test to crash the secondary in the middle of rollback right after oplog truncation.
+// We assert that the update made to the 'config.transactions' table persisted on startup.
+runTest(true);
+})(); \ No newline at end of file
diff --git a/jstests/replsets/rollback_with_coalesced_txn_table_updates_from_vectored_inserts.js b/jstests/replsets/rollback_with_coalesced_txn_table_updates_from_vectored_inserts.js
new file mode 100644
index 00000000000..e6ca0056c31
--- /dev/null
+++ b/jstests/replsets/rollback_with_coalesced_txn_table_updates_from_vectored_inserts.js
@@ -0,0 +1,115 @@
+/**
+ * Tests that the rollback procedure will update the 'config.transactions' table to be consistent
+ * with the node data at the 'stableTimestamp', specifically in the case where multiple derived ops
+ * to the 'config.transactions' table were coalesced into a single operation when performing
+ * vectored inserts on the primary.
+ */
+
+(function() {
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/write_concern_util.js");
+
+const rst = new ReplSetTest({
+ nodes: {
+ // Set the syncdelay to 1s to speed up checkpointing.
+ n0: {syncdelay: 1},
+ // Set the bgSyncOplogFetcherBatchSize to 1 oplog entry to guarantee replication progress
+ // with the stopReplProducerOnDocument failpoint.
+ n1: {setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ n2: {setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ },
+ // Force secondaries to sync from the primary to guarantee replication progress with the
+ // stopReplProducerOnDocument failpoint. Also disable primary catchup because some replicated
+ // retryable write statements are intentionally not being made majority committed.
+ settings: {chainingAllowed: false, catchUpTimeoutMillis: 0},
+});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const ns = "test.retryable_write_coalesced_txn_updates";
+assert.commandWorked(primary.getCollection(ns).insert({_id: -1}, {writeConcern: {w: 3}}));
+
+const [secondary1, secondary2] = rst.getSecondaries();
+
+// Disable replication partway into the retryable write on all of the secondaries. The idea is that
+// while the primary will apply all of the writes in a single storage transaction, the secondaries
+// will only apply up to insertBatchMajorityCommitted oplog entries.
+const insertBatchTotal = 20;
+const insertBatchMajorityCommitted = insertBatchTotal - 2;
+const stopReplProducerOnDocumentFailpoints = [secondary1, secondary2].map(
+ conn => configureFailPoint(
+ conn, 'stopReplProducerOnDocument', {document: {"_id": insertBatchMajorityCommitted + 1}}));
+
+const lsid = ({id: UUID()});
+
+assert.commandWorked(primary.getCollection(ns).runCommand("insert", {
+ documents: Array.from({length: insertBatchTotal}, (_, i) => ({_id: i})),
+ lsid,
+ txnNumber: NumberLong(1),
+}));
+
+const stmtMajorityCommitted =
+ primary.getCollection("local.oplog.rs").findOne({ns, "o._id": insertBatchMajorityCommitted});
+assert.neq(null, stmtMajorityCommitted);
+
+// Wait for the primary to have advanced its stable_timestamp.
+assert.soon(() => {
+ const {lastStableRecoveryTimestamp} =
+ assert.commandWorked(primary.adminCommand({replSetGetStatus: 1}));
+
+ const wtStatus = assert.commandWorked(primary.adminCommand({serverStatus: 1})).wiredTiger;
+ const latestMajority =
+ wtStatus["snapshot-window-settings"]["latest majority snapshot timestamp available"];
+
+ print(`${primary.host}: ${tojsononeline({
+ lastStableRecoveryTimestamp,
+ stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts,
+ "latest majority snapshot timestamp available": latestMajority
+ })}`);
+
+ // Make sure 'secondary1' has a 'lastApplied' optime equal to 'stmtMajorityCommitted.ts'.
+ // Otherwise, it can fail to win the election later.
+ const {optimes: {appliedOpTime}} =
+ assert.commandWorked(secondary1.adminCommand({replSetGetStatus: 1}));
+ print(`${secondary1.host}: ${tojsononeline({appliedOpTime})}`);
+
+ return bsonWoCompare(lastStableRecoveryTimestamp, stmtMajorityCommitted.ts) >= 0 &&
+ bsonWoCompare(appliedOpTime.ts, stmtMajorityCommitted.ts) >= 0;
+});
+
+// Step up one of the secondaries and do a write which becomes majority committed to force the
+// current primary to go into rollback.
+assert.commandWorked(secondary1.adminCommand({replSetStepUp: 1}));
+rst.freeze(primary);
+rst.awaitNodesAgreeOnPrimary(undefined, undefined, secondary1);
+
+for (const fp of stopReplProducerOnDocumentFailpoints) {
+ fp.off();
+}
+
+rst.getPrimary(); // Wait for secondary1 to be a writable primary.
+
+// Do a write which becomes majority committed and wait for the old primary to have completed its
+// rollback.
+assert.commandWorked(secondary1.getCollection("test.dummy").insert({}, {writeConcern: {w: 3}}));
+
+// Reconnect to the primary after it completes its rollback and step it up to be the primary again.
+rst.awaitNodesAgreeOnPrimary(undefined, undefined, secondary1);
+assert.commandWorked(primary.adminCommand({replSetFreeze: 0}));
+rst.stepUp(primary);
+
+print(`${primary.host} session txn record: ${
+ tojson(primary.getCollection("config.transactions").findOne({"_id.id": lsid.id}))}`);
+
+// Make sure we don't re-execute operations that have already been inserted by making sure we
+// we don't get a 'DuplicateKeyError'.
+assert.commandWorked(primary.getCollection(ns).runCommand("insert", {
+ documents: Array.from({length: insertBatchTotal}, (_, i) => ({_id: i})),
+ lsid,
+ txnNumber: NumberLong(1),
+ writeConcern: {w: 3},
+}));
+
+rst.stopSet();
+})(); \ No newline at end of file