summaryrefslogtreecommitdiff
path: root/jstests/replsets
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2022-08-25 19:33:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-25 20:48:17 +0000
commit5930149b09f51b1035b7a24556399a8777f9399c (patch)
treedb735a39bf12678f03e52991c9d27e5e01c4ea78 /jstests/replsets
parentd3b68ac11a3283f4afd9ce4c2e519b180caa9879 (diff)
downloadmongo-5930149b09f51b1035b7a24556399a8777f9399c.tar.gz
SERVER-66854 Prevent step-up ops from being killed by killSessions commands
Diffstat (limited to 'jstests/replsets')
-rw-r--r--jstests/replsets/prepared_transaction_kill_during_step_up_refresh.js70
-rw-r--r--jstests/replsets/step_up_kill_abort_transactions.js149
2 files changed, 219 insertions, 0 deletions
diff --git a/jstests/replsets/prepared_transaction_kill_during_step_up_refresh.js b/jstests/replsets/prepared_transaction_kill_during_step_up_refresh.js
new file mode 100644
index 00000000000..2852269417d
--- /dev/null
+++ b/jstests/replsets/prepared_transaction_kill_during_step_up_refresh.js
@@ -0,0 +1,70 @@
+/**
+ * Tests that the work to restore locks for prepared transactions on step up is not killable via
+ * killSessions commands.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+(function() {
+"use strict";
+load("jstests/core/txns/libs/prepare_helpers.js");
+load("jstests/libs/fail_point_util.js");
+load("jstests/replsets/rslib.js"); // For reconnect()
+
+const rst = new ReplSetTest({nodes: 2, name: jsTestName()});
+rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const dbName = "primaryDB";
+const collName = "testcoll";
+
+const primary = rst.getPrimary();
+const newPrimary = rst.getSecondary();
+
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB.getCollection(collName);
+assert.commandWorked(primaryDB.runCommand({create: collName, writeConcern: {w: "majority"}}));
+
+jsTestName("Starting a transaction");
+const session = primary.startSession({causalConsistency: false});
+session.startTransaction({writeConcern: {w: "majority"}});
+const lsid = session.getSessionId().id;
+
+jsTestLog("LSID for our session is " + tojson(lsid));
+
+jsTestLog("Inserting a doc in a transaction.");
+const doc = {
+ _id: "txnDoc"
+};
+assert.commandWorked(session.getDatabase(dbName).getCollection(collName).insert(doc));
+
+jsTestLog("Putting transaction into prepare");
+const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+
+jsTestLog("Setting failpoint on new primary");
+const stepUpFP = configureFailPoint(newPrimary, "hangDuringStepUpPrepareRestoreLocks");
+
+jsTestLog("Stepping up new primary");
+rst.stepUp(newPrimary, {awaitWritablePrimary: false});
+reconnect(primary);
+
+jsTestLog("Waiting on new primary to hit step up failpoint");
+stepUpFP.wait();
+
+jsTestLog("Killing the session");
+const newPrimaryDB = newPrimary.getDB(dbName);
+assert.commandWorked(newPrimaryDB.runCommand({killSessions: [{id: lsid}]}));
+
+jsTestLog("Allowing step up to continue");
+stepUpFP.off();
+assert(newPrimary, rst.getPrimary());
+
+jsTestLog("Committing transaction on the new primary");
+// Create a proxy session to reuse the session state of the old primary.
+const newSession = new _DelegatingDriverSession(newPrimary, session);
+
+assert.commandWorked(PrepareHelpers.commitTransaction(newSession, prepareTimestamp));
+
+assert.eq(doc, primaryColl.findOne({}), primaryColl.find({}).toArray());
+
+rst.stopSet();
+})();
diff --git a/jstests/replsets/step_up_kill_abort_transactions.js b/jstests/replsets/step_up_kill_abort_transactions.js
new file mode 100644
index 00000000000..2c0ca84670a
--- /dev/null
+++ b/jstests/replsets/step_up_kill_abort_transactions.js
@@ -0,0 +1,149 @@
+/**
+ * Tests that the work for aborting in-progress transactions on step up is not killable via
+ * killSessions commands.
+ *
+ * @tags: [
+ * exclude_from_large_txns,
+ * uses_transactions,
+ * ]
+ */
+
+(function() {
+"use strict";
+load("jstests/replsets/rslib.js"); // For reconnect()
+load("jstests/libs/fail_point_util.js");
+
+function getTxnTableEntry(db) {
+ let txnTableEntries = db.getSiblingDB("config")["transactions"].find().toArray();
+ assert.eq(txnTableEntries.length, 1);
+ return txnTableEntries[0];
+}
+
+const rst = new ReplSetTest({
+ name: jsTestName(),
+ nodes: 3,
+ nodeOptions: {
+ setParameter:
+ // Make it easier to hold a transaction before it completes.
+ {maxNumberOfTransactionOperationsInSingleOplogEntry: 1, bgSyncOplogFetcherBatchSize: 1}
+ },
+});
+
+rst.startSet();
+let config = rst.getReplSetConfig();
+config.members[2].priority = 0;
+// Disable primary catchup and chaining.
+config.settings = {
+ catchUpTimeoutMillis: 0,
+ chainingAllowed: false
+};
+rst.initiate(config);
+
+setLogVerbosity(rst.nodes, {"replication": {"verbosity": 3}});
+
+const dbName = "testdb";
+const collName = "testcoll";
+
+const primary = rst.nodes[0];
+const primaryDB = primary.getDB(dbName);
+const newPrimary = rst.nodes[1];
+const newPrimaryDB = newPrimary.getDB(dbName);
+
+assert.commandWorked(primaryDB.runCommand({create: collName, writeConcern: {w: "majority"}}));
+
+// Prevent the priority: 0 node from fetching new ops so that it can vote for the new primary.
+const stopReplProducerFailPoint = configureFailPoint(rst.nodes[2], 'stopReplProducer');
+
+jsTest.log("Stop secondary oplog replication before the last operation in the transaction.");
+// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before
+// applying the last operation in the transaction. This depends on the oplog fetcher batch size
+// being 1.
+const stopReplProducerOnDocumentFailPoint = configureFailPoint(
+ newPrimary, "stopReplProducerOnDocument", {document: {"applyOps.o._id": "last in txn"}});
+
+jsTestLog("Start a transaction.");
+const session = primary.startSession({causalConsistency: false});
+const sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction({writeConcern: {w: "majority", wtimeout: 500}});
+
+const lsid = session.getSessionId().id;
+jsTestLog("LSID for our session is " + tojson(lsid));
+
+jsTestLog("Add inserts to transaction.");
+assert.commandWorked(sessionColl.insert({_id: "first in txn on primary " + primary}));
+assert.commandWorked(sessionColl.insert({_id: "last in txn"}));
+
+jsTestLog("Confirm we cannot commit the transaction due to insufficient replication.");
+let res = session.commitTransaction_forTesting();
+assert.commandFailedWithCode(res, ErrorCodes.WriteConcernFailed);
+
+jsTestLog("Find the start and commit optimes on the primary.");
+let txnTableEntry = getTxnTableEntry(primaryDB);
+assert.eq(txnTableEntry.state, "committed");
+const commitOpTime = txnTableEntry.lastWriteOpTime;
+const startOpTime =
+ primaryDB.getSiblingDB("local").oplog.rs.findOne({ts: commitOpTime.ts}).prevOpTime;
+
+jsTestLog("Wait for the new primary to block on fail point.");
+stopReplProducerOnDocumentFailPoint.wait();
+
+jsTestLog("Wait for the new primary to apply the first op of transaction at timestamp: " +
+ tojson(startOpTime));
+assert.soon(() => {
+ const lastOpTime = getLastOpTime(newPrimary);
+ jsTestLog("Current lastOpTime on the new primary: " + tojson(lastOpTime));
+ return rs.compareOpTimes(lastOpTime, startOpTime) >= 0;
+});
+
+// Now the transaction should be in-progress on the new primary.
+txnTableEntry = getTxnTableEntry(newPrimaryDB);
+assert.eq(txnTableEntry.state, "inProgress");
+// The startOpTime should be less than the commit optime.
+assert.eq(rs.compareOpTimes(txnTableEntry.startOpTime, commitOpTime), -1);
+
+jsTestLog("Set step up failpoint on new primary");
+const stepUpFP = configureFailPoint(newPrimary, "hangDuringStepUpAbortInProgressTransactions");
+
+jsTestLog("Step down primary via heartbeat.");
+assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1}));
+rst.awaitNodesAgreeOnPrimary();
+reconnect(primary);
+
+jsTestLog("Wait for the new primary to stop replication after primary catch-up.");
+checkLog.contains(newPrimary, "Stopping replication producer");
+
+jsTestLog("Enable replication on the new primary so that it can continue the state transition");
+stopReplProducerOnDocumentFailPoint.off();
+
+jsTestLog("Wait on new primary to hit step up failpoint");
+stepUpFP.wait();
+
+jsTestLog("Attempt to kill the session");
+assert.commandWorked(newPrimaryDB.runCommand({killSessions: [{id: lsid}]}));
+
+jsTestLog("Allow step up to continue");
+stepUpFP.off();
+assert.eq(rst.getPrimary(), newPrimary);
+stopReplProducerFailPoint.off();
+rst.awaitReplication();
+
+jsTestLog("Verifying that the transaction has been aborted on the new primary.");
+// Create a proxy session to reuse the session state of the old primary.
+const newSession = new _DelegatingDriverSession(newPrimary, session);
+const newSessionDB = newSession.getDatabase(dbName);
+// The transaction should have been aborted.
+assert.commandFailedWithCode(newSessionDB.adminCommand({
+ commitTransaction: 1,
+ txnNumber: NumberLong(newSession.getTxnNumber_forTesting()),
+ autocommit: false,
+ writeConcern: {w: "majority"}
+}),
+ ErrorCodes.NoSuchTransaction);
+
+jsTestLog("Verifying that the collection was not changed by the transaction.");
+assert.eq(primaryDB.getCollection(collName).find().itcount(), 0);
+assert.eq(newPrimaryDB.getCollection(collName).find().itcount(), 0);
+
+rst.stopSet();
+})();