summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah.schvimer@10gen.com>2019-10-11 18:20:47 +0000
committerevergreen <evergreen@mongodb.com>2019-10-11 18:20:47 +0000
commit958d004f06c37b404c7164a49cf5c4784bea6c6f (patch)
treefc56c50f921d35bf9e7996fe8894cae4071018d6
parent20883d237ab0e4ee45c3aa6bc5f00602265402b1 (diff)
downloadmongo-958d004f06c37b404c7164a49cf5c4784bea6c6f.tar.gz
SERVER-42987 make it safe to interrupt abortTransaction
(cherry picked from commit 1659ddcfb49050dcf18fef014cd9d5ebf5717650)
-rw-r--r--jstests/replsets/kill_prepared_transaction_commit_abort.js22
-rw-r--r--src/mongo/db/transaction_participant.cpp63
-rw-r--r--src/mongo/db/transaction_participant.h4
-rw-r--r--src/mongo/db/transaction_participant_test.cpp7
4 files changed, 60 insertions, 36 deletions
diff --git a/jstests/replsets/kill_prepared_transaction_commit_abort.js b/jstests/replsets/kill_prepared_transaction_commit_abort.js
index 58a964a2bf8..280cbdc9191 100644
--- a/jstests/replsets/kill_prepared_transaction_commit_abort.js
+++ b/jstests/replsets/kill_prepared_transaction_commit_abort.js
@@ -6,10 +6,6 @@
(function() {
"use strict";
-// TODO (SERVER-42987) Re-enable this test.
-if (true) {
- return;
-}
load("jstests/core/txns/libs/prepare_helpers.js");
load("jstests/libs/parallelTester.js"); // for ScopedThread.
@@ -51,11 +47,10 @@ function killOpThread(host, dbName, collName, shutdownLatch) {
]
};
let ops = nodeDB.currentOp(filter).inprog;
- if (ops.length > 0) {
- print("Going to run 'killOp' on " + ops.length + " ops.");
- }
ops.forEach(op => {
- if (op.opid) {
+ // Let some operations survive so the test terminates.
+ const shouldKill = (Math.random() < 0.8);
+ if (op.opid && shouldKill) {
nodeDB.killOp(op.opid);
}
});
@@ -125,14 +120,6 @@ function commitOrAbortAllTransactions(sessions) {
// The number of sessions and transactions to create.
const numTxns = 100;
-// Make the server sleep a bit right after commit/abort commands start to make it more likely that
-// the kill op thread will be able to find and kill them.
-assert.commandWorked(primary.adminCommand({
- configureFailPoint: "sleepMillisAfterCommandExecutionBegins",
- mode: "alwaysOn",
- data: {millis: 50, commands: {"commitTransaction": 1, "abortTransaction": 1}}
-}));
-
jsTestLog("Creating sessions and preparing " + numTxns + " transactions.");
let sessions = createPreparedTransactions(numTxns);
@@ -140,6 +127,9 @@ jsTestLog("Starting the killOp thread.");
let killThread = new Thread(killOpThread, primary.host, dbName, collName, shutdownLatch);
killThread.start();
+// Sleep for a second to let the killThread begin killing.
+sleep(1000);
+
jsTestLog("Committing/aborting all transactions.");
commitOrAbortAllTransactions(sessions);
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 7fcbcaecb14..49a4bd4cf77 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -1522,27 +1522,56 @@ void TransactionParticipant::Participant::_abortActiveTransaction(
o(lk).transactionMetricsObserver.onTransactionOperation(
opCtx, CurOp::get(opCtx)->debug().additiveMetrics, o().txnState.isPrepared());
}
- // We reserve an oplog slot before aborting the transaction so that no writes that are causally
- // related to the transaction abort enter the oplog at a timestamp earlier than the abort oplog
- // entry. On secondaries, we generate a fake empty oplog slot, since it's not used by the
- // OpObserver.
- boost::optional<OplogSlotReserver> oplogSlotReserver;
- boost::optional<OplogSlot> abortOplogSlot;
- if (opCtx->writesAreReplicated() && p().needToWriteAbortEntry) {
- oplogSlotReserver.emplace(opCtx);
- abortOplogSlot = oplogSlotReserver->getLastSlot();
- }
-
- // Clean up the transaction resources on the opCtx even if the transaction resources on the
- // session were not aborted. This actually aborts the storage-transaction.
- _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted);
- // Write the abort oplog entry. This must be done after aborting the storage transaction, so
- // that the lock state is reset, and there is no max lock timeout on the locker.
auto opObserver = opCtx->getServiceContext()->getOpObserver();
invariant(opObserver);
- opObserver->onTransactionAbort(opCtx, abortOplogSlot);
+ const bool needToWriteAbortEntry = opCtx->writesAreReplicated() && p().needToWriteAbortEntry;
+ if (needToWriteAbortEntry) {
+ // We reserve an oplog slot before aborting the transaction so that no writes that are
+ // causally related to the transaction abort enter the oplog at a timestamp earlier than the
+ // abort oplog entry.
+ OplogSlotReserver oplogSlotReserver(opCtx);
+
+ // Clean up the transaction resources on the opCtx even if the transaction resources on the
+ // session were not aborted. This actually aborts the storage-transaction.
+ _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted);
+
+ try {
+ // If we need to write an abort oplog entry, this function can no longer be interrupted.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
+ // Write the abort oplog entry. This must be done after aborting the storage
+ // transaction, so that the lock state is reset, and there is no max lock timeout on the
+ // locker.
+ opObserver->onTransactionAbort(opCtx, oplogSlotReserver.getLastSlot());
+
+ _finishAbortingActiveTransaction(opCtx, expectedStates);
+ } catch (...) {
+ // It is illegal for aborting a transaction that must write an abort oplog entry to fail
+ // after aborting the storage transaction, so we crash instead.
+ severe()
+ << "Caught exception during abort of transaction that must write abort oplog entry "
+ << opCtx->getTxnNumber() << " on " << _sessionId().toBSON() << ": "
+ << exceptionToStatus();
+ std::terminate();
+ }
+ } else {
+ // Clean up the transaction resources on the opCtx even if the transaction resources on the
+ // session were not aborted. This actually aborts the storage-transaction.
+ //
+ // These functions are allowed to throw. We are not writing an oplog entry, so the only risk
+ // is not cleaning up some internal TransactionParticipant state, updating metrics, or
+ // logging the end of the transaction. That will either be cleaned up in the
+ // ServiceEntryPoint's abortGuard or when the next transaction begins.
+ _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted);
+ opObserver->onTransactionAbort(opCtx, boost::none);
+ _finishAbortingActiveTransaction(opCtx, expectedStates);
+ }
+}
+
+void TransactionParticipant::Participant::_finishAbortingActiveTransaction(
+ OperationContext* opCtx, TransactionState::StateSet expectedStates) {
// Only abort the transaction in session if it's in expected states.
// When the state of active transaction on session is not expected, it means another
// thread has already aborted the transaction on session.
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index ac85e6f8c56..f0f1cde0c28 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -676,6 +676,10 @@ public:
void _abortActiveTransaction(OperationContext* opCtx,
TransactionState::StateSet expectedStates);
+ // Factors out code for clarity from _abortActiveTransaction.
+ void _finishAbortingActiveTransaction(OperationContext* opCtx,
+ TransactionState::StateSet expectedStates);
+
// Aborts a prepared transaction.
void _abortActivePreparedTransaction(OperationContext* opCtx);
diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp
index 5fe79784bbf..9c0f56889c6 100644
--- a/src/mongo/db/transaction_participant_test.cpp
+++ b/src/mongo/db/transaction_participant_test.cpp
@@ -1477,7 +1477,9 @@ TEST_F(TxnParticipantTest, ThrowDuringUnpreparedOnTransactionAbort) {
txnParticipant.abortTransaction(opCtx()), AssertionException, ErrorCodes::OperationFailed);
}
-TEST_F(TxnParticipantTest, ThrowDuringPreparedOnTransactionAbortIsFatal) {
+DEATH_TEST_F(TxnParticipantTest,
+ ThrowDuringPreparedOnTransactionAbortIsFatal,
+ "Caught exception during abort of transaction") {
auto sessionCheckout = checkOutSession();
auto txnParticipant = TransactionParticipant::get(opCtx());
txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction");
@@ -1485,8 +1487,7 @@ TEST_F(TxnParticipantTest, ThrowDuringPreparedOnTransactionAbortIsFatal) {
_opObserver->onTransactionAbortThrowsException = true;
- ASSERT_THROWS_CODE(
- txnParticipant.abortTransaction(opCtx()), AssertionException, ErrorCodes::OperationFailed);
+ txnParticipant.abortTransaction(opCtx());
}
TEST_F(TxnParticipantTest, InterruptedSessionsCannotBePrepared) {