diff options
author | Judah Schvimer <judah.schvimer@10gen.com> | 2019-10-11 18:20:47 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-10-11 18:20:47 +0000 |
commit | 958d004f06c37b404c7164a49cf5c4784bea6c6f (patch) | |
tree | fc56c50f921d35bf9e7996fe8894cae4071018d6 | |
parent | 20883d237ab0e4ee45c3aa6bc5f00602265402b1 (diff) | |
download | mongo-958d004f06c37b404c7164a49cf5c4784bea6c6f.tar.gz |
SERVER-42987 make it safe to interrupt abortTransaction
(cherry picked from commit 1659ddcfb49050dcf18fef014cd9d5ebf5717650)
-rw-r--r-- | jstests/replsets/kill_prepared_transaction_commit_abort.js | 22 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.cpp | 63 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.h | 4 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant_test.cpp | 7 |
4 files changed, 60 insertions, 36 deletions
diff --git a/jstests/replsets/kill_prepared_transaction_commit_abort.js b/jstests/replsets/kill_prepared_transaction_commit_abort.js index 58a964a2bf8..280cbdc9191 100644 --- a/jstests/replsets/kill_prepared_transaction_commit_abort.js +++ b/jstests/replsets/kill_prepared_transaction_commit_abort.js @@ -6,10 +6,6 @@ (function() { "use strict"; -// TODO (SERVER-42987) Re-enable this test. -if (true) { - return; -} load("jstests/core/txns/libs/prepare_helpers.js"); load("jstests/libs/parallelTester.js"); // for ScopedThread. @@ -51,11 +47,10 @@ function killOpThread(host, dbName, collName, shutdownLatch) { ] }; let ops = nodeDB.currentOp(filter).inprog; - if (ops.length > 0) { - print("Going to run 'killOp' on " + ops.length + " ops."); - } ops.forEach(op => { - if (op.opid) { + // Let some operations survive so the test terminates. + const shouldKill = (Math.random() < 0.8); + if (op.opid && shouldKill) { nodeDB.killOp(op.opid); } }); @@ -125,14 +120,6 @@ function commitOrAbortAllTransactions(sessions) { // The number of sessions and transactions to create. const numTxns = 100; -// Make the server sleep a bit right after commit/abort commands start to make it more likely that -// the kill op thread will be able to find and kill them. -assert.commandWorked(primary.adminCommand({ - configureFailPoint: "sleepMillisAfterCommandExecutionBegins", - mode: "alwaysOn", - data: {millis: 50, commands: {"commitTransaction": 1, "abortTransaction": 1}} -})); - jsTestLog("Creating sessions and preparing " + numTxns + " transactions."); let sessions = createPreparedTransactions(numTxns); @@ -140,6 +127,9 @@ jsTestLog("Starting the killOp thread."); let killThread = new Thread(killOpThread, primary.host, dbName, collName, shutdownLatch); killThread.start(); +// Sleep for a second to let the killThread begin killing. +sleep(1000); + jsTestLog("Committing/aborting all transactions."); commitOrAbortAllTransactions(sessions); diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 7fcbcaecb14..49a4bd4cf77 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -1522,27 +1522,56 @@ void TransactionParticipant::Participant::_abortActiveTransaction( o(lk).transactionMetricsObserver.onTransactionOperation( opCtx, CurOp::get(opCtx)->debug().additiveMetrics, o().txnState.isPrepared()); } - // We reserve an oplog slot before aborting the transaction so that no writes that are causally - // related to the transaction abort enter the oplog at a timestamp earlier than the abort oplog - // entry. On secondaries, we generate a fake empty oplog slot, since it's not used by the - // OpObserver. - boost::optional<OplogSlotReserver> oplogSlotReserver; - boost::optional<OplogSlot> abortOplogSlot; - if (opCtx->writesAreReplicated() && p().needToWriteAbortEntry) { - oplogSlotReserver.emplace(opCtx); - abortOplogSlot = oplogSlotReserver->getLastSlot(); - } - - // Clean up the transaction resources on the opCtx even if the transaction resources on the - // session were not aborted. This actually aborts the storage-transaction. - _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted); - // Write the abort oplog entry. This must be done after aborting the storage transaction, so - // that the lock state is reset, and there is no max lock timeout on the locker. auto opObserver = opCtx->getServiceContext()->getOpObserver(); invariant(opObserver); - opObserver->onTransactionAbort(opCtx, abortOplogSlot); + const bool needToWriteAbortEntry = opCtx->writesAreReplicated() && p().needToWriteAbortEntry; + if (needToWriteAbortEntry) { + // We reserve an oplog slot before aborting the transaction so that no writes that are + // causally related to the transaction abort enter the oplog at a timestamp earlier than the + // abort oplog entry. + OplogSlotReserver oplogSlotReserver(opCtx); + + // Clean up the transaction resources on the opCtx even if the transaction resources on the + // session were not aborted. This actually aborts the storage-transaction. + _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted); + + try { + // If we need to write an abort oplog entry, this function can no longer be interrupted. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + + // Write the abort oplog entry. This must be done after aborting the storage + // transaction, so that the lock state is reset, and there is no max lock timeout on the + // locker. + opObserver->onTransactionAbort(opCtx, oplogSlotReserver.getLastSlot()); + + _finishAbortingActiveTransaction(opCtx, expectedStates); + } catch (...) { + // It is illegal for aborting a transaction that must write an abort oplog entry to fail + // after aborting the storage transaction, so we crash instead. + severe() + << "Caught exception during abort of transaction that must write abort oplog entry " + << opCtx->getTxnNumber() << " on " << _sessionId().toBSON() << ": " + << exceptionToStatus(); + std::terminate(); + } + } else { + // Clean up the transaction resources on the opCtx even if the transaction resources on the + // session were not aborted. This actually aborts the storage-transaction. + // + // These functions are allowed to throw. We are not writing an oplog entry, so the only risk + // is not cleaning up some internal TransactionParticipant state, updating metrics, or + // logging the end of the transaction. That will either be cleaned up in the + // ServiceEntryPoint's abortGuard or when the next transaction begins. + _cleanUpTxnResourceOnOpCtx(opCtx, TerminationCause::kAborted); + opObserver->onTransactionAbort(opCtx, boost::none); + _finishAbortingActiveTransaction(opCtx, expectedStates); + } +} + +void TransactionParticipant::Participant::_finishAbortingActiveTransaction( + OperationContext* opCtx, TransactionState::StateSet expectedStates) { // Only abort the transaction in session if it's in expected states. // When the state of active transaction on session is not expected, it means another // thread has already aborted the transaction on session. diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h index ac85e6f8c56..f0f1cde0c28 100644 --- a/src/mongo/db/transaction_participant.h +++ b/src/mongo/db/transaction_participant.h @@ -676,6 +676,10 @@ public: void _abortActiveTransaction(OperationContext* opCtx, TransactionState::StateSet expectedStates); + // Factors out code for clarity from _abortActiveTransaction. + void _finishAbortingActiveTransaction(OperationContext* opCtx, + TransactionState::StateSet expectedStates); + // Aborts a prepared transaction. void _abortActivePreparedTransaction(OperationContext* opCtx); diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 5fe79784bbf..9c0f56889c6 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -1477,7 +1477,9 @@ TEST_F(TxnParticipantTest, ThrowDuringUnpreparedOnTransactionAbort) { txnParticipant.abortTransaction(opCtx()), AssertionException, ErrorCodes::OperationFailed); } -TEST_F(TxnParticipantTest, ThrowDuringPreparedOnTransactionAbortIsFatal) { +DEATH_TEST_F(TxnParticipantTest, + ThrowDuringPreparedOnTransactionAbortIsFatal, + "Caught exception during abort of transaction") { auto sessionCheckout = checkOutSession(); auto txnParticipant = TransactionParticipant::get(opCtx()); txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction"); @@ -1485,8 +1487,7 @@ TEST_F(TxnParticipantTest, ThrowDuringPreparedOnTransactionAbortIsFatal) { _opObserver->onTransactionAbortThrowsException = true; - ASSERT_THROWS_CODE( - txnParticipant.abortTransaction(opCtx()), AssertionException, ErrorCodes::OperationFailed); + txnParticipant.abortTransaction(opCtx()); } TEST_F(TxnParticipantTest, InterruptedSessionsCannotBePrepared) { |