diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2019-01-03 12:21:49 -0500 |
---|---|---|
committer | Jack Mulrow <jack.mulrow@mongodb.com> | 2019-01-09 14:16:00 -0500 |
commit | 0c8e948f50cec99860e4ac0d15ba76f33512c9d2 (patch) | |
tree | f9f1ebe145f9723d3a5ebecc698e74a6bbd7a9ea /src | |
parent | 627853bdc19ed147fe5c14e9e458035598c062ea (diff) | |
download | mongo-0c8e948f50cec99860e4ac0d15ba76f33512c9d2.tar.gz |
SERVER-36639 Disallow restarting txn at active txn number if shard has aborted after being prepared
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/op_observer_impl_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.cpp | 67 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.h | 32 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant_test.cpp | 32 |
4 files changed, 97 insertions, 40 deletions
diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp index 991b452a1bc..ea3b80b605e 100644 --- a/src/mongo/db/op_observer_impl_test.cpp +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -755,7 +755,7 @@ TEST_F(OpObserverTransactionTest, TransactionalPreparedAbortTest) { opCtx()->setWriteUnitOfWork(nullptr); opCtx()->lockState()->unsetMaxLockTimeout(); opObserver().onTransactionAbort(opCtx(), abortSlot); - txnParticipant->transitionToAbortedforTest(); + txnParticipant->transitionToAbortedWithPrepareforTest(); repl::OplogInterfaceLocal oplogInterface(opCtx(), NamespaceString::kRsOplogNamespace.ns()); auto oplogIter = oplogInterface.makeIterator(); @@ -807,7 +807,7 @@ TEST_F(OpObserverTransactionTest, TransactionalUnpreparedAbortTest) { AutoGetCollection autoColl(opCtx(), nss, MODE_IX); opObserver().onInserts(opCtx(), nss, uuid, insert.begin(), insert.end(), false); - txnParticipant->transitionToAbortedforTest(); + txnParticipant->transitionToAbortedWithoutPrepareforTest(); opObserver().onTransactionAbort(opCtx(), boost::none); } @@ -891,7 +891,7 @@ TEST_F(OpObserverTransactionTest, AbortingPreparedTransactionWritesToTransaction opCtx()->setWriteUnitOfWork(nullptr); opCtx()->lockState()->unsetMaxLockTimeout(); opObserver().onTransactionAbort(opCtx(), abortSlot); - txnParticipant->transitionToAbortedforTest(); + txnParticipant->transitionToAbortedWithPrepareforTest(); txnParticipant->stashTransactionResources(opCtx()); diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index d25cb130417..066790d2236 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -431,11 +431,8 @@ void TransactionParticipant::beginOrContinue(TxnNumber txnNumber, // The active transaction number can only be reused if the transaction is not in a state // that indicates it has been involved in a two phase commit. In normal operation this check // should never fail. - // - // TODO SERVER-36639: Ensure the active transaction number cannot be reused if the - // transaction is in the abort after prepare state (or any state indicating the participant - // has been involved in a two phase commit). - const auto restartableStates = TransactionState::kInProgress | TransactionState::kAborted; + const auto restartableStates = + TransactionState::kInProgress | TransactionState::kAbortedWithoutPrepare; uassert(50911, str::stream() << "Cannot start a transaction at given transaction number " << txnNumber @@ -675,7 +672,7 @@ void TransactionParticipant::stashTransactionResources(OperationContext* opCtx) stdx::unique_lock<stdx::mutex> lg(_mutex); // Always check session's txnNumber, since it can be modified by migration, which does not - // check out the session. We intentionally do not error if _txnState=kAborted, since we + // check out the session. We intentionally do not error if the transaction is aborted, since we // expect this function to be called at the end of the 'abortTransaction' command. _checkIsActiveTransaction(lg, *opCtx->getTxnNumber(), false); @@ -1127,7 +1124,7 @@ void TransactionParticipant::_finishCommitTransaction(WithLock lk, OperationCont // We must clear the recovery unit and locker so any post-transaction writes can run without // transactional settings such as a read timestamp. - _cleanUpTxnResourceOnOpCtx(lk, opCtx, TransactionState::kCommitted); + _cleanUpTxnResourceOnOpCtx(lk, opCtx, TerminationCause::kCommitted); } void TransactionParticipant::_updateTxnMetricsOnCommit(WithLock lk, @@ -1257,7 +1254,7 @@ void TransactionParticipant::_abortActiveTransaction(stdx::unique_lock<stdx::mut // Clean up the transaction resources on the opCtx even if the transaction resources on the // session were not aborted. This actually aborts the storage-transaction. - _cleanUpTxnResourceOnOpCtx(lock, opCtx, TransactionState::kAborted); + _cleanUpTxnResourceOnOpCtx(lock, opCtx, TerminationCause::kAborted); // Write the abort oplog entry. This must be done after aborting the storage transaction, so // that the lock state is reset, and there is no max lock timeout on the locker. We need to @@ -1300,7 +1297,10 @@ void TransactionParticipant::_abortActiveTransaction(stdx::unique_lock<stdx::mut str::stream() << "Cannot abort transaction in " << _txnState.toString()); } else { // If _activeTxnNumber is higher than ours, it means the transaction is already aborted. - invariant(_txnState.isInSet(lock, TransactionState::kNone | TransactionState::kAborted)); + invariant(_txnState.isInSet(lock, + TransactionState::kNone | + TransactionState::kAbortedWithoutPrepare | + TransactionState::kAbortedWithPrepare)); } } @@ -1319,7 +1319,7 @@ void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { } _logSlowTransaction(wl, &(_txnResourceStash->locker()->getLockerInfo(boost::none))->stats, - TransactionState::kAborted, + TerminationCause::kAborted, _txnResourceStash->getReadConcernArgs()); } else { stdx::lock_guard<stdx::mutex> lm(_metricsMutex); @@ -1332,11 +1332,14 @@ void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { _txnState.isPrepared(lm)); } - _resetTransactionState(wl, TransactionState::kAborted); + const auto nextState = _txnState.isPrepared(wl) ? TransactionState::kAbortedWithPrepare + : TransactionState::kAbortedWithoutPrepare; + _resetTransactionState(wl, nextState); } -void TransactionParticipant::_cleanUpTxnResourceOnOpCtx( - WithLock wl, OperationContext* opCtx, TransactionState::StateFlag terminationCause) { +void TransactionParticipant::_cleanUpTxnResourceOnOpCtx(WithLock wl, + OperationContext* opCtx, + TerminationCause terminationCause) { // Log the transaction if its duration is longer than the slowMS command threshold. _logSlowTransaction( wl, @@ -1471,8 +1474,10 @@ std::string TransactionParticipant::TransactionState::toString(StateFlag state) return "TxnState::CommittingWithPrepare"; case TransactionParticipant::TransactionState::kCommitted: return "TxnState::Committed"; - case TransactionParticipant::TransactionState::kAborted: - return "TxnState::Aborted"; + case TransactionParticipant::TransactionState::kAbortedWithoutPrepare: + return "TxnState::AbortedWithoutPrepare"; + case TransactionParticipant::TransactionState::kAbortedWithPrepare: + return "TxnState::AbortedAfterPrepare"; } MONGO_UNREACHABLE; } @@ -1494,7 +1499,7 @@ bool TransactionParticipant::TransactionState::_isLegalTransition(StateFlag oldS case kNone: case kPrepared: case kCommittingWithoutPrepare: - case kAborted: + case kAbortedWithoutPrepare: return true; default: return false; @@ -1503,18 +1508,25 @@ bool TransactionParticipant::TransactionState::_isLegalTransition(StateFlag oldS case kPrepared: switch (newState) { case kCommittingWithPrepare: - case kAborted: + case kAbortedWithPrepare: return true; default: return false; } MONGO_UNREACHABLE; case kCommittingWithPrepare: + switch (newState) { + case kCommitted: + return true; + default: + return false; + } + MONGO_UNREACHABLE; case kCommittingWithoutPrepare: switch (newState) { case kNone: case kCommitted: - case kAborted: + case kAbortedWithoutPrepare: return true; default: return false; @@ -1523,13 +1535,12 @@ bool TransactionParticipant::TransactionState::_isLegalTransition(StateFlag oldS case kCommitted: switch (newState) { case kNone: - case kInProgress: return true; default: return false; } MONGO_UNREACHABLE; - case kAborted: + case kAbortedWithoutPrepare: switch (newState) { case kNone: case kInProgress: @@ -1538,6 +1549,14 @@ bool TransactionParticipant::TransactionState::_isLegalTransition(StateFlag oldS return false; } MONGO_UNREACHABLE; + case kAbortedWithPrepare: + switch (newState) { + case kNone: + return true; + default: + return false; + } + MONGO_UNREACHABLE; } MONGO_UNREACHABLE; } @@ -1565,11 +1584,9 @@ void TransactionParticipant::_reportTransactionStats(WithLock wl, std::string TransactionParticipant::_transactionInfoForLog( const SingleThreadedLockStats* lockStats, - TransactionState::StateFlag terminationCause, + TerminationCause terminationCause, repl::ReadConcernArgs readConcernArgs) const { invariant(lockStats); - invariant(terminationCause == TransactionState::kCommitted || - terminationCause == TransactionState::kAborted); StringBuilder s; @@ -1593,7 +1610,7 @@ std::string TransactionParticipant::_transactionInfoForLog( s << singleTransactionStats.getOpDebug()->additiveMetrics.report(); std::string terminationCauseString = - terminationCause == TransactionState::kCommitted ? "committed" : "aborted"; + terminationCause == TerminationCause::kCommitted ? "committed" : "aborted"; s << " terminationCause:" << terminationCauseString; auto tickSource = getGlobalServiceContext()->getTickSource(); @@ -1637,7 +1654,7 @@ std::string TransactionParticipant::_transactionInfoForLog( void TransactionParticipant::_logSlowTransaction(WithLock wl, const SingleThreadedLockStats* lockStats, - TransactionState::StateFlag terminationCause, + TerminationCause terminationCause, repl::ReadConcernArgs readConcernArgs) { // Only log multi-document transactions. if (!_txnState.isNone(wl)) { diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h index 2833f9fd14d..c488cd1dc2f 100644 --- a/src/mongo/db/transaction_participant.h +++ b/src/mongo/db/transaction_participant.h @@ -74,6 +74,14 @@ enum class SpeculativeTransactionOpTime { }; /** + * Reason a transaction was terminated. + */ +enum class TerminationCause { + kCommitted, + kAborted, +}; + +/** * A state machine that coordinates a distributed transaction commit with the transaction * coordinator. */ @@ -479,8 +487,8 @@ public: bool committed, const repl::ReadConcernArgs& readConcernArgs) const { stdx::lock_guard<stdx::mutex> lk(_mutex); - TransactionState::StateFlag terminationCause = - committed ? TransactionState::kCommitted : TransactionState::kAborted; + TerminationCause terminationCause = + committed ? TerminationCause::kCommitted : TerminationCause::kAborted; return _transactionInfoForLog(lockStats, terminationCause, readConcernArgs); } @@ -510,9 +518,14 @@ public: _txnState.transitionTo(lk, TransactionState::kPrepared); } - void transitionToAbortedforTest() { + void transitionToAbortedWithoutPrepareforTest() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + _txnState.transitionTo(lk, TransactionState::kAbortedWithoutPrepare); + } + + void transitionToAbortedWithPrepareforTest() { stdx::lock_guard<stdx::mutex> lk(_mutex); - _txnState.transitionTo(lk, TransactionState::kAborted); + _txnState.transitionTo(lk, TransactionState::kAbortedWithPrepare); } private: @@ -560,7 +573,8 @@ private: kCommittingWithoutPrepare = 1 << 3, kCommittingWithPrepare = 1 << 4, kCommitted = 1 << 5, - kAborted = 1 << 6 + kAbortedWithoutPrepare = 1 << 6, + kAbortedWithPrepare = 1 << 7 }; using StateSet = int; @@ -607,7 +621,7 @@ private: } bool isAborted(WithLock) const { - return _state == kAborted; + return _state == kAbortedWithoutPrepare || _state == kAbortedWithPrepare; } std::string toString() const { @@ -696,7 +710,7 @@ private: // Clean up the transaction resources unstashed on operation context. void _cleanUpTxnResourceOnOpCtx(WithLock wl, OperationContext* opCtx, - TransactionState::StateFlag terminationCause); + TerminationCause terminationCause); // Checks if the current transaction number of this transaction still matches with the // parent session as well as the transaction number of the current operation context. @@ -713,7 +727,7 @@ private: // transaction must be committed or aborted when this function is called. void _logSlowTransaction(WithLock wl, const SingleThreadedLockStats* lockStats, - TransactionState::StateFlag terminationCause, + TerminationCause terminationCause, repl::ReadConcernArgs readConcernArgs); // This method returns a string with information about a slow transaction. The format of the @@ -721,7 +735,7 @@ private: // transaction must be completed (committed or aborted) and a valid LockStats reference must be // passed in order for this method to be called. std::string _transactionInfoForLog(const SingleThreadedLockStats* lockStats, - TransactionState::StateFlag terminationCause, + TerminationCause terminationCause, repl::ReadConcernArgs readConcernArgs) const; // Reports transaction stats for both active and inactive transactions using the provided diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 3b8e4712f20..b20d33efa3a 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -1383,9 +1383,27 @@ protected: 50911); } - // TODO SERVER-36639: Add tests that the active transaction number cannot be reused if the - // transaction is in the abort after prepare state (or any state indicating the participant - // has been involved in a two phase commit). + void cannotSpecifyStartTransactionOnAbortedPreparedTransaction() { + auto autocommit = false; + auto startTransaction = true; + auto sessionCheckout = checkOutSession(); + + auto txnParticipant = TransactionParticipant::get(opCtx()); + ASSERT(txnParticipant->inMultiDocumentTransaction()); + + txnParticipant->unstashTransactionResources(opCtx(), "prepareTransaction"); + txnParticipant->prepareTransaction(opCtx(), {}); + ASSERT(txnParticipant->transactionIsPrepared()); + + txnParticipant->abortActiveTransaction(opCtx()); + ASSERT(txnParticipant->transactionIsAborted()); + + startTransaction = true; + ASSERT_THROWS_CODE( + txnParticipant->beginOrContinue(*opCtx()->getTxnNumber(), autocommit, startTransaction), + AssertionException, + 50911); + } }; /** @@ -1424,6 +1442,10 @@ TEST_F(ShardTxnParticipantTest, CannotSpecifyStartTransactionOnStartedRetryableW cannotSpecifyStartTransactionOnStartedRetryableWrite(); } +TEST_F(ShardTxnParticipantTest, CannotSpecifyStartTransactionOnAbortedPreparedTransaction) { + cannotSpecifyStartTransactionOnAbortedPreparedTransaction(); +} + /** * Test fixture for a transaction participant running on a config server. */ @@ -1460,6 +1482,10 @@ TEST_F(ConfigTxnParticipantTest, CannotSpecifyStartTransactionOnStartedRetryable cannotSpecifyStartTransactionOnStartedRetryableWrite(); } +TEST_F(ConfigTxnParticipantTest, CannotSpecifyStartTransactionOnAbortedPreparedTransaction) { + cannotSpecifyStartTransactionOnAbortedPreparedTransaction(); +} + TEST_F(TxnParticipantTest, KillSessionsDuringUnpreparedAbortSucceeds) { auto sessionCheckout = checkOutSession(); auto txnParticipant = TransactionParticipant::get(opCtx()); |