diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2018-12-07 12:15:01 -0500 |
---|---|---|
committer | Jack Mulrow <jack.mulrow@mongodb.com> | 2018-12-10 17:44:54 -0500 |
commit | e6a0068b690c1bfc53f090401e1f066be3574733 (patch) | |
tree | 9e4ba33ceff3adb355843392172ffea9ef7df8b4 /src/mongo/s | |
parent | 3fa51aa6da0aa37c1fa51f01172db8b7fe9a44b2 (diff) | |
download | mongo-e6a0068b690c1bfc53f090401e1f066be3574733.tar.gz |
SERVER-38459 TransactionRouter shouldn't lose original non internally retryable snapshot and stale version errors
Diffstat (limited to 'src/mongo/s')
-rw-r--r-- | src/mongo/s/commands/strategy.cpp | 6 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 2 | ||||
-rw-r--r-- | src/mongo/s/transaction_router.cpp | 10 | ||||
-rw-r--r-- | src/mongo/s/transaction_router.h | 4 | ||||
-rw-r--r-- | src/mongo/s/transaction_router_test.cpp | 31 |
5 files changed, 29 insertions, 24 deletions
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index 8e8bc44f6ed..c2539c426ec 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -478,7 +478,7 @@ void runCommand(OperationContext* opCtx, auto abortGuard = MakeGuard([&] { txnRouter->implicitlyAbortTransaction(opCtx); }); handleCanRetryInTransaction(opCtx, txnRouter, canRetry, ex); - txnRouter->onStaleShardOrDbError(commandName); + txnRouter->onStaleShardOrDbError(commandName, ex.toStatus()); abortGuard.Dismiss(); } @@ -497,7 +497,7 @@ void runCommand(OperationContext* opCtx, auto abortGuard = MakeGuard([&] { txnRouter->implicitlyAbortTransaction(opCtx); }); handleCanRetryInTransaction(opCtx, txnRouter, canRetry, ex); - txnRouter->onStaleShardOrDbError(commandName); + txnRouter->onStaleShardOrDbError(commandName, ex.toStatus()); abortGuard.Dismiss(); } @@ -514,7 +514,7 @@ void runCommand(OperationContext* opCtx, auto abortGuard = MakeGuard([&] { txnRouter->implicitlyAbortTransaction(opCtx); }); handleCanRetryInTransaction(opCtx, txnRouter, canRetry, ex); - txnRouter->onSnapshotError(); + txnRouter->onSnapshotError(ex.toStatus()); abortGuard.Dismiss(); } diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index a344636860d..d26dc9b039a 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -463,7 +463,7 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx, if (auto txnRouter = TransactionRouter::get(opCtx)) { // A transaction can always continue on a stale version error during find because // the operation must be idempotent. - txnRouter->onStaleShardOrDbError(kFindCmdName); + txnRouter->onStaleShardOrDbError(kFindCmdName, ex.toStatus()); } } } diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp index 01e4b18abca..3af5fe824ff 100644 --- a/src/mongo/s/transaction_router.cpp +++ b/src/mongo/s/transaction_router.cpp @@ -365,11 +365,12 @@ bool TransactionRouter::_canContinueOnStaleShardOrDbError(StringData cmdName) co return false; } -void TransactionRouter::onStaleShardOrDbError(StringData cmdName) { +void TransactionRouter::onStaleShardOrDbError(StringData cmdName, const Status& errorStatus) { uassert(ErrorCodes::NoSuchTransaction, str::stream() << "Transaction " << _txnNumber << " was aborted on statement " << _latestStmtId - << " due to cluster data placement change", + << " due to an error from cluster data placement change: " + << errorStatus, _canContinueOnStaleShardOrDbError(cmdName)); // Remove participants created during the current statement so they are sent the correct options @@ -390,11 +391,12 @@ bool TransactionRouter::_canContinueOnSnapshotError() const { return _atClusterTime && _atClusterTime->canChange(_latestStmtId); } -void TransactionRouter::onSnapshotError() { +void TransactionRouter::onSnapshotError(const Status& errorStatus) { uassert(ErrorCodes::NoSuchTransaction, str::stream() << "Transaction " << _txnNumber << " was aborted on statement " << _latestStmtId - << " due to a non-retryable snapshot error", + << " due to a non-retryable snapshot error: " + << errorStatus, _canContinueOnSnapshotError()); // The transaction must be restarted on all participants because a new read timestamp will be diff --git a/src/mongo/s/transaction_router.h b/src/mongo/s/transaction_router.h index 51d7418f350..2cdf38bf389 100644 --- a/src/mongo/s/transaction_router.h +++ b/src/mongo/s/transaction_router.h @@ -172,14 +172,14 @@ public: * Updates the transaction state to allow for a retry of the current command on a stale version * error. Will throw if the transaction cannot be continued. */ - void onStaleShardOrDbError(StringData cmdName); + void onStaleShardOrDbError(StringData cmdName, const Status& errorStatus); /** * Resets the transaction state to allow for a retry attempt. This includes clearing all * participants, clearing the coordinator, and resetting the global read timestamp. Will throw * if the transaction cannot be continued. */ - void onSnapshotError(); + void onSnapshotError(const Status& errorStatus); /** * Updates the transaction tracking state to allow for a retry attempt on a view resolution diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp index e78ae8707fc..05c0bcdcb42 100644 --- a/src/mongo/s/transaction_router_test.cpp +++ b/src/mongo/s/transaction_router_test.cpp @@ -69,6 +69,8 @@ protected: repl::ReadConcernLevel::kAvailableReadConcern, repl::ReadConcernLevel::kLinearizableReadConcern}; + const Status kDummyStatus = {ErrorCodes::InternalError, "dummy"}; + void setUp() override { ShardingTestFixture::setUp(); configTargeter()->setFindHostReturnValue(kTestConfigShardHost); @@ -730,7 +732,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, SnapshotErrorsResetAtClusterTime LogicalClock::get(operationContext())->setClusterTimeFromTrustedSource(laterTime); // Simulate a snapshot error. - txnRouter.onSnapshotError(); + txnRouter.onSnapshotError(kDummyStatus); txnRouter.setDefaultAtClusterTime(operationContext()); @@ -824,7 +826,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, SnapshotErrorsClearsAllParticipa // Simulate a snapshot error and an internal retry that only re-targets one of the original two // shards. - txnRouter.onSnapshotError(); + txnRouter.onSnapshotError(kDummyStatus); txnRouter.setDefaultAtClusterTime(operationContext()); @@ -860,19 +862,19 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnSnapshotErrorThrowsAfterFirstC txnRouter.setDefaultAtClusterTime(operationContext()); // Should not throw. - txnRouter.onSnapshotError(); + txnRouter.onSnapshotError(kDummyStatus); txnRouter.setDefaultAtClusterTime(operationContext()); repl::ReadConcernArgs::get(operationContext()) = repl::ReadConcernArgs(); txnRouter.beginOrContinueTxn(operationContext(), txnNum, false); ASSERT_THROWS_CODE( - txnRouter.onSnapshotError(), AssertionException, ErrorCodes::NoSuchTransaction); + txnRouter.onSnapshotError(kDummyStatus), AssertionException, ErrorCodes::NoSuchTransaction); repl::ReadConcernArgs::get(operationContext()) = repl::ReadConcernArgs(); txnRouter.beginOrContinueTxn(operationContext(), txnNum, false); ASSERT_THROWS_CODE( - txnRouter.onSnapshotError(), AssertionException, ErrorCodes::NoSuchTransaction); + txnRouter.onSnapshotError(kDummyStatus), AssertionException, ErrorCodes::NoSuchTransaction); } TEST_F(TransactionRouterTestWithDefaultSession, ParticipantsRememberStmtIdCreatedAt) { @@ -942,7 +944,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, // Simulate stale error and internal retry that only re-targets one of the original shards. - txnRouter.onStaleShardOrDbError("find"); + txnRouter.onStaleShardOrDbError("find", kDummyStatus); ASSERT_FALSE(txnRouter.getCoordinatorId()); @@ -987,7 +989,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnlyNewlyCreatedParticipantsClea txnRouter.attachTxnFieldsIfNeeded(shard2, {}); txnRouter.attachTxnFieldsIfNeeded(shard3, {}); - txnRouter.onStaleShardOrDbError("find"); + txnRouter.onStaleShardOrDbError("find", kDummyStatus); // Shards 2 and 3 must start a transaction, but shard 1 must not. ASSERT_FALSE(txnRouter.attachTxnFieldsIfNeeded(shard1, {})["startTransaction"].trueValue()); @@ -1016,7 +1018,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, ASSERT_GT(laterTime, kInMemoryLogicalTime); LogicalClock::get(operationContext())->setClusterTimeFromTrustedSource(laterTime); - txnRouter.onStaleShardOrDbError("find"); + txnRouter.onStaleShardOrDbError("find", kDummyStatus); txnRouter.setDefaultAtClusterTime(operationContext()); BSONObj expectedReadConcern = BSON("level" @@ -1051,11 +1053,11 @@ TEST_F(TransactionRouterTestWithDefaultSession, WritesCanOnlyBeRetriedIfFirstOve txnRouter.attachTxnFieldsIfNeeded(shard1, {}); for (auto writeCmd : writeCmds) { - txnRouter.onStaleShardOrDbError(writeCmd); // Should not throw. + txnRouter.onStaleShardOrDbError(writeCmd, kDummyStatus); // Should not throw. } for (auto cmd : otherCmds) { - txnRouter.onStaleShardOrDbError(cmd); // Should not throw. + txnRouter.onStaleShardOrDbError(cmd, kDummyStatus); // Should not throw. } // Advance to the next command. @@ -1064,13 +1066,13 @@ TEST_F(TransactionRouterTestWithDefaultSession, WritesCanOnlyBeRetriedIfFirstOve txnRouter.beginOrContinueTxn(operationContext(), txnNum, false); for (auto writeCmd : writeCmds) { - ASSERT_THROWS_CODE(txnRouter.onStaleShardOrDbError(writeCmd), + ASSERT_THROWS_CODE(txnRouter.onStaleShardOrDbError(writeCmd, kDummyStatus), AssertionException, ErrorCodes::NoSuchTransaction); } for (auto cmd : otherCmds) { - txnRouter.onStaleShardOrDbError(cmd); // Should not throw. + txnRouter.onStaleShardOrDbError(cmd, kDummyStatus); // Should not throw. } } @@ -1414,8 +1416,9 @@ TEST_F(TransactionRouterTestWithDefaultSession, NonSnapshotReadConcernHasNoAtClu ASSERT_FALSE(txnRouter.getAtClusterTime()); // Can't continue on snapshot errors. - ASSERT_THROWS_CODE( - txnRouter.onSnapshotError(), AssertionException, ErrorCodes::NoSuchTransaction); + ASSERT_THROWS_CODE(txnRouter.onSnapshotError(kDummyStatus), + AssertionException, + ErrorCodes::NoSuchTransaction); } } |