summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2018-12-05 11:22:28 -0500
committerJack Mulrow <jack.mulrow@mongodb.com>2018-12-16 14:04:10 -0500
commit2d94c21d3af7d4b4fc32e6c34791ac24a2a35c0a (patch)
tree63d4ae1994fcf5849e0974fa7af897f2bf44bf12 /src/mongo
parent54c2a4560cbcd5b652c3bdc8543404ea6cc2d485 (diff)
downloadmongo-2d94c21d3af7d4b4fc32e6c34791ac24a2a35c0a.tar.gz
SERVER-38470 Add more logging to TransactionRouter
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/s/commands/cluster_distinct_cmd.cpp2
-rw-r--r--src/mongo/s/query/cluster_aggregate.cpp2
-rw-r--r--src/mongo/s/transaction_router.cpp49
-rw-r--r--src/mongo/s/transaction_router.h11
-rw-r--r--src/mongo/s/transaction_router_test.cpp8
5 files changed, 58 insertions, 14 deletions
diff --git a/src/mongo/s/commands/cluster_distinct_cmd.cpp b/src/mongo/s/commands/cluster_distinct_cmd.cpp
index bcc8b9976f0..3e4fe391d73 100644
--- a/src/mongo/s/commands/cluster_distinct_cmd.cpp
+++ b/src/mongo/s/commands/cluster_distinct_cmd.cpp
@@ -201,7 +201,7 @@ public:
auto resolvedAggCmd = resolvedAggRequest.serializeToCommandObj().toBson();
if (auto txnRouter = TransactionRouter::get(opCtx)) {
- txnRouter->onViewResolutionError();
+ txnRouter->onViewResolutionError(nss);
}
BSONObj aggResult = CommandHelpers::runCommandDirectly(
diff --git a/src/mongo/s/query/cluster_aggregate.cpp b/src/mongo/s/query/cluster_aggregate.cpp
index 506842da29c..82d5e5a7966 100644
--- a/src/mongo/s/query/cluster_aggregate.cpp
+++ b/src/mongo/s/query/cluster_aggregate.cpp
@@ -884,7 +884,7 @@ Status ClusterAggregate::retryOnViewError(OperationContext* opCtx,
result->resetToEmpty();
if (auto txnRouter = TransactionRouter::get(opCtx)) {
- txnRouter->onViewResolutionError();
+ txnRouter->onViewResolutionError(requestedNss);
}
// We pass both the underlying collection namespace and the view namespace here. The
diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp
index 2e6e95ce68a..8a6f4623017 100644
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@@ -279,10 +279,13 @@ const boost::optional<ShardId>& TransactionRouter::getCoordinatorId() const {
BSONObj TransactionRouter::attachTxnFieldsIfNeeded(const ShardId& shardId, const BSONObj& cmdObj) {
if (auto txnPart = getParticipant(shardId)) {
+ LOG(4) << _txnIdToString()
+ << " Sending transaction fields to existing participant: " << shardId;
return txnPart->attachTxnFieldsIfNeeded(cmdObj, false);
}
auto txnPart = _createParticipant(shardId);
+ LOG(4) << _txnIdToString() << " Sending transaction fields to new participant: " << shardId;
return txnPart.attachTxnFieldsIfNeeded(cmdObj, true);
}
@@ -378,14 +381,20 @@ void TransactionRouter::onStaleShardOrDbError(StringData cmdName, const Status&
<< errorStatus,
_canContinueOnStaleShardOrDbError(cmdName));
+ LOG(0) << _txnIdToString()
+ << " Clearing pending participants after stale version error: " << errorStatus;
+
// Remove participants created during the current statement so they are sent the correct options
// if they are targeted again by the retry.
_clearPendingParticipants();
}
-void TransactionRouter::onViewResolutionError() {
+void TransactionRouter::onViewResolutionError(const NamespaceString& nss) {
// The router can always retry on a view resolution error.
+ LOG(0) << _txnIdToString()
+ << " Clearing pending participants after view resolution error on namespace: " << nss;
+
// Requests against views are always routed to the primary shard for its database, but the retry
// on the resolved namespace does not have to re-target the primary, so pending participants
// should be cleared.
@@ -404,6 +413,10 @@ void TransactionRouter::onSnapshotError(const Status& errorStatus) {
<< errorStatus,
_canContinueOnSnapshotError());
+ LOG(0) << _txnIdToString() << " Clearing pending participants and resetting global snapshot "
+ "timestamp after snapshot error: "
+ << errorStatus << ", previous timestamp: " << _atClusterTime->getTime();
+
// The transaction must be restarted on all participants because a new read timestamp will be
// selected, so clear all pending participants. Snapshot errors are only retryable on the first
// client statement, so all participants should be cleared, including the coordinator.
@@ -412,7 +425,6 @@ void TransactionRouter::onSnapshotError(const Status& errorStatus) {
invariant(!_coordinatorId);
// Reset the global snapshot timestamp so the retry will select a new one.
- invariant(_atClusterTime);
_atClusterTime.reset();
_atClusterTime.emplace();
}
@@ -462,6 +474,9 @@ void TransactionRouter::_setAtClusterTime(const boost::optional<LogicalTime>& af
return;
}
+ LOG(0) << _txnIdToString() << " Setting global snapshot timestamp to " << candidateTime
+ << " on statement " << _latestStmtId;
+
_atClusterTime->setTime(candidateTime, _latestStmtId);
}
@@ -529,6 +544,8 @@ void TransactionRouter::beginOrContinueTxn(OperationContext* opCtx,
if (_readConcernArgs.getLevel() == repl::ReadConcernLevel::kSnapshotReadConcern) {
_atClusterTime.emplace();
}
+
+ LOG(0) << _txnIdToString() << " New transaction started";
}
const LogicalSessionId& TransactionRouter::_sessionId() const {
@@ -546,6 +563,9 @@ Shard::CommandResponse TransactionRouter::_commitSingleShardTransaction(Operatio
CommitTransaction commitCmd;
commitCmd.setDbName("admin");
+ LOG(0) << _txnIdToString()
+ << " Committing single shard transaction, single participant: " << shardId;
+
const auto& participant = citer->second;
return uassertStatusOK(shard->runCommandWithFixedRetryAttempts(
opCtx,
@@ -611,6 +631,9 @@ Shard::CommandResponse TransactionRouter::_commitMultiShardTransaction(Operation
_initiatedTwoPhaseCommit = true;
+ LOG(0) << _txnIdToString()
+ << " Committing multi shard transaction, coordinator: " << *_coordinatorId;
+
return uassertStatusOK(coordinatorShard->runCommandWithFixedRetryAttempts(
opCtx,
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
@@ -633,7 +656,7 @@ Shard::CommandResponse TransactionRouter::commitTransaction(OperationContext* op
}
std::vector<AsyncRequestsSender::Response> TransactionRouter::abortTransaction(
- OperationContext* opCtx) {
+ OperationContext* opCtx, bool isImplicit) {
// The router has yet to send any commands to a remote shard for this transaction.
// Return the same error that would have been returned by a shard.
uassert(ErrorCodes::NoSuchTransaction,
@@ -647,6 +670,14 @@ std::vector<AsyncRequestsSender::Response> TransactionRouter::abortTransaction(
abortRequests.emplace_back(ShardId(participantEntry.first), abortCmd);
}
+ if (isImplicit) {
+ LOG(0) << _txnIdToString() << " Implicitly aborting transaction on " << _participants.size()
+ << " shard(s)";
+ } else {
+ LOG(0) << _txnIdToString() << " Aborting transaction on " << _participants.size()
+ << " shard(s)";
+ }
+
return gatherResponses(opCtx,
NamespaceString::kAdminDb,
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
@@ -660,17 +691,21 @@ void TransactionRouter::implicitlyAbortTransaction(OperationContext* opCtx) {
}
if (_initiatedTwoPhaseCommit) {
- LOG(0) << "Router not sending implicit abortTransaction for transaction "
- << *opCtx->getTxnNumber() << " on session " << opCtx->getLogicalSessionId()->toBSON()
- << " because already initiated two phase commit for the transaction";
+ LOG(0) << _txnIdToString() << " Router not sending implicit abortTransaction because "
+ "already initiated two phase commit for the transaction";
return;
}
try {
- abortTransaction(opCtx);
+ abortTransaction(opCtx, true /*isImplicit*/);
} catch (...) {
// Ignore any exceptions.
}
}
+std::string TransactionRouter::_txnIdToString() {
+ return str::stream() << "(txnNumber: " << _txnNumber << ", lsid: " << _sessionId().getId()
+ << ")";
+}
+
} // namespace mongo
diff --git a/src/mongo/s/transaction_router.h b/src/mongo/s/transaction_router.h
index 2cdf38bf389..9791ed2e00e 100644
--- a/src/mongo/s/transaction_router.h
+++ b/src/mongo/s/transaction_router.h
@@ -185,7 +185,7 @@ public:
* Updates the transaction tracking state to allow for a retry attempt on a view resolution
* error.
*/
- void onViewResolutionError();
+ void onViewResolutionError(const NamespaceString& nss);
/**
* Computes and sets the atClusterTime for the current transaction based on the given query
@@ -233,7 +233,8 @@ public:
/**
* Sends abort to all participants and returns the responses from all shards.
*/
- std::vector<AsyncRequestsSender::Response> abortTransaction(OperationContext* opCtx);
+ std::vector<AsyncRequestsSender::Response> abortTransaction(OperationContext* opCtx,
+ bool isImplicit = false);
/**
* Sends abort to all shards in the current participant list. Will retry on retryable errors,
@@ -317,6 +318,12 @@ private:
*/
void _verifyParticipantAtClusterTime(const Participant& participant);
+ /**
+ * Returns a string with the active transaction's transaction number and logical session id
+ * (i.e. the transaction id).
+ */
+ std::string _txnIdToString();
+
// The currently active transaction number on this transaction router (i.e. on the session)
TxnNumber _txnNumber{kUninitializedTxnNumber};
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index 05c0bcdcb42..95c0e0bd7f1 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -71,6 +71,8 @@ protected:
const Status kDummyStatus = {ErrorCodes::InternalError, "dummy"};
+ const NamespaceString kViewNss = NamespaceString("test.foo");
+
void setUp() override {
ShardingTestFixture::setUp();
configTargeter()->setFindHostReturnValue(kTestConfigShardHost);
@@ -1031,7 +1033,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
<< "test"));
ASSERT_BSONOBJ_EQ(expectedReadConcern, newCmd["readConcern"].Obj());
- txnRouter.onViewResolutionError();
+ txnRouter.onViewResolutionError(kViewNss);
txnRouter.setDefaultAtClusterTime(operationContext());
newCmd = txnRouter.attachTxnFieldsIfNeeded(shard1,
@@ -1184,7 +1186,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
// Simulate a view resolution error on the first client statement that leads to a retry which
// targets the same shard.
- txnRouter.onViewResolutionError();
+ txnRouter.onViewResolutionError(kViewNss);
// The only participant was the coordinator, so it should have been reset.
ASSERT_FALSE(txnRouter.getCoordinatorId());
@@ -1203,7 +1205,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
// Simulate a view resolution error.
- txnRouter.onViewResolutionError();
+ txnRouter.onViewResolutionError(kViewNss);
// Only the new participant shard was reset.
firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(shard1, {});