summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2019-07-10 18:37:46 +0200
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2019-07-14 08:14:07 -0400
commit903207938dc05f9e3f4ca546232d8a7ceda99e4c (patch)
tree1afc0550abe5a044e74e3fce1e4a48a4b98b286a
parente6644474d876eb99579101e81d38c363feef07cd (diff)
downloadmongo-903207938dc05f9e3f4ca546232d8a7ceda99e4c.tar.gz
SERVER-41204 Output the transaction abort reason in the slow log line
-rw-r--r--src/mongo/base/status.cpp11
-rw-r--r--src/mongo/base/status.h6
-rw-r--r--src/mongo/db/s/transaction_coordinator.cpp9
-rw-r--r--src/mongo/db/s/transaction_coordinator.h1
-rw-r--r--src/mongo/db/s/transaction_coordinator_document.idl12
-rw-r--r--src/mongo/db/s/transaction_coordinator_structures.cpp9
-rw-r--r--src/mongo/db/s/transaction_coordinator_structures.h9
-rw-r--r--src/mongo/db/s/transaction_coordinator_structures_test.cpp28
-rw-r--r--src/mongo/db/s/transaction_coordinator_test.cpp69
-rw-r--r--src/mongo/db/s/transaction_coordinator_util.cpp119
-rw-r--r--src/mongo/db/s/transaction_coordinator_util.h7
11 files changed, 204 insertions, 76 deletions
diff --git a/src/mongo/base/status.cpp b/src/mongo/base/status.cpp
index 696ff79e5e0..ae4235c0bc3 100644
--- a/src/mongo/base/status.cpp
+++ b/src/mongo/base/status.cpp
@@ -143,4 +143,15 @@ std::string Status::toString() const {
return sb.str();
}
+void Status::serializeErrorToBSON(BSONObjBuilder* builder) const {
+ invariant(!isOK());
+
+ builder->append("code", code());
+ builder->append("codeName", ErrorCodes::errorString(code()));
+ builder->append("errmsg", reason());
+
+ if (auto ei = extraInfo())
+ ei->serialize(builder);
+}
+
} // namespace mongo
diff --git a/src/mongo/base/status.h b/src/mongo/base/status.h
index d3faa8fd0ec..c31ef5e83bb 100644
--- a/src/mongo/base/status.h
+++ b/src/mongo/base/status.h
@@ -194,6 +194,12 @@ public:
std::string toString() const;
/**
+ * May only be called if the status is *not OK*. Serializes the code, code name and reason in
+ * the canonical code/codeName/errmsg format used in the server command responses.
+ */
+ void serializeErrorToBSON(BSONObjBuilder* builder) const;
+
+ /**
* Returns true if this Status's code is a member of the given category.
*/
template <ErrorCategory category>
diff --git a/src/mongo/db/s/transaction_coordinator.cpp b/src/mongo/db/s/transaction_coordinator.cpp
index e189f75cd33..e9c59b0b17b 100644
--- a/src/mongo/db/s/transaction_coordinator.cpp
+++ b/src/mongo/db/s/transaction_coordinator.cpp
@@ -187,11 +187,7 @@ TransactionCoordinator::TransactionCoordinator(ServiceContext* serviceContext,
return Future<void>::makeReady();
}
- return txn::persistDecision(*_scheduler,
- _lsid,
- _txnNumber,
- *_participants,
- _decision->getCommitTimestamp())
+ return txn::persistDecision(*_scheduler, _lsid, _txnNumber, *_participants, *_decision)
.then([this] {
stdx::lock_guard<stdx::mutex> lg(_mutex);
_decisionDurable = true;
@@ -264,6 +260,7 @@ TransactionCoordinator::TransactionCoordinator(ServiceContext* serviceContext,
}
TransactionCoordinator::~TransactionCoordinator() {
+ invariant(_completionPromisesFired);
invariant(_completionPromises.empty());
}
@@ -399,7 +396,7 @@ std::string TransactionCoordinator::_twoPhaseCommitInfoForLog(
break;
case txn::CommitDecision::kAbort:
s << ", terminationCause:aborted";
- // TODO: abortCause, abortSource
+ s << ", terminationDetails: " << *decision.getAbortStatus();
break;
default:
MONGO_UNREACHABLE;
diff --git a/src/mongo/db/s/transaction_coordinator.h b/src/mongo/db/s/transaction_coordinator.h
index a482628a897..2bfb54e74ba 100644
--- a/src/mongo/db/s/transaction_coordinator.h
+++ b/src/mongo/db/s/transaction_coordinator.h
@@ -160,6 +160,7 @@ private:
// Protects the state below
mutable stdx::mutex _mutex;
+ // Tracks which step of the 2PC coordination is currently (or was most recently) executing
Step _step{Step::kInactive};
// Promise/future pair which will be signaled when the coordinator has completed
diff --git a/src/mongo/db/s/transaction_coordinator_document.idl b/src/mongo/db/s/transaction_coordinator_document.idl
index 78dadac283b..39f1bd0bef3 100644
--- a/src/mongo/db/s/transaction_coordinator_document.idl
+++ b/src/mongo/db/s/transaction_coordinator_document.idl
@@ -45,6 +45,13 @@ types:
serializer: "::mongo::txn::writeCommitDecisionEnumProperty"
deserializer: "::mongo::txn::readCommitDecisionEnumProperty"
+ status:
+ bson_serialization_type: any
+ description: "Serializes/deserializes the general server Status type"
+ cpp_type: "::mongo::Status"
+ serializer: "::mongo::txn::writeStatusProperty"
+ deserializer: "::mongo::txn::readStatusProperty"
+
structs:
CoordinatorCommitDecision:
description: "An object representing the coordinator's commit decision."
@@ -59,6 +66,11 @@ structs:
type: timestamp
description: "If the decision is 'commit', contains the chosen commit timestamp,
otherwise it will not be set"
+ abortStatus:
+ optional: true
+ type: status
+ description: "If the decision is 'abort', contains the reason the shard aborted,
+ otherwise it will not be set"
TransactionCoordinatorDocument:
description: "A document used for majority confirming the coordinator's state changes"
diff --git a/src/mongo/db/s/transaction_coordinator_structures.cpp b/src/mongo/db/s/transaction_coordinator_structures.cpp
index 2088ff6d5c7..65e9a840f99 100644
--- a/src/mongo/db/s/transaction_coordinator_structures.cpp
+++ b/src/mongo/db/s/transaction_coordinator_structures.cpp
@@ -75,5 +75,14 @@ logger::LogstreamBuilder& operator<<(logger::LogstreamBuilder& stream,
MONGO_UNREACHABLE;
}
+Status readStatusProperty(const BSONElement& statusBSON) {
+ return Status(ErrorCodes::Error(statusBSON["code"].Int()), statusBSON["errmsg"].String());
+}
+
+void writeStatusProperty(const Status& status, StringData fieldName, BSONObjBuilder* builder) {
+ BSONObjBuilder statusBuilder(builder->subobjStart(fieldName));
+ status.serializeErrorToBSON(&statusBuilder);
+}
+
} // namespace txn
} // namespace mongo
diff --git a/src/mongo/db/s/transaction_coordinator_structures.h b/src/mongo/db/s/transaction_coordinator_structures.h
index 76f7890990d..13462d2a273 100644
--- a/src/mongo/db/s/transaction_coordinator_structures.h
+++ b/src/mongo/db/s/transaction_coordinator_structures.h
@@ -33,6 +33,7 @@
#include <vector>
#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/logger/logstream_builder.h"
#include "mongo/s/shard_id.h"
@@ -49,7 +50,7 @@ enum class PrepareVote {
using CommitDecision = PrepareVote;
/**
- * String serializer/deserializer for the commit decision property values.
+ * String serializer/deserializer for the commit decision enum values.
*/
CommitDecision readCommitDecisionEnumProperty(StringData decision);
StringData writeCommitDecisionEnumProperty(CommitDecision decision);
@@ -57,5 +58,11 @@ StringData writeCommitDecisionEnumProperty(CommitDecision decision);
logger::LogstreamBuilder& operator<<(logger::LogstreamBuilder& stream,
const CommitDecision& decision);
+/**
+ * Optional serializer/deserializer for the generic server 'Status' type.
+ */
+Status readStatusProperty(const BSONElement& statusBSON);
+void writeStatusProperty(const Status& status, StringData fieldName, BSONObjBuilder* builder);
+
} // namespace txn
} // namespace mongo
diff --git a/src/mongo/db/s/transaction_coordinator_structures_test.cpp b/src/mongo/db/s/transaction_coordinator_structures_test.cpp
index 6ccc636a222..f29b442559b 100644
--- a/src/mongo/db/s/transaction_coordinator_structures_test.cpp
+++ b/src/mongo/db/s/transaction_coordinator_structures_test.cpp
@@ -36,15 +36,39 @@ namespace mongo {
namespace txn {
namespace {
-TEST(CoordinatorCommitDecisionTest, SerializeNoCommitTimestamp) {
+TEST(CoordinatorCommitDecisionTest, SerializeCommitHasTimestampAndNoAbortStatus) {
CoordinatorCommitDecision decision(CommitDecision::kCommit);
+ decision.setCommitTimestamp(Timestamp(100, 200));
+
auto obj = decision.toBSON();
ASSERT_BSONOBJ_EQ(BSON("decision"
- << "commit"),
+ << "commit"
+ << "commitTimestamp"
+ << Timestamp(100, 200)),
obj);
}
+TEST(CoordinatorCommitDecisionTest, SerializeAbortHasNoTimestampAndAbortStatus) {
+ CoordinatorCommitDecision decision(CommitDecision::kAbort);
+ decision.setAbortStatus(Status(ErrorCodes::InternalError, "Test error"));
+
+ auto obj = decision.toBSON();
+ auto expectedObj = BSON("decision"
+ << "abort"
+ << "abortStatus"
+ << BSON("code" << 1 << "codeName"
+ << "InternalError"
+ << "errmsg"
+ << "Test error"));
+
+ ASSERT_BSONOBJ_EQ(expectedObj, obj);
+
+ auto deserializedDecision =
+ CoordinatorCommitDecision::parse(IDLParserErrorContext("AbortTest"), expectedObj);
+ ASSERT_BSONOBJ_EQ(obj, deserializedDecision.toBSON());
+}
+
} // namespace
} // namespace txn
} // namespace mongo
diff --git a/src/mongo/db/s/transaction_coordinator_test.cpp b/src/mongo/db/s/transaction_coordinator_test.cpp
index fc5857570cb..ddf0bb96fa6 100644
--- a/src/mongo/db/s/transaction_coordinator_test.cpp
+++ b/src/mongo/db/s/transaction_coordinator_test.cpp
@@ -54,7 +54,8 @@ using TransactionCoordinatorDocument = txn::TransactionCoordinatorDocument;
const Hours kLongFutureTimeout(8);
const StatusWith<BSONObj> kNoSuchTransaction =
- BSON("ok" << 0 << "code" << ErrorCodes::NoSuchTransaction);
+ BSON("ok" << 0 << "code" << ErrorCodes::NoSuchTransaction << "errmsg"
+ << "No such transaction exists");
const StatusWith<BSONObj> kOk = BSON("ok" << 1);
const Timestamp kDummyPrepareTimestamp = Timestamp(1, 1);
@@ -63,6 +64,7 @@ StatusWith<BSONObj> makePrepareOkResponse(const Timestamp& timestamp) {
}
const StatusWith<BSONObj> kPrepareOk = makePrepareOkResponse(kDummyPrepareTimestamp);
+const StatusWith<BSONObj> kPrepareOkNoTimestamp = BSON("ok" << 1);
class TransactionCoordinatorTestBase : public TransactionCoordinatorTestFixture {
protected:
@@ -274,12 +276,15 @@ TEST_F(TransactionCoordinatorDriverTest,
getServiceContext(), aws, kTwoShardIdList[0], makeDummyPrepareCommand(_lsid, _txnNumber));
assertPrepareSentAndRespondWithRetryableError();
- aws.shutdown({ErrorCodes::TransactionCoordinatorReachedAbortDecision, "Retry interrupted"});
+ const auto shutdownStatus =
+ Status{ErrorCodes::TransactionCoordinatorReachedAbortDecision, "Retry interrupted"};
+ aws.shutdown(shutdownStatus);
advanceClockAndExecuteScheduledTasks();
auto response = future.get();
ASSERT(response.vote == boost::none);
ASSERT(response.prepareTimestamp == boost::none);
+ ASSERT_EQ(shutdownStatus.code(), response.abortReason->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -307,6 +312,8 @@ TEST_F(TransactionCoordinatorDriverTest,
auto response = future.get();
ASSERT(response.vote == txn::PrepareVote::kAbort);
ASSERT(response.prepareTimestamp == boost::none);
+ ASSERT(response.abortReason);
+ ASSERT_EQ(ErrorCodes::NoSuchTransaction, response.abortReason->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -321,6 +328,8 @@ TEST_F(TransactionCoordinatorDriverTest,
auto response = future.get();
ASSERT(response.vote == txn::PrepareVote::kAbort);
ASSERT(response.prepareTimestamp == boost::none);
+ ASSERT(response.abortReason);
+ ASSERT_EQ(ErrorCodes::NoSuchTransaction, response.abortReason->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -332,7 +341,10 @@ TEST_F(TransactionCoordinatorDriverTest,
[&](const executor::RemoteCommandRequest& request) { return kPrepareOk; }});
auto decision = future.get().decision();
+
ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
+ ASSERT(decision.getAbortStatus());
+ ASSERT_EQ(ErrorCodes::NoSuchTransaction, decision.getAbortStatus()->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -345,6 +357,8 @@ TEST_F(TransactionCoordinatorDriverTest,
auto decision = future.get().decision();
ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
+ ASSERT(decision.getAbortStatus());
+ ASSERT_EQ(ErrorCodes::NoSuchTransaction, decision.getAbortStatus()->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -353,10 +367,12 @@ TEST_F(TransactionCoordinatorDriverTest,
auto future = txn::sendPrepare(getServiceContext(), aws, _lsid, _txnNumber, kTwoShardIdList);
onCommands({[&](const executor::RemoteCommandRequest& request) { return kNoSuchTransaction; },
- [&](const executor::RemoteCommandRequest& request) { return kPrepareOk; }});
+ [&](const executor::RemoteCommandRequest& request) { return kNoSuchTransaction; }});
auto decision = future.get().decision();
ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
+ ASSERT(decision.getAbortStatus());
+ ASSERT_EQ(ErrorCodes::NoSuchTransaction, decision.getAbortStatus()->code());
}
TEST_F(TransactionCoordinatorDriverTest,
@@ -372,6 +388,7 @@ TEST_F(TransactionCoordinatorDriverTest,
auto decision = future.get().decision();
ASSERT(decision.getDecision() == txn::CommitDecision::kCommit);
+ ASSERT(!decision.getAbortStatus());
ASSERT_EQ(maxPrepareTimestamp, *decision.getCommitTimestamp());
}
@@ -388,6 +405,7 @@ TEST_F(TransactionCoordinatorDriverTest,
auto decision = future.get().decision();
ASSERT(decision.getDecision() == txn::CommitDecision::kCommit);
+ ASSERT(!decision.getAbortStatus());
ASSERT_EQ(maxPrepareTimestamp, *decision.getCommitTimestamp());
}
@@ -404,9 +422,27 @@ TEST_F(TransactionCoordinatorDriverTest,
auto decision = future.get().decision();
ASSERT(decision.getDecision() == txn::CommitDecision::kCommit);
+ ASSERT(!decision.getAbortStatus());
ASSERT_EQ(maxPrepareTimestamp, *decision.getCommitTimestamp());
}
+TEST_F(TransactionCoordinatorDriverTest,
+ SendPrepareReturnsAbortDecisionWhenNoPreparedTimestampIsReturned) {
+ const auto timestamp = Timestamp(1, 1);
+
+ txn::AsyncWorkScheduler aws(getServiceContext());
+ auto future = txn::sendPrepare(getServiceContext(), aws, _lsid, _txnNumber, kTwoShardIdList);
+
+ assertPrepareSentAndRespondWithSuccess(timestamp);
+ assertCommandSentAndRespondWith(
+ PrepareTransaction::kCommandName, kPrepareOkNoTimestamp, WriteConcernOptions::Majority);
+
+ auto decision = future.get().decision();
+
+ ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
+ ASSERT(decision.getAbortStatus());
+ ASSERT_EQ(ErrorCodes::InternalError, decision.getAbortStatus()->code());
+}
class TransactionCoordinatorDriverPersistenceTest : public TransactionCoordinatorDriverTest {
protected:
@@ -465,7 +501,17 @@ protected:
TxnNumber txnNumber,
const std::vector<ShardId>& participants,
const boost::optional<Timestamp>& commitTimestamp) {
- txn::persistDecision(*_aws, lsid, txnNumber, participants, commitTimestamp).get();
+ txn::persistDecision(*_aws, lsid, txnNumber, participants, [&] {
+ txn::CoordinatorCommitDecision decision;
+ if (commitTimestamp) {
+ decision.setDecision(txn::CommitDecision::kCommit);
+ decision.setCommitTimestamp(commitTimestamp);
+ } else {
+ decision.setDecision(txn::CommitDecision::kAbort);
+ decision.setAbortStatus(Status(ErrorCodes::NoSuchTransaction, "Test abort status"));
+ }
+ return decision;
+ }()).get();
auto allCoordinatorDocs = txn::readAllCoordinatorDocs(opCtx);
ASSERT_EQUALS(allCoordinatorDocs.size(), size_t(1));
@@ -573,8 +619,11 @@ TEST_F(TransactionCoordinatorDriverPersistenceTest,
TEST_F(TransactionCoordinatorDriverPersistenceTest,
PersistCommitDecisionWhenNoDocumentForTransactionExistsCanBeInterruptedAndReturnsError) {
- Future<void> future = txn::persistDecision(
- *_aws, _lsid, _txnNumber, _participants, _commitTimestamp /* commit */);
+ Future<void> future = txn::persistDecision(*_aws, _lsid, _txnNumber, _participants, [&] {
+ txn::CoordinatorCommitDecision decision(txn::CommitDecision::kCommit);
+ decision.setCommitTimestamp(_commitTimestamp);
+ return decision;
+ }());
_aws->shutdown({ErrorCodes::TransactionCoordinatorSteppingDown, "Shutdown for test"});
ASSERT_THROWS_CODE(
@@ -639,7 +688,11 @@ TEST_F(TransactionCoordinatorDriverPersistenceTest,
// Delete the document for the first transaction and check that only the second transaction's
// document still exists.
- txn::persistDecision(*_aws, _lsid, txnNumber1, _participants, boost::none /* abort */).get();
+ txn::persistDecision(*_aws, _lsid, txnNumber1, _participants, [&] {
+ txn::CoordinatorCommitDecision decision(txn::CommitDecision::kAbort);
+ decision.setAbortStatus(Status(ErrorCodes::NoSuchTransaction, "Test abort error"));
+ return decision;
+ }()).get();
txn::deleteCoordinatorDoc(*_aws, _lsid, txnNumber1).get();
allCoordinatorDocs = txn::readAllCoordinatorDocs(operationContext());
@@ -2027,6 +2080,8 @@ TEST_F(TransactionCoordinatorMetricsTest, SlowLogLineIncludesTerminationCauseFor
stopCapturingLogMessages();
ASSERT_EQUALS(1, countLogLinesContaining("terminationCause:aborted"));
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("terminationDetails: NoSuchTransaction: from shard s2"));
}
TEST_F(TransactionCoordinatorMetricsTest, SlowLogLineIncludesNumParticipants) {
diff --git a/src/mongo/db/s/transaction_coordinator_util.cpp b/src/mongo/db/s/transaction_coordinator_util.cpp
index 05b9ff6437b..5baab08bc83 100644
--- a/src/mongo/db/s/transaction_coordinator_util.cpp
+++ b/src/mongo/db/s/transaction_coordinator_util.cpp
@@ -212,10 +212,11 @@ void PrepareVoteConsensus::registerVote(const PrepareResponse& vote) {
if (vote.vote == PrepareVote::kCommit) {
++_numCommitVotes;
_maxPrepareTimestamp = std::max(_maxPrepareTimestamp, *vote.prepareTimestamp);
- } else if (vote.vote == PrepareVote::kAbort) {
- ++_numAbortVotes;
} else {
- ++_numNoVotes;
+ vote.vote == PrepareVote::kAbort ? ++_numAbortVotes : ++_numNoVotes;
+
+ if (!_abortStatus)
+ _abortStatus.emplace(*vote.abortReason);
}
}
@@ -227,7 +228,9 @@ CoordinatorCommitDecision PrepareVoteConsensus::decision() const {
decision.setDecision(CommitDecision::kCommit);
decision.setCommitTimestamp(_maxPrepareTimestamp);
} else {
+ invariant(_abortStatus);
decision.setDecision(CommitDecision::kAbort);
+ decision.setAbortStatus(*_abortStatus);
}
return decision;
}
@@ -287,8 +290,9 @@ void persistDecisionBlocking(OperationContext* opCtx,
const LogicalSessionId& lsid,
TxnNumber txnNumber,
const std::vector<ShardId>& participantList,
- const boost::optional<Timestamp>& commitTimestamp) {
- LOG(3) << "Going to write decision " << (commitTimestamp ? "commit" : "abort") << " for "
+ const txn::CoordinatorCommitDecision& decision) {
+ const bool isCommit = decision.getDecision() == txn::CommitDecision::kCommit;
+ LOG(3) << "Going to write decision " << (isCommit ? "commit" : "abort") << " for "
<< lsid.getId() << ':' << txnNumber;
if (MONGO_FAIL_POINT(hangBeforeWritingDecision)) {
@@ -313,18 +317,9 @@ void persistDecisionBlocking(OperationContext* opCtx,
// if an earlier attempt to write the decision failed waiting for writeConcern.
BSONObj noDecision = BSON(TransactionCoordinatorDocument::kDecisionFieldName
<< BSON("$exists" << false));
- BSONObj sameDecision;
- if (commitTimestamp) {
- sameDecision = BSON(TransactionCoordinatorDocument::kDecisionFieldName
- << BSON(TransactionCoordinatorDocument::kDecisionFieldName
- << "commit"
- << "commitTimestamp"
- << *commitTimestamp));
- } else {
- sameDecision =
- BSON(TransactionCoordinatorDocument::kDecisionFieldName
- << BSON(TransactionCoordinatorDocument::kDecisionFieldName << "abort"));
- }
+ BSONObj sameDecision =
+ BSON(TransactionCoordinatorDocument::kDecisionFieldName << decision.toBSON());
+
entry.setQ(BSON(TransactionCoordinatorDocument::kIdFieldName
<< sessionInfo.toBSON()
<< "$and"
@@ -332,19 +327,13 @@ void persistDecisionBlocking(OperationContext* opCtx,
<< "$or"
<< BSON_ARRAY(noDecision << sameDecision)));
- // Update with decision.
- TransactionCoordinatorDocument doc;
- doc.setId(sessionInfo);
- doc.setParticipants(std::move(participantList));
- txn::CoordinatorCommitDecision decision;
- if (commitTimestamp) {
- decision.setDecision(CommitDecision::kCommit);
- decision.setCommitTimestamp(commitTimestamp);
- } else {
- decision.setDecision(CommitDecision::kAbort);
- }
- doc.setDecision(decision);
- entry.setU(doc.toBSON());
+ entry.setU([&] {
+ TransactionCoordinatorDocument doc;
+ doc.setId(sessionInfo);
+ doc.setParticipants(std::move(participantList));
+ doc.setDecision(decision);
+ return doc.toBSON();
+ }());
return entry;
}()});
@@ -361,11 +350,11 @@ void persistDecisionBlocking(OperationContext* opCtx,
// exists. Note that this is best-effort: the document may have been deleted or manually
// changed since the update above ran.
const auto doc = client.findOne(
- NamespaceString::kTransactionCoordinatorsNamespace.toString(),
+ NamespaceString::kTransactionCoordinatorsNamespace.ns(),
QUERY(TransactionCoordinatorDocument::kIdFieldName << sessionInfo.toBSON()));
uasserted(51026,
str::stream() << "While attempting to write decision "
- << (commitTimestamp ? "'commit'" : "'abort'")
+ << (isCommit ? "'commit'" : "'abort'")
<< " for"
<< lsid.getId()
<< ':'
@@ -376,8 +365,8 @@ void persistDecisionBlocking(OperationContext* opCtx,
<< doc);
}
- LOG(3) << "Wrote decision " << (commitTimestamp ? "commit" : "abort") << " for " << lsid.getId()
- << ':' << txnNumber;
+ LOG(3) << "Wrote decision " << (isCommit ? "commit" : "abort") << " for " << lsid.getId() << ':'
+ << txnNumber;
MONGO_FAIL_POINT_BLOCK(hangBeforeWaitingForDecisionWriteConcern, fp) {
LOG(0) << "Hit hangBeforeWaitingForDecisionWriteConcern failpoint";
@@ -403,17 +392,17 @@ Future<void> persistDecision(txn::AsyncWorkScheduler& scheduler,
const LogicalSessionId& lsid,
TxnNumber txnNumber,
const txn::ParticipantsList& participants,
- const boost::optional<Timestamp>& commitTimestamp) {
- return txn::doWhile(
- scheduler,
- boost::none /* no need for a backoff */,
- [](const Status& s) { return shouldRetryPersistingCoordinatorState(s); },
- [&scheduler, lsid, txnNumber, participants, commitTimestamp] {
- return scheduler.scheduleWork(
- [lsid, txnNumber, participants, commitTimestamp](OperationContext* opCtx) {
- persistDecisionBlocking(opCtx, lsid, txnNumber, participants, commitTimestamp);
- });
- });
+ const txn::CoordinatorCommitDecision& decision) {
+ return txn::doWhile(scheduler,
+ boost::none /* no need for a backoff */,
+ [](const Status& s) { return shouldRetryPersistingCoordinatorState(s); },
+ [&scheduler, lsid, txnNumber, participants, decision] {
+ return scheduler.scheduleWork(
+ [lsid, txnNumber, participants, decision](OperationContext* opCtx) {
+ persistDecisionBlocking(
+ opCtx, lsid, txnNumber, participants, decision);
+ });
+ });
}
Future<void> sendCommit(ServiceContext* service,
@@ -596,33 +585,45 @@ Future<PrepareResponse> sendPrepareToShard(ServiceContext* service,
auto prepareTimestampField = response.data["prepareTimestamp"];
if (prepareTimestampField.eoo() ||
prepareTimestampField.timestamp().isNull()) {
- LOG(0) << "Coordinator shard received an OK response to "
- "prepareTransaction "
- "without a prepareTimestamp from shard "
- << shardId << ", which is not expected behavior. "
- "Interpreting the response from "
- << shardId << " as a vote to abort";
- return PrepareResponse{shardId, PrepareVote::kAbort, boost::none};
+ Status abortStatus(ErrorCodes::InternalError,
+ str::stream()
+ << "Coordinator shard received an OK response "
+ "to prepareTransaction without a "
+ "prepareTimestamp from shard "
+ << shardId
+ << ", which is not an expected behavior. "
+ "Interpreting the response as vote to abort");
+ LOG(0) << redact(abortStatus);
+
+ return PrepareResponse{
+ shardId, PrepareVote::kAbort, boost::none, abortStatus};
}
LOG(3) << "Coordinator shard received a vote to commit from shard "
<< shardId
<< " with prepareTimestamp: " << prepareTimestampField.timestamp();
- return PrepareResponse{
- shardId, PrepareVote::kCommit, prepareTimestampField.timestamp()};
+
+ return PrepareResponse{shardId,
+ PrepareVote::kCommit,
+ prepareTimestampField.timestamp(),
+ boost::none};
}
LOG(3) << "Coordinator shard received " << status << " from shard " << shardId
<< " for " << commandObj;
if (ErrorCodes::isVoteAbortError(status.code())) {
- return PrepareResponse{shardId, PrepareVote::kAbort, boost::none};
+ return PrepareResponse{
+ shardId,
+ PrepareVote::kAbort,
+ boost::none,
+ status.withContext(str::stream() << "from shard " << shardId)};
}
uassertStatusOK(status);
MONGO_UNREACHABLE;
})
- .onError<ErrorCodes::ShardNotFound>([shardId, isLocalShard](const Status&) {
+ .onError<ErrorCodes::ShardNotFound>([shardId, isLocalShard](const Status& status) {
invariant(!isLocalShard);
// ShardNotFound may indicate that the participant shard has been removed (it
// could also mean the participant shard was recently added and this node
@@ -632,14 +633,14 @@ Future<PrepareResponse> sendPrepareToShard(ServiceContext* service,
// treat ShardNotFound as a vote to abort, which is always safe since the node
// must then send abort.
return Future<PrepareResponse>::makeReady(
- {shardId, CommitDecision::kAbort, boost::none});
+ {shardId, CommitDecision::kAbort, boost::none, status});
});
});
return std::move(f).onError<ErrorCodes::TransactionCoordinatorReachedAbortDecision>(
- [shardId](const Status&) {
+ [shardId](const Status& status) {
LOG(3) << "Prepare stopped retrying due to retrying being cancelled";
- return PrepareResponse{shardId, boost::none, boost::none};
+ return PrepareResponse{shardId, boost::none, boost::none, status};
});
}
diff --git a/src/mongo/db/s/transaction_coordinator_util.h b/src/mongo/db/s/transaction_coordinator_util.h
index 8721c0bd2d3..d6e87bb4baf 100644
--- a/src/mongo/db/s/transaction_coordinator_util.h
+++ b/src/mongo/db/s/transaction_coordinator_util.h
@@ -79,6 +79,8 @@ private:
int _numNoVotes{0};
Timestamp _maxPrepareTimestamp;
+
+ boost::optional<Status> _abortStatus;
};
/**
@@ -130,7 +132,7 @@ Future<void> persistDecision(txn::AsyncWorkScheduler& scheduler,
const LogicalSessionId& lsid,
TxnNumber txnNumber,
const txn::ParticipantsList& participants,
- const boost::optional<Timestamp>& commitTimestamp);
+ const txn::CoordinatorCommitDecision& decision);
/**
* Sends commit to all shards and returns a future that will be resolved when all participants have
@@ -195,6 +197,9 @@ struct PrepareResponse {
// Will only be set if the vote was kCommit
boost::optional<Timestamp> prepareTimestamp;
+
+ // Will only be set if the vote was kAbort or no value
+ boost::optional<Status> abortReason;
};
Future<PrepareResponse> sendPrepareToShard(ServiceContext* service,
txn::AsyncWorkScheduler& scheduler,