summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2019-07-15 12:52:12 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2019-07-24 02:24:31 -0400
commit88be1e9ba104dffbc23aae7a817a944cf6058eff (patch)
tree4b3a5d6520f0ac79e3c40ecb81ec7464dc43bd56
parente0db896c785dc85de0ddb4ec32859c142b91166c (diff)
downloadmongo-88be1e9ba104dffbc23aae7a817a944cf6058eff.tar.gz
SERVER-41369 Terminate the TransactionCoordinator retry logic if read concern majority is not available
(cherry picked from commit e3c26cf2f4c6ea3d597f42360442c0b6320e84bc)
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--jstests/replsets/prepare_transaction_fails_on_standalone.js3
-rw-r--r--jstests/replsets/prepare_transaction_fails_with_arbiters.js3
-rw-r--r--jstests/replsets/prepare_transaction_fails_without_majority_reads.js3
-rw-r--r--jstests/sharding/multi_shard_transaction_without_majority_reads.js39
-rw-r--r--jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js2
-rw-r--r--src/mongo/base/error_codes.err2
-rw-r--r--src/mongo/db/s/transaction_coordinator.cpp19
-rw-r--r--src/mongo/db/s/transaction_coordinator.h2
-rw-r--r--src/mongo/db/s/transaction_coordinator_test.cpp51
-rw-r--r--src/mongo/db/s/transaction_coordinator_util.cpp2
-rw-r--r--src/mongo/db/s/txn_two_phase_commit_cmds.cpp23
12 files changed, 128 insertions, 22 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index d7d1ca1c1d5..9652027c838 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -97,6 +97,7 @@ selector:
- jstests/sharding/shard_collection_existing_zones.js
- jstests/sharding/single_shard_transaction_with_arbiter.js
- jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js
+ - jstests/sharding/multi_shard_transaction_without_majority_reads.js
- jstests/sharding/snapshot_cursor_commands_mongos.js
- jstests/sharding/transactions_causal_consistency.js
- jstests/sharding/transactions_distinct_not_allowed_on_sharded_collections.js
diff --git a/jstests/replsets/prepare_transaction_fails_on_standalone.js b/jstests/replsets/prepare_transaction_fails_on_standalone.js
index 998119ac34e..14eb17656a5 100644
--- a/jstests/replsets/prepare_transaction_fails_on_standalone.js
+++ b/jstests/replsets/prepare_transaction_fails_on_standalone.js
@@ -14,7 +14,8 @@
assert.commandWorked(testDB.runCommand({create: collName}));
- assert.commandFailedWithCode(testDB.adminCommand({prepareTransaction: 1}), 51239);
+ assert.commandFailedWithCode(testDB.adminCommand({prepareTransaction: 1}),
+ ErrorCodes.ReadConcernMajorityNotEnabled);
MongoRunner.stopMongod(standalone);
}());
diff --git a/jstests/replsets/prepare_transaction_fails_with_arbiters.js b/jstests/replsets/prepare_transaction_fails_with_arbiters.js
index e0529b8195d..672ef7c147a 100644
--- a/jstests/replsets/prepare_transaction_fails_with_arbiters.js
+++ b/jstests/replsets/prepare_transaction_fails_with_arbiters.js
@@ -34,7 +34,8 @@
session.startTransaction();
assert.commandWorked(sessionColl.insert({_id: 42}));
- assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), 50995);
+ assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}),
+ ErrorCodes.ReadConcernMajorityNotEnabled);
rst.stopSet();
})();
diff --git a/jstests/replsets/prepare_transaction_fails_without_majority_reads.js b/jstests/replsets/prepare_transaction_fails_without_majority_reads.js
index ef816b0fb07..30cbeac87ee 100644
--- a/jstests/replsets/prepare_transaction_fails_without_majority_reads.js
+++ b/jstests/replsets/prepare_transaction_fails_without_majority_reads.js
@@ -27,7 +27,8 @@
session.startTransaction();
assert.commandWorked(sessionColl.insert({_id: 42}));
- assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), 50993);
+ assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}),
+ ErrorCodes.ReadConcernMajorityNotEnabled);
rst.stopSet();
})();
diff --git a/jstests/sharding/multi_shard_transaction_without_majority_reads.js b/jstests/sharding/multi_shard_transaction_without_majority_reads.js
new file mode 100644
index 00000000000..dafdf503f84
--- /dev/null
+++ b/jstests/sharding/multi_shard_transaction_without_majority_reads.js
@@ -0,0 +1,39 @@
+/**
+ * Test that multi-shard transactions will fail with a non-transient error when run against shards
+ * with disabled majority read concern.
+ *
+ * @tags: [uses_transactions]
+ */
+
+(function() {
+ 'use strict';
+
+ const st = new ShardingTest({shards: 2, rs: {nodes: 1, enableMajorityReadConcern: 'false'}});
+
+ assert.commandWorked(st.s0.adminCommand({enableSharding: 'TestDB'}));
+ st.ensurePrimaryShard('TestDB', st.shard0.shardName);
+ assert.commandWorked(st.s0.adminCommand({shardCollection: 'TestDB.TestColl', key: {_id: 1}}));
+
+ const coll = st.s0.getDB('TestDB').TestColl;
+ assert.writeOK(coll.insert({_id: -1, x: 0}));
+ assert.writeOK(coll.insert({_id: 1, x: 0}));
+ assert.commandWorked(st.s0.adminCommand({split: 'TestDB.TestColl', middle: {_id: 1}}));
+ assert.commandWorked(st.s0.adminCommand(
+ {moveChunk: 'TestDB.TestColl', find: {_id: 1}, to: st.shard1.shardName}));
+
+ assert.writeOK(coll.update({_id: -1}, {$inc: {x: 1}}));
+ assert.writeOK(coll.update({_id: 1}, {$inc: {x: 1}}));
+
+ const session = st.s0.startSession();
+ const sessionColl = session.getDatabase('TestDB').TestColl;
+
+ session.startTransaction();
+
+ assert.writeOK(sessionColl.update({_id: -1}, {$inc: {x: 1}}));
+ assert.writeOK(sessionColl.update({_id: 1}, {$inc: {x: 1}}));
+
+ assert.commandFailedWithCode(session.commitTransaction_forTesting(),
+ ErrorCodes.ReadConcernMajorityNotEnabled);
+
+ st.stop();
+})();
diff --git a/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js b/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js
index c5c3e45c880..03dd211baf7 100644
--- a/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js
+++ b/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js
@@ -27,7 +27,7 @@
shards: 1,
rs: {
nodes: [
- {/* primary */ enableMajorityReadConcern: false},
+ {/* primary */ enableMajorityReadConcern: 'false'},
{/* secondary */ rsConfig: {priority: 0}}
]
}
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index b649efaac01..43b1656368b 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -359,6 +359,6 @@ error_class("ExceededTimeLimitError", ["ExceededTimeLimit", "MaxTimeMSExpired",
error_class("SnapshotError", ["SnapshotTooOld", "SnapshotUnavailable", "StaleChunkHistory", "MigrationConflict"])
-error_class("VoteAbortError", ["NoSuchTransaction", "TransactionTooOld"])
+error_class("VoteAbortError", ["NoSuchTransaction", "ReadConcernMajorityNotEnabled", "TransactionTooOld"])
error_class("NonResumableChangeStreamError", ["ChangeStreamFatalError", "ChangeStreamHistoryLost"])
diff --git a/src/mongo/db/s/transaction_coordinator.cpp b/src/mongo/db/s/transaction_coordinator.cpp
index 32fae28ed89..95815c552d9 100644
--- a/src/mongo/db/s/transaction_coordinator.cpp
+++ b/src/mongo/db/s/transaction_coordinator.cpp
@@ -249,19 +249,28 @@ TransactionCoordinator::TransactionCoordinator(ServiceContext* serviceContext,
_serviceContext->getPreciseClockSource()->now());
}
- _decisionPromise.emplaceValue(_decision->getDecision());
-
switch (_decision->getDecision()) {
- case CommitDecision::kCommit:
+ case CommitDecision::kCommit: {
+ _decisionPromise.emplaceValue(CommitDecision::kCommit);
+
return txn::sendCommit(_serviceContext,
*_scheduler,
_lsid,
_txnNumber,
*_participants,
*_decision->getCommitTimestamp());
- case CommitDecision::kAbort:
+ }
+ case CommitDecision::kAbort: {
+ const auto& abortStatus = *_decision->getAbortStatus();
+
+ if (abortStatus == ErrorCodes::ReadConcernMajorityNotEnabled)
+ _decisionPromise.setError(abortStatus);
+ else
+ _decisionPromise.emplaceValue(CommitDecision::kAbort);
+
return txn::sendAbort(
_serviceContext, *_scheduler, _lsid, _txnNumber, *_participants);
+ }
default:
MONGO_UNREACHABLE;
};
@@ -324,7 +333,7 @@ void TransactionCoordinator::continueCommit(const TransactionCoordinatorDocument
_kickOffCommitPromise.emplaceValue();
}
-SharedSemiFuture<CommitDecision> TransactionCoordinator::getDecision() {
+SharedSemiFuture<CommitDecision> TransactionCoordinator::getDecision() const {
return _decisionPromise.getFuture();
}
diff --git a/src/mongo/db/s/transaction_coordinator.h b/src/mongo/db/s/transaction_coordinator.h
index 2bfb54e74ba..87b7252a6dc 100644
--- a/src/mongo/db/s/transaction_coordinator.h
+++ b/src/mongo/db/s/transaction_coordinator.h
@@ -98,7 +98,7 @@ public:
*
* TODO (SERVER-37364): Remove this when it is no longer needed by the coordinator service.
*/
- SharedSemiFuture<txn::CommitDecision> getDecision();
+ SharedSemiFuture<txn::CommitDecision> getDecision() const;
/**
* Returns a future which can be listened on for when all the asynchronous activity spawned by
diff --git a/src/mongo/db/s/transaction_coordinator_test.cpp b/src/mongo/db/s/transaction_coordinator_test.cpp
index 8717e8683c0..cbed3eb021a 100644
--- a/src/mongo/db/s/transaction_coordinator_test.cpp
+++ b/src/mongo/db/s/transaction_coordinator_test.cpp
@@ -466,9 +466,29 @@ TEST_F(TransactionCoordinatorDriverTest,
ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
ASSERT(decision.getAbortStatus());
- ASSERT_EQ(ErrorCodes::InternalError, decision.getAbortStatus()->code());
+ ASSERT_EQ(50993, int(decision.getAbortStatus()->code()));
}
+TEST_F(TransactionCoordinatorDriverTest,
+ SendPrepareReturnsErrorWhenOneShardReturnsReadConcernMajorityNotEnabled) {
+ txn::AsyncWorkScheduler aws(getServiceContext());
+ auto future = txn::sendPrepare(getServiceContext(), aws, _lsid, _txnNumber, kTwoShardIdList);
+
+ assertPrepareSentAndRespondWithSuccess(Timestamp(100, 1));
+ assertCommandSentAndRespondWith(
+ PrepareTransaction::kCommandName,
+ BSON("ok" << 0 << "code" << ErrorCodes::ReadConcernMajorityNotEnabled << "errmsg"
+ << "Read concern majority not enabled"),
+ WriteConcernOptions::Majority);
+
+ auto decision = future.get().decision();
+
+ ASSERT(decision.getDecision() == txn::CommitDecision::kAbort);
+ ASSERT(decision.getAbortStatus());
+ ASSERT_EQ(ErrorCodes::ReadConcernMajorityNotEnabled, decision.getAbortStatus()->code());
+}
+
+
class TransactionCoordinatorDriverPersistenceTest : public TransactionCoordinatorDriverTest {
protected:
void setUp() override {
@@ -901,6 +921,35 @@ TEST_F(TransactionCoordinatorTest,
coordinator.onCompletion().get();
}
+TEST_F(TransactionCoordinatorTest,
+ RunCommitProducesReadConcernMajorityNotEnabledIfEitherShardReturnsIt) {
+ TransactionCoordinator coordinator(
+ getServiceContext(),
+ _lsid,
+ _txnNumber,
+ std::make_unique<txn::AsyncWorkScheduler>(getServiceContext()),
+ Date_t::max());
+ coordinator.runCommit(kTwoShardIdList);
+ auto commitDecisionFuture = coordinator.getDecision();
+
+ // One participant votes commit and other encounters retryable error
+ onCommands({[&](const executor::RemoteCommandRequest& request) { return kPrepareOk; },
+ [&](const executor::RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::ReadConcernMajorityNotEnabled
+ << "errmsg"
+ << "Read concern majority not enabled");
+ }});
+
+ assertAbortSentAndRespondWithSuccess();
+ assertAbortSentAndRespondWithSuccess();
+
+ ASSERT_THROWS_CODE(
+ commitDecisionFuture.get(), AssertionException, ErrorCodes::ReadConcernMajorityNotEnabled);
+
+ coordinator.onCompletion().get();
+}
+
+
class TransactionCoordinatorMetricsTest : public TransactionCoordinatorTestBase {
public:
void setUp() override {
diff --git a/src/mongo/db/s/transaction_coordinator_util.cpp b/src/mongo/db/s/transaction_coordinator_util.cpp
index 0d3bde20418..f49da0ac61f 100644
--- a/src/mongo/db/s/transaction_coordinator_util.cpp
+++ b/src/mongo/db/s/transaction_coordinator_util.cpp
@@ -548,7 +548,7 @@ Future<PrepareResponse> sendPrepareToShard(ServiceContext* service,
auto prepareTimestampField = response.data["prepareTimestamp"];
if (prepareTimestampField.eoo() ||
prepareTimestampField.timestamp().isNull()) {
- Status abortStatus(ErrorCodes::InternalError,
+ Status abortStatus(ErrorCodes::Error(50993),
str::stream()
<< "Coordinator shard received an OK response "
"to prepareTransaction without a "
diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
index 1dd07473d67..3cb6b8c1cbe 100644
--- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
+++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
@@ -73,20 +73,23 @@ public:
uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands());
}
- // We automatically fail 'prepareTransaction' against a primary that has
- // 'enableMajorityReadConcern' set to 'false'.
- uassert(50993,
+ // If a node has majority read concern disabled, replication must use the legacy
+ // 'rollbackViaRefetch' algortithm, which does not support prepareTransaction oplog
+ // entries
+ uassert(ErrorCodes::ReadConcernMajorityNotEnabled,
"'prepareTransaction' is not supported with 'enableMajorityReadConcern=false'",
serverGlobalParams.enableMajorityReadConcern);
- // We do not allow preparing a transaction if the replica set has any arbiters.
+ // Replica sets with arbiters are able to continually accept majority writes without
+ // actually being able to commit them (e.g. PSA with a downed secondary), which in turn
+ // will impact the liveness of 2PC transactions
const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- uassert(50995,
+ uassert(ErrorCodes::ReadConcernMajorityNotEnabled,
"'prepareTransaction' is not supported for replica sets with arbiters",
!replCoord->setContainsArbiter());
- // We do not allow the prepareTransaction command to run on a standalone.
- uassert(51239,
+ // Standalone nodes do not support transactions at all
+ uassert(ErrorCodes::ReadConcernMajorityNotEnabled,
"'prepareTransaction' is not supported on standalone nodes.",
replCoord->isReplEnabled());
@@ -256,8 +259,10 @@ public:
if (coordinatorDecisionFuture) {
auto swCommitDecision = coordinatorDecisionFuture->getNoThrow(opCtx);
- // The coordinator can only return NoSuchTransaction if cancelIfCommitNotYetStarted
- // was called, which can happen in one of 3 cases:
+ // The coordinator can only throw NoSuchTransaction (as opposed to propagating an
+ // Abort decision due to NoSuchTransaction reported by a shard) if
+ // cancelIfCommitNotYetStarted was called, which can happen in one of 3 cases:
+ //
// 1) The deadline to receive coordinateCommit passed
// 2) Transaction with a newer txnNumber started on the session before
// coordinateCommit was received