diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2019-07-15 12:52:12 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2019-07-23 17:25:35 -0400 |
commit | e3c26cf2f4c6ea3d597f42360442c0b6320e84bc (patch) | |
tree | 37442818ec0c0555dfd203f330a2abd1fd7ace1a | |
parent | 7d687264de65258764dca70ce46754c4765912ce (diff) | |
download | mongo-e3c26cf2f4c6ea3d597f42360442c0b6320e84bc.tar.gz |
SERVER-41369 Terminate the TransactionCoordinator retry logic if read concern majority is not available
12 files changed, 128 insertions, 22 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 2f323b8a32c..867784fac04 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -99,6 +99,7 @@ selector: - jstests/sharding/shard_collection_existing_zones.js - jstests/sharding/single_shard_transaction_with_arbiter.js - jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js + - jstests/sharding/multi_shard_transaction_without_majority_reads.js - jstests/sharding/snapshot_cursor_commands_mongos.js - jstests/sharding/transactions_causal_consistency.js - jstests/sharding/transactions_distinct_not_allowed_on_sharded_collections.js diff --git a/jstests/replsets/prepare_transaction_fails_on_standalone.js b/jstests/replsets/prepare_transaction_fails_on_standalone.js index 998119ac34e..14eb17656a5 100644 --- a/jstests/replsets/prepare_transaction_fails_on_standalone.js +++ b/jstests/replsets/prepare_transaction_fails_on_standalone.js @@ -14,7 +14,8 @@ assert.commandWorked(testDB.runCommand({create: collName})); - assert.commandFailedWithCode(testDB.adminCommand({prepareTransaction: 1}), 51239); + assert.commandFailedWithCode(testDB.adminCommand({prepareTransaction: 1}), + ErrorCodes.ReadConcernMajorityNotEnabled); MongoRunner.stopMongod(standalone); }()); diff --git a/jstests/replsets/prepare_transaction_fails_with_arbiters.js b/jstests/replsets/prepare_transaction_fails_with_arbiters.js index e0529b8195d..672ef7c147a 100644 --- a/jstests/replsets/prepare_transaction_fails_with_arbiters.js +++ b/jstests/replsets/prepare_transaction_fails_with_arbiters.js @@ -34,7 +34,8 @@ session.startTransaction(); assert.commandWorked(sessionColl.insert({_id: 42})); - assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), 50995); + assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), + ErrorCodes.ReadConcernMajorityNotEnabled); rst.stopSet(); })(); diff --git a/jstests/replsets/prepare_transaction_fails_without_majority_reads.js b/jstests/replsets/prepare_transaction_fails_without_majority_reads.js index ef816b0fb07..30cbeac87ee 100644 --- a/jstests/replsets/prepare_transaction_fails_without_majority_reads.js +++ b/jstests/replsets/prepare_transaction_fails_without_majority_reads.js @@ -27,7 +27,8 @@ session.startTransaction(); assert.commandWorked(sessionColl.insert({_id: 42})); - assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), 50993); + assert.commandFailedWithCode(sessionDB.adminCommand({prepareTransaction: 1}), + ErrorCodes.ReadConcernMajorityNotEnabled); rst.stopSet(); })(); diff --git a/jstests/sharding/multi_shard_transaction_without_majority_reads.js b/jstests/sharding/multi_shard_transaction_without_majority_reads.js new file mode 100644 index 00000000000..dafdf503f84 --- /dev/null +++ b/jstests/sharding/multi_shard_transaction_without_majority_reads.js @@ -0,0 +1,39 @@ +/** + * Test that multi-shard transactions will fail with a non-transient error when run against shards + * with disabled majority read concern. + * + * @tags: [uses_transactions] + */ + +(function() { + 'use strict'; + + const st = new ShardingTest({shards: 2, rs: {nodes: 1, enableMajorityReadConcern: 'false'}}); + + assert.commandWorked(st.s0.adminCommand({enableSharding: 'TestDB'})); + st.ensurePrimaryShard('TestDB', st.shard0.shardName); + assert.commandWorked(st.s0.adminCommand({shardCollection: 'TestDB.TestColl', key: {_id: 1}})); + + const coll = st.s0.getDB('TestDB').TestColl; + assert.writeOK(coll.insert({_id: -1, x: 0})); + assert.writeOK(coll.insert({_id: 1, x: 0})); + assert.commandWorked(st.s0.adminCommand({split: 'TestDB.TestColl', middle: {_id: 1}})); + assert.commandWorked(st.s0.adminCommand( + {moveChunk: 'TestDB.TestColl', find: {_id: 1}, to: st.shard1.shardName})); + + assert.writeOK(coll.update({_id: -1}, {$inc: {x: 1}})); + assert.writeOK(coll.update({_id: 1}, {$inc: {x: 1}})); + + const session = st.s0.startSession(); + const sessionColl = session.getDatabase('TestDB').TestColl; + + session.startTransaction(); + + assert.writeOK(sessionColl.update({_id: -1}, {$inc: {x: 1}})); + assert.writeOK(sessionColl.update({_id: 1}, {$inc: {x: 1}})); + + assert.commandFailedWithCode(session.commitTransaction_forTesting(), + ErrorCodes.ReadConcernMajorityNotEnabled); + + st.stop(); +})(); diff --git a/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js b/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js index c5c3e45c880..03dd211baf7 100644 --- a/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js +++ b/jstests/sharding/single_shard_transaction_without_majority_reads_lagged.js @@ -27,7 +27,7 @@ shards: 1, rs: { nodes: [ - {/* primary */ enableMajorityReadConcern: false}, + {/* primary */ enableMajorityReadConcern: 'false'}, {/* secondary */ rsConfig: {priority: 0}} ] } diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index b29aedffeb6..bab631510b2 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -358,6 +358,6 @@ error_class("ExceededTimeLimitError", ["ExceededTimeLimit", "MaxTimeMSExpired", error_class("SnapshotError", ["SnapshotTooOld", "SnapshotUnavailable", "StaleChunkHistory", "MigrationConflict"]) -error_class("VoteAbortError", ["NoSuchTransaction", "TransactionTooOld"]) +error_class("VoteAbortError", ["NoSuchTransaction", "ReadConcernMajorityNotEnabled", "TransactionTooOld"]) error_class("NonResumableChangeStreamError", ["ChangeStreamFatalError", "ChangeStreamHistoryLost"]) diff --git a/src/mongo/db/s/transaction_coordinator.cpp b/src/mongo/db/s/transaction_coordinator.cpp index 3085db606e4..eaf91e54c06 100644 --- a/src/mongo/db/s/transaction_coordinator.cpp +++ b/src/mongo/db/s/transaction_coordinator.cpp @@ -249,19 +249,28 @@ TransactionCoordinator::TransactionCoordinator(ServiceContext* serviceContext, _serviceContext->getPreciseClockSource()->now()); } - _decisionPromise.emplaceValue(_decision->getDecision()); - switch (_decision->getDecision()) { - case CommitDecision::kCommit: + case CommitDecision::kCommit: { + _decisionPromise.emplaceValue(CommitDecision::kCommit); + return txn::sendCommit(_serviceContext, *_scheduler, _lsid, _txnNumber, *_participants, *_decision->getCommitTimestamp()); - case CommitDecision::kAbort: + } + case CommitDecision::kAbort: { + const auto& abortStatus = *_decision->getAbortStatus(); + + if (abortStatus == ErrorCodes::ReadConcernMajorityNotEnabled) + _decisionPromise.setError(abortStatus); + else + _decisionPromise.emplaceValue(CommitDecision::kAbort); + return txn::sendAbort( _serviceContext, *_scheduler, _lsid, _txnNumber, *_participants); + } default: MONGO_UNREACHABLE; }; @@ -324,7 +333,7 @@ void TransactionCoordinator::continueCommit(const TransactionCoordinatorDocument _kickOffCommitPromise.emplaceValue(); } -SharedSemiFuture<CommitDecision> TransactionCoordinator::getDecision() { +SharedSemiFuture<CommitDecision> TransactionCoordinator::getDecision() const { return _decisionPromise.getFuture(); } diff --git a/src/mongo/db/s/transaction_coordinator.h b/src/mongo/db/s/transaction_coordinator.h index 2bfb54e74ba..87b7252a6dc 100644 --- a/src/mongo/db/s/transaction_coordinator.h +++ b/src/mongo/db/s/transaction_coordinator.h @@ -98,7 +98,7 @@ public: * * TODO (SERVER-37364): Remove this when it is no longer needed by the coordinator service. */ - SharedSemiFuture<txn::CommitDecision> getDecision(); + SharedSemiFuture<txn::CommitDecision> getDecision() const; /** * Returns a future which can be listened on for when all the asynchronous activity spawned by diff --git a/src/mongo/db/s/transaction_coordinator_test.cpp b/src/mongo/db/s/transaction_coordinator_test.cpp index 32134a82a71..7e88a292067 100644 --- a/src/mongo/db/s/transaction_coordinator_test.cpp +++ b/src/mongo/db/s/transaction_coordinator_test.cpp @@ -466,9 +466,29 @@ TEST_F(TransactionCoordinatorDriverTest, ASSERT(decision.getDecision() == txn::CommitDecision::kAbort); ASSERT(decision.getAbortStatus()); - ASSERT_EQ(ErrorCodes::InternalError, decision.getAbortStatus()->code()); + ASSERT_EQ(50993, int(decision.getAbortStatus()->code())); } +TEST_F(TransactionCoordinatorDriverTest, + SendPrepareReturnsErrorWhenOneShardReturnsReadConcernMajorityNotEnabled) { + txn::AsyncWorkScheduler aws(getServiceContext()); + auto future = txn::sendPrepare(getServiceContext(), aws, _lsid, _txnNumber, kTwoShardIdList); + + assertPrepareSentAndRespondWithSuccess(Timestamp(100, 1)); + assertCommandSentAndRespondWith( + PrepareTransaction::kCommandName, + BSON("ok" << 0 << "code" << ErrorCodes::ReadConcernMajorityNotEnabled << "errmsg" + << "Read concern majority not enabled"), + WriteConcernOptions::Majority); + + auto decision = future.get().decision(); + + ASSERT(decision.getDecision() == txn::CommitDecision::kAbort); + ASSERT(decision.getAbortStatus()); + ASSERT_EQ(ErrorCodes::ReadConcernMajorityNotEnabled, decision.getAbortStatus()->code()); +} + + class TransactionCoordinatorDriverPersistenceTest : public TransactionCoordinatorDriverTest { protected: void setUp() override { @@ -901,6 +921,35 @@ TEST_F(TransactionCoordinatorTest, coordinator.onCompletion().get(); } +TEST_F(TransactionCoordinatorTest, + RunCommitProducesReadConcernMajorityNotEnabledIfEitherShardReturnsIt) { + TransactionCoordinator coordinator( + getServiceContext(), + _lsid, + _txnNumber, + std::make_unique<txn::AsyncWorkScheduler>(getServiceContext()), + Date_t::max()); + coordinator.runCommit(kTwoShardIdList); + auto commitDecisionFuture = coordinator.getDecision(); + + // One participant votes commit and other encounters retryable error + onCommands({[&](const executor::RemoteCommandRequest& request) { return kPrepareOk; }, + [&](const executor::RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::ReadConcernMajorityNotEnabled + << "errmsg" + << "Read concern majority not enabled"); + }}); + + assertAbortSentAndRespondWithSuccess(); + assertAbortSentAndRespondWithSuccess(); + + ASSERT_THROWS_CODE( + commitDecisionFuture.get(), AssertionException, ErrorCodes::ReadConcernMajorityNotEnabled); + + coordinator.onCompletion().get(); +} + + class TransactionCoordinatorMetricsTest : public TransactionCoordinatorTestBase { public: void setUp() override { diff --git a/src/mongo/db/s/transaction_coordinator_util.cpp b/src/mongo/db/s/transaction_coordinator_util.cpp index 0d3bde20418..f49da0ac61f 100644 --- a/src/mongo/db/s/transaction_coordinator_util.cpp +++ b/src/mongo/db/s/transaction_coordinator_util.cpp @@ -548,7 +548,7 @@ Future<PrepareResponse> sendPrepareToShard(ServiceContext* service, auto prepareTimestampField = response.data["prepareTimestamp"]; if (prepareTimestampField.eoo() || prepareTimestampField.timestamp().isNull()) { - Status abortStatus(ErrorCodes::InternalError, + Status abortStatus(ErrorCodes::Error(50993), str::stream() << "Coordinator shard received an OK response " "to prepareTransaction without a " diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp index 1dd07473d67..3cb6b8c1cbe 100644 --- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp +++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp @@ -73,20 +73,23 @@ public: uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands()); } - // We automatically fail 'prepareTransaction' against a primary that has - // 'enableMajorityReadConcern' set to 'false'. - uassert(50993, + // If a node has majority read concern disabled, replication must use the legacy + // 'rollbackViaRefetch' algortithm, which does not support prepareTransaction oplog + // entries + uassert(ErrorCodes::ReadConcernMajorityNotEnabled, "'prepareTransaction' is not supported with 'enableMajorityReadConcern=false'", serverGlobalParams.enableMajorityReadConcern); - // We do not allow preparing a transaction if the replica set has any arbiters. + // Replica sets with arbiters are able to continually accept majority writes without + // actually being able to commit them (e.g. PSA with a downed secondary), which in turn + // will impact the liveness of 2PC transactions const auto replCoord = repl::ReplicationCoordinator::get(opCtx); - uassert(50995, + uassert(ErrorCodes::ReadConcernMajorityNotEnabled, "'prepareTransaction' is not supported for replica sets with arbiters", !replCoord->setContainsArbiter()); - // We do not allow the prepareTransaction command to run on a standalone. - uassert(51239, + // Standalone nodes do not support transactions at all + uassert(ErrorCodes::ReadConcernMajorityNotEnabled, "'prepareTransaction' is not supported on standalone nodes.", replCoord->isReplEnabled()); @@ -256,8 +259,10 @@ public: if (coordinatorDecisionFuture) { auto swCommitDecision = coordinatorDecisionFuture->getNoThrow(opCtx); - // The coordinator can only return NoSuchTransaction if cancelIfCommitNotYetStarted - // was called, which can happen in one of 3 cases: + // The coordinator can only throw NoSuchTransaction (as opposed to propagating an + // Abort decision due to NoSuchTransaction reported by a shard) if + // cancelIfCommitNotYetStarted was called, which can happen in one of 3 cases: + // // 1) The deadline to receive coordinateCommit passed // 2) Transaction with a newer txnNumber started on the session before // coordinateCommit was received |