diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2020-05-19 11:25:17 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-21 16:30:55 +0000 |
commit | 539db971814c1d2dbcea6f1fa28f59eea737a16b (patch) | |
tree | af5d32c53f0be57d1cbd47d6f5a57ca3285aaf79 | |
parent | 51d9fe12b5d19720e72dcd7db0f2f17dd9a19212 (diff) | |
download | mongo-539db971814c1d2dbcea6f1fa28f59eea737a16b.tar.gz |
SERVER-48307 Disable single-write-shard transaction commit optimization
(cherry picked from commit e18f218d59237398962b8b5e3730f28c0f06e587)
6 files changed, 104 insertions, 48 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index b04b769e8ac..d99a2a3e644 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -136,6 +136,7 @@ selector: - jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js - jstests/sharding/txn_commit_optimizations_for_read_only_shards.js - jstests/sharding/txn_being_applied_to_secondary_cannot_be_killed.js + - jstests/sharding/txn_single_write_shard_failover.js - jstests/sharding/txn_with_several_routers.js - jstests/sharding/txn_writes_during_movechunk.js - jstests/sharding/update_sharded.js diff --git a/jstests/noPassthrough/router_transactions_metrics.js b/jstests/noPassthrough/router_transactions_metrics.js index df20826b81b..3c8fd681336 100644 --- a/jstests/noPassthrough/router_transactions_metrics.js +++ b/jstests/noPassthrough/router_transactions_metrics.js @@ -373,6 +373,7 @@ jsTest.log("Failed single shard transaction."); verifyServerStatusValues(st, expectedStats); })(); +// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. jsTest.log("Successful single write shard transaction."); (() => { startSingleWriteShardTransaction(); @@ -382,13 +383,14 @@ jsTest.log("Successful single write shard transaction."); expectedStats.currentOpen -= 1; expectedStats.currentInactive -= 1; expectedStats.totalCommitted += 1; - expectedStats.commitTypes.singleWriteShard.initiated += 1; - expectedStats.commitTypes.singleWriteShard.successful += 1; + expectedStats.commitTypes.twoPhaseCommit.initiated += 1; + expectedStats.commitTypes.twoPhaseCommit.successful += 1; expectedStats.totalParticipantsAtCommit += 2; - expectedStats.totalRequestsTargeted += 2; + expectedStats.totalRequestsTargeted += 1; verifyServerStatusValues(st, expectedStats); })(); +// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. jsTest.log("Failed single write shard transaction."); (() => { startSingleWriteShardTransaction(); @@ -401,12 +403,10 @@ jsTest.log("Failed single write shard transaction."); expectedStats.currentInactive -= 1; expectedStats.totalAborted += 1; expectedStats.abortCause["NoSuchTransaction"] += 1; - expectedStats.commitTypes.singleWriteShard.initiated += 1; + expectedStats.commitTypes.twoPhaseCommit.initiated += 1; expectedStats.totalParticipantsAtCommit += 2; - // In a single write shard commit, all read shards are committed first, then the - // write shards, so if committing on a read shard fails, the write shards aren't targeted. - // The implicit abort after will target all shards. - expectedStats.totalRequestsTargeted += 1 + 2; + // There are no implicit aborts after two phase commit, so the coordinator is targeted once. + expectedStats.totalRequestsTargeted += 1; verifyServerStatusValues(st, expectedStats); })(); diff --git a/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js b/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js index fc6137f2ff7..efcc35b3c54 100644 --- a/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js +++ b/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js @@ -183,10 +183,12 @@ const transactionTypes = { writeReadSingleShardExpectSingleShardCommit: txnNumber => { return [writeShard0(txnNumber), readShard0(txnNumber)]; }, - readOneShardWriteOtherShardExpectSingleWriteShardCommit: txnNumber => { + // TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. + readOneShardWriteOtherShardExpectTwoPhaseCommit: txnNumber => { return [readShard0(txnNumber), writeShard1(txnNumber)]; }, - writeOneShardReadOtherShardExpectSingleWriteShardCommit: txnNumber => { + // TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. + writeOneShardReadOtherShardExpectTwoPhaseCommit: txnNumber => { return [writeShard0(txnNumber), readShard1(txnNumber)]; }, readOneShardWriteTwoOtherShardsExpectTwoPhaseCommit: txnNumber => { diff --git a/jstests/sharding/txn_single_write_shard_failover.js b/jstests/sharding/txn_single_write_shard_failover.js new file mode 100644 index 00000000000..9c112fc5f65 --- /dev/null +++ b/jstests/sharding/txn_single_write_shard_failover.js @@ -0,0 +1,75 @@ +/** + * Runs a single-write-shard transaction which commits, but for which the client retries commit and + * a read-only shard fails over before the second commit attempt. + * + * The second commit attempt should still return a commit decision. + * + * This test was written to reproduce the bug in the original single-write-shard transaction commit + * optimization where if a read-only shard failed over before the client sent a second commit + * attempt, the second commit attempt would return NoSuchTransaction with TransientTransactionError, + * causing the client to retry the whole transaction at a higher transaction number and the + * transaction's write to be applied twice. + * + * TODO (SERVER-48341): Remove requires_fcv_46 after backporting SERVER-48307 to 4.4. + * requires_find_command because legacy queries cannot be run in a session. + * @tags: [requires_find_command, requires_fcv_46, uses_transactions, uses_multi_shard_transaction] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); + +// This test causes a failover on a shard. +TestData.skipCheckingUUIDsConsistentAcrossCluster = true; + +const db1Name = "db1"; +const coll1Name = "foo"; +const ns1 = db1Name + "." + coll1Name; + +const db2Name = "db2"; +const coll2Name = "bar"; +const ns2 = db2Name + "." + coll2Name; + +const st = new ShardingTest({ + shards: {rs0: {nodes: 2}, rs1: {nodes: 1}}, + config: 1, + other: { + mongosOptions: {verbose: 3}, + } +}); + +jsTest.log("Create two databases on different primary shards."); +// enableSharding creates the databases. +assert.commandWorked(st.s.adminCommand({enableSharding: db1Name})); +assert.commandWorked(st.s.adminCommand({enableSharding: db2Name})); +assert.commandWorked(st.s.adminCommand({movePrimary: db1Name, to: st.shard0.shardName})); +assert.commandWorked(st.s.adminCommand({movePrimary: db2Name, to: st.shard1.shardName})); + +jsTest.log("Insert data on both shards."); +// This ensures all nodes refresh their routing caches. +st.s.getDB(db1Name).getCollection(coll1Name).insert({_id: "dummy"}); +st.s.getDB(db2Name).getCollection(coll2Name).insert({_id: "dummy"}); + +jsTest.log("Run a single-write-shard transaction and commit it."); +const session = st.s.startSession(); +session.startTransaction(); +session.getDatabase(db1Name).getCollection(coll1Name).findOne({_id: "readOperationOnShard0"}); +session.getDatabase(db2Name).getCollection(coll2Name).insert({_id: "writeOperationOnShard1"}); +// Use adminCommand so we can pass writeConcern. +assert.commandWorked(st.s.adminCommand({ + commitTransaction: 1, + lsid: session.getSessionId(), + txnNumber: session.getTxnNumber_forTesting(), + autocommit: false, + writeConcern: {w: "majority"}, +})); + +jsTest.log("Induce a failover on the read shard."); +assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 60, force: true})); + +jsTest.log("Make second attempt to commit, should still return that the transaction committed"); +assert.commandWorked(session.commitTransaction_forTesting()); + +st.stop(); +})(); diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp index 5e73c898bfd..714122db647 100644 --- a/src/mongo/s/transaction_router.cpp +++ b/src/mongo/s/transaction_router.cpp @@ -1064,25 +1064,6 @@ BSONObj TransactionRouter::Router::_commitTransaction( return sendCommitDirectlyToShards(opCtx, readOnlyShards); } - if (writeShards.size() == 1) { - LOG(3) << txnIdToString() << " Committing single-write-shard transaction with " - << readOnlyShards.size() - << " read-only shards, write shard: " << writeShards.front(); - { - stdx::lock_guard<Client> lk(*opCtx->getClient()); - o(lk).commitType = CommitType::kSingleWriteShard; - _onStartCommit(lk, opCtx); - } - - const auto readOnlyShardsResponse = sendCommitDirectlyToShards(opCtx, readOnlyShards); - - if (!getStatusFromCommandResult(readOnlyShardsResponse).isOK() || - !getWriteConcernStatusFromCommandResult(readOnlyShardsResponse).isOK()) { - return readOnlyShardsResponse; - } - return sendCommitDirectlyToShards(opCtx, writeShards); - } - { stdx::lock_guard<Client> lk(*opCtx->getClient()); o(lk).commitType = CommitType::kTwoPhaseCommit; diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp index a02b6126c5c..17b122340c6 100644 --- a/src/mongo/s/transaction_router_test.cpp +++ b/src/mongo/s/transaction_router_test.cpp @@ -1007,8 +1007,9 @@ TEST_F(TransactionRouterTestWithDefaultSession, ASSERT(expectedHostAndPorts == seenHostAndPorts); } +// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. TEST_F(TransactionRouterTestWithDefaultSession, - SendCommitDirectlyToReadOnlyShardsThenWriteShardForMultipleParticipantsOnlyOneDidAWrite) { + SendCoordinateCommitForMultipleParticipantsOnlyOneDidAWrite) { TxnNumber txnNum{3}; auto txnRouter = TransactionRouter::get(operationContext()); @@ -1035,26 +1036,23 @@ TEST_F(TransactionRouterTestWithDefaultSession, ASSERT_EQ("admin", request.dbname); auto cmdName = request.cmdObj.firstElement().fieldNameStringData(); - ASSERT_EQ(cmdName, "commitTransaction"); - - checkSessionDetails(request.cmdObj, getSessionId(), txnNum, true); - - return BSON("ok" << 1); - }); + ASSERT_EQ(cmdName, "coordinateCommitTransaction"); - onCommand([&](const RemoteCommandRequest& request) { - ASSERT_EQ(hostAndPort2, request.target); - ASSERT_EQ("admin", request.dbname); + std::set<std::string> expectedParticipants = {shard1.toString(), shard2.toString()}; + auto participantElements = request.cmdObj["participants"].Array(); + ASSERT_EQ(expectedParticipants.size(), participantElements.size()); - auto cmdName = request.cmdObj.firstElement().fieldNameStringData(); - ASSERT_EQ(cmdName, "commitTransaction"); + for (const auto& element : participantElements) { + auto shardId = element["shardId"].valuestr(); + ASSERT_EQ(1ull, expectedParticipants.count(shardId)); + expectedParticipants.erase(shardId); + } - checkSessionDetails(request.cmdObj, getSessionId(), txnNum, false); + checkSessionDetails(request.cmdObj, getSessionId(), txnNum, true); return BSON("ok" << 1); }); - future.default_timed_get(); } @@ -2993,8 +2991,7 @@ protected: startCapturingLogMessages(); auto future = launchAsync( [&] { txnRouter().commitTransaction(operationContext(), kDummyRecoveryToken); }); - expectCommitTransaction(); - expectCommitTransaction(); + expectCoordinateCommitTransaction(); future.default_timed_get(); stopCapturingLogMessages(); } @@ -3262,15 +3259,15 @@ TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_SingleShard) { ASSERT_EQUALS(0, countLogLinesContaining("coordinator:")); } +// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization. TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_SingleWriteShard) { beginSlowTxnWithDefaultTxnNumber(); runSingleWriteShardCommit(); - ASSERT_EQUALS(1, countLogLinesContaining("commitType:singleWriteShard,")); + ASSERT_EQUALS(1, countLogLinesContaining("commitType:twoPhaseCommit,")); + ASSERT_EQUALS(1, countLogLinesContaining("coordinator:")); ASSERT_EQUALS(1, countLogLinesContaining("numParticipants:2")); ASSERT_EQUALS(1, countLogLinesContaining("commitDurationMicros:")); - - ASSERT_EQUALS(0, countLogLinesContaining("coordinator:")); } TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_ReadOnly) { |