summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2020-05-19 11:25:17 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-21 16:30:55 +0000
commit539db971814c1d2dbcea6f1fa28f59eea737a16b (patch)
treeaf5d32c53f0be57d1cbd47d6f5a57ca3285aaf79
parent51d9fe12b5d19720e72dcd7db0f2f17dd9a19212 (diff)
downloadmongo-539db971814c1d2dbcea6f1fa28f59eea737a16b.tar.gz
SERVER-48307 Disable single-write-shard transaction commit optimization
(cherry picked from commit e18f218d59237398962b8b5e3730f28c0f06e587)
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--jstests/noPassthrough/router_transactions_metrics.js16
-rw-r--r--jstests/sharding/txn_commit_optimizations_for_read_only_shards.js6
-rw-r--r--jstests/sharding/txn_single_write_shard_failover.js75
-rw-r--r--src/mongo/s/transaction_router.cpp19
-rw-r--r--src/mongo/s/transaction_router_test.cpp35
6 files changed, 104 insertions, 48 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index b04b769e8ac..d99a2a3e644 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -136,6 +136,7 @@ selector:
- jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js
- jstests/sharding/txn_commit_optimizations_for_read_only_shards.js
- jstests/sharding/txn_being_applied_to_secondary_cannot_be_killed.js
+ - jstests/sharding/txn_single_write_shard_failover.js
- jstests/sharding/txn_with_several_routers.js
- jstests/sharding/txn_writes_during_movechunk.js
- jstests/sharding/update_sharded.js
diff --git a/jstests/noPassthrough/router_transactions_metrics.js b/jstests/noPassthrough/router_transactions_metrics.js
index df20826b81b..3c8fd681336 100644
--- a/jstests/noPassthrough/router_transactions_metrics.js
+++ b/jstests/noPassthrough/router_transactions_metrics.js
@@ -373,6 +373,7 @@ jsTest.log("Failed single shard transaction.");
verifyServerStatusValues(st, expectedStats);
})();
+// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
jsTest.log("Successful single write shard transaction.");
(() => {
startSingleWriteShardTransaction();
@@ -382,13 +383,14 @@ jsTest.log("Successful single write shard transaction.");
expectedStats.currentOpen -= 1;
expectedStats.currentInactive -= 1;
expectedStats.totalCommitted += 1;
- expectedStats.commitTypes.singleWriteShard.initiated += 1;
- expectedStats.commitTypes.singleWriteShard.successful += 1;
+ expectedStats.commitTypes.twoPhaseCommit.initiated += 1;
+ expectedStats.commitTypes.twoPhaseCommit.successful += 1;
expectedStats.totalParticipantsAtCommit += 2;
- expectedStats.totalRequestsTargeted += 2;
+ expectedStats.totalRequestsTargeted += 1;
verifyServerStatusValues(st, expectedStats);
})();
+// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
jsTest.log("Failed single write shard transaction.");
(() => {
startSingleWriteShardTransaction();
@@ -401,12 +403,10 @@ jsTest.log("Failed single write shard transaction.");
expectedStats.currentInactive -= 1;
expectedStats.totalAborted += 1;
expectedStats.abortCause["NoSuchTransaction"] += 1;
- expectedStats.commitTypes.singleWriteShard.initiated += 1;
+ expectedStats.commitTypes.twoPhaseCommit.initiated += 1;
expectedStats.totalParticipantsAtCommit += 2;
- // In a single write shard commit, all read shards are committed first, then the
- // write shards, so if committing on a read shard fails, the write shards aren't targeted.
- // The implicit abort after will target all shards.
- expectedStats.totalRequestsTargeted += 1 + 2;
+ // There are no implicit aborts after two phase commit, so the coordinator is targeted once.
+ expectedStats.totalRequestsTargeted += 1;
verifyServerStatusValues(st, expectedStats);
})();
diff --git a/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js b/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js
index fc6137f2ff7..efcc35b3c54 100644
--- a/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js
+++ b/jstests/sharding/txn_commit_optimizations_for_read_only_shards.js
@@ -183,10 +183,12 @@ const transactionTypes = {
writeReadSingleShardExpectSingleShardCommit: txnNumber => {
return [writeShard0(txnNumber), readShard0(txnNumber)];
},
- readOneShardWriteOtherShardExpectSingleWriteShardCommit: txnNumber => {
+ // TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
+ readOneShardWriteOtherShardExpectTwoPhaseCommit: txnNumber => {
return [readShard0(txnNumber), writeShard1(txnNumber)];
},
- writeOneShardReadOtherShardExpectSingleWriteShardCommit: txnNumber => {
+ // TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
+ writeOneShardReadOtherShardExpectTwoPhaseCommit: txnNumber => {
return [writeShard0(txnNumber), readShard1(txnNumber)];
},
readOneShardWriteTwoOtherShardsExpectTwoPhaseCommit: txnNumber => {
diff --git a/jstests/sharding/txn_single_write_shard_failover.js b/jstests/sharding/txn_single_write_shard_failover.js
new file mode 100644
index 00000000000..9c112fc5f65
--- /dev/null
+++ b/jstests/sharding/txn_single_write_shard_failover.js
@@ -0,0 +1,75 @@
+/**
+ * Runs a single-write-shard transaction which commits, but for which the client retries commit and
+ * a read-only shard fails over before the second commit attempt.
+ *
+ * The second commit attempt should still return a commit decision.
+ *
+ * This test was written to reproduce the bug in the original single-write-shard transaction commit
+ * optimization where if a read-only shard failed over before the client sent a second commit
+ * attempt, the second commit attempt would return NoSuchTransaction with TransientTransactionError,
+ * causing the client to retry the whole transaction at a higher transaction number and the
+ * transaction's write to be applied twice.
+ *
+ * TODO (SERVER-48341): Remove requires_fcv_46 after backporting SERVER-48307 to 4.4.
+ * requires_find_command because legacy queries cannot be run in a session.
+ * @tags: [requires_find_command, requires_fcv_46, uses_transactions, uses_multi_shard_transaction]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+
+// This test causes a failover on a shard.
+TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
+
+const db1Name = "db1";
+const coll1Name = "foo";
+const ns1 = db1Name + "." + coll1Name;
+
+const db2Name = "db2";
+const coll2Name = "bar";
+const ns2 = db2Name + "." + coll2Name;
+
+const st = new ShardingTest({
+ shards: {rs0: {nodes: 2}, rs1: {nodes: 1}},
+ config: 1,
+ other: {
+ mongosOptions: {verbose: 3},
+ }
+});
+
+jsTest.log("Create two databases on different primary shards.");
+// enableSharding creates the databases.
+assert.commandWorked(st.s.adminCommand({enableSharding: db1Name}));
+assert.commandWorked(st.s.adminCommand({enableSharding: db2Name}));
+assert.commandWorked(st.s.adminCommand({movePrimary: db1Name, to: st.shard0.shardName}));
+assert.commandWorked(st.s.adminCommand({movePrimary: db2Name, to: st.shard1.shardName}));
+
+jsTest.log("Insert data on both shards.");
+// This ensures all nodes refresh their routing caches.
+st.s.getDB(db1Name).getCollection(coll1Name).insert({_id: "dummy"});
+st.s.getDB(db2Name).getCollection(coll2Name).insert({_id: "dummy"});
+
+jsTest.log("Run a single-write-shard transaction and commit it.");
+const session = st.s.startSession();
+session.startTransaction();
+session.getDatabase(db1Name).getCollection(coll1Name).findOne({_id: "readOperationOnShard0"});
+session.getDatabase(db2Name).getCollection(coll2Name).insert({_id: "writeOperationOnShard1"});
+// Use adminCommand so we can pass writeConcern.
+assert.commandWorked(st.s.adminCommand({
+ commitTransaction: 1,
+ lsid: session.getSessionId(),
+ txnNumber: session.getTxnNumber_forTesting(),
+ autocommit: false,
+ writeConcern: {w: "majority"},
+}));
+
+jsTest.log("Induce a failover on the read shard.");
+assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 60, force: true}));
+
+jsTest.log("Make second attempt to commit, should still return that the transaction committed");
+assert.commandWorked(session.commitTransaction_forTesting());
+
+st.stop();
+})();
diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp
index 5e73c898bfd..714122db647 100644
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@@ -1064,25 +1064,6 @@ BSONObj TransactionRouter::Router::_commitTransaction(
return sendCommitDirectlyToShards(opCtx, readOnlyShards);
}
- if (writeShards.size() == 1) {
- LOG(3) << txnIdToString() << " Committing single-write-shard transaction with "
- << readOnlyShards.size()
- << " read-only shards, write shard: " << writeShards.front();
- {
- stdx::lock_guard<Client> lk(*opCtx->getClient());
- o(lk).commitType = CommitType::kSingleWriteShard;
- _onStartCommit(lk, opCtx);
- }
-
- const auto readOnlyShardsResponse = sendCommitDirectlyToShards(opCtx, readOnlyShards);
-
- if (!getStatusFromCommandResult(readOnlyShardsResponse).isOK() ||
- !getWriteConcernStatusFromCommandResult(readOnlyShardsResponse).isOK()) {
- return readOnlyShardsResponse;
- }
- return sendCommitDirectlyToShards(opCtx, writeShards);
- }
-
{
stdx::lock_guard<Client> lk(*opCtx->getClient());
o(lk).commitType = CommitType::kTwoPhaseCommit;
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index a02b6126c5c..17b122340c6 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -1007,8 +1007,9 @@ TEST_F(TransactionRouterTestWithDefaultSession,
ASSERT(expectedHostAndPorts == seenHostAndPorts);
}
+// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
TEST_F(TransactionRouterTestWithDefaultSession,
- SendCommitDirectlyToReadOnlyShardsThenWriteShardForMultipleParticipantsOnlyOneDidAWrite) {
+ SendCoordinateCommitForMultipleParticipantsOnlyOneDidAWrite) {
TxnNumber txnNum{3};
auto txnRouter = TransactionRouter::get(operationContext());
@@ -1035,26 +1036,23 @@ TEST_F(TransactionRouterTestWithDefaultSession,
ASSERT_EQ("admin", request.dbname);
auto cmdName = request.cmdObj.firstElement().fieldNameStringData();
- ASSERT_EQ(cmdName, "commitTransaction");
-
- checkSessionDetails(request.cmdObj, getSessionId(), txnNum, true);
-
- return BSON("ok" << 1);
- });
+ ASSERT_EQ(cmdName, "coordinateCommitTransaction");
- onCommand([&](const RemoteCommandRequest& request) {
- ASSERT_EQ(hostAndPort2, request.target);
- ASSERT_EQ("admin", request.dbname);
+ std::set<std::string> expectedParticipants = {shard1.toString(), shard2.toString()};
+ auto participantElements = request.cmdObj["participants"].Array();
+ ASSERT_EQ(expectedParticipants.size(), participantElements.size());
- auto cmdName = request.cmdObj.firstElement().fieldNameStringData();
- ASSERT_EQ(cmdName, "commitTransaction");
+ for (const auto& element : participantElements) {
+ auto shardId = element["shardId"].valuestr();
+ ASSERT_EQ(1ull, expectedParticipants.count(shardId));
+ expectedParticipants.erase(shardId);
+ }
- checkSessionDetails(request.cmdObj, getSessionId(), txnNum, false);
+ checkSessionDetails(request.cmdObj, getSessionId(), txnNum, true);
return BSON("ok" << 1);
});
-
future.default_timed_get();
}
@@ -2993,8 +2991,7 @@ protected:
startCapturingLogMessages();
auto future = launchAsync(
[&] { txnRouter().commitTransaction(operationContext(), kDummyRecoveryToken); });
- expectCommitTransaction();
- expectCommitTransaction();
+ expectCoordinateCommitTransaction();
future.default_timed_get();
stopCapturingLogMessages();
}
@@ -3262,15 +3259,15 @@ TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_SingleShard) {
ASSERT_EQUALS(0, countLogLinesContaining("coordinator:"));
}
+// TODO (SERVER-48340): Re-enable the single-write-shard transaction commit optimization.
TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_SingleWriteShard) {
beginSlowTxnWithDefaultTxnNumber();
runSingleWriteShardCommit();
- ASSERT_EQUALS(1, countLogLinesContaining("commitType:singleWriteShard,"));
+ ASSERT_EQUALS(1, countLogLinesContaining("commitType:twoPhaseCommit,"));
+ ASSERT_EQUALS(1, countLogLinesContaining("coordinator:"));
ASSERT_EQUALS(1, countLogLinesContaining("numParticipants:2"));
ASSERT_EQUALS(1, countLogLinesContaining("commitDurationMicros:"));
-
- ASSERT_EQUALS(0, countLogLinesContaining("coordinator:"));
}
TEST_F(TransactionRouterMetricsTest, SlowLoggingCommitType_ReadOnly) {