summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2019-01-31 15:04:42 -0500
committerEsha Maharishi <esha.maharishi@mongodb.com>2019-01-31 19:01:42 -0500
commit9606de0f0f3166b9c8fcff033f2476af2937f685 (patch)
tree187e4e382deaab3626fa5abb0324cf763fb75471
parentf4656acfee11569a796e06d14e4825ab54d39ecc (diff)
downloadmongo-9606de0f0f3166b9c8fcff033f2476af2937f685.tar.gz
SERVER-39316 Test two-phase abort path without 'config server is coordinator' override in txn_failover_two_phase_commit.js
-rw-r--r--jstests/sharding/txn_failover_two_phase_commit.js154
1 files changed, 87 insertions, 67 deletions
diff --git a/jstests/sharding/txn_failover_two_phase_commit.js b/jstests/sharding/txn_failover_two_phase_commit.js
index cac9e0f1224..0fc9f1b7a2c 100644
--- a/jstests/sharding/txn_failover_two_phase_commit.js
+++ b/jstests/sharding/txn_failover_two_phase_commit.js
@@ -6,6 +6,10 @@
* @tags: [uses_transactions, uses_multi_shard_transaction]
*/
+// The UUID consistency check uses connections to shards cached on the ShardingTest object, but this
+// test causes failovers on a shard, so the cached connection is not usable.
+TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
+
(function() {
'use strict';
@@ -23,11 +27,10 @@
let failpointCounter = 0;
- const runTest = function(sameNodeStepsUpAfterFailover) {
-
- jsTest.log("Testing all scenarios with sameNodeStepsUpAfterFailover: " +
- sameNodeStepsUpAfterFailover);
+ let lsid = {id: UUID()};
+ let txnNumber = 0;
+ const runTest = function(sameNodeStepsUpAfterFailover, overrideCoordinatorToBeConfigServer) {
let stepDownSecs; // The amount of time the node has to wait before becoming primary again.
let numCoordinatorNodes;
if (sameNodeStepsUpAfterFailover) {
@@ -38,28 +41,42 @@
stepDownSecs = 3;
}
- let st = new ShardingTest({
- shards: 3, // number of *regular shards*
- config: numCoordinatorNodes, // number of replica set *nodes* in *config shard*
- causallyConsistent: true,
- other: {
- mongosOptions: {
- // This failpoint is needed because it is not yet possible to step down a node
- // with a prepared transaction.
- setParameter:
- {"failpoint.sendCoordinateCommitToConfigServer": "{'mode': 'alwaysOn'}"},
- verbose: 3
- },
- configOptions: {
- // This failpoint is needed because of the other failpoint: the config server
- // will not have a local participant, so coordinateCommitTransaction cannot fall
- // back to recovering the decision from the local participant.
- setParameter: {"failpoint.doNotForgetCoordinator": "{'mode': 'alwaysOn'}"},
+ let st, coordinatorReplSetTest;
+ if (overrideCoordinatorToBeConfigServer) {
+ st = new ShardingTest({
+ shards: 3, // number of *regular shards*
+ config: numCoordinatorNodes, // number of replica set *nodes* in *config shard*
+ causallyConsistent: true,
+ other: {
+ mongosOptions: {
+ // This failpoint is needed because it is not yet possible to step down a
+ // node with a prepared transaction.
+ setParameter: {
+ "failpoint.sendCoordinateCommitToConfigServer": "{'mode': 'alwaysOn'}"
+ },
+ verbose: 3
+ },
+ configOptions: {
+ // This failpoint is needed because of the other failpoint: the config
+ // server will not have a local participant, so coordinateCommitTransaction
+ // cannot fall back to recovering the decision from the local participant.
+ setParameter: {"failpoint.doNotForgetCoordinator": "{'mode': 'alwaysOn'}"},
+ }
}
- }
- });
+ });
+
+ coordinatorReplSetTest = st.configRS;
+ } else {
+ st = new ShardingTest({
+ shards: 3,
+ rs0: {nodes: numCoordinatorNodes},
+ causallyConsistent: true,
+ other: {mongosOptions: {verbose: 3}}
+ });
+
+ coordinatorReplSetTest = st.rs0;
+ }
- let coordinatorReplSetTest = st.configRS;
let participant0 = st.shard0;
let participant1 = st.shard1;
let participant2 = st.shard2;
@@ -67,9 +84,6 @@
let expectedParticipantList =
[participant0.shardName, participant1.shardName, participant2.shardName];
- let lsid = {id: UUID()};
- let txnNumber = 0;
-
const runCommitThroughMongosInParallelShellExpectSuccess = function() {
const runCommitExpectSuccessCode = "assert.commandWorked(db.adminCommand({" +
"commitTransaction: 1," + "lsid: " + tojson(lsid) + "," + "txnNumber: NumberLong(" +
@@ -101,13 +115,6 @@
assert.commandWorked(
st.s.adminCommand({moveChunk: ns, find: {_id: 10}, to: participant2.shardName}));
- // These forced refreshes are not strictly necessary; they just prevent extra TXN log
- // lines from the shards starting, aborting, and restarting the transaction due to
- // needing to refresh after the transaction has started.
- assert.commandWorked(participant0.adminCommand({_flushRoutingTableCacheUpdates: ns}));
- assert.commandWorked(participant1.adminCommand({_flushRoutingTableCacheUpdates: ns}));
- assert.commandWorked(participant2.adminCommand({_flushRoutingTableCacheUpdates: ns}));
-
// Start a new transaction by inserting a document onto each shard.
assert.commandWorked(st.s.getDB(dbName).runCommand({
insert: collName,
@@ -121,17 +128,27 @@
};
const testCommitProtocol = function(makeAParticipantAbort, failpoint, expectAbortResponse) {
- jsTest.log("Testing commit protocol with makeAParticipantAbort: " +
- makeAParticipantAbort + ", failpoint: " + failpoint +
- ", and expectAbortResponse: " + expectAbortResponse);
+ jsTest.log("Testing commit protocol with sameNodeStepsUpAfterFailover: " +
+ sameNodeStepsUpAfterFailover + ", overrideCoordinatorToBeConfigServer: " +
+ overrideCoordinatorToBeConfigServer + ", makeAParticipantAbort: " +
+ makeAParticipantAbort + ", expectAbortResponse: " + expectAbortResponse +
+ ", and failpoint: " + failpoint);
txnNumber++;
setUp();
+ coordinatorReplSetTest.awaitNodesAgreeOnPrimary();
+ let coordPrimary = coordinatorReplSetTest.getPrimary();
+
if (makeAParticipantAbort) {
+ // In order to test coordinator failover for a coordinator colocated with a
+ // participant, the participant colocated with the coordinator must fail to prepare,
+ // because prepare does not yet support failover.
+ let nodeToAbort = overrideCoordinatorToBeConfigServer ? participant2 : coordPrimary;
+
// Manually abort the transaction on one of the participants, so that the
// participant fails to prepare.
- assert.commandWorked(participant2.adminCommand({
+ assert.commandWorked(nodeToAbort.adminCommand({
abortTransaction: 1,
lsid: lsid,
txnNumber: NumberLong(txnNumber),
@@ -140,9 +157,6 @@
}));
}
- coordinatorReplSetTest.awaitNodesAgreeOnPrimary();
- let coordPrimary = coordinatorReplSetTest.getPrimary();
-
assert.commandWorked(coordPrimary.adminCommand({
configureFailPoint: failpoint,
mode: "alwaysOn",
@@ -213,35 +227,41 @@
// Run through all the failpoints when all participants respond to prepare with vote commit.
//
- ++failpointCounter;
-
- // Note: If the coordinator fails over before making the participant list durable, the
- // transaction will abort even if all participants could have committed. Further note that
- // this is a property of the coordinator only - in general, the coordinator is co-located
- // with a participant and in 4.2, participants abort if they fail over before prepare. This
- // is really testing that even if the participant's unprepared transaction was able to
- // survive failover at some future time (for example, in the near future for read-only
- // transactions, or in the far future if we add support for multi-master), then the
- // transaction would nevertheless abort due to the design of the coordinator.
- testCommitProtocol(false /* all participants can commit */,
- "hangBeforeWritingParticipantList",
- true /* expect abort decision */);
-
- testCommitProtocol(false /* all participants can commit */,
- "hangBeforeWritingDecision",
- false /* expect commit decision */);
- testCommitProtocol(
- false /* all participants can commit */, "hangBeforeDeletingCoordinatorDoc", false
- /* expect commit decision */);
-
+ // We only test two-phase commit (as opposed to two-phase abort) if the coordinator is
+ // overridden to be the config server, because prepare does not yet support failover.
+ if (overrideCoordinatorToBeConfigServer) {
+ ++failpointCounter;
+
+ // Note: If the coordinator fails over before making the participant list durable, the
+ // transaction will abort even if all participants could have committed. This is a
+ // property of the coordinator only, and would be true even if a participant's
+ // in-progress transaction could survive failover.
+ testCommitProtocol(false /* all participants can commit */,
+ "hangBeforeWritingParticipantList",
+ true /* expect abort decision */);
+
+ testCommitProtocol(false /* all participants can commit */,
+ "hangBeforeWritingDecision",
+ false /* expect commit decision */);
+ testCommitProtocol(
+ false /* all participants can commit */, "hangBeforeDeletingCoordinatorDoc", false
+ /* expect commit decision */);
+ }
st.stop();
};
- // Same node *always* steps back up after stepping down.
- runTest(true);
+ //
+ // Coordinator is co-located with a participant
+ //
+
+ runTest(true /* same node always steps up after stepping down */, false);
+ runTest(false /* same node always steps up after stepping down */, false);
+
+ //
+ // Override coordinator to be config server
+ //
- // Same or different node can step back up after stepping down (but most likely a different node
- // will).
- runTest(false);
+ runTest(true /* same node always steps up after stepping down */, true);
+ runTest(false /* same node always steps up after stepping down */, true);
})();