diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2019-01-31 15:04:42 -0500 |
---|---|---|
committer | Esha Maharishi <esha.maharishi@mongodb.com> | 2019-01-31 19:01:42 -0500 |
commit | 9606de0f0f3166b9c8fcff033f2476af2937f685 (patch) | |
tree | 187e4e382deaab3626fa5abb0324cf763fb75471 | |
parent | f4656acfee11569a796e06d14e4825ab54d39ecc (diff) | |
download | mongo-9606de0f0f3166b9c8fcff033f2476af2937f685.tar.gz |
SERVER-39316 Test two-phase abort path without 'config server is coordinator' override in txn_failover_two_phase_commit.js
-rw-r--r-- | jstests/sharding/txn_failover_two_phase_commit.js | 154 |
1 files changed, 87 insertions, 67 deletions
diff --git a/jstests/sharding/txn_failover_two_phase_commit.js b/jstests/sharding/txn_failover_two_phase_commit.js index cac9e0f1224..0fc9f1b7a2c 100644 --- a/jstests/sharding/txn_failover_two_phase_commit.js +++ b/jstests/sharding/txn_failover_two_phase_commit.js @@ -6,6 +6,10 @@ * @tags: [uses_transactions, uses_multi_shard_transaction] */ +// The UUID consistency check uses connections to shards cached on the ShardingTest object, but this +// test causes failovers on a shard, so the cached connection is not usable. +TestData.skipCheckingUUIDsConsistentAcrossCluster = true; + (function() { 'use strict'; @@ -23,11 +27,10 @@ let failpointCounter = 0; - const runTest = function(sameNodeStepsUpAfterFailover) { - - jsTest.log("Testing all scenarios with sameNodeStepsUpAfterFailover: " + - sameNodeStepsUpAfterFailover); + let lsid = {id: UUID()}; + let txnNumber = 0; + const runTest = function(sameNodeStepsUpAfterFailover, overrideCoordinatorToBeConfigServer) { let stepDownSecs; // The amount of time the node has to wait before becoming primary again. let numCoordinatorNodes; if (sameNodeStepsUpAfterFailover) { @@ -38,28 +41,42 @@ stepDownSecs = 3; } - let st = new ShardingTest({ - shards: 3, // number of *regular shards* - config: numCoordinatorNodes, // number of replica set *nodes* in *config shard* - causallyConsistent: true, - other: { - mongosOptions: { - // This failpoint is needed because it is not yet possible to step down a node - // with a prepared transaction. - setParameter: - {"failpoint.sendCoordinateCommitToConfigServer": "{'mode': 'alwaysOn'}"}, - verbose: 3 - }, - configOptions: { - // This failpoint is needed because of the other failpoint: the config server - // will not have a local participant, so coordinateCommitTransaction cannot fall - // back to recovering the decision from the local participant. - setParameter: {"failpoint.doNotForgetCoordinator": "{'mode': 'alwaysOn'}"}, + let st, coordinatorReplSetTest; + if (overrideCoordinatorToBeConfigServer) { + st = new ShardingTest({ + shards: 3, // number of *regular shards* + config: numCoordinatorNodes, // number of replica set *nodes* in *config shard* + causallyConsistent: true, + other: { + mongosOptions: { + // This failpoint is needed because it is not yet possible to step down a + // node with a prepared transaction. + setParameter: { + "failpoint.sendCoordinateCommitToConfigServer": "{'mode': 'alwaysOn'}" + }, + verbose: 3 + }, + configOptions: { + // This failpoint is needed because of the other failpoint: the config + // server will not have a local participant, so coordinateCommitTransaction + // cannot fall back to recovering the decision from the local participant. + setParameter: {"failpoint.doNotForgetCoordinator": "{'mode': 'alwaysOn'}"}, + } } - } - }); + }); + + coordinatorReplSetTest = st.configRS; + } else { + st = new ShardingTest({ + shards: 3, + rs0: {nodes: numCoordinatorNodes}, + causallyConsistent: true, + other: {mongosOptions: {verbose: 3}} + }); + + coordinatorReplSetTest = st.rs0; + } - let coordinatorReplSetTest = st.configRS; let participant0 = st.shard0; let participant1 = st.shard1; let participant2 = st.shard2; @@ -67,9 +84,6 @@ let expectedParticipantList = [participant0.shardName, participant1.shardName, participant2.shardName]; - let lsid = {id: UUID()}; - let txnNumber = 0; - const runCommitThroughMongosInParallelShellExpectSuccess = function() { const runCommitExpectSuccessCode = "assert.commandWorked(db.adminCommand({" + "commitTransaction: 1," + "lsid: " + tojson(lsid) + "," + "txnNumber: NumberLong(" + @@ -101,13 +115,6 @@ assert.commandWorked( st.s.adminCommand({moveChunk: ns, find: {_id: 10}, to: participant2.shardName})); - // These forced refreshes are not strictly necessary; they just prevent extra TXN log - // lines from the shards starting, aborting, and restarting the transaction due to - // needing to refresh after the transaction has started. - assert.commandWorked(participant0.adminCommand({_flushRoutingTableCacheUpdates: ns})); - assert.commandWorked(participant1.adminCommand({_flushRoutingTableCacheUpdates: ns})); - assert.commandWorked(participant2.adminCommand({_flushRoutingTableCacheUpdates: ns})); - // Start a new transaction by inserting a document onto each shard. assert.commandWorked(st.s.getDB(dbName).runCommand({ insert: collName, @@ -121,17 +128,27 @@ }; const testCommitProtocol = function(makeAParticipantAbort, failpoint, expectAbortResponse) { - jsTest.log("Testing commit protocol with makeAParticipantAbort: " + - makeAParticipantAbort + ", failpoint: " + failpoint + - ", and expectAbortResponse: " + expectAbortResponse); + jsTest.log("Testing commit protocol with sameNodeStepsUpAfterFailover: " + + sameNodeStepsUpAfterFailover + ", overrideCoordinatorToBeConfigServer: " + + overrideCoordinatorToBeConfigServer + ", makeAParticipantAbort: " + + makeAParticipantAbort + ", expectAbortResponse: " + expectAbortResponse + + ", and failpoint: " + failpoint); txnNumber++; setUp(); + coordinatorReplSetTest.awaitNodesAgreeOnPrimary(); + let coordPrimary = coordinatorReplSetTest.getPrimary(); + if (makeAParticipantAbort) { + // In order to test coordinator failover for a coordinator colocated with a + // participant, the participant colocated with the coordinator must fail to prepare, + // because prepare does not yet support failover. + let nodeToAbort = overrideCoordinatorToBeConfigServer ? participant2 : coordPrimary; + // Manually abort the transaction on one of the participants, so that the // participant fails to prepare. - assert.commandWorked(participant2.adminCommand({ + assert.commandWorked(nodeToAbort.adminCommand({ abortTransaction: 1, lsid: lsid, txnNumber: NumberLong(txnNumber), @@ -140,9 +157,6 @@ })); } - coordinatorReplSetTest.awaitNodesAgreeOnPrimary(); - let coordPrimary = coordinatorReplSetTest.getPrimary(); - assert.commandWorked(coordPrimary.adminCommand({ configureFailPoint: failpoint, mode: "alwaysOn", @@ -213,35 +227,41 @@ // Run through all the failpoints when all participants respond to prepare with vote commit. // - ++failpointCounter; - - // Note: If the coordinator fails over before making the participant list durable, the - // transaction will abort even if all participants could have committed. Further note that - // this is a property of the coordinator only - in general, the coordinator is co-located - // with a participant and in 4.2, participants abort if they fail over before prepare. This - // is really testing that even if the participant's unprepared transaction was able to - // survive failover at some future time (for example, in the near future for read-only - // transactions, or in the far future if we add support for multi-master), then the - // transaction would nevertheless abort due to the design of the coordinator. - testCommitProtocol(false /* all participants can commit */, - "hangBeforeWritingParticipantList", - true /* expect abort decision */); - - testCommitProtocol(false /* all participants can commit */, - "hangBeforeWritingDecision", - false /* expect commit decision */); - testCommitProtocol( - false /* all participants can commit */, "hangBeforeDeletingCoordinatorDoc", false - /* expect commit decision */); - + // We only test two-phase commit (as opposed to two-phase abort) if the coordinator is + // overridden to be the config server, because prepare does not yet support failover. + if (overrideCoordinatorToBeConfigServer) { + ++failpointCounter; + + // Note: If the coordinator fails over before making the participant list durable, the + // transaction will abort even if all participants could have committed. This is a + // property of the coordinator only, and would be true even if a participant's + // in-progress transaction could survive failover. + testCommitProtocol(false /* all participants can commit */, + "hangBeforeWritingParticipantList", + true /* expect abort decision */); + + testCommitProtocol(false /* all participants can commit */, + "hangBeforeWritingDecision", + false /* expect commit decision */); + testCommitProtocol( + false /* all participants can commit */, "hangBeforeDeletingCoordinatorDoc", false + /* expect commit decision */); + } st.stop(); }; - // Same node *always* steps back up after stepping down. - runTest(true); + // + // Coordinator is co-located with a participant + // + + runTest(true /* same node always steps up after stepping down */, false); + runTest(false /* same node always steps up after stepping down */, false); + + // + // Override coordinator to be config server + // - // Same or different node can step back up after stepping down (but most likely a different node - // will). - runTest(false); + runTest(true /* same node always steps up after stepping down */, true); + runTest(false /* same node always steps up after stepping down */, true); })(); |