summaryrefslogtreecommitdiff
path: root/jstests/sharding/txn_two_phase_commit_failure.js
blob: 0d5bb5b0831b560a350b3f5940289a844231aea1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/**
 * Exercises two-phase commit failures.
 *
 * @tags: [uses_transactions, uses_multi_shard_transaction, multiversion_incompatible]
 */

(function() {
'use strict';

load('jstests/sharding/libs/sharded_transactions_helpers.js');

const dbName = "test";
const collName = "foo";
const ns = dbName + "." + collName;

// The test should not depend on a particular timeout, but shorter timeout makes it faster.
TestData.transactionLifetimeLimitSeconds = 10;

let lsid = {id: UUID()};
let txnNumber = 0;

(function() {
let st = new ShardingTest({
    shards: 3,
    rs0: {nodes: [{}]},
    causallyConsistent: true,
    other: {mongosOptions: {verbose: 3}}
});

let coordinatorReplSetTest = st.rs0;

let participant0 = st.shard0;
let participant1 = st.shard1;
let participant2 = st.shard2;

const runCommitThroughMongosInParallelShellExpectAbort = function(errorText) {
    const runCommitExpectCode = "assert.commandFailedWithCode(db.adminCommand({" +
        "commitTransaction: 1," +
        "lsid: " + tojson(lsid) + "," +
        "txnNumber: NumberLong(" + txnNumber + ")," +
        "stmtId: NumberInt(0)," +
        "autocommit: false," +
        "})," +
        "ErrorCodes." + errorText + ");";
    return startParallelShell(runCommitExpectCode, st.s.port);
};

const setUp = function() {
    // Create a sharded collection with a chunk on each shard:
    // shard0: [-inf, 0)
    // shard1: [0, 10)
    // shard2: [10, +inf)
    assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
    assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: participant0.shardName}));
    assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
    assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 0}}));
    assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 10}}));
    assert.commandWorked(
        st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: participant1.shardName}));
    assert.commandWorked(
        st.s.adminCommand({moveChunk: ns, find: {_id: 10}, to: participant2.shardName}));

    flushRoutersAndRefreshShardMetadata(st, {ns});

    // Start a new transaction by inserting a document onto each shard.
    assert.commandWorked(st.s.getDB(dbName).runCommand({
        insert: collName,
        documents: [{_id: -5}, {_id: 5}, {_id: 15}],
        lsid: lsid,
        txnNumber: NumberLong(txnNumber),
        stmtId: NumberInt(0),
        startTransaction: true,
        autocommit: false,
    }));
};

const testCommitProtocol = function(failpointData, expectError = "NoSuchTransaction") {
    jsTest.log("Testing commit protocol with failpointData: " + tojson(failpointData));

    txnNumber++;
    setUp();

    let coordPrimary = coordinatorReplSetTest.getPrimary();

    assert.commandWorked(coordPrimary.adminCommand({
        configureFailPoint: failpointData.failpoint,
        mode: {skip: (failpointData.skip ? failpointData.skip : 0)},
        data: failpointData.options ? failpointData.options : {},
    }));

    // Run commitTransaction through a parallel shell.
    let awaitResult = runCommitThroughMongosInParallelShellExpectAbort(expectError);

    awaitResult();

    // Check that the transaction aborted as expected.
    jsTest.log("Verify that the transaction was aborted on all shards.");
    assert.eq(0, st.s.getDB(dbName).getCollection(collName).find().itcount());

    st.s.getDB(dbName).getCollection(collName).drop();
    clearRawMongoProgramOutput();

    assert.commandWorked(
        coordPrimary.adminCommand({configureFailPoint: failpointData.failpoint, mode: "off"}));
};

//
// Run through all the failpoints. Each failpoint is targeting different error handling block.
//

// This triggers timeout in the chain registered in TransactionCoordinator constructor, which throws
// TransactionCoordinatorReachedAbortDecision caught by the onError() in the same chain.
testCommitProtocol(getCoordinatorFailpoints().find((data) => data.failpoint ===
                                                       'hangBeforeWritingParticipantList'));

// This is one of the standard error codes that a transaction shard can generate and is supported by
// the transaction coordinator directly.
testCommitProtocol({
    failpoint: "failRemoteTransactionCommand",
    numTimesShouldBeHit: 2,
    options: {command: "prepareTransaction", code: ErrorCodes.NoSuchTransaction}
});

// Similar to above, except with different error code, which is expected to be propagated to the
// abort response.
testCommitProtocol({
    failpoint: "failRemoteTransactionCommand",
    numTimesShouldBeHit: 2,
    options: {command: "prepareTransaction", code: ErrorCodes.TransactionTooOld}
},
                   "TransactionTooOld");

// This is one of the non standard error codes from a transaction shard, it is retried by the
// per-shard retry logic and it is eventually converted into
// TransactionCoordinatorReachedAbortDecision and is caught by the onError block in the
// sendPrepareToShard() helper method.
testCommitProtocol({
    failpoint: "failRemoteTransactionCommand",
    numTimesShouldBeHit: 2,
    options: {command: "prepareTransaction", code: ErrorCodes.CommandNotFound}
});

st.stop();
})();
})();