summaryrefslogtreecommitdiff
path: root/jstests/sharding/libs/sharded_transactions_helpers.js
blob: 0fdd971a83427d4e4458f36fa177424fd12f87f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
const kSnapshotErrors =
    [ErrorCodes.SnapshotTooOld, ErrorCodes.SnapshotUnavailable, ErrorCodes.StaleChunkHistory];

// List of failpoints in the coordinator's two-phase commit code. The associated data describes how
// many times each failpoint would be hit assuming a 3-participant transaction where one of the
// participants is co-located with the coordinator:
// - numTimesShouldBeHit: N means the failpoint should be hit N times during that phase; for
//   example, if there are two remote participants, the hangWhileTargetingRemoteHost failpoint would
//   be hit two times in the prepare phase.
// - skip: N means turn on the failpoint after the failpoint has been hit N times; it's used to turn
//   on the remote and local targeting failpoints for the prepare and decision phase separately.
function getCoordinatorFailpoints() {
    const coordinatorFailpointDataArr = [
        {failpoint: "hangBeforeWritingParticipantList", numTimesShouldBeHit: 1},
        {
            // Test targeting remote nodes for prepare
            failpoint: "hangWhileTargetingRemoteHost",
            numTimesShouldBeHit: 2 /* once per remote participant */
        },
        {
            // Test targeting local node for prepare
            failpoint: "hangWhileTargetingLocalHost",
            numTimesShouldBeHit: 1
        },
        {failpoint: "hangBeforeWritingDecision", numTimesShouldBeHit: 1},
        {
            // Test targeting remote nodes for decision
            failpoint: "hangWhileTargetingRemoteHost",
            numTimesShouldBeHit: 2, /* once per remote participant */
            skip: 2                 /* to skip when the failpoint is hit for prepare */
        },
        {
            // Test targeting local node for decision
            failpoint: "hangWhileTargetingLocalHost",
            numTimesShouldBeHit: 1,
            skip: 1 /* to skip when the failpoint is hit for prepare */
        },
        {failpoint: "hangBeforeDeletingCoordinatorDoc", numTimesShouldBeHit: 1},
    ];

    // Return a deep copy of the array, so that the caller is free to modify its contents.
    return coordinatorFailpointDataArr.map(failpoint => Object.assign({}, failpoint));
}

function setFailCommandOnShards(st, mode, commands, code, numShards, ns) {
    for (let i = 0; i < numShards; i++) {
        const shardConn = st["rs" + i].getPrimary();
        // Sharding tests require failInternalCommands: true, since the mongos appears to mongod to
        // be an internal client.
        if (ns) {
            assert.commandWorked(shardConn.adminCommand({
                configureFailPoint: "failCommand",
                mode: mode,
                data: {
                    namespace: ns,
                    errorCode: code,
                    failCommands: commands,
                    failInternalCommands: true
                }
            }));
        } else {
            assert.commandWorked(shardConn.adminCommand({
                configureFailPoint: "failCommand",
                mode: mode,
                data: {errorCode: code, failCommands: commands, failInternalCommands: true}
            }));
        }
    }
}

function unsetFailCommandOnEachShard(st, numShards) {
    for (let i = 0; i < numShards; i++) {
        const shardConn = st["rs" + i].getPrimary();
        assert.commandWorked(
            shardConn.adminCommand({configureFailPoint: "failCommand", mode: "off"}));
    }
}

function assertNoSuchTransactionOnAllShards(st, lsid, txnNumber) {
    st._rs.forEach(function(rs) {
        assertNoSuchTransactionOnConn(rs.test.getPrimary(), lsid, txnNumber);
    });
}

function assertNoSuchTransactionOnConn(conn, lsid, txnNumber) {
    assert.commandFailedWithCode(
        conn.getDB("foo").runCommand({
            find: "bar",
            lsid: lsid,
            txnNumber: NumberLong(txnNumber),
            autocommit: false,
        }),
        ErrorCodes.NoSuchTransaction,
        "expected there to be no active transaction on shard, lsid: " + tojson(lsid) +
            ", txnNumber: " + tojson(txnNumber) + ", connection: " + tojson(conn));
}

function waitForFailpoint(hitFailpointStr, numTimes, timeout) {
    // Don't run the hang analyzer because we don't expect waitForFailpoint() to always succeed.
    assert.soon(
        function() {
            const re = new RegExp(hitFailpointStr, 'g' /* find all occurrences */);
            return (rawMongoProgramOutput().match(re) || []).length == numTimes;
        },
        'Failed to find "' + hitFailpointStr + '" logged ' + numTimes + ' times',
        timeout,
        undefined,
        {runHangAnalyzer: false});
}

// Enables the transaction router to retry on stale version (db or shard version) and snapshot
// errors within a transaction.
//
// TODO SERVER-39704: Remove this function.
function enableStaleVersionAndSnapshotRetriesWithinTransactions(st) {
    assert.commandWorked(st.s.adminCommand({
        configureFailPoint: "enableStaleVersionAndSnapshotRetriesWithinTransactions",
        mode: "alwaysOn"
    }));

    st._rs.forEach(function(replTest) {
        replTest.nodes.forEach(function(node) {
            assert.commandWorked(node.getDB('admin').runCommand(
                {configureFailPoint: "dontRemoveTxnCoordinatorOnAbort", mode: "alwaysOn"}));
        });
    });
}

// TODO SERVER-39704: Remove this function.
function disableStaleVersionAndSnapshotRetriesWithinTransactions(st) {
    assert.commandWorked(st.s.adminCommand({
        configureFailPoint: "enableStaleVersionAndSnapshotRetriesWithinTransactions",
        mode: "off"
    }));

    st._rs.forEach(function(replTest) {
        replTest.nodes.forEach(function(node) {
            assert.commandWorked(node.getDB('admin').runCommand(
                {configureFailPoint: "dontRemoveTxnCoordinatorOnAbort", mode: "off"}));
        });
    });
}

// Flush each router's metadata and force refreshes on each shard for the given namespace and/or
// database names.
//
// TODO SERVER-39704: Remove this function.
function flushRoutersAndRefreshShardMetadata(st, {ns, dbNames = []} = {}) {
    st._mongos.forEach((s) => {
        assert.commandWorked(s.adminCommand({flushRouterConfig: 1}));
    });

    st._rs.forEach((rs) => {
        if (ns) {
            assert.commandWorked(
                rs.test.getPrimary().adminCommand({_flushRoutingTableCacheUpdates: ns}));
        }

        dbNames.forEach((dbName) => {
            assert.commandWorked(
                rs.test.getPrimary().adminCommand({_flushDatabaseCacheUpdates: dbName}));
        });
    });
}