summaryrefslogtreecommitdiff
path: root/jstests/concurrency/fsm_workload_helpers/auto_retry_transaction.js
blob: 30fdaa89e5f4aed881dc03ef7ce000d62d612199 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
'use strict';

var {withTxnAndAutoRetry, isKilledSessionCode} = (function() {
    /**
     * Calls 'func' with the print() function overridden to be a no-op.
     *
     * This function is useful for silencing JavaScript backtraces that would otherwise be logged
     * from doassert() being called, even when the JavaScript exception is ultimately caught and
     * handled.
     */
    function quietly(func) {
        const printOriginal = print;
        try {
            print = Function.prototype;
            func();
        } finally {
            print = printOriginal;
        }
    }

    // Returns if the code is one that could come from a session being killed.
    function isKilledSessionCode(code) {
        return code === ErrorCodes.Interrupted || code === ErrorCodes.CursorKilled ||
            code === ErrorCodes.CursorNotFound;
    }

    // Returns true if the transaction can be retried with a higher transaction number after the
    // given error.
    function shouldRetryEntireTxnOnError(e, hasCommitTxnError, retryOnKilledSession) {
        if ((e.hasOwnProperty('errorLabels') &&
             e.errorLabels.includes('TransientTransactionError'))) {
            return true;
        }

        // Don't retry the entire transaction on commit errors that aren't labeled as transient
        // transaction errors because it's unknown if the commit succeeded. commitTransaction is
        // individually retryable and should be retried at a lower level (e.g.
        // network_error_and_txn_override.js or commitTransactionWithKilledSessionRetries()), so any
        // error that reached here must not be transient.
        if (hasCommitTxnError) {
            print("-=-=-=- Cannot retry entire transaction on commit transaction error without" +
                  " transient transaction error label, error: " + tojsononeline(e));
            return false;
        }

        // A network error before commit is considered a transient txn error. Network errors during
        // commit should be handled at the same level as retries of retryable writes.
        if (isNetworkError(e)) {
            return true;
        }

        if (retryOnKilledSession &&
            (isKilledSessionCode(e.code) ||
             (Array.isArray(e.writeErrors) &&
              e.writeErrors.every(writeError => isKilledSessionCode(writeError.code))))) {
            return true;
        }

        return false;
    }

    // Commits the transaction active on the given session, retrying on killed session errors if
    // configured to do so. Throws if the commit fails and cannot be retried.
    function commitTransactionWithKilledSessionRetries(session, retryOnKilledSession) {
        while (true) {
            const commitRes = session.commitTransaction_forTesting();

            // If commit fails with a killed session code, the commit must be retried because it is
            // unknown if the interrupted commit succeeded. This is safe because commitTransaction
            // is a retryable write.
            if (!commitRes.ok && retryOnKilledSession && isKilledSessionCode(commitRes.code)) {
                print("-=-=-=- Retrying commit after killed session code, sessionId: " +
                      tojsononeline(session.getSessionId()) +
                      ", txnNumber: " + tojsononeline(session.getTxnNumber_forTesting()) +
                      ", res: " + tojsononeline(commitRes));
                continue;
            }

            // Use assert.commandWorked() because it throws an exception in the format expected by
            // the caller of this function if the commit failed. Committing may fail with a
            // transient error that can be retried on at a higher level, so suppress unnecessary
            // logging.
            quietly(() => {
                assert.commandWorked(commitRes);
            });

            return;
        }
    }

    // Use a "signature" value that won't typically match a value assigned in normal use. This way
    // the wtimeout set by this override is distinguishable in the server logs.
    const kDefaultWtimeout = 5 * 60 * 1000 + 789;

    /**
     * Runs 'func' inside of a transaction started with 'txnOptions', and automatically retries
     * until it either succeeds or the server returns a non-TransientTransactionError error
     * response. If retryOnKilledSession is true, the transaction will be automatically retried on
     * error codes that may come from a killed session as well. There is a probability of
     * 'prepareProbability' that the transaction is prepared before committing.
     *
     * The caller should take care to ensure 'func' doesn't modify any captured variables in a
     * speculative fashion where calling it multiple times would lead to unintended behavior. The
     * transaction started by the withTxnAndAutoRetry() function is only known to have committed
     * after the withTxnAndAutoRetry() function returns.
     */
    function withTxnAndAutoRetry(session, func, {
        txnOptions: txnOptions = {
            readConcern: {level: TestData.defaultTransactionReadConcernLevel || 'snapshot'},
            writeConcern: TestData.hasOwnProperty("defaultTransactionWriteConcernW")
                ? {w: TestData.defaultTransactionWriteConcernW, wtimeout: kDefaultWtimeout}
                : undefined
        },
        retryOnKilledSession: retryOnKilledSession = false,
        prepareProbability: prepareProbability = 0.0
    } = {}) {
        // Committing a manually prepared transaction isn't currently supported when sessions might
        // be killed.
        assert(!retryOnKilledSession || prepareProbability === 0.0,
               "retrying on killed session error codes isn't supported with prepareProbability");

        let hasTransientError;
        let iterations = 0;
        do {
            session.startTransaction_forTesting(txnOptions, {ignoreActiveTxn: true});
            let hasCommitTxnError = false;
            hasTransientError = false;

            iterations += 1;
            if (iterations % 10 === 0) {
                print("withTxnAndAutoRetry has iterated " + iterations + " times.");
            }
            try {
                func();

                try {
                    const rand = Random.rand();
                    if (rand < prepareProbability) {
                        const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
                        PrepareHelpers.commitTransaction(session, prepareTimestamp);
                    } else {
                        commitTransactionWithKilledSessionRetries(session, retryOnKilledSession);
                    }
                } catch (e) {
                    hasCommitTxnError = true;
                    throw e;
                }

            } catch (e) {
                if (!hasCommitTxnError) {
                    // We need to call abortTransaction_forTesting() in order to update the mongo
                    // shell's state such that it agrees no transaction is currently in progress on
                    // this session.
                    // The transaction may have implicitly been aborted by the server or killed by
                    // the kill_session helper and will therefore return a
                    // NoSuchTransaction/Interrupted error code.
                    assert.commandWorkedOrFailedWithCode(
                        session.abortTransaction_forTesting(),
                        [ErrorCodes.NoSuchTransaction, ErrorCodes.Interrupted]);
                }

                if (shouldRetryEntireTxnOnError(e, hasCommitTxnError, retryOnKilledSession)) {
                    print("Retrying transaction due to transient error.");
                    hasTransientError = true;
                    continue;
                }

                // FailedToSatisfyReadPreference errors are not retryable.
                // However, they should be because if there is no primary, there should be one soon.
                // TODO SERVER-60706: Make FailedToSatisfyReadPreference a transient error
                if (e.code == ErrorCodes.FailedToSatisfyReadPreference) {
                    print("Retrying transaction due to a FailedToSatisfyReadPreference error.");
                    hasTransientError = true;
                    continue;
                }

                throw e;
            }
        } while (hasTransientError);
    }

    return {withTxnAndAutoRetry, isKilledSessionCode};
})();