summaryrefslogtreecommitdiff
path: root/jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js
blob: 6018fba57559efe074fb1d35884553c3c1458d5a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/**
 * Overrides runCommand so operations that encounter the BackgroundOperationInProgressForNs/Db error
 * codes automatically retry.
 */
(function() {
    "use strict";

    load("jstests/libs/override_methods/override_helpers.js");

    // These are all commands that can return BackgroundOperationInProgress error codes.
    const commandWhitelist = new Set([
        "cloneCollectionAsCapped",
        "collMod",
        "compact",
        "convertToCapped",
        "createIndexes",
        "drop",
        "dropDatabase",
        "dropIndexes",
        "reIndex",
        "renameCollection",
    ]);

    // Whitelisted errors commands may encounter when retried on a sharded cluster. Shards may
    // return different responses, so errors associated with repeated executions of a command may be
    // ignored.
    const acceptableCommandErrors = {
        "drop": [ErrorCodes.NamespaceNotFound],
        "dropIndexes": [ErrorCodes.IndexNotFound],
        "renameCollection": [ErrorCodes.NamespaceNotFound],
    };

    const kTimeout = 10 * 60 * 1000;
    const kInterval = 200;

    // Make it easier to understand whether or not returns from the assert.soon are being retried.
    const kNoRetry = true;
    const kRetry = false;

    function hasBackgroundOpInProgress(res) {
        // Only these are retryable.
        return res.code === ErrorCodes.BackgroundOperationInProgressForNamespace ||
            res.code === ErrorCodes.BackgroundOperationInProgressForDatabase;
    }

    function runCommandWithRetries(conn, dbName, commandName, commandObj, func, makeFuncArgs) {
        if (typeof commandObj !== "object" || commandObj === null) {
            return func.apply(conn, makeFuncArgs(commandObj));
        }

        let res;
        let attempt = 0;

        assert.soon(
            () => {
                attempt++;

                res = func.apply(conn, makeFuncArgs(commandObj));
                if (res.ok === 1) {
                    return kNoRetry;
                }

                // Commands that are not in the whitelist should never fail with this error code.
                if (!commandWhitelist.has(commandName)) {
                    return kNoRetry;
                }

                let message = "Retrying the " + commandName +
                    " command because a background operation is in progress (attempt " + attempt +
                    ")";

                // This handles the retry case when run against a standalone, replica set, or mongos
                // where both shards returned the same response.
                if (hasBackgroundOpInProgress(res)) {
                    print(message);
                    return kRetry;
                }

                // The following logic only applies to sharded clusters.
                if (!conn.isMongos() || !res.raw) {
                    // We don't attempt to retry commands for which mongos doesn't expose the raw
                    // responses from the shards.
                    return kNoRetry;
                }

                // In certain cases, retrying a command on a sharded cluster may result in a
                // scenario where one shard has executed the command and another still has a
                // background operation in progress. Retry, ignoring whitelisted errors on a
                // command-by-command basis.
                let shardsWithBackgroundOps = [];

                // If any shard has a background operation in progress and the other shards sent
                // whitelisted errors after a first attempt, retry the entire command.
                for (let shard in res.raw) {
                    let shardRes = res.raw[shard];
                    if (shardRes.ok) {
                        continue;
                    }

                    if (hasBackgroundOpInProgress(shardRes)) {
                        shardsWithBackgroundOps.push(shard);
                        continue;
                    }

                    // If any of the shards return an error that is not whitelisted or even if a
                    // whitelisted error is received on the first attempt, do not retry.
                    let acceptableErrors = acceptableCommandErrors[commandName] || [];
                    if (!acceptableErrors.includes(shardRes.code)) {
                        return kNoRetry;
                    }
                    // Whitelisted errors can only occur from running a command more than once, so
                    // it would be unexpected to receive an error on the first attempt.
                    if (attempt === 1) {
                        return kNoRetry;
                    }
                }

                // At this point, all shards have resulted in whitelisted errors resulting in
                // retrying whitelisted commands. Fake a successful response.
                if (shardsWithBackgroundOps.length === 0) {
                    print("done retrying " + commandName +
                          " command because all shards have responded with acceptable errors");
                    res.ok = 1;
                    return kNoRetry;
                }

                print(message + " on shards: " + tojson(shardsWithBackgroundOps));
                return kRetry;
            },
            () => "Timed out while retrying command '" + tojson(commandObj) + "', response: " +
                tojson(res),
            kTimeout,
            kInterval);
        return res;
    }

    OverrideHelpers.overrideRunCommand(runCommandWithRetries);
})();