summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-10-16 11:45:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-16 16:02:58 +0000
commit3f68a848c68e993769589dc18e657728921d8367 (patch)
tree0847ca775a4a5d99effa226311c8e3ee2fd1371a
parent08e22cf1dbc7f28db4ff0711b2feb166d776c2d8 (diff)
downloadmongo-3f68a848c68e993769589dc18e657728921d8367.tar.gz
SERVER-51608 Backport implicitly_retry_on_background_op_in_progress.jsr4.0.21-rc1r4.0.21
-rw-r--r--jstests/concurrency/fsm_libs/worker_thread.js4
-rw-r--r--jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js138
2 files changed, 142 insertions, 0 deletions
diff --git a/jstests/concurrency/fsm_libs/worker_thread.js b/jstests/concurrency/fsm_libs/worker_thread.js
index 0d70add109d..465a8d9b52e 100644
--- a/jstests/concurrency/fsm_libs/worker_thread.js
+++ b/jstests/concurrency/fsm_libs/worker_thread.js
@@ -52,6 +52,10 @@ var workerThread = (function() {
mongo = new Mongo(connectionString);
}
+ // Retry operations that fail due to in-progress background operations. Load this early
+ // so that later overrides can be retried.
+ load('jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js');
+
if (typeof args.sessionOptions !== 'undefined') {
let initialClusterTime;
let initialOperationTime;
diff --git a/jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js b/jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js
new file mode 100644
index 00000000000..6018fba5755
--- /dev/null
+++ b/jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js
@@ -0,0 +1,138 @@
+/**
+ * Overrides runCommand so operations that encounter the BackgroundOperationInProgressForNs/Db error
+ * codes automatically retry.
+ */
+(function() {
+ "use strict";
+
+ load("jstests/libs/override_methods/override_helpers.js");
+
+ // These are all commands that can return BackgroundOperationInProgress error codes.
+ const commandWhitelist = new Set([
+ "cloneCollectionAsCapped",
+ "collMod",
+ "compact",
+ "convertToCapped",
+ "createIndexes",
+ "drop",
+ "dropDatabase",
+ "dropIndexes",
+ "reIndex",
+ "renameCollection",
+ ]);
+
+ // Whitelisted errors commands may encounter when retried on a sharded cluster. Shards may
+ // return different responses, so errors associated with repeated executions of a command may be
+ // ignored.
+ const acceptableCommandErrors = {
+ "drop": [ErrorCodes.NamespaceNotFound],
+ "dropIndexes": [ErrorCodes.IndexNotFound],
+ "renameCollection": [ErrorCodes.NamespaceNotFound],
+ };
+
+ const kTimeout = 10 * 60 * 1000;
+ const kInterval = 200;
+
+ // Make it easier to understand whether or not returns from the assert.soon are being retried.
+ const kNoRetry = true;
+ const kRetry = false;
+
+ function hasBackgroundOpInProgress(res) {
+ // Only these are retryable.
+ return res.code === ErrorCodes.BackgroundOperationInProgressForNamespace ||
+ res.code === ErrorCodes.BackgroundOperationInProgressForDatabase;
+ }
+
+ function runCommandWithRetries(conn, dbName, commandName, commandObj, func, makeFuncArgs) {
+ if (typeof commandObj !== "object" || commandObj === null) {
+ return func.apply(conn, makeFuncArgs(commandObj));
+ }
+
+ let res;
+ let attempt = 0;
+
+ assert.soon(
+ () => {
+ attempt++;
+
+ res = func.apply(conn, makeFuncArgs(commandObj));
+ if (res.ok === 1) {
+ return kNoRetry;
+ }
+
+ // Commands that are not in the whitelist should never fail with this error code.
+ if (!commandWhitelist.has(commandName)) {
+ return kNoRetry;
+ }
+
+ let message = "Retrying the " + commandName +
+ " command because a background operation is in progress (attempt " + attempt +
+ ")";
+
+ // This handles the retry case when run against a standalone, replica set, or mongos
+ // where both shards returned the same response.
+ if (hasBackgroundOpInProgress(res)) {
+ print(message);
+ return kRetry;
+ }
+
+ // The following logic only applies to sharded clusters.
+ if (!conn.isMongos() || !res.raw) {
+ // We don't attempt to retry commands for which mongos doesn't expose the raw
+ // responses from the shards.
+ return kNoRetry;
+ }
+
+ // In certain cases, retrying a command on a sharded cluster may result in a
+ // scenario where one shard has executed the command and another still has a
+ // background operation in progress. Retry, ignoring whitelisted errors on a
+ // command-by-command basis.
+ let shardsWithBackgroundOps = [];
+
+ // If any shard has a background operation in progress and the other shards sent
+ // whitelisted errors after a first attempt, retry the entire command.
+ for (let shard in res.raw) {
+ let shardRes = res.raw[shard];
+ if (shardRes.ok) {
+ continue;
+ }
+
+ if (hasBackgroundOpInProgress(shardRes)) {
+ shardsWithBackgroundOps.push(shard);
+ continue;
+ }
+
+ // If any of the shards return an error that is not whitelisted or even if a
+ // whitelisted error is received on the first attempt, do not retry.
+ let acceptableErrors = acceptableCommandErrors[commandName] || [];
+ if (!acceptableErrors.includes(shardRes.code)) {
+ return kNoRetry;
+ }
+ // Whitelisted errors can only occur from running a command more than once, so
+ // it would be unexpected to receive an error on the first attempt.
+ if (attempt === 1) {
+ return kNoRetry;
+ }
+ }
+
+ // At this point, all shards have resulted in whitelisted errors resulting in
+ // retrying whitelisted commands. Fake a successful response.
+ if (shardsWithBackgroundOps.length === 0) {
+ print("done retrying " + commandName +
+ " command because all shards have responded with acceptable errors");
+ res.ok = 1;
+ return kNoRetry;
+ }
+
+ print(message + " on shards: " + tojson(shardsWithBackgroundOps));
+ return kRetry;
+ },
+ () => "Timed out while retrying command '" + tojson(commandObj) + "', response: " +
+ tojson(res),
+ kTimeout,
+ kInterval);
+ return res;
+ }
+
+ OverrideHelpers.overrideRunCommand(runCommandWithRetries);
+})();