diff options
4 files changed, 457 insertions, 332 deletions
diff --git a/jstests/libs/override_methods/continuous_stepdown.js b/jstests/libs/override_methods/continuous_stepdown.js new file mode 100644 index 00000000000..fcaa2b5f59d --- /dev/null +++ b/jstests/libs/override_methods/continuous_stepdown.js @@ -0,0 +1,341 @@ +/** + * Loading this file exposes ContinuousStepdown, which contains the "configure" function that + * extends the prototype for ReplSetTest to spawn a thread that continuously step down its primary + * node. + * + * ContinuousStepdown#configure takes a configuration object with the following options: + * + * configStepdown: boolean (default true) + * True if a stepdown thread should be started for the CSRS. + * + * electionTimeoutMS: number (default 5 seconds) + * The election timeout for the replica set. + * + * shardStepdown: boolean (default true) + * True if a stepdown thread should be started for each shard replica set. + * + * stepdownDurationSecs: number (default 10 seconds) + * Number of seconds after stepping down as primary for which the node is not re-electable. + * + * stepdownIntervalMS: number (default 8 seconds) + * Number of milliseconds to wait after issuing a step down command, and discovering the new + * primary. + */ + +let ContinuousStepdown; + +(function() { + "use strict"; + + load("jstests/libs/parallelTester.js"); // ScopedThread and CountDownLatch + load("jstests/libs/retry_on_network_error.js"); // retryOnNetworkError + load("jstests/replsets/rslib.js"); // reconfig + + /** + * Helper class to manage the ScopedThread instance that will continuously step down the primary + * node. + */ + const StepdownThread = function() { + let _counter = null; + let _thread = null; + + /** + * This function is intended to be called in a separate thread and it continuously + * steps down the current primary for a number of attempts. + * + * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread. + * + * @param {string} seedNode The connection string of a node from which to discover + * the primary of the replica set. + * + * @param {Object} options Configuration object with the following fields: + * stepdownDurationSecs {integer}: The number of seconds after stepping down the + * primary for which the node is not re-electable. + * stepdownIntervalMS {integer}: The number of milliseconds to wait after + * issuing a step down command. + * + * @return Object with the following fields: + * ok {integer}: 0 if it failed, 1 if it succeeded. + * error {string}: Only present if ok == 0. Contains the cause for the error. + * stack {string}: Only present if ok == 0. Contains the stack at the time of + * the error. + */ + function _continuousPrimaryStepdownFn(stopCounter, seedNode, options) { + "use strict"; + + load("jstests/libs/retry_on_network_error.js"); + + print("*** Continuous stepdown thread running with seed node " + seedNode); + + try { + // The config primary may unexpectedly step down during startup if under heavy + // load and too slowly processing heartbeats. When it steps down, it closes all of + // its connections. This can happen during the call to new ReplSetTest, so in order + // to account for this and make the tests stable, retry discovery of the replica + // set's configuration once (SERVER-22794). + const replSet = retryOnNetworkError(function() { + return new ReplSetTest(seedNode); + }); + + let primary = replSet.getPrimary(); + + while (stopCounter.getCount() > 0) { + print("*** Stepping down " + primary); + + assert.throws(function() { + let result = primary.adminCommand( + {replSetStepDown: options.stepdownDurationSecs, force: true}); + print("replSetStepDown command did not throw and returned: " + + tojson(result)); + + // The call to replSetStepDown should never succeed. + assert.commandWorked(result); + }); + + // Wait for primary to get elected and allow the test to make some progress + // before attempting another stepdown. + if (stopCounter.getCount() > 0) { + primary = replSet.getPrimary(); + } + + if (stopCounter.getCount() > 0) { + sleep(options.stepdownIntervalMS); + } + } + + print("*** Continuous stepdown thread completed successfully"); + return {ok: 1}; + } catch (e) { + print("*** Continuous stepdown thread caught exception: " + tojson(e)); + return {ok: 0, error: e.toString(), stack: e.stack}; + } + } + + /** + * Returns true if the stepdown thread has been created and started. + */ + this.hasStarted = function() { + return !!_thread; + }; + + /** + * Spawns a ScopedThread using the given seedNode to discover the replica set. + */ + this.start = function(seedNode, options) { + if (_thread) { + throw new Error("Continuous stepdown thread is already active"); + } + + _counter = new CountDownLatch(1); + _thread = new ScopedThread(_continuousPrimaryStepdownFn, _counter, seedNode, options); + _thread.start(); + }; + + /** + * Sets the stepdown thread's counter to 0, and waits for it to finish. Returns the stepdown + * thread's return value. + */ + this.stop = function() { + if (!_thread) { + throw new Error("Continuous stepdown thread is not active"); + } + + _counter.countDown(); + _counter = null; + + _thread.join(); + + const retVal = _thread.returnData(); + _thread = null; + + return assert.commandWorked(retVal); + }; + }; + + ContinuousStepdown = {}; + + /** + * Defines two methods on ReplSetTest, startContinuousFailover and stopContinuousFailover, that + * allow starting and stopping a separate thread that will periodically step down the replica + * set's primary node. Also defines these methods on ShardingTest, which allow starting and + * stopping a stepdown thread for the test's config server replica set and each of the shard + * replica sets, as specified by the given stepdownOptions object. + */ + ContinuousStepdown.configure = function(stepdownOptions) { + const defaultOptions = { + configStepdown: true, + electionTimeoutMS: 5 * 1000, + shardStepdown: true, + stepdownDurationSecs: 10, + stepdownIntervalMS: 8 * 1000 + }; + stepdownOptions = Object.merge(defaultOptions, stepdownOptions); + + // Preserve the original ReplSetTest and ShardingTest constructors, because they are being + // overriden. + const originalReplSetTest = ReplSetTest; + const originalShardingTest = ShardingTest; + + const verbositySetting = tojson({ + verbosity: 0, + command: {verbosity: 1}, + network: {verbosity: 1, asio: {verbosity: 2}}, + tracking: {verbosity: 0} + }); + + /** + * Overrides the ReplSetTest constructor to start the continuous primary stepdown thread. + */ + ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() { + // Construct the original object + originalReplSetTest.apply(this, arguments); + + // Preserve the original versions of functions that are overrided below. + const _originalStartSetFn = this.startSet; + const _originalStopSetFn = this.stopSet; + const _originalAwaitLastOpCommitted = this.awaitLastOpCommitted; + + /** + * Overrides startSet call to increase logging verbosity. + */ + this.startSet = function(options) { + options = options || {}; + options.setParameter = options.setParameter || {}; + options.setParameter.logComponentVerbosity = verbositySetting; + return _originalStartSetFn.call(this, options); + }; + + /** + * Overrides stopSet to terminate the failover thread. + */ + this.stopSet = function() { + this.stopContinuousFailover(); + _originalStopSetFn.apply(this, arguments); + }; + + /** + * Overrides awaitLastOpCommitted to retry on network errors. + */ + this.awaitLastOpCommitted = function() { + return retryOnNetworkError(_originalAwaitLastOpCommitted.bind(this)); + }; + + // Handle for the continuous stepdown thread. + const _stepdownThread = new StepdownThread(); + + /** + * Reconfigures the replica set to change its election timeout to + * stepdownOptions.electionTimeoutMS so a new primary can get elected before the + * stepdownOptions.stepdownIntervalMS period would cause one to step down again, then + * starts the primary stepdown thread. + */ + this.startContinuousFailover = function() { + if (_stepdownThread.hasStarted()) { + throw new Error("Continuous failover thread is already active"); + } + + const rsconfig = this.getReplSetConfigFromNode(); + if (rsconfig.settings.electionTimeoutMillis !== stepdownOptions.electionTimeoutMS) { + rsconfig.settings.electionTimeoutMillis = stepdownOptions.electionTimeoutMS; + rsconfig.version += 1; + reconfig(this, rsconfig); + assert.eq(this.getReplSetConfigFromNode().settings.electionTimeoutMillis, + stepdownOptions.electionTimeoutMS, + "Failed to set the electionTimeoutMillis to " + + stepdownOptions.electionTimeoutMS + " milliseconds."); + } + + _stepdownThread.start(this.nodes[0].host, stepdownOptions); + }; + + /** + * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop + * and waits for it to terminate. + */ + this.stopContinuousFailover = function() { + if (!_stepdownThread.hasStarted()) { + return; + } + + return _stepdownThread.stop(); + }; + }; + + Object.extend(ReplSetTest, originalReplSetTest); + + /** + * Overrides the ShardingTest constructor to start the continuous primary stepdown thread. + */ + ShardingTest = function ShardingTestWithContinuousPrimaryStepdown(params) { + params.other = params.other || {}; + + if (stepdownOptions.configStepdown) { + params.other.configOptions = params.other.configOptions || {}; + params.other.configOptions.setParameter = + params.other.configOptions.setParameter || {}; + params.other.configOptions.setParameter.logComponentVerbosity = verbositySetting; + } + + if (stepdownOptions.shardStepdown) { + params.other.shardOptions = params.other.shardOptions || {}; + params.other.shardOptions.setParameter = + params.other.shardOptions.setParameter || {}; + params.other.shardOptions.setParameter.logComponentVerbosity = verbositySetting; + } + + // Construct the original object. + originalShardingTest.apply(this, arguments); + + // Validate the stepdown options. + if (stepdownOptions.configStepdown && !this.configRS) { + throw new Error( + "Continuous config server primary step down only available with CSRS"); + } + + if (stepdownOptions.shardStepdown && this._rs.some(rst => !rst)) { + throw new Error( + "Continuous shard primary step down only available with replica set shards"); + } + + /** + * Calls startContinuousFailover on the config server and/or each shard replica set as + * specifed by the stepdownOptions object. + */ + this.startContinuousFailover = function() { + if (stepdownOptions.configStepdown) { + this.configRS.startContinuousFailover(); + } + + if (stepdownOptions.shardStepdown) { + this._rs.forEach(function(rst) { + rst.test.startContinuousFailover(); + }); + } + }; + + /** + * Calls stopContinuousFailover on the config server and each shard replica set as + * specified by the stepdownOptions object. + */ + this.stopContinuousFailover = function() { + if (stepdownOptions.configStepdown) { + this.configRS.stopContinuousFailover(); + } + + if (stepdownOptions.shardStepdown) { + this._rs.forEach(function(rst) { + rst.test.stopContinuousFailover(); + }); + } + }; + + /** + * This method is disabled because it runs aggregation, which doesn't handle config + * server stepdown correctly. + */ + this.printShardingStatus = function() {}; + }; + + Object.extend(ShardingTest, originalShardingTest); + }; +})(); diff --git a/jstests/libs/override_methods/mongos_manual_intervention_actions.js b/jstests/libs/override_methods/mongos_manual_intervention_actions.js new file mode 100644 index 00000000000..6715ce081f2 --- /dev/null +++ b/jstests/libs/override_methods/mongos_manual_intervention_actions.js @@ -0,0 +1,79 @@ +/** + * If the config primary steps down during a metadata command, mongos will internally retry the + * command. On the retry, the command may fail with the error "ManualInterventionRequired" if + * the earlier try left the config database in an inconsistent state. + * + * This override allows for automating the manual cleanup by catching the + * "ManualInterventionRequired" error, performing the cleanup, and transparently retrying the + * command. + */ +(function() { + let manualInterventionActions = { + removePartiallyWrittenChunks: function(mongosConn, ns, cmdObj, numAttempts) { + print("command " + tojson(cmdObj) + " failed after " + numAttempts + + " attempts due to seeing partially written chunks for collection " + ns + + ", probably due to a previous failed shardCollection attempt. Manually" + + " deleting chunks for " + ns + " from config.chunks and retrying the command."); + assert.writeOK(mongosConn.getDB("config").chunks.remove( + {ns: ns}, {writeConcern: {w: "majority"}})); + } + }; + + const mongoRunCommandOriginal = Mongo.prototype.runCommand; + + Mongo.prototype.runCommand = function runCommand(dbName, cmdObj, options) { + const cmdName = Object.keys(cmdObj)[0]; + const commandsToRetry = + new Set(["mapReduce", "mapreduce", "shardCollection", "shardcollection"]); + + if (!commandsToRetry.has(cmdName)) { + return mongoRunCommandOriginal.apply(this, arguments); + } + + const maxAttempts = 10; + let numAttempts = 0; + let res; + + while (numAttempts < maxAttempts) { + res = mongoRunCommandOriginal.apply(this, arguments); + ++numAttempts; + + if (res.ok === 1 || res.code !== ErrorCodes.ManualInterventionRequired || + numAttempts === maxAttempts) { + break; + } + + if (cmdName === "shardCollection" || cmdName === "shardcollection") { + const ns = cmdObj[cmdName]; + manualInterventionActions.removePartiallyWrittenChunks( + this, ns, cmdObj, numAttempts); + } else if (cmdName === "mapReduce" || cmdName === "mapreduce") { + const out = cmdObj.out; + + // The output collection can be specified as a string argument to the mapReduce + // command's 'out' option, or nested under 'out.replace', 'out.merge', or + // 'out.reduce'. + let outCollName; + if (typeof out === "string") { + outCollName = out; + } else if (typeof out === "object") { + outCollName = out.replace || out.merge || out.reduce; + } else { + print("Could not parse the output collection's name from 'out' option in " + + tojson(cmdObj) + "; not retrying on ManualInterventionRequired error " + + tojson(res)); + break; + } + + // The output collection's database can optionally be specified under 'out.db', + // else it defaults to the input collection's database. + const outDbName = out.db || dbName; + + const ns = outDbName + "." + outCollName; + manualInterventionActions.removePartiallyWrittenChunks( + this, ns, cmdObj, numAttempts); + } + } + return res; + }; +})(); diff --git a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js index dd5c9b5e7ef..b96c9726708 100644 --- a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js +++ b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js @@ -1,340 +1,22 @@ -/** - * Loading this file extends the prototype for ReplSetTest to spawn a thread, which continuously - * step down the primary. - */ - -// Contains the declaration for ScopedThread and CountDownLatch -load('jstests/libs/parallelTester.js'); -load("jstests/replsets/rslib.js"); - -/** - * Executes the specified function and if it fails due to exception, which is related to network - * error retries the call once. If the second attempt also fails, simply throws the last - * exception. - * - * Returns the return value of the input call. - */ -function retryOnNetworkError(func) { - var networkErrorRetriesLeft = 1; - - while (true) { - try { - return func(); - } catch (e) { - if (e.toString().indexOf("network error") > -1 && networkErrorRetriesLeft > 0) { - print("Network error occurred and the call will be retried: " + - tojson({error: e.toString(), stack: e.stack})); - networkErrorRetriesLeft--; - } else { - throw e; - } - } - } -} - (function() { - 'use strict'; - - // Preserve the original ReplSetTest and ShardingTest constructors, because we are overriding - // them - var originalReplSetTest = ReplSetTest; - var originalShardingTest = ShardingTest; - - const stepdownDelaySeconds = 10; - const verbositySetting = - "{ verbosity: 0, command: {verbosity: 1}, network: {verbosity: 1, asio: {verbosity: 2}}, \ -tracking: {verbosity: 0} }"; - - /** - * Overrides the ReplSetTest constructor to start the continuous config server stepdown - * thread. - */ - ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() { - // Construct the original object - originalReplSetTest.apply(this, arguments); - - /** - * This function is intended to be called in a separate thread and it continuously steps - * down the current primary for a number of attempts. - * - * @param {string} seedNode The connection string of a node from which to discover the - * primary of the replica set. - * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread. - * - * @param {integer} stepdownDelaySeconds The number of seconds after stepping down the - * primary for which the node is not re-electable. - * - * @return Object with the following fields: - * ok {integer}: 0 if it failed, 1 if it succeeded. - * error {string}: Only present if ok == 0. Contains the cause for the error. - * stack {string}: Only present if ok == 0. Contains the stack at the time of the - * error. - */ - function _continuousPrimaryStepdownFn(seedNode, stopCounter, stepdownDelaySeconds) { - 'use strict'; - - load('jstests/libs/override_methods/sharding_continuous_config_stepdown.js'); - - print('*** Continuous stepdown thread running with seed node ' + seedNode); - - try { - // The config primary may unexpectedly step down during startup if under heavy - // load and too slowly processing heartbeats. When it steps down, it closes all of - // its connections. This can happen during the call to new ReplSetTest, so in order - // to account for this and make the tests stable, retry discovery of the replica - // set's configuration once (SERVER-22794). - var replSet = retryOnNetworkError(function() { - return new ReplSetTest(seedNode); - }); - - var primary = replSet.getPrimary(); - - while (stopCounter.getCount() > 0) { - print('*** Stepping down ' + primary); - - assert.throws(function() { - var result = primary.adminCommand( - {replSetStepDown: stepdownDelaySeconds, force: true}); - print('replSetStepDown command did not throw and returned: ' + - tojson(result)); - - // The call to replSetStepDown should never succeed - assert.commandWorked(result); - }); - - // Wait for primary to get elected and allow the test to make some progress - // before attempting another stepdown. - if (stopCounter.getCount() > 0) - primary = replSet.getPrimary(); - - if (stopCounter.getCount() > 0) - sleep(8000); - } - - print('*** Continuous stepdown thread completed successfully'); - return {ok: 1}; - } catch (e) { - print('*** Continuous stepdown thread caught exception: ' + tojson(e)); - return {ok: 0, error: e.toString(), stack: e.stack}; - } - } - - // Preserve the original stopSet method, because we are overriding it to stop the - // continuous - // stepdown thread. - var _originalStartSetFn = this.startSet; - var _originalStopSetFn = this.stopSet; - - // We override these methods to retry on network errors - var _originalAwaitLastOpCommitted = this.awaitLastOpCommitted; - - // These two manage the scoped failover thread - var _scopedPrimaryStepdownThread; - var _scopedPrimaryStepdownThreadStopCounter; - - /** - * Overrides the startSet call so we can increase the logging verbosity - */ - this.startSet = function(options) { - if (!options) { - options = {}; - } - if ('setParameter' in options) { - options.setParameter.logComponentVerbosity = verbositySetting; - } else { - options.setParameter = {logComponentVerbosity: verbositySetting}; - } - return _originalStartSetFn.call(this, options); - }; - - /** - * Overrides the stopSet call so it terminates the failover thread. - */ - this.stopSet = function() { - this.stopContinuousFailover(); - _originalStopSetFn.apply(this, arguments); - }; - - /** - * Overrides the awaitLastOpCommitted to retry on network errors. - */ - this.awaitLastOpCommitted = function() { - return retryOnNetworkError(_originalAwaitLastOpCommitted.bind(this)); - }; - - /** - * Spawns a thread to invoke continuousPrimaryStepdownFn. See its comments for more - * information. - */ - this.startContinuousFailover = function() { - if (_scopedPrimaryStepdownThread) { - throw new Error('Continuous failover thread is already active'); - } + "use strict"; - // This suite will step down the config primary every 10 seconds, and - // electionTimeoutMillis defaults to 10 seconds. Set electionTimeoutMillis to 5 seconds, - // so config operations have some time to run before being interrupted by stepdown. - // - // Note: this is done after ShardingTest runs because ShardingTest operations are not - // resilient to stepdowns, which a shorter election timeout can cause to happen on - // slow machines. - var rsconfig = this.getReplSetConfigFromNode(); - rsconfig.settings.electionTimeoutMillis = stepdownDelaySeconds * 1000 / 2; - rsconfig.version++; - reconfig(this, rsconfig); - assert.eq(this.getReplSetConfigFromNode().settings.electionTimeoutMillis, - 5000, - "Failed to lower the electionTimeoutMillis to 5000 milliseconds."); + load("jstests/libs/override_methods/continuous_stepdown.js"); + load("jstests/libs/override_methods/mongos_manual_intervention_actions.js"); - _scopedPrimaryStepdownThreadStopCounter = new CountDownLatch(1); - _scopedPrimaryStepdownThread = new ScopedThread(_continuousPrimaryStepdownFn, - this.nodes[0].host, - _scopedPrimaryStepdownThreadStopCounter, - stepdownDelaySeconds); - _scopedPrimaryStepdownThread.start(); - }; + ContinuousStepdown.configure({ + configStepdown: true, + electionTimeoutMS: 5 * 1000, + shardStepdown: false, + stepdownDurationSecs: 10, + stepdownIntervalMS: 8 * 1000, + }); - /** - * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop - * and - * waits - * for it to terminate. - */ - this.stopContinuousFailover = function() { - if (!_scopedPrimaryStepdownThread) { - return; - } - - _scopedPrimaryStepdownThreadStopCounter.countDown(); - _scopedPrimaryStepdownThreadStopCounter = null; - - _scopedPrimaryStepdownThread.join(); - - var retVal = _scopedPrimaryStepdownThread.returnData(); - _scopedPrimaryStepdownThread = null; - - return assert.commandWorked(retVal); - }; - }; - - Object.extend(ReplSetTest, originalReplSetTest); - - /** - * Overrides the ShardingTest constructor to start the continuous config server stepdown thread. - */ - ShardingTest = function ShardingTestWithContinuousConfigPrimaryStepdown() { - if (!arguments[0].other) { - arguments[0].other = {}; - } - if ('configOptions' in arguments[0].other && - 'setParameter' in arguments[0].other.configOptions) { - arguments[0].other.configOptions.setParameter.logComponentVerbosity = verbositySetting; - } - - if ('setParameter' in arguments[0].other) { - arguments[0].other.setParameter.logComponentVerbosity = verbositySetting; - } else { - arguments[0].other.setParameter = {logComponentVerbosity: verbositySetting}; - } - - // Construct the original object + const originalShardingTest = ShardingTest; + ShardingTest = function() { originalShardingTest.apply(this, arguments); - if (!this.configRS) { - throw new Error('Continuous config server step down only available with CSRS'); - } - - /** - * This method is disabled because it runs aggregation, which doesn't handle config server - * stepdown correctly. - */ - this.printShardingStatus = function() { - - }; - - // Start the continuous config server stepdown thread - this.configRS.startContinuousFailover(); + // Automatically start the continuous stepdown thread on the config server replica set. + this.startContinuousFailover(); }; - - Object.extend(ShardingTest, originalShardingTest); - - /** - * If the config primary steps down during a metadata command, mongos will internally retry the - * command. On the retry, the command may fail with the error "ManualInterventionRequired" if - * the earlier try left the config database in an inconsistent state. - * - * This override allows for automating the manual cleanup by catching the - * "ManualInterventionRequired" error, performing the cleanup, and transparently retrying the - * command. - */ - (function(original) { - let manualInterventionActions = { - removePartiallyWrittenChunks: function(mongosConn, ns, cmdObj, numAttempts) { - print("command " + tojson(cmdObj) + " failed after " + numAttempts + - " attempts due to seeing partially written chunks for collection " + ns + - ", probably due to a previous failed shardCollection attempt. Manually" + - " deleting chunks for " + ns + - " from config.chunks and retrying the command."); - assert.writeOK(mongosConn.getDB("config").chunks.remove( - {ns: ns}, {writeConcern: {w: "majority"}})); - } - }; - - Mongo.prototype.runCommand = function runCommand(dbName, cmdObj, options) { - const cmdName = Object.keys(cmdObj)[0]; - const commandsToRetry = - new Set(["mapReduce", "mapreduce", "shardCollection", "shardcollection"]); - - if (!commandsToRetry.has(cmdName)) { - return original.apply(this, arguments); - } - - const maxAttempts = 10; - let numAttempts = 0; - let res; - - while (numAttempts < maxAttempts) { - res = original.apply(this, arguments); - ++numAttempts; - - if (res.ok === 1 || res.code !== ErrorCodes.ManualInterventionRequired || - numAttempts === maxAttempts) { - break; - } - - if (cmdName === "shardCollection" || cmdName === "shardcollection") { - const ns = cmdObj[cmdName]; - manualInterventionActions.removePartiallyWrittenChunks( - this, ns, cmdObj, numAttempts); - } else if (cmdName === "mapReduce" || cmdName === "mapreduce") { - const out = cmdObj.out; - - // The output collection can be specified as a string argument to the mapReduce - // command's 'out' option, or nested under 'out.replace', 'out.merge', or - // 'out.reduce'. - let outCollName; - if (typeof out === "string") { - outCollName = out; - } else if (typeof out === "object") { - outCollName = out.replace || out.merge || out.reduce; - } else { - print("Could not parse the output collection's name from 'out' option in " + - tojson(cmdObj) + - "; not retrying on ManualInterventionRequired error " + tojson(res)); - break; - } - - // The output collection's database can optionally be specified under 'out.db', - // else it defaults to the input collection's database. - const outDbName = out.db || dbName; - - const ns = outDbName + "." + outCollName; - manualInterventionActions.removePartiallyWrittenChunks( - this, ns, cmdObj, numAttempts); - } - } - return res; - }; - })(Mongo.prototype.runCommand); - })(); diff --git a/jstests/libs/retry_on_network_error.js b/jstests/libs/retry_on_network_error.js new file mode 100644 index 00000000000..f47921f66cd --- /dev/null +++ b/jstests/libs/retry_on_network_error.js @@ -0,0 +1,23 @@ +/** + * Executes the specified function and if it fails due to exception, which is related to network + * error retries the call once. If the second attempt also fails, simply throws the last + * exception. + * + * Returns the return value of the input call. + */ +function retryOnNetworkError(func, numRetries = 1) { + while (true) { + try { + return func(); + } catch (e) { + if ((isNetworkError(e) || e.toString().indexOf("network error") > -1) && + numRetries > 0) { + print("Network error occurred and the call will be retried: " + + tojson({error: e.toString(), stack: e.stack})); + numRetries--; + } else { + throw e; + } + } + } +} |