summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2017-08-21 15:36:27 -0400
committerJack Mulrow <jack.mulrow@mongodb.com>2017-08-30 13:20:09 -0400
commit4582f2e1c51f5277488c15732374560ee8ade96f (patch)
treef0ccf4ee679c2d2cd82e220a93c4dbe9d0bf6be5
parentbfbeb0cbabd9ae85f34df430474c9e524b274862 (diff)
downloadmongo-4582f2e1c51f5277488c15732374560ee8ade96f.tar.gz
SERVER-30675 SERVER-30678 Add configuration options to JavaScript stepdown thread
-rw-r--r--jstests/libs/override_methods/continuous_stepdown.js341
-rw-r--r--jstests/libs/override_methods/mongos_manual_intervention_actions.js79
-rw-r--r--jstests/libs/override_methods/sharding_continuous_config_stepdown.js346
-rw-r--r--jstests/libs/retry_on_network_error.js23
4 files changed, 457 insertions, 332 deletions
diff --git a/jstests/libs/override_methods/continuous_stepdown.js b/jstests/libs/override_methods/continuous_stepdown.js
new file mode 100644
index 00000000000..fcaa2b5f59d
--- /dev/null
+++ b/jstests/libs/override_methods/continuous_stepdown.js
@@ -0,0 +1,341 @@
+/**
+ * Loading this file exposes ContinuousStepdown, which contains the "configure" function that
+ * extends the prototype for ReplSetTest to spawn a thread that continuously step down its primary
+ * node.
+ *
+ * ContinuousStepdown#configure takes a configuration object with the following options:
+ *
+ * configStepdown: boolean (default true)
+ * True if a stepdown thread should be started for the CSRS.
+ *
+ * electionTimeoutMS: number (default 5 seconds)
+ * The election timeout for the replica set.
+ *
+ * shardStepdown: boolean (default true)
+ * True if a stepdown thread should be started for each shard replica set.
+ *
+ * stepdownDurationSecs: number (default 10 seconds)
+ * Number of seconds after stepping down as primary for which the node is not re-electable.
+ *
+ * stepdownIntervalMS: number (default 8 seconds)
+ * Number of milliseconds to wait after issuing a step down command, and discovering the new
+ * primary.
+ */
+
+let ContinuousStepdown;
+
+(function() {
+ "use strict";
+
+ load("jstests/libs/parallelTester.js"); // ScopedThread and CountDownLatch
+ load("jstests/libs/retry_on_network_error.js"); // retryOnNetworkError
+ load("jstests/replsets/rslib.js"); // reconfig
+
+ /**
+ * Helper class to manage the ScopedThread instance that will continuously step down the primary
+ * node.
+ */
+ const StepdownThread = function() {
+ let _counter = null;
+ let _thread = null;
+
+ /**
+ * This function is intended to be called in a separate thread and it continuously
+ * steps down the current primary for a number of attempts.
+ *
+ * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread.
+ *
+ * @param {string} seedNode The connection string of a node from which to discover
+ * the primary of the replica set.
+ *
+ * @param {Object} options Configuration object with the following fields:
+ * stepdownDurationSecs {integer}: The number of seconds after stepping down the
+ * primary for which the node is not re-electable.
+ * stepdownIntervalMS {integer}: The number of milliseconds to wait after
+ * issuing a step down command.
+ *
+ * @return Object with the following fields:
+ * ok {integer}: 0 if it failed, 1 if it succeeded.
+ * error {string}: Only present if ok == 0. Contains the cause for the error.
+ * stack {string}: Only present if ok == 0. Contains the stack at the time of
+ * the error.
+ */
+ function _continuousPrimaryStepdownFn(stopCounter, seedNode, options) {
+ "use strict";
+
+ load("jstests/libs/retry_on_network_error.js");
+
+ print("*** Continuous stepdown thread running with seed node " + seedNode);
+
+ try {
+ // The config primary may unexpectedly step down during startup if under heavy
+ // load and too slowly processing heartbeats. When it steps down, it closes all of
+ // its connections. This can happen during the call to new ReplSetTest, so in order
+ // to account for this and make the tests stable, retry discovery of the replica
+ // set's configuration once (SERVER-22794).
+ const replSet = retryOnNetworkError(function() {
+ return new ReplSetTest(seedNode);
+ });
+
+ let primary = replSet.getPrimary();
+
+ while (stopCounter.getCount() > 0) {
+ print("*** Stepping down " + primary);
+
+ assert.throws(function() {
+ let result = primary.adminCommand(
+ {replSetStepDown: options.stepdownDurationSecs, force: true});
+ print("replSetStepDown command did not throw and returned: " +
+ tojson(result));
+
+ // The call to replSetStepDown should never succeed.
+ assert.commandWorked(result);
+ });
+
+ // Wait for primary to get elected and allow the test to make some progress
+ // before attempting another stepdown.
+ if (stopCounter.getCount() > 0) {
+ primary = replSet.getPrimary();
+ }
+
+ if (stopCounter.getCount() > 0) {
+ sleep(options.stepdownIntervalMS);
+ }
+ }
+
+ print("*** Continuous stepdown thread completed successfully");
+ return {ok: 1};
+ } catch (e) {
+ print("*** Continuous stepdown thread caught exception: " + tojson(e));
+ return {ok: 0, error: e.toString(), stack: e.stack};
+ }
+ }
+
+ /**
+ * Returns true if the stepdown thread has been created and started.
+ */
+ this.hasStarted = function() {
+ return !!_thread;
+ };
+
+ /**
+ * Spawns a ScopedThread using the given seedNode to discover the replica set.
+ */
+ this.start = function(seedNode, options) {
+ if (_thread) {
+ throw new Error("Continuous stepdown thread is already active");
+ }
+
+ _counter = new CountDownLatch(1);
+ _thread = new ScopedThread(_continuousPrimaryStepdownFn, _counter, seedNode, options);
+ _thread.start();
+ };
+
+ /**
+ * Sets the stepdown thread's counter to 0, and waits for it to finish. Returns the stepdown
+ * thread's return value.
+ */
+ this.stop = function() {
+ if (!_thread) {
+ throw new Error("Continuous stepdown thread is not active");
+ }
+
+ _counter.countDown();
+ _counter = null;
+
+ _thread.join();
+
+ const retVal = _thread.returnData();
+ _thread = null;
+
+ return assert.commandWorked(retVal);
+ };
+ };
+
+ ContinuousStepdown = {};
+
+ /**
+ * Defines two methods on ReplSetTest, startContinuousFailover and stopContinuousFailover, that
+ * allow starting and stopping a separate thread that will periodically step down the replica
+ * set's primary node. Also defines these methods on ShardingTest, which allow starting and
+ * stopping a stepdown thread for the test's config server replica set and each of the shard
+ * replica sets, as specified by the given stepdownOptions object.
+ */
+ ContinuousStepdown.configure = function(stepdownOptions) {
+ const defaultOptions = {
+ configStepdown: true,
+ electionTimeoutMS: 5 * 1000,
+ shardStepdown: true,
+ stepdownDurationSecs: 10,
+ stepdownIntervalMS: 8 * 1000
+ };
+ stepdownOptions = Object.merge(defaultOptions, stepdownOptions);
+
+ // Preserve the original ReplSetTest and ShardingTest constructors, because they are being
+ // overriden.
+ const originalReplSetTest = ReplSetTest;
+ const originalShardingTest = ShardingTest;
+
+ const verbositySetting = tojson({
+ verbosity: 0,
+ command: {verbosity: 1},
+ network: {verbosity: 1, asio: {verbosity: 2}},
+ tracking: {verbosity: 0}
+ });
+
+ /**
+ * Overrides the ReplSetTest constructor to start the continuous primary stepdown thread.
+ */
+ ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() {
+ // Construct the original object
+ originalReplSetTest.apply(this, arguments);
+
+ // Preserve the original versions of functions that are overrided below.
+ const _originalStartSetFn = this.startSet;
+ const _originalStopSetFn = this.stopSet;
+ const _originalAwaitLastOpCommitted = this.awaitLastOpCommitted;
+
+ /**
+ * Overrides startSet call to increase logging verbosity.
+ */
+ this.startSet = function(options) {
+ options = options || {};
+ options.setParameter = options.setParameter || {};
+ options.setParameter.logComponentVerbosity = verbositySetting;
+ return _originalStartSetFn.call(this, options);
+ };
+
+ /**
+ * Overrides stopSet to terminate the failover thread.
+ */
+ this.stopSet = function() {
+ this.stopContinuousFailover();
+ _originalStopSetFn.apply(this, arguments);
+ };
+
+ /**
+ * Overrides awaitLastOpCommitted to retry on network errors.
+ */
+ this.awaitLastOpCommitted = function() {
+ return retryOnNetworkError(_originalAwaitLastOpCommitted.bind(this));
+ };
+
+ // Handle for the continuous stepdown thread.
+ const _stepdownThread = new StepdownThread();
+
+ /**
+ * Reconfigures the replica set to change its election timeout to
+ * stepdownOptions.electionTimeoutMS so a new primary can get elected before the
+ * stepdownOptions.stepdownIntervalMS period would cause one to step down again, then
+ * starts the primary stepdown thread.
+ */
+ this.startContinuousFailover = function() {
+ if (_stepdownThread.hasStarted()) {
+ throw new Error("Continuous failover thread is already active");
+ }
+
+ const rsconfig = this.getReplSetConfigFromNode();
+ if (rsconfig.settings.electionTimeoutMillis !== stepdownOptions.electionTimeoutMS) {
+ rsconfig.settings.electionTimeoutMillis = stepdownOptions.electionTimeoutMS;
+ rsconfig.version += 1;
+ reconfig(this, rsconfig);
+ assert.eq(this.getReplSetConfigFromNode().settings.electionTimeoutMillis,
+ stepdownOptions.electionTimeoutMS,
+ "Failed to set the electionTimeoutMillis to " +
+ stepdownOptions.electionTimeoutMS + " milliseconds.");
+ }
+
+ _stepdownThread.start(this.nodes[0].host, stepdownOptions);
+ };
+
+ /**
+ * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop
+ * and waits for it to terminate.
+ */
+ this.stopContinuousFailover = function() {
+ if (!_stepdownThread.hasStarted()) {
+ return;
+ }
+
+ return _stepdownThread.stop();
+ };
+ };
+
+ Object.extend(ReplSetTest, originalReplSetTest);
+
+ /**
+ * Overrides the ShardingTest constructor to start the continuous primary stepdown thread.
+ */
+ ShardingTest = function ShardingTestWithContinuousPrimaryStepdown(params) {
+ params.other = params.other || {};
+
+ if (stepdownOptions.configStepdown) {
+ params.other.configOptions = params.other.configOptions || {};
+ params.other.configOptions.setParameter =
+ params.other.configOptions.setParameter || {};
+ params.other.configOptions.setParameter.logComponentVerbosity = verbositySetting;
+ }
+
+ if (stepdownOptions.shardStepdown) {
+ params.other.shardOptions = params.other.shardOptions || {};
+ params.other.shardOptions.setParameter =
+ params.other.shardOptions.setParameter || {};
+ params.other.shardOptions.setParameter.logComponentVerbosity = verbositySetting;
+ }
+
+ // Construct the original object.
+ originalShardingTest.apply(this, arguments);
+
+ // Validate the stepdown options.
+ if (stepdownOptions.configStepdown && !this.configRS) {
+ throw new Error(
+ "Continuous config server primary step down only available with CSRS");
+ }
+
+ if (stepdownOptions.shardStepdown && this._rs.some(rst => !rst)) {
+ throw new Error(
+ "Continuous shard primary step down only available with replica set shards");
+ }
+
+ /**
+ * Calls startContinuousFailover on the config server and/or each shard replica set as
+ * specifed by the stepdownOptions object.
+ */
+ this.startContinuousFailover = function() {
+ if (stepdownOptions.configStepdown) {
+ this.configRS.startContinuousFailover();
+ }
+
+ if (stepdownOptions.shardStepdown) {
+ this._rs.forEach(function(rst) {
+ rst.test.startContinuousFailover();
+ });
+ }
+ };
+
+ /**
+ * Calls stopContinuousFailover on the config server and each shard replica set as
+ * specified by the stepdownOptions object.
+ */
+ this.stopContinuousFailover = function() {
+ if (stepdownOptions.configStepdown) {
+ this.configRS.stopContinuousFailover();
+ }
+
+ if (stepdownOptions.shardStepdown) {
+ this._rs.forEach(function(rst) {
+ rst.test.stopContinuousFailover();
+ });
+ }
+ };
+
+ /**
+ * This method is disabled because it runs aggregation, which doesn't handle config
+ * server stepdown correctly.
+ */
+ this.printShardingStatus = function() {};
+ };
+
+ Object.extend(ShardingTest, originalShardingTest);
+ };
+})();
diff --git a/jstests/libs/override_methods/mongos_manual_intervention_actions.js b/jstests/libs/override_methods/mongos_manual_intervention_actions.js
new file mode 100644
index 00000000000..6715ce081f2
--- /dev/null
+++ b/jstests/libs/override_methods/mongos_manual_intervention_actions.js
@@ -0,0 +1,79 @@
+/**
+ * If the config primary steps down during a metadata command, mongos will internally retry the
+ * command. On the retry, the command may fail with the error "ManualInterventionRequired" if
+ * the earlier try left the config database in an inconsistent state.
+ *
+ * This override allows for automating the manual cleanup by catching the
+ * "ManualInterventionRequired" error, performing the cleanup, and transparently retrying the
+ * command.
+ */
+(function() {
+ let manualInterventionActions = {
+ removePartiallyWrittenChunks: function(mongosConn, ns, cmdObj, numAttempts) {
+ print("command " + tojson(cmdObj) + " failed after " + numAttempts +
+ " attempts due to seeing partially written chunks for collection " + ns +
+ ", probably due to a previous failed shardCollection attempt. Manually" +
+ " deleting chunks for " + ns + " from config.chunks and retrying the command.");
+ assert.writeOK(mongosConn.getDB("config").chunks.remove(
+ {ns: ns}, {writeConcern: {w: "majority"}}));
+ }
+ };
+
+ const mongoRunCommandOriginal = Mongo.prototype.runCommand;
+
+ Mongo.prototype.runCommand = function runCommand(dbName, cmdObj, options) {
+ const cmdName = Object.keys(cmdObj)[0];
+ const commandsToRetry =
+ new Set(["mapReduce", "mapreduce", "shardCollection", "shardcollection"]);
+
+ if (!commandsToRetry.has(cmdName)) {
+ return mongoRunCommandOriginal.apply(this, arguments);
+ }
+
+ const maxAttempts = 10;
+ let numAttempts = 0;
+ let res;
+
+ while (numAttempts < maxAttempts) {
+ res = mongoRunCommandOriginal.apply(this, arguments);
+ ++numAttempts;
+
+ if (res.ok === 1 || res.code !== ErrorCodes.ManualInterventionRequired ||
+ numAttempts === maxAttempts) {
+ break;
+ }
+
+ if (cmdName === "shardCollection" || cmdName === "shardcollection") {
+ const ns = cmdObj[cmdName];
+ manualInterventionActions.removePartiallyWrittenChunks(
+ this, ns, cmdObj, numAttempts);
+ } else if (cmdName === "mapReduce" || cmdName === "mapreduce") {
+ const out = cmdObj.out;
+
+ // The output collection can be specified as a string argument to the mapReduce
+ // command's 'out' option, or nested under 'out.replace', 'out.merge', or
+ // 'out.reduce'.
+ let outCollName;
+ if (typeof out === "string") {
+ outCollName = out;
+ } else if (typeof out === "object") {
+ outCollName = out.replace || out.merge || out.reduce;
+ } else {
+ print("Could not parse the output collection's name from 'out' option in " +
+ tojson(cmdObj) + "; not retrying on ManualInterventionRequired error " +
+ tojson(res));
+ break;
+ }
+
+ // The output collection's database can optionally be specified under 'out.db',
+ // else it defaults to the input collection's database.
+ const outDbName = out.db || dbName;
+
+ const ns = outDbName + "." + outCollName;
+ manualInterventionActions.removePartiallyWrittenChunks(
+ this, ns, cmdObj, numAttempts);
+ }
+ }
+ return res;
+ };
+})();
diff --git a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js
index dd5c9b5e7ef..b96c9726708 100644
--- a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js
+++ b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js
@@ -1,340 +1,22 @@
-/**
- * Loading this file extends the prototype for ReplSetTest to spawn a thread, which continuously
- * step down the primary.
- */
-
-// Contains the declaration for ScopedThread and CountDownLatch
-load('jstests/libs/parallelTester.js');
-load("jstests/replsets/rslib.js");
-
-/**
- * Executes the specified function and if it fails due to exception, which is related to network
- * error retries the call once. If the second attempt also fails, simply throws the last
- * exception.
- *
- * Returns the return value of the input call.
- */
-function retryOnNetworkError(func) {
- var networkErrorRetriesLeft = 1;
-
- while (true) {
- try {
- return func();
- } catch (e) {
- if (e.toString().indexOf("network error") > -1 && networkErrorRetriesLeft > 0) {
- print("Network error occurred and the call will be retried: " +
- tojson({error: e.toString(), stack: e.stack}));
- networkErrorRetriesLeft--;
- } else {
- throw e;
- }
- }
- }
-}
-
(function() {
- 'use strict';
-
- // Preserve the original ReplSetTest and ShardingTest constructors, because we are overriding
- // them
- var originalReplSetTest = ReplSetTest;
- var originalShardingTest = ShardingTest;
-
- const stepdownDelaySeconds = 10;
- const verbositySetting =
- "{ verbosity: 0, command: {verbosity: 1}, network: {verbosity: 1, asio: {verbosity: 2}}, \
-tracking: {verbosity: 0} }";
-
- /**
- * Overrides the ReplSetTest constructor to start the continuous config server stepdown
- * thread.
- */
- ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() {
- // Construct the original object
- originalReplSetTest.apply(this, arguments);
-
- /**
- * This function is intended to be called in a separate thread and it continuously steps
- * down the current primary for a number of attempts.
- *
- * @param {string} seedNode The connection string of a node from which to discover the
- * primary of the replica set.
- * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread.
- *
- * @param {integer} stepdownDelaySeconds The number of seconds after stepping down the
- * primary for which the node is not re-electable.
- *
- * @return Object with the following fields:
- * ok {integer}: 0 if it failed, 1 if it succeeded.
- * error {string}: Only present if ok == 0. Contains the cause for the error.
- * stack {string}: Only present if ok == 0. Contains the stack at the time of the
- * error.
- */
- function _continuousPrimaryStepdownFn(seedNode, stopCounter, stepdownDelaySeconds) {
- 'use strict';
-
- load('jstests/libs/override_methods/sharding_continuous_config_stepdown.js');
-
- print('*** Continuous stepdown thread running with seed node ' + seedNode);
-
- try {
- // The config primary may unexpectedly step down during startup if under heavy
- // load and too slowly processing heartbeats. When it steps down, it closes all of
- // its connections. This can happen during the call to new ReplSetTest, so in order
- // to account for this and make the tests stable, retry discovery of the replica
- // set's configuration once (SERVER-22794).
- var replSet = retryOnNetworkError(function() {
- return new ReplSetTest(seedNode);
- });
-
- var primary = replSet.getPrimary();
-
- while (stopCounter.getCount() > 0) {
- print('*** Stepping down ' + primary);
-
- assert.throws(function() {
- var result = primary.adminCommand(
- {replSetStepDown: stepdownDelaySeconds, force: true});
- print('replSetStepDown command did not throw and returned: ' +
- tojson(result));
-
- // The call to replSetStepDown should never succeed
- assert.commandWorked(result);
- });
-
- // Wait for primary to get elected and allow the test to make some progress
- // before attempting another stepdown.
- if (stopCounter.getCount() > 0)
- primary = replSet.getPrimary();
-
- if (stopCounter.getCount() > 0)
- sleep(8000);
- }
-
- print('*** Continuous stepdown thread completed successfully');
- return {ok: 1};
- } catch (e) {
- print('*** Continuous stepdown thread caught exception: ' + tojson(e));
- return {ok: 0, error: e.toString(), stack: e.stack};
- }
- }
-
- // Preserve the original stopSet method, because we are overriding it to stop the
- // continuous
- // stepdown thread.
- var _originalStartSetFn = this.startSet;
- var _originalStopSetFn = this.stopSet;
-
- // We override these methods to retry on network errors
- var _originalAwaitLastOpCommitted = this.awaitLastOpCommitted;
-
- // These two manage the scoped failover thread
- var _scopedPrimaryStepdownThread;
- var _scopedPrimaryStepdownThreadStopCounter;
-
- /**
- * Overrides the startSet call so we can increase the logging verbosity
- */
- this.startSet = function(options) {
- if (!options) {
- options = {};
- }
- if ('setParameter' in options) {
- options.setParameter.logComponentVerbosity = verbositySetting;
- } else {
- options.setParameter = {logComponentVerbosity: verbositySetting};
- }
- return _originalStartSetFn.call(this, options);
- };
-
- /**
- * Overrides the stopSet call so it terminates the failover thread.
- */
- this.stopSet = function() {
- this.stopContinuousFailover();
- _originalStopSetFn.apply(this, arguments);
- };
-
- /**
- * Overrides the awaitLastOpCommitted to retry on network errors.
- */
- this.awaitLastOpCommitted = function() {
- return retryOnNetworkError(_originalAwaitLastOpCommitted.bind(this));
- };
-
- /**
- * Spawns a thread to invoke continuousPrimaryStepdownFn. See its comments for more
- * information.
- */
- this.startContinuousFailover = function() {
- if (_scopedPrimaryStepdownThread) {
- throw new Error('Continuous failover thread is already active');
- }
+ "use strict";
- // This suite will step down the config primary every 10 seconds, and
- // electionTimeoutMillis defaults to 10 seconds. Set electionTimeoutMillis to 5 seconds,
- // so config operations have some time to run before being interrupted by stepdown.
- //
- // Note: this is done after ShardingTest runs because ShardingTest operations are not
- // resilient to stepdowns, which a shorter election timeout can cause to happen on
- // slow machines.
- var rsconfig = this.getReplSetConfigFromNode();
- rsconfig.settings.electionTimeoutMillis = stepdownDelaySeconds * 1000 / 2;
- rsconfig.version++;
- reconfig(this, rsconfig);
- assert.eq(this.getReplSetConfigFromNode().settings.electionTimeoutMillis,
- 5000,
- "Failed to lower the electionTimeoutMillis to 5000 milliseconds.");
+ load("jstests/libs/override_methods/continuous_stepdown.js");
+ load("jstests/libs/override_methods/mongos_manual_intervention_actions.js");
- _scopedPrimaryStepdownThreadStopCounter = new CountDownLatch(1);
- _scopedPrimaryStepdownThread = new ScopedThread(_continuousPrimaryStepdownFn,
- this.nodes[0].host,
- _scopedPrimaryStepdownThreadStopCounter,
- stepdownDelaySeconds);
- _scopedPrimaryStepdownThread.start();
- };
+ ContinuousStepdown.configure({
+ configStepdown: true,
+ electionTimeoutMS: 5 * 1000,
+ shardStepdown: false,
+ stepdownDurationSecs: 10,
+ stepdownIntervalMS: 8 * 1000,
+ });
- /**
- * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop
- * and
- * waits
- * for it to terminate.
- */
- this.stopContinuousFailover = function() {
- if (!_scopedPrimaryStepdownThread) {
- return;
- }
-
- _scopedPrimaryStepdownThreadStopCounter.countDown();
- _scopedPrimaryStepdownThreadStopCounter = null;
-
- _scopedPrimaryStepdownThread.join();
-
- var retVal = _scopedPrimaryStepdownThread.returnData();
- _scopedPrimaryStepdownThread = null;
-
- return assert.commandWorked(retVal);
- };
- };
-
- Object.extend(ReplSetTest, originalReplSetTest);
-
- /**
- * Overrides the ShardingTest constructor to start the continuous config server stepdown thread.
- */
- ShardingTest = function ShardingTestWithContinuousConfigPrimaryStepdown() {
- if (!arguments[0].other) {
- arguments[0].other = {};
- }
- if ('configOptions' in arguments[0].other &&
- 'setParameter' in arguments[0].other.configOptions) {
- arguments[0].other.configOptions.setParameter.logComponentVerbosity = verbositySetting;
- }
-
- if ('setParameter' in arguments[0].other) {
- arguments[0].other.setParameter.logComponentVerbosity = verbositySetting;
- } else {
- arguments[0].other.setParameter = {logComponentVerbosity: verbositySetting};
- }
-
- // Construct the original object
+ const originalShardingTest = ShardingTest;
+ ShardingTest = function() {
originalShardingTest.apply(this, arguments);
- if (!this.configRS) {
- throw new Error('Continuous config server step down only available with CSRS');
- }
-
- /**
- * This method is disabled because it runs aggregation, which doesn't handle config server
- * stepdown correctly.
- */
- this.printShardingStatus = function() {
-
- };
-
- // Start the continuous config server stepdown thread
- this.configRS.startContinuousFailover();
+ // Automatically start the continuous stepdown thread on the config server replica set.
+ this.startContinuousFailover();
};
-
- Object.extend(ShardingTest, originalShardingTest);
-
- /**
- * If the config primary steps down during a metadata command, mongos will internally retry the
- * command. On the retry, the command may fail with the error "ManualInterventionRequired" if
- * the earlier try left the config database in an inconsistent state.
- *
- * This override allows for automating the manual cleanup by catching the
- * "ManualInterventionRequired" error, performing the cleanup, and transparently retrying the
- * command.
- */
- (function(original) {
- let manualInterventionActions = {
- removePartiallyWrittenChunks: function(mongosConn, ns, cmdObj, numAttempts) {
- print("command " + tojson(cmdObj) + " failed after " + numAttempts +
- " attempts due to seeing partially written chunks for collection " + ns +
- ", probably due to a previous failed shardCollection attempt. Manually" +
- " deleting chunks for " + ns +
- " from config.chunks and retrying the command.");
- assert.writeOK(mongosConn.getDB("config").chunks.remove(
- {ns: ns}, {writeConcern: {w: "majority"}}));
- }
- };
-
- Mongo.prototype.runCommand = function runCommand(dbName, cmdObj, options) {
- const cmdName = Object.keys(cmdObj)[0];
- const commandsToRetry =
- new Set(["mapReduce", "mapreduce", "shardCollection", "shardcollection"]);
-
- if (!commandsToRetry.has(cmdName)) {
- return original.apply(this, arguments);
- }
-
- const maxAttempts = 10;
- let numAttempts = 0;
- let res;
-
- while (numAttempts < maxAttempts) {
- res = original.apply(this, arguments);
- ++numAttempts;
-
- if (res.ok === 1 || res.code !== ErrorCodes.ManualInterventionRequired ||
- numAttempts === maxAttempts) {
- break;
- }
-
- if (cmdName === "shardCollection" || cmdName === "shardcollection") {
- const ns = cmdObj[cmdName];
- manualInterventionActions.removePartiallyWrittenChunks(
- this, ns, cmdObj, numAttempts);
- } else if (cmdName === "mapReduce" || cmdName === "mapreduce") {
- const out = cmdObj.out;
-
- // The output collection can be specified as a string argument to the mapReduce
- // command's 'out' option, or nested under 'out.replace', 'out.merge', or
- // 'out.reduce'.
- let outCollName;
- if (typeof out === "string") {
- outCollName = out;
- } else if (typeof out === "object") {
- outCollName = out.replace || out.merge || out.reduce;
- } else {
- print("Could not parse the output collection's name from 'out' option in " +
- tojson(cmdObj) +
- "; not retrying on ManualInterventionRequired error " + tojson(res));
- break;
- }
-
- // The output collection's database can optionally be specified under 'out.db',
- // else it defaults to the input collection's database.
- const outDbName = out.db || dbName;
-
- const ns = outDbName + "." + outCollName;
- manualInterventionActions.removePartiallyWrittenChunks(
- this, ns, cmdObj, numAttempts);
- }
- }
- return res;
- };
- })(Mongo.prototype.runCommand);
-
})();
diff --git a/jstests/libs/retry_on_network_error.js b/jstests/libs/retry_on_network_error.js
new file mode 100644
index 00000000000..f47921f66cd
--- /dev/null
+++ b/jstests/libs/retry_on_network_error.js
@@ -0,0 +1,23 @@
+/**
+ * Executes the specified function and if it fails due to exception, which is related to network
+ * error retries the call once. If the second attempt also fails, simply throws the last
+ * exception.
+ *
+ * Returns the return value of the input call.
+ */
+function retryOnNetworkError(func, numRetries = 1) {
+ while (true) {
+ try {
+ return func();
+ } catch (e) {
+ if ((isNetworkError(e) || e.toString().indexOf("network error") > -1) &&
+ numRetries > 0) {
+ print("Network error occurred and the call will be retried: " +
+ tojson({error: e.toString(), stack: e.stack}));
+ numRetries--;
+ } else {
+ throw e;
+ }
+ }
+ }
+}