summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2015-12-21 11:30:31 -0500
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2015-12-23 10:39:04 -0500
commita7e6d919ec602af002c61797d6c5ae5dfc6e6217 (patch)
tree52868563464ce8e04fe19bf1871f1a48a74a3f31
parent551e33cd86e8fcb6c87050d0249bac6fc8342534 (diff)
downloadmongo-a7e6d919ec602af002c61797d6c5ae5dfc6e6217.tar.gz
SERVER-21050 Continuous config server primary step down
This change adds a continuous primary stepdown capability to the ReplicaSetTest and hooks it into ShardingTest.
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml92
-rw-r--r--etc/evergreen.yml10
-rw-r--r--jstests/libs/override_methods/sharding_continuous_config_stepdown.js176
-rw-r--r--jstests/sharding/disable_autosplit.js38
-rw-r--r--jstests/sharding/max_time_ms_sharded.js8
-rw-r--r--src/mongo/shell/shardingtest.js12
6 files changed, 309 insertions, 27 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
new file mode 100644
index 00000000000..921e8dc5186
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -0,0 +1,92 @@
+selector:
+ js_test:
+ roots:
+ - jstests/sharding/*.js
+ - jstests/sharding/replset_config/*.js
+ exclude_files:
+ # SERVER-20694
+ - jstests/sharding/csrs_upgrade.js
+ # Test is flaky - SERVER-20580
+ - jstests/sharding/csrs_upgrade_during_migrate.js
+ # Auth tests require authentication on the stepdown thread's connection
+ - jstests/sharding/auth*.js
+ - jstests/sharding/cleanup_orphaned_auth.js
+ - jstests/sharding/localhostAuthBypass.js
+ - jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
+ - jstests/sharding/mrShardedOutputAuth.js
+ # Count/write/aggregate/group commands against the config shard do not support retries yet
+ - jstests/sharding/addshard1.js
+ - jstests/sharding/addshard2.js
+ - jstests/sharding/auto1.js
+ - jstests/sharding/auto_rebalance.js
+ - jstests/sharding/autosplit_heuristics.js
+ - jstests/sharding/balance_tags1.js
+ - jstests/sharding/balance_tags2.js
+ - jstests/sharding/basic_drop_coll.js
+ - jstests/sharding/bulk_shard_insert.js
+ - jstests/sharding/count1.js
+ - jstests/sharding/count2.js
+ - jstests/sharding/cursor1.js
+ - jstests/sharding/diffservers1.js
+ - jstests/sharding/disable_autosplit.js
+ - jstests/sharding/drop_sharded_db.js
+ - jstests/sharding/fair_balancer_round.js
+ - jstests/sharding/findandmodify1.js
+ - jstests/sharding/findandmodify2.js
+ - jstests/sharding/geo_shardedgeonear.js
+ - jstests/sharding/hash_basic.js
+ - jstests/sharding/hash_shard1.js
+ - jstests/sharding/hash_shard_non_empty.js
+ - jstests/sharding/hash_shard_num_chunks.js
+ - jstests/sharding/hash_single_shard.js
+ - jstests/sharding/key_many.js
+ - jstests/sharding/key_string.js
+ - jstests/sharding/large_chunk.js
+ - jstests/sharding/limit_push.js
+ - jstests/sharding/merge_chunks_basic.js
+ - jstests/sharding/migrateBig_balancer.js
+ - jstests/sharding/move_chunk_basic.js
+ - jstests/sharding/movePrimary1.js
+ - jstests/sharding/mrShardedOutput.js
+ - jstests/sharding/names.js
+ - jstests/sharding/prefix_shard_key.js
+ - jstests/sharding/presplit.js
+ - jstests/sharding/query_config.js
+ - jstests/sharding/remove1.js
+ - jstests/sharding/rename_across_mongos.js
+ - jstests/sharding/shard1.js
+ - jstests/sharding/shard2.js
+ - jstests/sharding/shard3.js
+ - jstests/sharding/shard_collection_basic.js
+ - jstests/sharding/sharding_balance1.js
+ - jstests/sharding/sharding_balance2.js
+ - jstests/sharding/sharding_balance3.js
+ - jstests/sharding/sharding_balance4.js
+ - jstests/sharding/sharding_migrate_cursor1.js
+ - jstests/sharding/tag_auto_split.js
+ - jstests/sharding/tag_range.js
+ - jstests/sharding/top_chunk_autosplit.js
+ - jstests/sharding/count_config_servers.js
+ # Awaits replication directly against the config server primary
+ - jstests/sharding/moveprimary_ignore_sharded.js
+ # Runs setShardVersion/getShardVersion against the config server and we don't support retries
+ # for this command
+ - jstests/sharding/major_version_check.js
+ - jstests/sharding/replset_config/ssv_config_check.js
+ # Already stop or blackholes the primary of the CSRS config shard
+ - jstests/sharding/all_config_hosts_down.js
+ - jstests/sharding/all_config_servers_blackholed_from_mongos.js
+ - jstests/sharding/replset_config/batch_write_command_sharded.js
+ - jstests/sharding/replset_config/config_rs_no_primary.js
+ - jstests/sharding/startup_with_all_configs_down.js
+ # Test runs really slow with the primary continuously stepping down
+ - jstests/sharding/zbigMapReduce.js
+ # Unknown balancer errors - need to be investigated
+ - jstests/sharding/in_memory_sort_limit.js
+executor:
+ js_test:
+ config:
+ shell_options:
+ eval: "load('jstests/libs/override_methods/sharding_continuous_config_stepdown.js');"
+ nodb: ''
+ readMode: commands
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 10f8e90a18b..375e344ee1a 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -1524,6 +1524,15 @@ tasks:
run_multiple_jobs: true
- <<: *task_template
+ name: sharding_csrs_continuous_config_stepdown_WT
+ commands:
+ - func: "do setup"
+ - func: "run tests"
+ vars:
+ resmoke_args: --suites=sharding_continuous_config_stepdown --storageEngine=wiredTiger
+ run_multiple_jobs: true
+
+- <<: *task_template
name: sharding_WT_ese
commands:
- func: "do setup"
@@ -3848,6 +3857,7 @@ buildvariants:
- name: sharding_legacy_op_query_WT
- name: sharding_op_query_WT
- name: sharding_legacy_multiversion
+ - name: sharding_csrs_continuous_config_stepdown_WT
- name: slow1
- name: slow1_WT
- name: slow2
diff --git a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js
new file mode 100644
index 00000000000..6edf2cbc79a
--- /dev/null
+++ b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js
@@ -0,0 +1,176 @@
+/**
+ * Loading this file extends the prototype for ReplSetTest to spawn a thread, which continuously
+ * step down the primary.
+ */
+
+// Contains the declaration for ScopedThread and CountDownLatch
+load('jstests/libs/parallelTester.js');
+
+(function() {
+'use strict';
+
+// Preserve the original ReplSetTest and ShardingTest constructors, because we are overriding them
+var originalReplSetTest = ReplSetTest;
+var originalShardingTest = ShardingTest;
+
+/**
+ * Overrides the ReplSetTest constructor to start the continuous config server stepdown thread.
+ */
+ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() {
+ // Construct the original object
+ originalReplSetTest.apply(this, arguments);
+
+ /**
+ * This function is intended to be called in a separate thread and it continuously steps down
+ * the current primary for a number of attempts.
+ *
+ * @param {string} seedNode The connection string of a node from which to discover the primary
+ * of the replica set.
+ * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread.
+ *
+ * @return Object with the following fields:
+ * ok {integer}: 0 if it failed, 1 if it succeeded.
+ * error {string}: Only present if ok == 0. Contains the cause for the error.
+ * stack {string}: Only present if ok == 0. Contains the stack at the time of the error.
+ */
+ function _continuousPrimaryStepdownFn(seedNode, stopCounter) {
+ 'use strict';
+
+ var stepdownDelaySeconds = 10;
+
+ print('*** Continuous stepdown thread running with seed node ' + seedNode);
+
+ try {
+ var replSet = new ReplSetTest(seedNode);
+ var primary = replSet.getPrimary();
+
+ while (stopCounter.getCount() > 0) {
+ print('*** Stepping down ' + primary);
+
+ assert.throws(function() {
+ var result = primary.adminCommand({
+ replSetStepDown: stepdownDelaySeconds,
+ secondaryCatchUpPeriodSecs: stepdownDelaySeconds });
+ print('replSetStepDown command did not throw and returned: ' + tojson(result));
+
+ // The call to replSetStepDown should never succeed
+ assert.commandWorked(result);
+ });
+
+ // Wait for primary to get elected and allow the test to make some progress before
+ // attempting another stepdown.
+ primary = replSet.getPrimary();
+ sleep(7000);
+ }
+
+ print('*** Continuous stepdown thread completed successfully');
+ return { ok: 1 };
+ }
+ catch (e) {
+ print('*** Continuous stepdown thread caught exception: ' + tojson(e));
+ return { ok: 0, error: e.toString(), stack: e.stack };
+ }
+ }
+
+ // Preserve the original stopSet method, because we are overriding it to stop the continuous
+ // stepdown thread.
+ var _originalStartSetFn = this.startSet;
+ var _originalStopSetFn = this.stopSet;
+
+ // These two manage the scoped failover thread
+ var _scopedPrimaryStepdownThread;
+ var _scopedPrimaryStepdownThreadStopCounter;
+
+ /**
+ * Overrides the startSet call so we can increase the logging verbosity
+ */
+ this.startSet = function(options) {
+ if (!options) {
+ options = {};
+ }
+ options.verbose = 2;
+ return _originalStartSetFn.call(this, options);
+ }
+
+ /**
+ * Overrides the stopSet call so it terminates the failover thread.
+ */
+ this.stopSet = function() {
+ this.stopContinuousFailover();
+ _originalStopSetFn.apply(this, arguments);
+ };
+
+ /**
+ * Spawns a thread to invoke continuousPrimaryStepdownFn. See its comments for more information.
+ */
+ this.startContinuousFailover = function() {
+ if (_scopedPrimaryStepdownThread) {
+ throw new Error('Continuous failover thread is already active');
+ }
+
+ _scopedPrimaryStepdownThreadStopCounter = new CountDownLatch(1);
+ _scopedPrimaryStepdownThread = new ScopedThread(_continuousPrimaryStepdownFn,
+ this.nodes[0].host,
+ _scopedPrimaryStepdownThreadStopCounter);
+ _scopedPrimaryStepdownThread.start();
+ };
+
+ /**
+ * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop and waits
+ * for it to terminate.
+ */
+ this.stopContinuousFailover = function() {
+ if (!_scopedPrimaryStepdownThread) {
+ return;
+ }
+
+ _scopedPrimaryStepdownThreadStopCounter.countDown();
+ _scopedPrimaryStepdownThreadStopCounter = null;
+
+ _scopedPrimaryStepdownThread.join();
+
+ var retVal = _scopedPrimaryStepdownThread.returnData();
+ _scopedPrimaryStepdownThread = null;
+
+ return assert.commandWorked(retVal);
+ };
+};
+
+Object.extend(ReplSetTest, originalReplSetTest);
+
+/**
+ * Overrides the ShardingTest constructor to start the continuous config server stepdown thread.
+ */
+ShardingTest = function ShardingTestWithContinuousConfigPrimaryStepdown() {
+ if (!arguments[0].other) {
+ arguments[0].other = {};
+ }
+ arguments[0].verbose = 2;
+
+ if (!arguments[0].other.shardOptions) {
+ arguments[0].other.shardOptions = {};
+ }
+ arguments[0].other.shardOptions.verbose = 2;
+
+ // Construct the original object
+ originalShardingTest.apply(this, arguments);
+
+ if (!this.configRS) {
+ throw new Error('Continuous config server step down only available with CSRS');
+ }
+
+ /**
+ * This method is disabled because it runs aggregation, which doesn't handle config server
+ * stepdown correctly.
+ */
+ this.printShardingStatus = function() {
+
+ }
+
+ // Start the continuous config server stepdown thread
+ this.configRS.startContinuousFailover();
+};
+
+Object.extend(ShardingTest, originalShardingTest);
+
+})();
diff --git a/jstests/sharding/disable_autosplit.js b/jstests/sharding/disable_autosplit.js
index a489aa9f605..ec3b593ea24 100644
--- a/jstests/sharding/disable_autosplit.js
+++ b/jstests/sharding/disable_autosplit.js
@@ -1,39 +1,33 @@
-//
// Tests disabling of autosplit from mongos
-//
(function() {
'use strict';
-var chunkSize = 1 //MB
+var chunkSize = 1; // In MB
-var st = new ShardingTest({ shards : 1,
- mongos : 1,
- other : {
-
- chunksize : chunkSize,
- mongosOptions : { noAutoSplit : "" }
-
- } })
+var st = new ShardingTest({ shards: 1,
+ mongos: 1,
+ other: { chunksize: chunkSize,
+ mongosOptions: { noAutoSplit: "" } } });
var data = "x";
-while( data.length < chunkSize * 1024 * 1024 ){
- data += data
+while(data.length < chunkSize * 1024 * 1024) {
+ data += data;
}
-var mongos = st.s0
-var admin = mongos.getDB( "admin" )
-var config = mongos.getDB( "config" )
-var coll = mongos.getCollection( "foo.bar" )
+var mongos = st.s0;
+var admin = mongos.getDB("admin");
+var config = mongos.getDB("config");
+var coll = mongos.getCollection("foo.bar");
-assert.commandWorked(admin.runCommand({ enableSharding : coll.getDB() + "" }));
-assert.commandWorked(admin.runCommand({ shardCollection : coll + "", key : { _id : 1 } }));
+assert.commandWorked(admin.runCommand({ enableSharding: coll.getDB() + "" }));
+assert.commandWorked(admin.runCommand({ shardCollection: coll + "", key: { _id: 1 } }));
-for( var i = 0; i < 20; i++ ) {
- coll.insert({ data : data });
+for(var i = 0; i < 20; i++) {
+ coll.insert({ data: data });
}
// Make sure we haven't split
-assert.eq(1, config.chunks.find({ ns : coll + "" }).count());
+assert.eq(1, config.chunks.find({ ns: coll + "" }).count());
st.stop();
diff --git a/jstests/sharding/max_time_ms_sharded.js b/jstests/sharding/max_time_ms_sharded.js
index cc3d87505ec..02a6d215c58 100644
--- a/jstests/sharding/max_time_ms_sharded.js
+++ b/jstests/sharding/max_time_ms_sharded.js
@@ -4,9 +4,11 @@
//
// Note that mongos does not time out commands or query ops (which remains responsibility of mongod,
// pending development of an interrupt framework for mongos).
+(function() {
+'use strict';
var st = new ShardingTest({shards: 2});
-st.stopBalancer();
+
var mongos = st.s0;
var shards = [st.shard0, st.shard1];
var coll = mongos.getCollection("foo.bar");
@@ -51,7 +53,7 @@ assert.commandWorked(admin.runCommand({moveChunk: coll.getFullName(),
// Insert 100 documents into sharded collection, such that each shard owns 50.
//
var bulk = coll.initializeUnorderedBulkOp();
-for (i=-50; i<50; i++) {
+for (var i = -50; i < 50; i++) {
bulk.insert({ _id: i });
}
assert.writeOK(bulk.execute());
@@ -211,3 +213,5 @@ assert.commandWorked(admin.runCommand({moveChunk: coll.getFullName(),
// TODO Test additional commmands.
st.stop();
+
+})();
diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js
index 03e273c0753..b99f425a899 100644
--- a/src/mongo/shell/shardingtest.js
+++ b/src/mongo/shell/shardingtest.js
@@ -180,10 +180,16 @@ var ShardingTest = function(params) {
this.getServerName = function(dbname) {
var x = this.config.databases.findOne({ _id : "" + dbname });
- if (x)
+ if (x) {
return x.primary;
- this.config.databases.find().forEach(printjson);
- throw Error("couldn't find dbname: " + dbname + " total: " + this.config.databases.count());
+ }
+
+ var countDBsFound = 0;
+ this.config.databases.find().forEach(function(db) {
+ countDBsFound++;
+ printjson(db);
+ });
+ throw Error("couldn't find dbname: " + dbname + " total: " + countDBsFound);
};
this.getNonPrimaries = function(dbname) {