diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2015-12-21 11:30:31 -0500 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2015-12-23 10:39:04 -0500 |
commit | a7e6d919ec602af002c61797d6c5ae5dfc6e6217 (patch) | |
tree | 52868563464ce8e04fe19bf1871f1a48a74a3f31 | |
parent | 551e33cd86e8fcb6c87050d0249bac6fc8342534 (diff) | |
download | mongo-a7e6d919ec602af002c61797d6c5ae5dfc6e6217.tar.gz |
SERVER-21050 Continuous config server primary step down
This change adds a continuous primary stepdown capability to the
ReplicaSetTest and hooks it into ShardingTest.
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml | 92 | ||||
-rw-r--r-- | etc/evergreen.yml | 10 | ||||
-rw-r--r-- | jstests/libs/override_methods/sharding_continuous_config_stepdown.js | 176 | ||||
-rw-r--r-- | jstests/sharding/disable_autosplit.js | 38 | ||||
-rw-r--r-- | jstests/sharding/max_time_ms_sharded.js | 8 | ||||
-rw-r--r-- | src/mongo/shell/shardingtest.js | 12 |
6 files changed, 309 insertions, 27 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml new file mode 100644 index 00000000000..921e8dc5186 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -0,0 +1,92 @@ +selector: + js_test: + roots: + - jstests/sharding/*.js + - jstests/sharding/replset_config/*.js + exclude_files: + # SERVER-20694 + - jstests/sharding/csrs_upgrade.js + # Test is flaky - SERVER-20580 + - jstests/sharding/csrs_upgrade_during_migrate.js + # Auth tests require authentication on the stepdown thread's connection + - jstests/sharding/auth*.js + - jstests/sharding/cleanup_orphaned_auth.js + - jstests/sharding/localhostAuthBypass.js + - jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js + - jstests/sharding/mrShardedOutputAuth.js + # Count/write/aggregate/group commands against the config shard do not support retries yet + - jstests/sharding/addshard1.js + - jstests/sharding/addshard2.js + - jstests/sharding/auto1.js + - jstests/sharding/auto_rebalance.js + - jstests/sharding/autosplit_heuristics.js + - jstests/sharding/balance_tags1.js + - jstests/sharding/balance_tags2.js + - jstests/sharding/basic_drop_coll.js + - jstests/sharding/bulk_shard_insert.js + - jstests/sharding/count1.js + - jstests/sharding/count2.js + - jstests/sharding/cursor1.js + - jstests/sharding/diffservers1.js + - jstests/sharding/disable_autosplit.js + - jstests/sharding/drop_sharded_db.js + - jstests/sharding/fair_balancer_round.js + - jstests/sharding/findandmodify1.js + - jstests/sharding/findandmodify2.js + - jstests/sharding/geo_shardedgeonear.js + - jstests/sharding/hash_basic.js + - jstests/sharding/hash_shard1.js + - jstests/sharding/hash_shard_non_empty.js + - jstests/sharding/hash_shard_num_chunks.js + - jstests/sharding/hash_single_shard.js + - jstests/sharding/key_many.js + - jstests/sharding/key_string.js + - jstests/sharding/large_chunk.js + - jstests/sharding/limit_push.js + - jstests/sharding/merge_chunks_basic.js + - jstests/sharding/migrateBig_balancer.js + - jstests/sharding/move_chunk_basic.js + - jstests/sharding/movePrimary1.js + - jstests/sharding/mrShardedOutput.js + - jstests/sharding/names.js + - jstests/sharding/prefix_shard_key.js + - jstests/sharding/presplit.js + - jstests/sharding/query_config.js + - jstests/sharding/remove1.js + - jstests/sharding/rename_across_mongos.js + - jstests/sharding/shard1.js + - jstests/sharding/shard2.js + - jstests/sharding/shard3.js + - jstests/sharding/shard_collection_basic.js + - jstests/sharding/sharding_balance1.js + - jstests/sharding/sharding_balance2.js + - jstests/sharding/sharding_balance3.js + - jstests/sharding/sharding_balance4.js + - jstests/sharding/sharding_migrate_cursor1.js + - jstests/sharding/tag_auto_split.js + - jstests/sharding/tag_range.js + - jstests/sharding/top_chunk_autosplit.js + - jstests/sharding/count_config_servers.js + # Awaits replication directly against the config server primary + - jstests/sharding/moveprimary_ignore_sharded.js + # Runs setShardVersion/getShardVersion against the config server and we don't support retries + # for this command + - jstests/sharding/major_version_check.js + - jstests/sharding/replset_config/ssv_config_check.js + # Already stop or blackholes the primary of the CSRS config shard + - jstests/sharding/all_config_hosts_down.js + - jstests/sharding/all_config_servers_blackholed_from_mongos.js + - jstests/sharding/replset_config/batch_write_command_sharded.js + - jstests/sharding/replset_config/config_rs_no_primary.js + - jstests/sharding/startup_with_all_configs_down.js + # Test runs really slow with the primary continuously stepping down + - jstests/sharding/zbigMapReduce.js + # Unknown balancer errors - need to be investigated + - jstests/sharding/in_memory_sort_limit.js +executor: + js_test: + config: + shell_options: + eval: "load('jstests/libs/override_methods/sharding_continuous_config_stepdown.js');" + nodb: '' + readMode: commands diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 10f8e90a18b..375e344ee1a 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -1524,6 +1524,15 @@ tasks: run_multiple_jobs: true - <<: *task_template + name: sharding_csrs_continuous_config_stepdown_WT + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=sharding_continuous_config_stepdown --storageEngine=wiredTiger + run_multiple_jobs: true + +- <<: *task_template name: sharding_WT_ese commands: - func: "do setup" @@ -3848,6 +3857,7 @@ buildvariants: - name: sharding_legacy_op_query_WT - name: sharding_op_query_WT - name: sharding_legacy_multiversion + - name: sharding_csrs_continuous_config_stepdown_WT - name: slow1 - name: slow1_WT - name: slow2 diff --git a/jstests/libs/override_methods/sharding_continuous_config_stepdown.js b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js new file mode 100644 index 00000000000..6edf2cbc79a --- /dev/null +++ b/jstests/libs/override_methods/sharding_continuous_config_stepdown.js @@ -0,0 +1,176 @@ +/**
+ * Loading this file extends the prototype for ReplSetTest to spawn a thread, which continuously
+ * step down the primary.
+ */
+
+// Contains the declaration for ScopedThread and CountDownLatch
+load('jstests/libs/parallelTester.js');
+
+(function() {
+'use strict';
+
+// Preserve the original ReplSetTest and ShardingTest constructors, because we are overriding them
+var originalReplSetTest = ReplSetTest;
+var originalShardingTest = ShardingTest;
+
+/**
+ * Overrides the ReplSetTest constructor to start the continuous config server stepdown thread.
+ */
+ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() {
+ // Construct the original object
+ originalReplSetTest.apply(this, arguments);
+
+ /**
+ * This function is intended to be called in a separate thread and it continuously steps down
+ * the current primary for a number of attempts.
+ *
+ * @param {string} seedNode The connection string of a node from which to discover the primary
+ * of the replica set.
+ * @param {CountDownLatch} stopCounter Object, which can be used to stop the thread.
+ *
+ * @return Object with the following fields:
+ * ok {integer}: 0 if it failed, 1 if it succeeded.
+ * error {string}: Only present if ok == 0. Contains the cause for the error.
+ * stack {string}: Only present if ok == 0. Contains the stack at the time of the error.
+ */
+ function _continuousPrimaryStepdownFn(seedNode, stopCounter) {
+ 'use strict';
+
+ var stepdownDelaySeconds = 10;
+
+ print('*** Continuous stepdown thread running with seed node ' + seedNode);
+
+ try {
+ var replSet = new ReplSetTest(seedNode);
+ var primary = replSet.getPrimary();
+
+ while (stopCounter.getCount() > 0) {
+ print('*** Stepping down ' + primary);
+
+ assert.throws(function() {
+ var result = primary.adminCommand({
+ replSetStepDown: stepdownDelaySeconds,
+ secondaryCatchUpPeriodSecs: stepdownDelaySeconds });
+ print('replSetStepDown command did not throw and returned: ' + tojson(result));
+
+ // The call to replSetStepDown should never succeed
+ assert.commandWorked(result);
+ });
+
+ // Wait for primary to get elected and allow the test to make some progress before
+ // attempting another stepdown.
+ primary = replSet.getPrimary();
+ sleep(7000);
+ }
+
+ print('*** Continuous stepdown thread completed successfully');
+ return { ok: 1 };
+ }
+ catch (e) {
+ print('*** Continuous stepdown thread caught exception: ' + tojson(e));
+ return { ok: 0, error: e.toString(), stack: e.stack };
+ }
+ }
+
+ // Preserve the original stopSet method, because we are overriding it to stop the continuous
+ // stepdown thread.
+ var _originalStartSetFn = this.startSet;
+ var _originalStopSetFn = this.stopSet;
+
+ // These two manage the scoped failover thread
+ var _scopedPrimaryStepdownThread;
+ var _scopedPrimaryStepdownThreadStopCounter;
+
+ /**
+ * Overrides the startSet call so we can increase the logging verbosity
+ */
+ this.startSet = function(options) {
+ if (!options) {
+ options = {};
+ }
+ options.verbose = 2;
+ return _originalStartSetFn.call(this, options);
+ }
+
+ /**
+ * Overrides the stopSet call so it terminates the failover thread.
+ */
+ this.stopSet = function() {
+ this.stopContinuousFailover();
+ _originalStopSetFn.apply(this, arguments);
+ };
+
+ /**
+ * Spawns a thread to invoke continuousPrimaryStepdownFn. See its comments for more information.
+ */
+ this.startContinuousFailover = function() {
+ if (_scopedPrimaryStepdownThread) {
+ throw new Error('Continuous failover thread is already active');
+ }
+
+ _scopedPrimaryStepdownThreadStopCounter = new CountDownLatch(1);
+ _scopedPrimaryStepdownThread = new ScopedThread(_continuousPrimaryStepdownFn,
+ this.nodes[0].host,
+ _scopedPrimaryStepdownThreadStopCounter);
+ _scopedPrimaryStepdownThread.start();
+ };
+
+ /**
+ * Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop and waits
+ * for it to terminate.
+ */
+ this.stopContinuousFailover = function() {
+ if (!_scopedPrimaryStepdownThread) {
+ return;
+ }
+
+ _scopedPrimaryStepdownThreadStopCounter.countDown();
+ _scopedPrimaryStepdownThreadStopCounter = null;
+
+ _scopedPrimaryStepdownThread.join();
+
+ var retVal = _scopedPrimaryStepdownThread.returnData();
+ _scopedPrimaryStepdownThread = null;
+
+ return assert.commandWorked(retVal);
+ };
+};
+
+Object.extend(ReplSetTest, originalReplSetTest);
+
+/**
+ * Overrides the ShardingTest constructor to start the continuous config server stepdown thread.
+ */
+ShardingTest = function ShardingTestWithContinuousConfigPrimaryStepdown() {
+ if (!arguments[0].other) {
+ arguments[0].other = {};
+ }
+ arguments[0].verbose = 2;
+
+ if (!arguments[0].other.shardOptions) {
+ arguments[0].other.shardOptions = {};
+ }
+ arguments[0].other.shardOptions.verbose = 2;
+
+ // Construct the original object
+ originalShardingTest.apply(this, arguments);
+
+ if (!this.configRS) {
+ throw new Error('Continuous config server step down only available with CSRS');
+ }
+
+ /**
+ * This method is disabled because it runs aggregation, which doesn't handle config server
+ * stepdown correctly.
+ */
+ this.printShardingStatus = function() {
+
+ }
+
+ // Start the continuous config server stepdown thread
+ this.configRS.startContinuousFailover();
+};
+
+Object.extend(ShardingTest, originalShardingTest);
+
+})();
diff --git a/jstests/sharding/disable_autosplit.js b/jstests/sharding/disable_autosplit.js index a489aa9f605..ec3b593ea24 100644 --- a/jstests/sharding/disable_autosplit.js +++ b/jstests/sharding/disable_autosplit.js @@ -1,39 +1,33 @@ -// // Tests disabling of autosplit from mongos -// (function() { 'use strict'; -var chunkSize = 1 //MB +var chunkSize = 1; // In MB -var st = new ShardingTest({ shards : 1, - mongos : 1, - other : { - - chunksize : chunkSize, - mongosOptions : { noAutoSplit : "" } - - } }) +var st = new ShardingTest({ shards: 1, + mongos: 1, + other: { chunksize: chunkSize, + mongosOptions: { noAutoSplit: "" } } }); var data = "x"; -while( data.length < chunkSize * 1024 * 1024 ){ - data += data +while(data.length < chunkSize * 1024 * 1024) { + data += data; } -var mongos = st.s0 -var admin = mongos.getDB( "admin" ) -var config = mongos.getDB( "config" ) -var coll = mongos.getCollection( "foo.bar" ) +var mongos = st.s0; +var admin = mongos.getDB("admin"); +var config = mongos.getDB("config"); +var coll = mongos.getCollection("foo.bar"); -assert.commandWorked(admin.runCommand({ enableSharding : coll.getDB() + "" })); -assert.commandWorked(admin.runCommand({ shardCollection : coll + "", key : { _id : 1 } })); +assert.commandWorked(admin.runCommand({ enableSharding: coll.getDB() + "" })); +assert.commandWorked(admin.runCommand({ shardCollection: coll + "", key: { _id: 1 } })); -for( var i = 0; i < 20; i++ ) { - coll.insert({ data : data }); +for(var i = 0; i < 20; i++) { + coll.insert({ data: data }); } // Make sure we haven't split -assert.eq(1, config.chunks.find({ ns : coll + "" }).count()); +assert.eq(1, config.chunks.find({ ns: coll + "" }).count()); st.stop(); diff --git a/jstests/sharding/max_time_ms_sharded.js b/jstests/sharding/max_time_ms_sharded.js index cc3d87505ec..02a6d215c58 100644 --- a/jstests/sharding/max_time_ms_sharded.js +++ b/jstests/sharding/max_time_ms_sharded.js @@ -4,9 +4,11 @@ // // Note that mongos does not time out commands or query ops (which remains responsibility of mongod, // pending development of an interrupt framework for mongos). +(function() { +'use strict'; var st = new ShardingTest({shards: 2}); -st.stopBalancer(); + var mongos = st.s0; var shards = [st.shard0, st.shard1]; var coll = mongos.getCollection("foo.bar"); @@ -51,7 +53,7 @@ assert.commandWorked(admin.runCommand({moveChunk: coll.getFullName(), // Insert 100 documents into sharded collection, such that each shard owns 50. // var bulk = coll.initializeUnorderedBulkOp(); -for (i=-50; i<50; i++) { +for (var i = -50; i < 50; i++) { bulk.insert({ _id: i }); } assert.writeOK(bulk.execute()); @@ -211,3 +213,5 @@ assert.commandWorked(admin.runCommand({moveChunk: coll.getFullName(), // TODO Test additional commmands. st.stop(); + +})(); diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js index 03e273c0753..b99f425a899 100644 --- a/src/mongo/shell/shardingtest.js +++ b/src/mongo/shell/shardingtest.js @@ -180,10 +180,16 @@ var ShardingTest = function(params) { this.getServerName = function(dbname) { var x = this.config.databases.findOne({ _id : "" + dbname }); - if (x) + if (x) { return x.primary; - this.config.databases.find().forEach(printjson); - throw Error("couldn't find dbname: " + dbname + " total: " + this.config.databases.count()); + } + + var countDBsFound = 0; + this.config.databases.find().forEach(function(db) { + countDBsFound++; + printjson(db); + }); + throw Error("couldn't find dbname: " + dbname + " total: " + countDBsFound); }; this.getNonPrimaries = function(dbname) { |