summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorBernard Gorman <bernard.gorman@mongodb.com>2019-09-13 10:12:39 +0000
committerevergreen <evergreen@mongodb.com>2019-09-13 10:12:39 +0000
commit89d5c8b10e40648a403f12a55bcb66f2f5bef384 (patch)
tree3674111f238de4c3bbd7b0ab717cac8bccbb864d /jstests
parent11eaee8aace14a3b8ba1d7c3ab462e8badc8ffee (diff)
downloadmongo-89d5c8b10e40648a403f12a55bcb66f2f5bef384.tar.gz
SERVER-41758 Verify that at least 1 shard exists after hard-reload in aggregation routing path
(cherry picked from commit 1fa4766c621bd4cfd74319094469eff3a5de3b79)
Diffstat (limited to 'jstests')
-rw-r--r--jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js28
-rw-r--r--jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js110
2 files changed, 138 insertions, 0 deletions
diff --git a/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js
new file mode 100644
index 00000000000..54e6ce913bf
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js
@@ -0,0 +1,28 @@
+/**
+ * Verify that operations which must run on all shards, such as $currentOp and $changeStream, do not
+ * crash when shards are added to the cluster mid-operation, or when config.shards is dropped.
+ *
+ * This test inherits from 'sharded_clusterwide_ops_with_add_remove_shards.js' but is kept separate
+ * from it, because (1) we may remove the ability to write to config.shards in the future, at which
+ * point this test can simply be removed; and (2) running a single FSM test with both removeShard
+ * and config.shards.remove({}) can cause the former to hang indefinitely while waiting for the
+ * removed shard to drain.
+ *
+ * @tags: [requires_sharding, requires_non_retryable_writes, catches_command_failures,
+ * uses_change_streams, uses_curop_agg_stage]
+ */
+
+"use strict";
+
+// For base $config setup.
+const baseDir = 'jstests/concurrency/fsm_workloads_add_remove_shards/';
+load(baseDir + 'clusterwide_ops_with_add_remove_shards.js');
+
+// After loading the base file, $config has been populated with states and transitions. We simply
+// overwrite 'states.removeShard' such that it instantly wipes all shards from the cluster rather
+// than removing a single shard via the removeShard command. This is the only way to test that
+// mongoS is resilient to the sudden absence of shards in the middle of an operation, as the
+// removeShard command is not permitted to remove the last existing shard in the cluster.
+$config.states.removeShard = function(db, collName) {
+ assert.commandWorked(db.getSiblingDB("config").shards.remove({}));
+};
diff --git a/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js
new file mode 100644
index 00000000000..b16ddcd319b
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js
@@ -0,0 +1,110 @@
+/**
+ * Verify that operations which must run on all shards, such as $currentOp and $changeStream, do not
+ * crash when shards are added to or removed from the cluster mid-operation.
+ *
+ * @tags: [requires_sharding, requires_non_retryable_writes, catches_command_failures,
+ * uses_change_streams, uses_curop_agg_stage]
+ */
+
+"use strict";
+
+var $config = (function() {
+ // The 'setup' function is run once by the parent thread after the cluster has been initialized,
+ // before the worker threads have been spawned. The 'this' argument is bound as '$config.data'.
+ function setup(db, collName, cluster) {
+ // Obtain the list of shards present in the cluster. Used to remove and restore shards.
+ this.shardList = db.getSiblingDB("config").shards.find().toArray();
+ // Drop the test database. It's not needed and will complicate re-adding shards.
+ assert.commandWorked(db.dropDatabase());
+ }
+
+ // Returns a random integer in the range [0, max).
+ function randomInt(max) {
+ return Math.floor(Math.random() * Math.floor(max));
+ }
+
+ // Helper to close a clusterwide cursor, given a command result.
+ function closeClusterWideCursor(db, res) {
+ if (res.ok) {
+ db.adminCommand({
+ killCursors: "$cmd.aggregate",
+ cursors: [res.cursor.id],
+ });
+ }
+ }
+
+ var states = {
+ runChangeStream: function(db, collName) {
+ const res = db.adminCommand({
+ aggregate: 1,
+ pipeline: [{$changeStream: {allChangesForCluster: true}}],
+ cursor: {}
+ });
+ closeClusterWideCursor(db, res);
+ },
+
+ runCurrentOp: function(db, collName) {
+ const res = db.adminCommand({aggregate: 1, pipeline: [{$currentOp: {}}], cursor: {}});
+ closeClusterWideCursor(db, res);
+ },
+
+ removeShard: function(db, collName) {
+ // Make sure that only a single removeShard operation is running at any time.
+ const testLocksColl = db.getSiblingDB("config").testLocks;
+ if (!testLocksColl.insert({_id: "removeShard"}).nInserted) {
+ return;
+ }
+ // Iterate until we successfully remove a shard or run out of shards.
+ for (let shardIdx = 0; shardIdx < this.shardList.length; ++shardIdx) {
+ const shardName = this.shardList[shardIdx]._id;
+ if (db.adminCommand({removeShard: shardName}).state === "started") {
+ break;
+ }
+ }
+ // Remove the lock document so that other threads can call removeShard.
+ assert.commandWorked(testLocksColl.remove({_id: "removeShard"}));
+ },
+
+ addShard: function addShard(db, collName) {
+ const shardIdx = randomInt(this.shardList.length);
+ const shardEntry = this.shardList[shardIdx];
+ db.adminCommand({addShard: shardEntry.host, name: shardEntry._id});
+ },
+
+ init: function(db, collName) {
+ // Do nothing. This is only used to randomize the first action taken by each worker.
+ }
+ };
+
+ const transitionProbabilities =
+ {runChangeStream: 0.25, runCurrentOp: 0.25, removeShard: 0.25, addShard: 0.25};
+ var transitions = {
+ init: transitionProbabilities,
+ runChangeStream: transitionProbabilities,
+ runCurrentOp: transitionProbabilities,
+ removeShard: transitionProbabilities,
+ addShard: transitionProbabilities
+ };
+
+ // The 'teardown' function is run once by the parent thread before the cluster is destroyed, but
+ // after the worker threads have been reaped. The 'this' argument is bound as '$config.data'.
+ function teardown(db, collName, cluster) {
+ // If any shards are draining, unset them so we don't impact subsequent tests.
+ db.getSiblingDB("config").shards.update({}, {$unset: {draining: 1}}, {multi: true});
+ // Ensure that all shards are present in the cluster before shutting down the ShardingTest.
+ for (let shardEntry of this.shardList) {
+ assert.soon(() => db.adminCommand({addShard: shardEntry.host, name: shardEntry._id}).ok,
+ `failed to add shard ${shardEntry._id} back into cluster at end of test`);
+ }
+ }
+
+ return {
+ threadCount: 100,
+ iterations: 1000,
+ startState: "init",
+ states: states,
+ transitions: transitions,
+ setup: setup,
+ teardown: teardown
+ };
+})();