summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2020-02-19 14:05:09 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-27 14:58:40 +0000
commitf67a13326a90013aeb28c09f82d4ea2bea49c494 (patch)
tree1797ee436813e4814db23f0de78d314498dc0b0c
parentcc18cf86a9af09110974f2dc1ae5c78505b71aea (diff)
downloadmongo-f67a13326a90013aeb28c09f82d4ea2bea49c494.tar.gz
SERVER-45541 Test interrupting $unionWith.
(cherry picked from commit 54488c22e2ce672a8bdbb2dac68941b958e69b5c)
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml1
-rw-r--r--jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js32
-rw-r--r--jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js112
9 files changed, 140 insertions, 11 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
index 778501e17ce..ab7878a2753 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
@@ -116,6 +116,7 @@ selector:
# Uses getmores.
- jstests/concurrency/fsm_workloads/agg_base.js
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/create_capped_collection.js
- jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js
- jstests/concurrency/fsm_workloads/create_index_background.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
index bc95236fb7c..311380b8e89 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
@@ -185,6 +185,7 @@ selector:
# Uses getMore. If a kill node happens between the time of creation of cursor (usually by calling
# find or aggregate) and calling getMore(), server will throw CursortNotFound exception.
# We currently do not retry the transaction on this exception.
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/agg_union_with_chunk_migrations.js
- jstests/concurrency/fsm_workloads/create_capped_collection.js
- jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
index 5c229b91bf6..ded60306b90 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
@@ -185,6 +185,7 @@ selector:
# Uses getMore. If a terminate happens between the time of creation of cursor (usually by calling
# find or aggregate) and calling getMore(), server will throw CursortNotFound exception.
# We currently do not retry the transaction on this exception.
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/agg_union_with_chunk_migrations.js
- jstests/concurrency/fsm_workloads/create_capped_collection.js
- jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
index fb2ffb35b1c..c4648343ebb 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
@@ -166,6 +166,7 @@ selector:
##
# Uses getMore in the same state function as a command not supported in a transaction.
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/list_indexes.js
- jstests/concurrency/fsm_workloads/agg_union_with_chunk_migrations.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
index efb809269ee..0267727c9d3 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
@@ -116,6 +116,7 @@ selector:
# Uses getmores.
- jstests/concurrency/fsm_workloads/agg_base.js
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/create_capped_collection.js
- jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js
- jstests/concurrency/fsm_workloads/create_index_background.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
index cf1a0e9eb75..9c373d43505 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
@@ -105,6 +105,7 @@ selector:
# Uses getmores.
- jstests/concurrency/fsm_workloads/agg_base.js
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/create_index_background.js
- jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js
- jstests/concurrency/fsm_workloads/create_index_background_wildcard.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
index 80cc94184fd..9f35ff0d7ee 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
@@ -111,6 +111,7 @@ selector:
# Uses getmores.
- jstests/concurrency/fsm_workloads/agg_base.js
+ - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
- jstests/concurrency/fsm_workloads/create_index_background.js
- jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js
- jstests/concurrency/fsm_workloads/create_index_background_wildcard.js
diff --git a/jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js b/jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js
index bc6c9b48515..d6bdebb5d57 100644
--- a/jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js
+++ b/jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js
@@ -1,6 +1,12 @@
/**
* Tests $out stage of aggregate command concurrently with killOp. Ensures that all the temporary
- * collections created during aggreate command are deleted.
+ * collections created during aggreate command are deleted. If extending this workload, consider
+ * overriding the following:
+ * - $config.states.aggregate: The function to execute the aggregation.
+ * - $config.states.killOp: The function to find the aggregation and kill it. Consider reusing
+ * $config.data.killOpsMatchingFilter to do the deed.
+ * - $config.teardown: If you want any assertion to make sure nothing got leaked or left behind by
+ * the interrupted aggregation.
*
* @tags: [uses_curop_agg_stage]
*/
@@ -15,11 +21,23 @@ var $config = extendWorkload($config, function($config, $super) {
{aggregate: collName, pipeline: [{$out: "interrupt_temp_out"}], cursor: {}});
};
+ // This test sets up aggregations just to tear them down. There's no benefit to using large
+ // documents here, and doing so can increase memory pressure on the test host, so we lower it
+ // down to 1KB.
+ $config.data.docSize = 1024;
+ $config.data.killOpsMatchingFilter = function killOpsMatchingFilter(db, filter) {
+ const currentOpOutput =
+ db.getSiblingDB('admin').aggregate([{$currentOp: {}}, {$match: filter}]).toArray();
+ for (let op of currentOpOutput) {
+ assert(op.hasOwnProperty('opid'));
+ assertAlways.commandWorked(db.getSiblingDB('admin').killOp(op.opid));
+ }
+ };
$config.states.killOp = function killOp(db, collName) {
// The aggregate command could be running different commands internally (renameCollection,
// insertDocument, etc.) depending on which stage of execution it is in. So, get all the
// operations that are running against the input, output or temp collections.
- const activeCurOpsFilter = {
+ this.killOpsMatchingFilter(db, {
op: "command",
active: true,
$or: [
@@ -31,15 +49,7 @@ var $config = extendWorkload($config, function($config, $super) {
$exists: false
} // Exclude 'drop' command from the filter to make sure that we don't kill the the
// drop command which is responsible for dropping the temporary collection.
- };
-
- const currentOpOutput = db.getSiblingDB('admin')
- .aggregate([{$currentOp: {}}, {$match: activeCurOpsFilter}])
- .toArray();
- for (let op of currentOpOutput) {
- assert(op.hasOwnProperty('opid'));
- assertAlways.commandWorked(db.getSiblingDB('admin').killOp(op.opid));
- }
+ });
};
$config.teardown = function teardown(db, collName, cluster) {
diff --git a/jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js b/jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
new file mode 100644
index 00000000000..912e26e80c0
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js
@@ -0,0 +1,112 @@
+/**
+ * Tests $unionWith stage of aggregate command concurrently with killOp. Ensures that all cursors
+ * opened on behalf of the $unionWith are killed when interrupted.
+ *
+ * @tags: [
+ * uses_curop_agg_stage,
+ * requires_fcv_44, # Uses $unionWith
+ * ]
+ */
+'use strict';
+load('jstests/concurrency/fsm_libs/extend_workload.js'); // for extendWorkload
+load('jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js'); // for $config
+
+var $config = extendWorkload($config, function($config, $super) {
+ $config.data.commentStr = "agg_unionWith_interrupt_cleanup";
+
+ $config.states.aggregate = function aggregate(db, collName) {
+ // Here we consistenly union with the same namespace to benefit from the sharded collection
+ // setup that may have been done in sharded passthroughs.
+ // TODO SERVER-46251 use multiple namespaces.
+ let response = db[collName].runCommand({
+ aggregate: collName,
+ pipeline: [{$unionWith: {coll: collName, pipeline: [{$unionWith: collName}]}}],
+ comment: this.commentStr,
+ // Use a small batch size to ensure these operations open up a cursor and use multiple
+ // getMores. We want to give coverage to interrupting the getMores as well.
+ cursor: {batchSize: this.numDocs / 4}
+ });
+ // Keep iterating the cursor until we exhaust it or we are interrupted.
+ while (response.ok && response.cursor.id != 0) {
+ response = db[collName].runCommand({getMore: response.cursor.id, collection: collName});
+ }
+ if (!response.ok) {
+ // If the interrupt happens just as the cursor is being checked back in, the cursor will
+ // be killed without failing the operation. When this happens, the next getMore will
+ // fail with CursorNotFound.
+ assertWhenOwnColl.contains(
+ response.code, [ErrorCodes.Interrupted, ErrorCodes.CursorNotFound], response);
+ }
+ };
+
+ $config.states.killOp = function killOp(db, collName) {
+ // The aggregate command could be running different sub-aggregates internally depending on
+ // which stage of execution it is in. So we rely on the comment to detect which operations
+ // are eligible to be interrupted, and interrupt those.
+ this.killOpsMatchingFilter(db, {
+ $and: [
+ {active: true},
+ {
+ $or: [
+ {"command.comment": this.commentStr},
+ {"cursor.originatingCommand.comment": this.commentStr},
+ ]
+ }
+ ]
+ });
+ };
+
+ $config.teardown = function teardown(db, collName, cluster) {
+ // Ensure that no operations, cursors, or sub-operations are left active. After
+ // SERVER-46255, We normally expect all operations to be cleaned up safely, but there are
+ // race conditions or possible network blips where the kill won't arrive as expected. We
+ // don't want to block the interrupt thread or the operation itself to wait around to make
+ // sure everything dies correctly, so we just rely on cursor timeouts or session reaps to
+ // cover these rare cases. Here we make sure everything is cleaned up so we avoid hogging
+ // resources for future tests.
+ this.killOpsMatchingFilter(db, {
+ $and: [
+ {active: true},
+ {
+ $or: [
+ {"command.comment": this.commentStr},
+ {"cursor.originatingCommand.comment": this.commentStr},
+ ]
+ }
+ ]
+ });
+ const curOpCursor = db.getSiblingDB("admin").aggregate([
+ {$currentOp: {idleCursors: true}},
+ {$match: {"cursor.originatingCommand.comment": this.commentStr}},
+ {$project: {shard: 1, host: 1, "cursor.cursorId": 1}},
+ ]);
+ while (curOpCursor.hasNext()) {
+ let result = curOpCursor.next();
+ assertAlways.commandWorked(
+ new Mongo(`${result.shard}/${result.host}`).getDB(db.getName()).runCommand({
+ killCursors: collName,
+ cursors: [result.cursor.cursorId]
+ }));
+ }
+ const remainingOps =
+ db.getSiblingDB("admin")
+ .aggregate([
+ {$currentOp: {idleCursors: true}},
+ // Look for any trace of state that wasn't cleaned up.
+ {
+ $match: {
+ $or: [
+ // The originating aggregation or a sub-aggregation still active.
+ {"command.comment": this.commentStr},
+ // An idle cursor left around.
+ {"cursor.originatingCommand.comment": this.commentStr}
+ ]
+ }
+ }
+ ])
+ .toArray();
+ assertAlways.eq(remainingOps.length, 0, remainingOps);
+ };
+
+ return $config;
+});