From c9349a22f68fac52f6056fb08ea3ce0993dd8cbe Mon Sep 17 00:00:00 2001 From: Vesselina Ratcheva Date: Wed, 16 Oct 2019 18:45:05 +0000 Subject: SERVER-39993 Add kill and terminate versions of concurrency stepdown suites (cherry picked from commit fae9a778eac1ee6ce082c82bf8d9eddac16eb3d9) --- ...currency_sharded_kill_primary_with_balancer.yml | 245 +++++++++++++++++ ...urrency_sharded_multi_stmt_txn_kill_primary.yml | 291 +++++++++++++++++++++ ...cy_sharded_multi_stmt_txn_terminate_primary.yml | 291 +++++++++++++++++++++ ...ncy_sharded_terminate_primary_with_balancer.yml | 245 +++++++++++++++++ etc/evergreen.yml | 60 ++++- 5 files changed, 1122 insertions(+), 10 deletions(-) create mode 100644 buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml create mode 100644 buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml create mode 100644 buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml create mode 100644 buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml new file mode 100644 index 00000000000..0fe1e5084fe --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml @@ -0,0 +1,245 @@ +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + # SERVER-13116 distinct isn't sharding aware + - jstests/concurrency/fsm_workloads/distinct.js + - jstests/concurrency/fsm_workloads/distinct_noindex.js + - jstests/concurrency/fsm_workloads/distinct_projection.js + + # SERVER-17397 Drops of sharded namespaces may not fully succeed + - jstests/concurrency/fsm_workloads/create_database.js + - jstests/concurrency/fsm_workloads/drop_database.js + + # SERVER-14669 Multi-removes that use $where miscount removed documents + - jstests/concurrency/fsm_workloads/remove_where.js + + # Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded + # collections'. This bug is problematic for these workloads because they assert on count() + # values: + - jstests/concurrency/fsm_workloads/agg_match.js + + # $lookup and $graphLookup are not supported on sharded collections. + - jstests/concurrency/fsm_workloads/agg_graph_lookup.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_lookup.js + + # Disabled due to SERVER-20057, 'Concurrent, sharded mapReduces can fail when temporary + # namespaces collide across mongos processes' + - jstests/concurrency/fsm_workloads/map_reduce_drop.js + - jstests/concurrency/fsm_workloads/map_reduce_inline.js + - jstests/concurrency/fsm_workloads/map_reduce_interrupt.js + - jstests/concurrency/fsm_workloads/map_reduce_merge.js + - jstests/concurrency/fsm_workloads/map_reduce_merge_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_replace.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_nonexistent.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_remove.js + + # Disabled due to MongoDB restrictions and/or workload restrictions + + # These workloads sometimes trigger 'Could not lock auth data update lock' + # errors because the AuthorizationManager currently waits for only five + # seconds to acquire the lock for authorization documents + - jstests/concurrency/fsm_workloads/auth_create_role.js + - jstests/concurrency/fsm_workloads/auth_create_user.js + - jstests/concurrency/fsm_workloads/auth_drop_role.js + - jstests/concurrency/fsm_workloads/auth_drop_user.js + + # uses >100MB of data, which can overwhelm test hosts + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + + # compact can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/compact.js + - jstests/concurrency/fsm_workloads/compact_simultaneous_padding_bytes.js + + # convertToCapped can't be run on mongos processes + - jstests/concurrency/fsm_workloads/convert_to_capped_collection.js + - jstests/concurrency/fsm_workloads/convert_to_capped_collection_index.js + + # findAndModify requires a shard key + - jstests/concurrency/fsm_workloads/findAndModify_mixed_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_update_collscan.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue_unindexed.js + + # remove cannot be {} for findAndModify + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js + + # can cause OOM kills on test hosts + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + + # cannot ensureIndex after dropDatabase without sharding first + - jstests/concurrency/fsm_workloads/plan_cache_drop_database.js + + # our .remove(query, {justOne: true}) calls lack shard keys + - jstests/concurrency/fsm_workloads/remove_single_document.js + + # The rename_* workloads are disabled since renameCollection doesn't work with sharded + # collections + - jstests/concurrency/fsm_workloads/rename_capped_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_droptarget.js + + # cannot use upsert command with $where with sharded collections + - jstests/concurrency/fsm_workloads/upsert_where.js + + # stagedebug can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/yield_and_hashed.js + - jstests/concurrency/fsm_workloads/yield_and_sorted.js + + # ChunkHelper directly talks to the config servers and doesn't support retries for network errors + - jstests/concurrency/fsm_workloads/sharded_base_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_mergeChunks_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_drop_shard_key_index.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_splitChunk_partitioned.js + + # These workloads frequently time out waiting for the distributed lock to drop a sharded + # collection. + - jstests/concurrency/fsm_workloads/kill_aggregation.js + - jstests/concurrency/fsm_workloads/kill_rooted_or.js + + # Uses getmores. + - jstests/concurrency/fsm_workloads/agg_base.js + - jstests/concurrency/fsm_workloads/create_capped_collection.js + - jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js + - jstests/concurrency/fsm_workloads/create_index_background.js + - jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js + - jstests/concurrency/fsm_workloads/create_index_background_wildcard.js + - jstests/concurrency/fsm_workloads/globally_managed_cursors.js + - jstests/concurrency/fsm_workloads/indexed_insert_ordered_bulk.js + - jstests/concurrency/fsm_workloads/indexed_insert_text.js + - jstests/concurrency/fsm_workloads/indexed_insert_unordered_bulk.js + - jstests/concurrency/fsm_workloads/indexed_insert_upsert.js + - jstests/concurrency/fsm_workloads/indexed_insert_where.js + - jstests/concurrency/fsm_workloads/list_indexes.js + - jstests/concurrency/fsm_workloads/reindex.js + - jstests/concurrency/fsm_workloads/reindex_background.js + - jstests/concurrency/fsm_workloads/remove_multiple_documents.js + - jstests/concurrency/fsm_workloads/server_status_with_time_out_cursors.js + - jstests/concurrency/fsm_workloads/touch_base.js + - jstests/concurrency/fsm_workloads/touch_data.js + - jstests/concurrency/fsm_workloads/touch_index.js + - jstests/concurrency/fsm_workloads/touch_no_data_no_index.js + - jstests/concurrency/fsm_workloads/update_where.js + - jstests/concurrency/fsm_workloads/yield.js + - jstests/concurrency/fsm_workloads/yield_fetch.js + - jstests/concurrency/fsm_workloads/yield_rooted_or.js + - jstests/concurrency/fsm_workloads/yield_sort.js + - jstests/concurrency/fsm_workloads/yield_sort_merge.js + - jstests/concurrency/fsm_workloads/yield_text.js + + # Uses non retryable writes. + - jstests/concurrency/fsm_workloads/remove_and_bulk_insert.js + - jstests/concurrency/fsm_workloads/update_and_bulk_insert.js + - jstests/concurrency/fsm_workloads/update_check_index.js + - jstests/concurrency/fsm_workloads/update_multifield_multiupdate.js + - jstests/concurrency/fsm_workloads/update_multifield_multiupdate_noindex.js + - jstests/concurrency/fsm_workloads/update_ordered_bulk_inc.js + - jstests/concurrency/fsm_workloads/yield_id_hack.js + + # Uses non retryable commands. + - jstests/concurrency/fsm_workloads/agg_out.js + - jstests/concurrency/fsm_workloads/agg_sort.js + - jstests/concurrency/fsm_workloads/collmod.js + - jstests/concurrency/fsm_workloads/collmod_separate_collections.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod_capped.js + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + - jstests/concurrency/fsm_workloads/invalidated_cursors.js + - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js + - jstests/concurrency/fsm_workloads/view_catalog.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_with_drop.js + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + # The aggregation stage $currentOp cannot run with a readConcern other than 'local'. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_current_op.js + + # The auto_retry_on_network_error.js override needs to overwrite the response from drop on + # NamespaceNotFound, and since this workload only creates and drops collections there isn't + # much value in running it. + - jstests/concurrency/fsm_workloads/drop_collection.js + + # TODO (SERVER-35534) Unblacklist this workload from the concurrency stepdown suites. It fails + # with PooledConnectionsDropped when setFCV is run concurrently with movePrimary, which seems like + # it's due to a race condition in the NetworkInterfaceTL. + - jstests/concurrency/fsm_workloads/database_versioning.js + + # serverStatus does not include transaction metrics on mongos. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_metrics_test.js + + # Uses the same transaction id across different routers, which is not allowed because when either + # router tries to commit, it may not know the full participant list. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_all_commands_same_session.js + + # Use non-retryable commands not allowed by the network retry helper. + - jstests/concurrency/fsm_workloads/snapshot_read_kill_op_only.js + - jstests/concurrency/fsm_workloads/snapshot_read_kill_operations.js + + exclude_with_any_tags: + - requires_replication + - requires_non_retryable_writes + +executor: + archive: + hooks: + - CheckReplDBHash + - ValidateCollections + tests: true + config: + shell_options: + readMode: commands + global_vars: + TestData: + runningWithConfigStepdowns: true + runningWithShardStepdowns: true + useStepdownPermittedFile: true + usingReplicaSetShards: true + hooks: + - class: ContinuousStepdown + config_stepdown: true + shard_stepdown: true + use_stepdown_permitted_file: true + wait_for_mongos_retarget: true + kill: true + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanupConcurrencyWorkloads + fixture: + class: ShardedClusterFixture + mongos_options: + set_parameters: + enableTestCommands: 1 + configsvr_options: + num_nodes: 3 + all_nodes_electable: true + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + shard_options: + all_nodes_electable: true + mongod_options: + oplogSize: 1024 + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + mongod_options: + set_parameters: + enableTestCommands: 1 + enableElectionHandoff: 0 + num_rs_nodes_per_shard: 3 + num_shards: 2 + num_mongos: 2 diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml new file mode 100644 index 00000000000..eaec2f94d69 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml @@ -0,0 +1,291 @@ +# This suite is intended to exercise transaction behavior with failovers by running existing +# concurrency workloads with state functions wrapped in transactions while shard and config +# primaries are being killed. State functions that use operations not supported in a transaction +# are not wrapped in one. + +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + ## + # Blacklists from concurrency_sharded_replication + ## + + # SERVER-13116 distinct isn't sharding aware + - jstests/concurrency/fsm_workloads/distinct.js + - jstests/concurrency/fsm_workloads/distinct_noindex.js + - jstests/concurrency/fsm_workloads/distinct_projection.js + + # SERVER-17397 Drops of sharded namespaces may not fully succeed + - jstests/concurrency/fsm_workloads/create_database.js + - jstests/concurrency/fsm_workloads/drop_database.js + + # Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded + # collections'. This bug is problematic for these workloads because they assert on count() + # values: + - jstests/concurrency/fsm_workloads/agg_match.js + + # $lookup and $graphLookup are not supported on sharded collections. + - jstests/concurrency/fsm_workloads/agg_graph_lookup.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_lookup.js + + # Disabled due to SERVER-20057, 'Concurrent, sharded mapReduces can fail when temporary + # namespaces collide across mongos processes' + - jstests/concurrency/fsm_workloads/map_reduce_drop.js + - jstests/concurrency/fsm_workloads/map_reduce_inline.js + - jstests/concurrency/fsm_workloads/map_reduce_interrupt.js + - jstests/concurrency/fsm_workloads/map_reduce_merge.js + - jstests/concurrency/fsm_workloads/map_reduce_merge_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_replace.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_nonexistent.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_remove.js + + # Disabled due to MongoDB restrictions and/or workload restrictions + + # These workloads sometimes trigger 'Could not lock auth data update lock' + # errors because the AuthorizationManager currently waits for only five + # seconds to acquire the lock for authorization documents + - jstests/concurrency/fsm_workloads/auth_create_role.js + - jstests/concurrency/fsm_workloads/auth_create_user.js + - jstests/concurrency/fsm_workloads/auth_drop_role.js + - jstests/concurrency/fsm_workloads/auth_drop_user.js + + # uses >100MB of data, which can overwhelm test hosts + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + + # compact can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/compact.js + - jstests/concurrency/fsm_workloads/compact_simultaneous_padding_bytes.js + + # convertToCapped can't be run on mongos processes + - jstests/concurrency/fsm_workloads/convert_to_capped_collection.js + - jstests/concurrency/fsm_workloads/convert_to_capped_collection_index.js + + # findAndModify requires a shard key + - jstests/concurrency/fsm_workloads/findAndModify_mixed_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_update_collscan.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue_unindexed.js + + # remove cannot be {} for findAndModify + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js + + # can cause OOM kills on test hosts + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + # cannot ensureIndex after dropDatabase without sharding first + - jstests/concurrency/fsm_workloads/plan_cache_drop_database.js + + # reIndex is not supported in mongos. + - jstests/concurrency/fsm_workloads/reindex.js + - jstests/concurrency/fsm_workloads/reindex_background.js + + # our .remove(query, {justOne: true}) calls lack shard keys + - jstests/concurrency/fsm_workloads/remove_single_document.js + + # The rename_* workloads are disabled since renameCollection doesn't work with sharded + # collections + - jstests/concurrency/fsm_workloads/rename_capped_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_droptarget.js + + # cannot use upsert command with $where with sharded collections + - jstests/concurrency/fsm_workloads/upsert_where.js + + # stagedebug can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/yield_and_hashed.js + - jstests/concurrency/fsm_workloads/yield_and_sorted.js + + # TODO Unblacklist (SERVER-35538). + - jstests/concurrency/fsm_workloads/database_versioning.js + + # TODO Unblacklist (SERVER-38852). + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + + # serverStatus does not include transaction metrics on mongos. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_metrics_test.js + + # Uses the same transaction id across different routers, which is not allowed because when either + # router tries to commit, it may not know the full participant list. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_all_commands_same_session.js + + ## + # Blacklists from concurrency_replication_multi_stmt_txn + ## + + # Relies on having one thread observe writes from the other threads, which won't become visible + # once a transaction in the thread is started because it'll keep reading from the same snapshot. + - jstests/concurrency/fsm_workloads/create_index_background.js + - jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js + - jstests/concurrency/fsm_workloads/create_index_background_wildcard.js + + # Expects reads to die with a particular error, but other errors are possible if the read is part + # of a transaction (e.g. ErrorCodes.LockTimeout). + - jstests/concurrency/fsm_workloads/drop_index_during_replan.js + + # Performs direct writes to system.views + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + ## + # Blacklists from concurrency_sharded_multi_stmt_txn + ## + + # Use updates that do not contain the shard key, so they are rejected before any commands are + # sent to participant shards, but these workloads do not fail, so the auto retry transaction logic + # attempts to commit, which fails because no participants have been contacted. + - jstests/concurrency/fsm_workloads/update_rename.js + - jstests/concurrency/fsm_workloads/update_rename_noindex.js + + # Expects DocumentValidationFailure which causes transaction to abort and retry indefinitely. + - jstests/concurrency/fsm_workloads/schema_validator_with_expr_variables.js + + ## + # Blacklists from concurrency_sharded_with_stepdowns + ## + + # ChunkHelper directly talks to the config servers and doesn't support retries for network errors + - jstests/concurrency/fsm_workloads/sharded_base_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_mergeChunks_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_drop_shard_key_index.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_splitChunk_partitioned.js + + # These workloads frequently time out waiting for the distributed lock to drop a sharded + # collection. + - jstests/concurrency/fsm_workloads/kill_aggregation.js + - jstests/concurrency/fsm_workloads/kill_rooted_or.js + + # Uses non retryable commands. + - jstests/concurrency/fsm_workloads/agg_out.js + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + - jstests/concurrency/fsm_workloads/agg_sort.js + - jstests/concurrency/fsm_workloads/collmod.js + - jstests/concurrency/fsm_workloads/collmod_separate_collections.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod_capped.js + - jstests/concurrency/fsm_workloads/invalidated_cursors.js + - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js + - jstests/concurrency/fsm_workloads/view_catalog.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_with_drop.js + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + ## + # Blacklists specific to this suite + ## + + # Uses getMore in the same state function as a command not supported in a transaction. + - jstests/concurrency/fsm_workloads/list_indexes.js + - jstests/concurrency/fsm_workloads/create_capped_collection.js + - jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js + + # Uses non-retryable commands in the same state function as a command not supported in a + # transaction. + - jstests/concurrency/fsm_workloads/agg_merge_when_not_matched_insert.js + - jstests/concurrency/fsm_workloads/agg_merge_when_matched_replace_with_new.js + + # TODO SERVER-40713 moveChunk is not considered retryable by the network retry override. + - jstests/concurrency/fsm_workloads/agg_with_chunk_migrations.js + - jstests/concurrency/fsm_workloads/random_moveChunk_broadcast_delete_transaction.js + - jstests/concurrency/fsm_workloads/random_moveChunk_broadcast_update_transaction.js + + # TODO SERVER-40493: JS engine interruptions on mongos return ErrorCodes::Interrupted, which isn't + # considered a retryable or transient transaction error. + - jstests/concurrency/fsm_workloads/indexed_insert_where.js + - jstests/concurrency/fsm_workloads/remove_where.js + - jstests/concurrency/fsm_workloads/update_where.js + + # TODO SERVER-40705: Stepdown with prepared transaction that performed text search triggers + # invariant + - jstests/concurrency/fsm_workloads/indexed_insert_text.js + - jstests/concurrency/fsm_workloads/indexed_insert_text_multikey.js + + exclude_with_any_tags: + - requires_replication + # The touch command does not exist on mongos, which is enforced before session information is + # parsed, causing state functions in these workloads to fail before starting a transaction despite + # sending a command with startTransaction=true, leading to an infinite loop in the auto + # transaction retry override. + - requires_touch + # Sharing cursors between state functions will fail in this suite because it will attempt to use + # the same cursor in multiple transactions. + - state_functions_share_cursor + # These start a transaction in one state function and use it in other state functions. This suite + # would instead execute each state function as its own transaction. + - state_functions_share_transaction + # Note that "requires_non_retryable_writes" does not need to be blacklisted because some writes + # that are not individually retryable can be retried if they are part of a transaction. + +executor: + archive: + hooks: + - CheckReplDBHash + - ValidateCollections + tests: true + config: + shell_options: + readMode: commands + global_vars: + TestData: + runInsideTransaction: true + runningWithAutoSplit: false + runningWithBalancer: false + runningWithConfigStepdowns: true + runningWithSessions: true + runningWithShardStepdowns: true + traceExceptions: false + useStepdownPermittedFile: true + usingReplicaSetShards: true + hooks: + # We use a stepdown interval of 15 seconds because we will retry all commands in a transaction + # so we need to allow time for at most 10 operations to be re-run and then re-committed. If + # too many network errors occur when re-running a transaction we will run out of retries. + - class: ContinuousStepdown + config_stepdown: true + kill: true + shard_stepdown: true + stepdown_interval_ms: 15000 + use_stepdown_permitted_file: true + wait_for_mongos_retarget: true + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanupConcurrencyWorkloads + fixture: + class: ShardedClusterFixture + enable_balancer: false + enable_autosplit: false + mongos_options: + set_parameters: + enableTestCommands: 1 + configsvr_options: + num_nodes: 3 + all_nodes_electable: true + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + shard_options: + all_nodes_electable: true + mongod_options: + oplogSize: 1024 + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + mongod_options: + set_parameters: + enableTestCommands: 1 + enableElectionHandoff: 0 + num_rs_nodes_per_shard: 3 + num_shards: 2 + num_mongos: 2 diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml new file mode 100644 index 00000000000..be56c7f0336 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml @@ -0,0 +1,291 @@ +# This suite is intended to exercise transaction behavior with failovers by running existing +# concurrency workloads with state functions wrapped in transactions while shard and config +# primaries are being terminated. State functions that use operations not supported in a +# transaction are not wrapped in one. + +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + ## + # Blacklists from concurrency_sharded_replication + ## + + # SERVER-13116 distinct isn't sharding aware + - jstests/concurrency/fsm_workloads/distinct.js + - jstests/concurrency/fsm_workloads/distinct_noindex.js + - jstests/concurrency/fsm_workloads/distinct_projection.js + + # SERVER-17397 Drops of sharded namespaces may not fully succeed + - jstests/concurrency/fsm_workloads/create_database.js + - jstests/concurrency/fsm_workloads/drop_database.js + + # Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded + # collections'. This bug is problematic for these workloads because they assert on count() + # values: + - jstests/concurrency/fsm_workloads/agg_match.js + + # $lookup and $graphLookup are not supported on sharded collections. + - jstests/concurrency/fsm_workloads/agg_graph_lookup.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_lookup.js + + # Disabled due to SERVER-20057, 'Concurrent, sharded mapReduces can fail when temporary + # namespaces collide across mongos processes' + - jstests/concurrency/fsm_workloads/map_reduce_drop.js + - jstests/concurrency/fsm_workloads/map_reduce_inline.js + - jstests/concurrency/fsm_workloads/map_reduce_interrupt.js + - jstests/concurrency/fsm_workloads/map_reduce_merge.js + - jstests/concurrency/fsm_workloads/map_reduce_merge_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_replace.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_nonexistent.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_remove.js + + # Disabled due to MongoDB restrictions and/or workload restrictions + + # These workloads sometimes trigger 'Could not lock auth data update lock' + # errors because the AuthorizationManager currently waits for only five + # seconds to acquire the lock for authorization documents + - jstests/concurrency/fsm_workloads/auth_create_role.js + - jstests/concurrency/fsm_workloads/auth_create_user.js + - jstests/concurrency/fsm_workloads/auth_drop_role.js + - jstests/concurrency/fsm_workloads/auth_drop_user.js + + # uses >100MB of data, which can overwhelm test hosts + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + + # compact can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/compact.js + - jstests/concurrency/fsm_workloads/compact_simultaneous_padding_bytes.js + + # convertToCapped can't be run on mongos processes + - jstests/concurrency/fsm_workloads/convert_to_capped_collection.js + - jstests/concurrency/fsm_workloads/convert_to_capped_collection_index.js + + # findAndModify requires a shard key + - jstests/concurrency/fsm_workloads/findAndModify_mixed_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_update_collscan.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue_unindexed.js + + # remove cannot be {} for findAndModify + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js + + # can cause OOM kills on test hosts + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + # cannot ensureIndex after dropDatabase without sharding first + - jstests/concurrency/fsm_workloads/plan_cache_drop_database.js + + # reIndex is not supported in mongos. + - jstests/concurrency/fsm_workloads/reindex.js + - jstests/concurrency/fsm_workloads/reindex_background.js + + # our .remove(query, {justOne: true}) calls lack shard keys + - jstests/concurrency/fsm_workloads/remove_single_document.js + + # The rename_* workloads are disabled since renameCollection doesn't work with sharded + # collections + - jstests/concurrency/fsm_workloads/rename_capped_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_droptarget.js + + # cannot use upsert command with $where with sharded collections + - jstests/concurrency/fsm_workloads/upsert_where.js + + # stagedebug can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/yield_and_hashed.js + - jstests/concurrency/fsm_workloads/yield_and_sorted.js + + # TODO Unblacklist (SERVER-35538). + - jstests/concurrency/fsm_workloads/database_versioning.js + + # TODO Unblacklist (SERVER-38852). + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + + # serverStatus does not include transaction metrics on mongos. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_metrics_test.js + + # Uses the same transaction id across different routers, which is not allowed because when either + # router tries to commit, it may not know the full participant list. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_all_commands_same_session.js + + ## + # Blacklists from concurrency_replication_multi_stmt_txn + ## + + # Relies on having one thread observe writes from the other threads, which won't become visible + # once a transaction in the thread is started because it'll keep reading from the same snapshot. + - jstests/concurrency/fsm_workloads/create_index_background.js + - jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js + - jstests/concurrency/fsm_workloads/create_index_background_wildcard.js + + # Expects reads to die with a particular error, but other errors are possible if the read is part + # of a transaction (e.g. ErrorCodes.LockTimeout). + - jstests/concurrency/fsm_workloads/drop_index_during_replan.js + + # Performs direct writes to system.views + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + ## + # Blacklists from concurrency_sharded_multi_stmt_txn + ## + + # Use updates that do not contain the shard key, so they are rejected before any commands are + # sent to participant shards, but these workloads do not fail, so the auto retry transaction logic + # attempts to commit, which fails because no participants have been contacted. + - jstests/concurrency/fsm_workloads/update_rename.js + - jstests/concurrency/fsm_workloads/update_rename_noindex.js + + # Expects DocumentValidationFailure which causes transaction to abort and retry indefinitely. + - jstests/concurrency/fsm_workloads/schema_validator_with_expr_variables.js + + ## + # Blacklists from concurrency_sharded_with_stepdowns + ## + + # ChunkHelper directly talks to the config servers and doesn't support retries for network errors + - jstests/concurrency/fsm_workloads/sharded_base_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_mergeChunks_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_drop_shard_key_index.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_splitChunk_partitioned.js + + # These workloads frequently time out waiting for the distributed lock to drop a sharded + # collection. + - jstests/concurrency/fsm_workloads/kill_aggregation.js + - jstests/concurrency/fsm_workloads/kill_rooted_or.js + + # Uses non retryable commands. + - jstests/concurrency/fsm_workloads/agg_out.js + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + - jstests/concurrency/fsm_workloads/agg_sort.js + - jstests/concurrency/fsm_workloads/collmod.js + - jstests/concurrency/fsm_workloads/collmod_separate_collections.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod_capped.js + - jstests/concurrency/fsm_workloads/invalidated_cursors.js + - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js + - jstests/concurrency/fsm_workloads/view_catalog.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_with_drop.js + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + ## + # Blacklists specific to this suite + ## + + # Uses getMore in the same state function as a command not supported in a transaction. + - jstests/concurrency/fsm_workloads/list_indexes.js + - jstests/concurrency/fsm_workloads/create_capped_collection.js + - jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js + + # Uses non-retryable commands in the same state function as a command not supported in a + # transaction. + - jstests/concurrency/fsm_workloads/agg_merge_when_not_matched_insert.js + - jstests/concurrency/fsm_workloads/agg_merge_when_matched_replace_with_new.js + + # TODO SERVER-40713 moveChunk is not considered retryable by the network retry override. + - jstests/concurrency/fsm_workloads/agg_with_chunk_migrations.js + - jstests/concurrency/fsm_workloads/random_moveChunk_broadcast_delete_transaction.js + - jstests/concurrency/fsm_workloads/random_moveChunk_broadcast_update_transaction.js + + # TODO SERVER-40493: JS engine interruptions on mongos return ErrorCodes::Interrupted, which isn't + # considered a retryable or transient transaction error. + - jstests/concurrency/fsm_workloads/indexed_insert_where.js + - jstests/concurrency/fsm_workloads/remove_where.js + - jstests/concurrency/fsm_workloads/update_where.js + + # TODO SERVER-40705: Stepdown with prepared transaction that performed text search triggers + # invariant + - jstests/concurrency/fsm_workloads/indexed_insert_text.js + - jstests/concurrency/fsm_workloads/indexed_insert_text_multikey.js + + exclude_with_any_tags: + - requires_replication + # The touch command does not exist on mongos, which is enforced before session information is + # parsed, causing state functions in these workloads to fail before starting a transaction despite + # sending a command with startTransaction=true, leading to an infinite loop in the auto + # transaction retry override. + - requires_touch + # Sharing cursors between state functions will fail in this suite because it will attempt to use + # the same cursor in multiple transactions. + - state_functions_share_cursor + # These start a transaction in one state function and use it in other state functions. This suite + # would instead execute each state function as its own transaction. + - state_functions_share_transaction + # Note that "requires_non_retryable_writes" does not need to be blacklisted because some writes + # that are not individually retryable can be retried if they are part of a transaction. + +executor: + archive: + hooks: + - CheckReplDBHash + - ValidateCollections + tests: true + config: + shell_options: + readMode: commands + global_vars: + TestData: + runInsideTransaction: true + runningWithAutoSplit: false + runningWithBalancer: false + runningWithConfigStepdowns: true + runningWithSessions: true + runningWithShardStepdowns: true + traceExceptions: false + useStepdownPermittedFile: true + usingReplicaSetShards: true + hooks: + # We use a stepdown interval of 15 seconds because we will retry all commands in a transaction + # so we need to allow time for at most 10 operations to be re-run and then re-committed. If + # too many network errors occur when re-running a transaction we will run out of retries. + - class: ContinuousStepdown + config_stepdown: true + shard_stepdown: true + stepdown_interval_ms: 15000 + terminate: true + use_stepdown_permitted_file: true + wait_for_mongos_retarget: true + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanupConcurrencyWorkloads + fixture: + class: ShardedClusterFixture + enable_balancer: false + enable_autosplit: false + mongos_options: + set_parameters: + enableTestCommands: 1 + configsvr_options: + num_nodes: 3 + all_nodes_electable: true + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + shard_options: + all_nodes_electable: true + mongod_options: + oplogSize: 1024 + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + mongod_options: + set_parameters: + enableTestCommands: 1 + enableElectionHandoff: 0 + num_rs_nodes_per_shard: 3 + num_shards: 2 + num_mongos: 2 diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml new file mode 100644 index 00000000000..68b55ab429c --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml @@ -0,0 +1,245 @@ +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + # SERVER-13116 distinct isn't sharding aware + - jstests/concurrency/fsm_workloads/distinct.js + - jstests/concurrency/fsm_workloads/distinct_noindex.js + - jstests/concurrency/fsm_workloads/distinct_projection.js + + # SERVER-17397 Drops of sharded namespaces may not fully succeed + - jstests/concurrency/fsm_workloads/create_database.js + - jstests/concurrency/fsm_workloads/drop_database.js + + # SERVER-14669 Multi-removes that use $where miscount removed documents + - jstests/concurrency/fsm_workloads/remove_where.js + + # Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded + # collections'. This bug is problematic for these workloads because they assert on count() + # values: + - jstests/concurrency/fsm_workloads/agg_match.js + + # $lookup and $graphLookup are not supported on sharded collections. + - jstests/concurrency/fsm_workloads/agg_graph_lookup.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_lookup.js + + # Disabled due to SERVER-20057, 'Concurrent, sharded mapReduces can fail when temporary + # namespaces collide across mongos processes' + - jstests/concurrency/fsm_workloads/map_reduce_drop.js + - jstests/concurrency/fsm_workloads/map_reduce_inline.js + - jstests/concurrency/fsm_workloads/map_reduce_interrupt.js + - jstests/concurrency/fsm_workloads/map_reduce_merge.js + - jstests/concurrency/fsm_workloads/map_reduce_merge_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce.js + - jstests/concurrency/fsm_workloads/map_reduce_reduce_nonatomic.js + - jstests/concurrency/fsm_workloads/map_reduce_replace.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_nonexistent.js + - jstests/concurrency/fsm_workloads/map_reduce_replace_remove.js + + # Disabled due to MongoDB restrictions and/or workload restrictions + + # These workloads sometimes trigger 'Could not lock auth data update lock' + # errors because the AuthorizationManager currently waits for only five + # seconds to acquire the lock for authorization documents + - jstests/concurrency/fsm_workloads/auth_create_role.js + - jstests/concurrency/fsm_workloads/auth_create_user.js + - jstests/concurrency/fsm_workloads/auth_drop_role.js + - jstests/concurrency/fsm_workloads/auth_drop_user.js + + # uses >100MB of data, which can overwhelm test hosts + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + + # compact can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/compact.js + - jstests/concurrency/fsm_workloads/compact_simultaneous_padding_bytes.js + + # convertToCapped can't be run on mongos processes + - jstests/concurrency/fsm_workloads/convert_to_capped_collection.js + - jstests/concurrency/fsm_workloads/convert_to_capped_collection_index.js + + # findAndModify requires a shard key + - jstests/concurrency/fsm_workloads/findAndModify_mixed_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue_unindexed.js + - jstests/concurrency/fsm_workloads/findAndModify_update_collscan.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue.js + - jstests/concurrency/fsm_workloads/findAndModify_update_queue_unindexed.js + + # remove cannot be {} for findAndModify + - jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js + + # can cause OOM kills on test hosts + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + + # cannot ensureIndex after dropDatabase without sharding first + - jstests/concurrency/fsm_workloads/plan_cache_drop_database.js + + # our .remove(query, {justOne: true}) calls lack shard keys + - jstests/concurrency/fsm_workloads/remove_single_document.js + + # The rename_* workloads are disabled since renameCollection doesn't work with sharded + # collections + - jstests/concurrency/fsm_workloads/rename_capped_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_capped_collection_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_chain.js + - jstests/concurrency/fsm_workloads/rename_collection_dbname_droptarget.js + - jstests/concurrency/fsm_workloads/rename_collection_droptarget.js + + # cannot use upsert command with $where with sharded collections + - jstests/concurrency/fsm_workloads/upsert_where.js + + # stagedebug can only be run against a standalone mongod + - jstests/concurrency/fsm_workloads/yield_and_hashed.js + - jstests/concurrency/fsm_workloads/yield_and_sorted.js + + # ChunkHelper directly talks to the config servers and doesn't support retries for network errors + - jstests/concurrency/fsm_workloads/sharded_base_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_mergeChunks_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_drop_shard_key_index.js + - jstests/concurrency/fsm_workloads/sharded_moveChunk_partitioned.js + - jstests/concurrency/fsm_workloads/sharded_splitChunk_partitioned.js + + # These workloads frequently time out waiting for the distributed lock to drop a sharded + # collection. + - jstests/concurrency/fsm_workloads/kill_aggregation.js + - jstests/concurrency/fsm_workloads/kill_rooted_or.js + + # Uses getmores. + - jstests/concurrency/fsm_workloads/agg_base.js + - jstests/concurrency/fsm_workloads/create_capped_collection.js + - jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js + - jstests/concurrency/fsm_workloads/create_index_background.js + - jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js + - jstests/concurrency/fsm_workloads/create_index_background_wildcard.js + - jstests/concurrency/fsm_workloads/globally_managed_cursors.js + - jstests/concurrency/fsm_workloads/indexed_insert_ordered_bulk.js + - jstests/concurrency/fsm_workloads/indexed_insert_text.js + - jstests/concurrency/fsm_workloads/indexed_insert_unordered_bulk.js + - jstests/concurrency/fsm_workloads/indexed_insert_upsert.js + - jstests/concurrency/fsm_workloads/indexed_insert_where.js + - jstests/concurrency/fsm_workloads/list_indexes.js + - jstests/concurrency/fsm_workloads/reindex.js + - jstests/concurrency/fsm_workloads/reindex_background.js + - jstests/concurrency/fsm_workloads/remove_multiple_documents.js + - jstests/concurrency/fsm_workloads/server_status_with_time_out_cursors.js + - jstests/concurrency/fsm_workloads/touch_base.js + - jstests/concurrency/fsm_workloads/touch_data.js + - jstests/concurrency/fsm_workloads/touch_index.js + - jstests/concurrency/fsm_workloads/touch_no_data_no_index.js + - jstests/concurrency/fsm_workloads/update_where.js + - jstests/concurrency/fsm_workloads/yield.js + - jstests/concurrency/fsm_workloads/yield_fetch.js + - jstests/concurrency/fsm_workloads/yield_rooted_or.js + - jstests/concurrency/fsm_workloads/yield_sort.js + - jstests/concurrency/fsm_workloads/yield_sort_merge.js + - jstests/concurrency/fsm_workloads/yield_text.js + + # Uses non retryable writes. + - jstests/concurrency/fsm_workloads/remove_and_bulk_insert.js + - jstests/concurrency/fsm_workloads/update_and_bulk_insert.js + - jstests/concurrency/fsm_workloads/update_check_index.js + - jstests/concurrency/fsm_workloads/update_multifield_multiupdate.js + - jstests/concurrency/fsm_workloads/update_multifield_multiupdate_noindex.js + - jstests/concurrency/fsm_workloads/update_ordered_bulk_inc.js + - jstests/concurrency/fsm_workloads/yield_id_hack.js + + # Uses non retryable commands. + - jstests/concurrency/fsm_workloads/agg_out.js + - jstests/concurrency/fsm_workloads/agg_sort.js + - jstests/concurrency/fsm_workloads/collmod.js + - jstests/concurrency/fsm_workloads/collmod_separate_collections.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod.js + - jstests/concurrency/fsm_workloads/create_index_background_unique_collmod_capped.js + - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js + - jstests/concurrency/fsm_workloads/invalidated_cursors.js + - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js + - jstests/concurrency/fsm_workloads/view_catalog.js + - jstests/concurrency/fsm_workloads/view_catalog_cycle_with_drop.js + - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js + + # The aggregation stage $currentOp cannot run with a readConcern other than 'local'. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_current_op.js + + # The auto_retry_on_network_error.js override needs to overwrite the response from drop on + # NamespaceNotFound, and since this workload only creates and drops collections there isn't + # much value in running it. + - jstests/concurrency/fsm_workloads/drop_collection.js + + # TODO (SERVER-35534) Unblacklist this workload from the concurrency stepdown suites. It fails + # with PooledConnectionsDropped when setFCV is run concurrently with movePrimary, which seems like + # it's due to a race condition in the NetworkInterfaceTL. + - jstests/concurrency/fsm_workloads/database_versioning.js + + # serverStatus does not include transaction metrics on mongos. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_metrics_test.js + + # Uses the same transaction id across different routers, which is not allowed because when either + # router tries to commit, it may not know the full participant list. + - jstests/concurrency/fsm_workloads/multi_statement_transaction_all_commands_same_session.js + + # Use non-retryable commands not allowed by the network retry helper. + - jstests/concurrency/fsm_workloads/snapshot_read_kill_op_only.js + - jstests/concurrency/fsm_workloads/snapshot_read_kill_operations.js + + exclude_with_any_tags: + - requires_replication + - requires_non_retryable_writes + +executor: + archive: + hooks: + - CheckReplDBHash + - ValidateCollections + tests: true + config: + shell_options: + readMode: commands + global_vars: + TestData: + runningWithConfigStepdowns: true + runningWithShardStepdowns: true + useStepdownPermittedFile: true + usingReplicaSetShards: true + hooks: + - class: ContinuousStepdown + config_stepdown: true + shard_stepdown: true + use_stepdown_permitted_file: true + wait_for_mongos_retarget: true + terminate: true + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanupConcurrencyWorkloads + fixture: + class: ShardedClusterFixture + mongos_options: + set_parameters: + enableTestCommands: 1 + configsvr_options: + num_nodes: 3 + all_nodes_electable: true + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + shard_options: + all_nodes_electable: true + mongod_options: + oplogSize: 1024 + replset_config_options: + settings: + catchUpTimeoutMillis: 0 + voting_secondaries: true + mongod_options: + set_parameters: + enableTestCommands: 1 + enableElectionHandoff: 0 + num_rs_nodes_per_shard: 3 + num_shards: 2 + num_mongos: 2 diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 1a82afcb996..33a3cbed636 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -6656,7 +6656,7 @@ tasks: - <<: *task_template name: concurrency_sharded_with_stepdowns - tags: ["concurrency"] + tags: ["concurrency", "stepdowns"] commands: - func: "do setup" - func: "run tests" @@ -6666,7 +6666,7 @@ tasks: - <<: *task_template name: concurrency_sharded_with_stepdowns_and_balancer - tags: ["concurrency"] + tags: ["concurrency", "stepdowns"] commands: - func: "do setup" - func: "run tests" @@ -6674,6 +6674,26 @@ tasks: resmoke_args: --suites=concurrency_sharded_with_stepdowns_and_balancer --storageEngine=wiredTiger resmoke_jobs_max: 1 +- <<: *task_template + name: concurrency_sharded_terminate_primary_with_balancer + tags: ["concurrency", "stepdowns", "kill_terminate"] + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_sharded_terminate_primary_with_balancer --storageEngine=wiredTiger + resmoke_jobs_max: 1 + +- <<: *task_template + name: concurrency_sharded_kill_primary_with_balancer + tags: ["concurrency", "stepdowns", "kill_terminate"] + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_sharded_kill_primary_with_balancer --storageEngine=wiredTiger + resmoke_jobs_max: 1 + - <<: *task_template name: concurrency_sharded_multi_stmt_txn tags: ["concurrency"] @@ -6724,6 +6744,26 @@ tasks: resmoke_args: --suites=concurrency_sharded_multi_stmt_txn_with_stepdowns --storageEngine=wiredTiger resmoke_jobs_max: 1 +- <<: *task_template + name: concurrency_sharded_multi_stmt_txn_terminate_primary + tags: ["concurrency", "stepdowns", "kill_terminate"] + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_sharded_multi_stmt_txn_terminate_primary --storageEngine=wiredTiger + resmoke_jobs_max: 1 + +- <<: *task_template + name: concurrency_sharded_multi_stmt_txn_kill_primary + tags: ["concurrency", "stepdowns", "kill_terminate"] + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_sharded_multi_stmt_txn_kill_primary --storageEngine=wiredTiger + resmoke_jobs_max: 1 + - <<: *task_template name: concurrency_simultaneous tags: ["concurrency", "common"] @@ -8246,7 +8286,7 @@ buildvariants: - name: jsCore_txns - name: .logical_session_cache - name: parallel_gen - - name: .concurrency .common + - name: .concurrency .common !.kill_terminate distros: - rhel62-large - name: concurrency_replication_causal_consistency @@ -9225,7 +9265,7 @@ buildvariants: - name: .misc_js !.non_win_dbg # Some concurrency workloads require a lot of memory, so we use machines # with more RAM for these suites. - - name: .concurrency !.ubsan !.no_txns + - name: .concurrency !.ubsan !.no_txns !.kill_terminate distros: - windows-64-vs2017-compile - name: disk_wiredtiger @@ -9551,7 +9591,7 @@ buildvariants: - name: .causally_consistent !.sharding - name: .change_streams !.secondary_reads - name: .misc_js - - name: .concurrency !.ubsan !.no_txns + - name: .concurrency !.ubsan !.no_txns !.kill_terminate - name: disk_wiredtiger - name: free_monitoring - name: initial_sync_fuzzer_gen @@ -10542,7 +10582,7 @@ buildvariants: - name: causally_consistent_jscore_txns_passthrough - name: .change_streams - name: .misc_js !.tool - - name: .concurrency !.ubsan !.no_txns !.stepdowns + - name: .concurrency !.ubsan !.no_txns !.stepdowns !.kill_terminate - name: disk_wiredtiger - name: .encrypt - name: initial_sync_fuzzer_gen @@ -11829,7 +11869,7 @@ buildvariants: - name: .causally_consistent !.wo_snapshot - name: .change_streams !.secondary_reads - name: .misc_js - - name: .concurrency !.ubsan !.no_txns + - name: .concurrency !.ubsan !.no_txns !.kill_terminate distros: - rhel62-large # Some workloads require a lot of memory, use a bigger machine for this suite. - name: .integration !.audit @@ -12252,7 +12292,7 @@ buildvariants: - name: .causally_consistent !.wo_snapshot - name: .change_streams - name: .misc_js - - name: .concurrency !.ubsan !.no_txns + - name: .concurrency !.ubsan !.no_txns !.kill_terminate - name: disk_wiredtiger - name: .encrypt - name: free_monitoring @@ -12342,7 +12382,7 @@ buildvariants: - name: .causally_consistent !.wo_snapshot - name: .change_streams - name: .misc_js - - name: .concurrency !.no_txns !.repl + - name: .concurrency !.no_txns !.repl !.kill_terminate - name: disk_wiredtiger - name: .encrypt - name: free_monitoring @@ -12435,7 +12475,7 @@ buildvariants: - name: audit - name: .causally_consistent !.wo_snapshot - name: .misc_js - - name: .concurrency .common + - name: .concurrency .common !.kill_terminate - name: concurrency_replication_causal_consistency - name: disk_wiredtiger - name: .encrypt -- cgit v1.2.1