# This suite is intended to exercise transaction behavior with failovers by running existing # concurrency workloads with state functions wrapped in transactions while shard and config # primaries are being killed. State functions that use operations not supported in a transaction # are not wrapped in one. test_kind: fsm_workload_test selector: roots: - jstests/concurrency/fsm_workloads/**/*.js exclude_files: ## # Denylists from concurrency_sharded_replication ## # SERVER-13116 distinct isn't sharding aware - jstests/concurrency/fsm_workloads/distinct.js - jstests/concurrency/fsm_workloads/distinct_noindex.js - jstests/concurrency/fsm_workloads/distinct_projection.js # Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded # collections'. This bug is problematic for these workloads because they assert on count() # values: - jstests/concurrency/fsm_workloads/agg_match.js # $lookup and $graphLookup are not supported on sharded collections with transactions. - jstests/concurrency/fsm_workloads/agg_graph_lookup.js - jstests/concurrency/fsm_workloads/agg_lookup.js - jstests/concurrency/fsm_workloads/view_catalog_cycle_lookup.js # Disabled because MapReduce can lose cursors if the primary goes down during the operation. - jstests/concurrency/fsm_workloads/map_reduce_drop.js - jstests/concurrency/fsm_workloads/map_reduce_inline.js - jstests/concurrency/fsm_workloads/map_reduce_interrupt.js - jstests/concurrency/fsm_workloads/map_reduce_merge.js - jstests/concurrency/fsm_workloads/map_reduce_reduce.js - jstests/concurrency/fsm_workloads/map_reduce_replace.js - jstests/concurrency/fsm_workloads/map_reduce_replace_nonexistent.js - jstests/concurrency/fsm_workloads/map_reduce_replace_remove.js - jstests/concurrency/fsm_workloads/map_reduce_with_chunk_migrations.js # Disabled due to MongoDB restrictions and/or workload restrictions # These workloads sometimes trigger 'Could not lock auth data update lock' # errors because the AuthorizationManager currently waits for only five # seconds to acquire the lock for authorization documents - jstests/concurrency/fsm_workloads/auth_create_role.js - jstests/concurrency/fsm_workloads/auth_create_user.js - jstests/concurrency/fsm_workloads/auth_drop_role.js - jstests/concurrency/fsm_workloads/auth_drop_user.js # uses >100MB of data, which can overwhelm test hosts - jstests/concurrency/fsm_workloads/agg_group_external.js - jstests/concurrency/fsm_workloads/agg_sort_external.js # compact can only be run against a standalone mongod - jstests/concurrency/fsm_workloads/compact.js - jstests/concurrency/fsm_workloads/compact_while_creating_indexes.js # convertToCapped can't be run on mongos processes - jstests/concurrency/fsm_workloads/convert_to_capped_collection.js - jstests/concurrency/fsm_workloads/convert_to_capped_collection_index.js # findAndModify requires a shard key - jstests/concurrency/fsm_workloads/findAndModify_mixed_queue_unindexed.js - jstests/concurrency/fsm_workloads/findAndModify_remove_queue_unindexed.js - jstests/concurrency/fsm_workloads/findAndModify_update_collscan.js - jstests/concurrency/fsm_workloads/findAndModify_update_queue.js - jstests/concurrency/fsm_workloads/findAndModify_update_queue_unindexed.js # remove cannot be {} for findAndModify - jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js # can cause OOM kills on test hosts - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js # cannot createIndex after dropDatabase without sharding first - jstests/concurrency/fsm_workloads/plan_cache_drop_database.js # reIndex is not supported in mongos. - jstests/concurrency/fsm_workloads/reindex.js - jstests/concurrency/fsm_workloads/reindex_background.js # The WTWriteConflictException failpoint is not supported on mongos. - jstests/concurrency/fsm_workloads/collmod_writeconflict.js - jstests/concurrency/fsm_workloads/reindex_writeconflict.js # our .remove(query, {justOne: true}) calls lack shard keys - jstests/concurrency/fsm_workloads/remove_single_document.js # cannot use upsert command with $where with sharded collections - jstests/concurrency/fsm_workloads/upsert_where.js # stagedebug can only be run against a standalone mongod - jstests/concurrency/fsm_workloads/yield_and_hashed.js - jstests/concurrency/fsm_workloads/yield_and_sorted.js # TODO Undenylist (SERVER-38852). - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js - jstests/concurrency/fsm_workloads/timeseries_agg_out_interrupt_cleanup.js # serverStatus does not include transaction metrics on mongos. - jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_metrics_test.js # Uses the same transaction id across different routers, which is not allowed because when either # router tries to commit, it may not know the full participant list. - jstests/concurrency/fsm_workloads/multi_statement_transaction_all_commands_same_session.js ## # Denylists from concurrency_replication_multi_stmt_txn ## # Relies on having one thread observe writes from the other threads, which won't become visible # once a transaction in the thread is started because it'll keep reading from the same snapshot. - jstests/concurrency/fsm_workloads/create_index_background.js - jstests/concurrency/fsm_workloads/create_index_background_partial_filter.js - jstests/concurrency/fsm_workloads/create_index_background_wildcard.js # Expects reads to die with a particular error, but other errors are possible if the read is part # of a transaction (e.g. ErrorCodes.LockTimeout). - jstests/concurrency/fsm_workloads/drop_index_during_replan.js - jstests/concurrency/fsm_workloads/drop_index_during_lookup.js # Performs direct writes to system.views - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js ## # Denylists from concurrency_sharded_multi_stmt_txn ## # Use updates that do not contain the shard key, so they are rejected before any commands are # sent to participant shards, but these workloads do not fail, so the auto retry transaction logic # attempts to commit, which fails because no participants have been contacted. - jstests/concurrency/fsm_workloads/update_rename.js - jstests/concurrency/fsm_workloads/update_rename_noindex.js # Expects DocumentValidationFailure which causes transaction to abort and retry indefinitely. - jstests/concurrency/fsm_workloads/schema_validator_with_expr_variables.js ## # Denylists from concurrency_sharded_with_stepdowns ## # ChunkHelper directly talks to the config servers and doesn't support retries for network errors - jstests/concurrency/fsm_workloads/cleanupOrphanedWhileMigrating.js - jstests/concurrency/fsm_workloads/sharded_mergeChunks_partitioned.js - jstests/concurrency/fsm_workloads/sharded_splitChunk_partitioned.js # These workloads frequently time out waiting for the distributed lock to drop a sharded # collection. - jstests/concurrency/fsm_workloads/kill_aggregation.js - jstests/concurrency/fsm_workloads/kill_rooted_or.js # Uses non retryable commands. - jstests/concurrency/fsm_workloads/agg_out.js - jstests/concurrency/fsm_workloads/agg_out_interrupt_cleanup.js - jstests/concurrency/fsm_workloads/timeseries_agg_out_interrupt_cleanup.js - jstests/concurrency/fsm_workloads/agg_sort.js - jstests/concurrency/fsm_workloads/collmod.js - jstests/concurrency/fsm_workloads/collmod_separate_collections.js - jstests/concurrency/fsm_workloads/invalidated_cursors.js - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js - jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_updates.js - jstests/concurrency/fsm_workloads/view_catalog.js - jstests/concurrency/fsm_workloads/view_catalog_cycle_with_drop.js - jstests/concurrency/fsm_workloads/view_catalog_direct_system_writes.js - jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_deletes.js - jstests/concurrency/fsm_workloads/timeseries_deletes_and_inserts.js ## # Denylists specific to this suite ## # Uses getMore. If a kill node happens between the time of creation of cursor (usually by calling # find or aggregate) and calling getMore(), server will throw CursortNotFound exception. # We currently do not retry the transaction on this exception. - jstests/concurrency/fsm_workloads/agg_unionWith_interrupt_cleanup.js - jstests/concurrency/fsm_workloads/agg_union_with_chunk_migrations.js - jstests/concurrency/fsm_workloads/create_capped_collection.js - jstests/concurrency/fsm_workloads/create_capped_collection_maxdocs.js - jstests/concurrency/fsm_workloads/indexed_insert_ordered_bulk.js - jstests/concurrency/fsm_workloads/indexed_insert_unordered_bulk.js - jstests/concurrency/fsm_workloads/list_indexes.js # Uses non-retryable commands in the same state function as a command not supported in a # transaction. - jstests/concurrency/fsm_workloads/agg_merge_when_not_matched_insert.js - jstests/concurrency/fsm_workloads/agg_merge_when_matched_replace_with_new.js # JS engine interruptions on mongos return ErrorCodes::Interrupted, which isn't # considered a retryable or transient transaction error. - jstests/concurrency/fsm_workloads/indexed_insert_where.js - jstests/concurrency/fsm_workloads/remove_where.js - jstests/concurrency/fsm_workloads/update_where.js # Time-series collections are not supported on mongos. - jstests/concurrency/fsm_workloads/create_timeseries_collection.js exclude_with_any_tags: - requires_replication - assumes_balancer_on # Snapshot reads in transactions are banned on capped collections. - requires_capped # Sharing cursors between state functions will fail in this suite because it will attempt to use # the same cursor in multiple transactions. - state_functions_share_cursor # These start a transaction in one state function and use it in other state functions. This suite # would instead execute each state function as its own transaction. - state_functions_share_transaction # Tests which expect commands to fail and catch the error can cause transactions to abort and # retry indefinitely. - catches_command_failures # This tag corresponds to operations which are run long enough to exceed the stepdown interval # when grouped into transactions. - operations_longer_than_stepdown_interval_in_txns # mongos has no system.profile collection. - requires_profiling - does_not_support_transactions - does_not_support_stepdowns - assumes_unsharded_collection # Note that "requires_non_retryable_writes" does not need to be denylisted because some writes # that are not individually retryable can be retried if they are part of a transaction. executor: archive: hooks: - CheckReplDBHash - CheckMetadataConsistencyInBackground - ValidateCollections tests: true config: shell_options: global_vars: TestData: runInsideTransaction: true runningWithBalancer: false runningWithConfigStepdowns: true runningWithSessions: true runningWithShardStepdowns: true traceExceptions: false useActionPermittedFile: true hooks: # We use a stepdown interval of 15 seconds because we will retry all commands in a transaction # so we need to allow time for at most 10 operations to be re-run and then re-committed. If # too many network errors occur when re-running a transaction we will run out of retries. - class: ContinuousStepdown config_stepdown: true kill: true shard_stepdown: true stepdown_interval_ms: 15000 use_action_permitted_file: true - class: CheckShardFilteringMetadata - class: CheckReplDBHash - class: CheckMetadataConsistencyInBackground - class: CheckOrphansDeleted - class: CheckRoutingTableConsistency - class: ValidateCollections # Validation can interfere with other operations, so this goes last. shell_options: global_vars: TestData: skipEnforceFastCountOnValidate: true - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture enable_balancer: false mongos_options: set_parameters: enableTestCommands: 1 queryAnalysisSamplerConfigurationRefreshSecs: 1 configsvr_options: num_nodes: 3 all_nodes_electable: true replset_config_options: settings: catchUpTimeoutMillis: 0 mongod_options: set_parameters: reshardingMinimumOperationDurationMillis: 30000 # 30 seconds shard_options: all_nodes_electable: true mongod_options: oplogSize: 1024 replset_config_options: settings: catchUpTimeoutMillis: 0 mongod_options: wiredTigerEngineConfigString: "debug_mode=(table_logging=true)" set_parameters: enableTestCommands: 1 enableElectionHandoff: 0 roleGraphInvalidationIsFatal: 1 queryAnalysisWriterIntervalSecs: 1 num_rs_nodes_per_shard: 3 num_shards: 2 num_mongos: 2