diff options
author | Jason Chan <jason.chan@10gen.com> | 2020-11-10 12:07:35 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-12 07:02:09 +0000 |
commit | e399bf8689f592129c9655933bdb6a0e551a47b8 (patch) | |
tree | 0609ae3f7617076eb13c6d7f38453f44a74cfdac /buildscripts | |
parent | 0ed9f1bf7d27e43174ce806291a597947b1f98ae (diff) | |
download | mongo-e399bf8689f592129c9655933bdb6a0e551a47b8.tar.gz |
SERVER-51598 Add new abort_multi_stmt_txn_test suites
Diffstat (limited to 'buildscripts')
5 files changed, 453 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml new file mode 100644 index 00000000000..6b24de0bbf3 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml @@ -0,0 +1,77 @@ +# A test suite intended to attempt to reproduce the crash in HELP-19266. This test suite will abort +# transactions by setting a smaller expiration time. This test suite will ignore all assertion +# and command failure errors and should only fail on a system crash, a hang, or a data consistency +# error. + +test_kind: abort_txns_fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + ## + # Disabled due to MongoDB restrictions and/or workload restrictions + ## + # These workloads use >100MB of data, which can overwhelm test hosts. + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + # The findAndModify_update_grow.js workload can cause OOM kills on test hosts. + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + # Creates a cursor in one state function and uses it in a different state function, which means + # that in this suite it attempts to use the same cursor in multiple transactions. + - jstests/concurrency/fsm_workloads/invalidated_cursors.js + - jstests/concurrency/fsm_workloads/globally_managed_cursors.js + - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js + + # Relies on having one thread observe writes from the other threads, which won't become visible + # once a transaction in the thread is started because it'll keep reading from the same snapshot. + - jstests/concurrency/fsm_workloads/create_index_background.js + + exclude_with_any_tags: + - requires_sharding + + # Tests which expect commands to fail and catch the error can cause transactions to abort and + # retry indefinitely. + - catches_command_failures + +executor: + archive: + hooks: + - CheckReplDBHashInBackground + - CheckReplDBHash + - ValidateCollections + tests: true + config: + shell_options: + # Ignore assertion failures from the shell in this test suite. + eval: doassert = Function.prototype; + readMode: commands + global_vars: + TestData: + runInsideTransaction: true + runningWithSessions: true + hooks: + # The CheckReplDBHash hook waits until all operations have replicated to and have been applied + # on the secondaries, so we run the ValidateCollections hook after it to ensure we're + # validating the entire contents of the collection. + # + # We don't run the CheckReplDBHashInBackground in this suite as it uses transactions to get the + # db hashes. The hook will throw if the transaction is aborted due to the shortened expiration + # lifetime in this suite. + # TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite. + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanupConcurrencyWorkloads + fixture: + class: ReplicaSetFixture + mongod_options: + oplogSize: 1024 + set_parameters: + enableTestCommands: 1 + numInitialSyncAttempts: 1 + failpoint.setTransactionLifetimeToRandomMillis: + mode: alwaysOn + failpoint.increaseFrequencyOfPeriodicThreadToExpireTransactions: + mode: alwaysOn + num_nodes: 3 diff --git a/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml new file mode 100644 index 00000000000..edc974bb5a8 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml @@ -0,0 +1,320 @@ +# A test suite intended to attempt to reproduce the crash in HELP-19266. This test suite will abort +# transactions by setting a smaller expiration time. This test suite will ignore all assertion +# and command failure errors and should only fail on a system crash, a hang, or a data consistency +# error. + +test_kind: abort_multi_stmt_txn_passthrough + +selector: + roots: + - jstests/core/**/*.js + # TODO: SERVER-35089 + # - jstests/libs/txns/txn_passthrough_runner_selftest.js + exclude_files: + # TODO: SERVER-35089 + - jstests/core/geo_allowedcomparisons.js + - jstests/core/geo_big_polygon2.js + - jstests/core/in.js + - jstests/core/orj.js + - jstests/core/insert1.js + + # These tests already run with transactions. + - jstests/core/txns/*.js + + # These tests are not expected to pass with replica-sets: + - jstests/core/capped_update.js + - jstests/core/dbadmin.js + - jstests/core/opcounters_write_cmd.js + - jstests/core/read_after_optime.js + + ## + ## Limitations with the way the runner file injects transactions. + ## + + # These tests expects some statements to error, which will cause txns to abort entirely. + - jstests/core/bulk_api_ordered.js + - jstests/core/bulk_api_unordered.js + - jstests/core/bulk_legacy_enforce_gle.js + - jstests/core/capped5.js + - jstests/core/commands_with_uuid.js + - jstests/core/explain_execution_error.js + - jstests/core/expr.js + - jstests/core/find_and_modify_invalid_query_params.js + - jstests/core/find_getmore_bsonsize.js + - jstests/core/find_getmore_cmd.js + - jstests/core/find9.js + - jstests/core/index_big1.js + - jstests/core/index_bigkeys.js + - jstests/core/index_decimal.js + - jstests/core/index_multiple_compatibility.js + - jstests/core/index_partial_write_ops.js + - jstests/core/index8.js # No explicit check for failed command. + - jstests/core/indexa.js # No explicit check for failed command. + - jstests/core/indexes_multiple_commands.js + - jstests/core/insert_long_index_key.js + - jstests/core/js2.js + - jstests/core/json_schema/json_schema.js + - jstests/core/mr_bigobject.js + - jstests/core/not2.js + - jstests/core/notablescan.js + - jstests/core/or1.js + - jstests/core/or2.js + - jstests/core/or3.js + - jstests/core/ork.js + - jstests/core/ref4.js + - jstests/core/regex_limit.js + - jstests/core/remove_undefined.js + - jstests/core/set7.js + - jstests/core/sortb.js + - jstests/core/sortf.js + - jstests/core/sortg.js + - jstests/core/sortj.js + - jstests/core/tailable_skip_limit.js + - jstests/core/type_array.js + - jstests/core/uniqueness.js + - jstests/core/unset2.js + - jstests/core/update_addToSet.js + - jstests/core/update_arrayFilters.js + - jstests/core/update_find_and_modify_id.js + - jstests/core/update_modifier_pop.js + - jstests/core/updateh.js + - jstests/core/updatej.js + - jstests/core/ref.js + + # Consecutive writes totalling more than 16MB will cause the txn to abort with + # a TransactionTooLarge error. + - jstests/core/batch_size.js + - jstests/core/single_batch.js + + ## + ## Some aggregation stages don't support snapshot readconcern. + ## + + # $explain (requires read concern local) + - jstests/core/agg_hint.js + - jstests/core/and.js + - jstests/core/collation.js + - jstests/core/explain_shell_helpers.js + - jstests/core/index_partial_read_ops.js + - jstests/core/optimized_match_explain.js + - jstests/core/sort_array.js + - jstests/core/views/views_collation.js + + # $out + - jstests/core/bypass_doc_validation.js + - jstests/core/views/views_aggregation.js + + # $listSessions + - jstests/core/list_all_local_sessions.js + - jstests/core/list_all_sessions.js + - jstests/core/list_local_sessions.js + - jstests/core/list_sessions.js + + # $indexStats + - jstests/core/index_stats.js + + # $collStats + - jstests/core/operation_latency_histogram.js + - jstests/core/views/views_coll_stats.js + - jstests/core/views/views_stats.js + + ## + ## WriteErrors get converted to WriteCommandErrors if part of a txn. + ## + + # The same error code, but with ok:0. + - jstests/core/json_schema/additional_items.js + - jstests/core/json_schema/additional_properties.js + - jstests/core/json_schema/bsontype.js + - jstests/core/json_schema/dependencies.js + - jstests/core/json_schema/items.js + - jstests/core/json_schema/logical_keywords.js + - jstests/core/json_schema/min_max_items.js + - jstests/core/json_schema/min_max_properties.js + - jstests/core/json_schema/pattern_properties.js + - jstests/core/json_schema/required.js + - jstests/core/json_schema/unique_items.js + + - jstests/core/field_name_validation.js + - jstests/core/fts_array.js + - jstests/core/inc-SERVER-7446.js + - jstests/core/invalid_db_name.js + - jstests/core/push_sort.js + + # Checks for "WriteErrors" explicitly from the response of db.runCommand() + - jstests/core/max_doc_size.js + + # Calls res.getWriteError() or res.hasWriteError(). + - jstests/core/bulk_api_ordered.js + - jstests/core/bulk_api_unordered.js + - jstests/core/bulk_legacy_enforce_gle.js + - jstests/core/cappeda.js + - jstests/core/doc_validation.js + - jstests/core/doc_validation_options.js + - jstests/core/geo_multinest0.js + - jstests/core/insert_illegal_doc.js + - jstests/core/ns_length.js + - jstests/core/push2.js + - jstests/core/remove6.js + - jstests/core/removeb.js + - jstests/core/rename4.js + - jstests/core/shell_writeconcern.js + - jstests/core/storefunc.js + - jstests/core/update_arrayFilters.js + - jstests/core/update_dbref.js + - jstests/core/updatel.js + - jstests/core/write_result.js + + # Multiple writes in a txn, some of which fail because the collection doesn't exist. + # We create the collection and retry the last write, but previous writes would have + # still failed. + - jstests/core/dbref1.js + - jstests/core/dbref2.js + - jstests/core/ref3.js + - jstests/core/repair_database.js + - jstests/core/update3.js + - jstests/core/rename3.js + + ## + ## Error: Unable to acquire lock within a max lock request timeout of '0ms' milliseconds + ## + + # Collection drops done through applyOps are not converted to w:majority + - jstests/core/views/invalid_system_views.js + + # Operations run on the "out" collection of a MapReduce call, which is not always + # immediately available to a transaction as it is still being replicated. Transactions + # fail with "Unable to acquire lock" errors. + - jstests/core/function_string_representations.js + - jstests/core/mr_errorhandling.js + - jstests/core/mr_merge.js + - jstests/core/mr_merge2.js + - jstests/core/mr_replaceIntoDB.js + - jstests/core/mr_outreduce.js + - jstests/core/mr_outreduce2.js + + ## + ## Misc. reasons. + ## + + # SERVER-34868 Cannot run a legacy query on a session. + - jstests/core/exhaust.js + - jstests/core/validate_cmd_ns.js + + # SERVER-34772 Tailable Cursors are not allowed with snapshot readconcern. + - jstests/core/awaitdata_getmore_cmd.js + - jstests/core/getmore_cmd_maxtimems.js + - jstests/core/tailable_cursor_invalidation.js + - jstests/core/tailable_getmore_batch_size.js + + # SERVER-34918 The "max" option of a capped collection can be exceeded until the next insert. + # The reason is that we don't update the count of a collection until a transaction commits, + # by which point it is too late to complain that "max" has been exceeded. + - jstests/core/capped_max1.js + + # The "max" option of a capped collection can be temporarily exceeded before a + # txn is committed. + - jstests/core/bulk_insert_capped.js + + # Wrong count for top info (WriteLock) + - jstests/core/top.js + + # Expects collection to not have been created + - jstests/core/insert_id_undefined.js + + # Creates sessions explicitly, resulting in txns being run through different sessions + # using a single txnNumber. + - jstests/core/list_all_local_cursors.js + - jstests/core/json_schema/misc_validation.js + - jstests/core/views/views_all_commands.js + + # Fails with implicit sessions because it will use multiple sessions on the same Mongo connection. + - jstests/core/dropdb.js + + # Committing a transaction when the server is fsync locked fails. + - jstests/core/fsync.js + + # Expects legacy errors ($err). + - jstests/core/constructors.js + + # txn interrupted by command outside of txn before getMore runs. + - jstests/core/commands_namespace_parsing.js + - jstests/core/drop3.js + - jstests/core/ensure_sorted.js + - jstests/core/geo_s2cursorlimitskip.js + - jstests/core/getmore_invalidated_cursors.js + - jstests/core/getmore_invalidated_documents.js + - jstests/core/kill_cursors.js + - jstests/core/list_collections1.js + - jstests/core/list_indexes.js + - jstests/core/oro.js + + # Expects certain number of operations in the system.profile collection. + - jstests/core/profile*.js + + # Parallel Shell - we do not signal the override to end a txn when a parallel shell closes. + - jstests/core/awaitdata_getmore_cmd.js + - jstests/core/compact_keeps_indexes.js + - jstests/core/count10.js + - jstests/core/count_plan_summary.js + - jstests/core/coveredIndex3.js + - jstests/core/currentop.js + - jstests/core/distinct3.js + - jstests/core/evald.js + - jstests/core/find_and_modify_concurrent_update.js + - jstests/core/fsync.js + - jstests/core/geo_update_btree.js + - jstests/core/killop_drop_collection.js + - jstests/core/loadserverscripts.js + - jstests/core/mr_killop.js + - jstests/core/remove9.js + - jstests/core/removeb.js + - jstests/core/removec.js + - jstests/core/shellstartparallel.js + - jstests/core/updatef.js + + # Command expects to see result from parallel operation. + # E.g. Suppose the following sequence of events: op1, join() op2 in parallel shell, op3. + # op3 will still be using the snapshot from op1, and not see op2 at all. + - jstests/core/cursora.js + - jstests/core/bench_test1.js + + # It is illegal to open a tailable cursor in a transaction + - jstests/core/geo_near_tailable.js + + exclude_with_any_tags: + # "Cowardly refusing to override read concern of command: ..." + - assumes_read_concern_unchanged + # "writeConcern is not allowed within a multi-statement transaction" + - assumes_write_concern_unchanged + +executor: + config: + shell_options: + # Ignore assertion failures from the shell in this test suite. + eval: var testingReplication = true; doassert = Function.prototype; + global_vars: + TestData: + sessionOptions: + causalConsistency: false + readMode: commands + hooks: + # The CheckReplDBHash hook waits until all operations have replicated to and have been applied + # on the secondaries, so we run the ValidateCollections hook after it to ensure we're + # validating the entire contents of the collection. + - class: CheckReplOplogs + - class: CheckReplDBHash + - class: ValidateCollections + - class: CleanEveryN + n: 20 + fixture: + class: ReplicaSetFixture + mongod_options: + set_parameters: + enableTestCommands: 1 + numInitialSyncAttempts: 1 + failpoint.setTransactionLifetimeToRandomMillis: + mode: alwaysOn + failpoint.increaseFrequencyOfPeriodicThreadToExpireTransactions: + mode: alwaysOn + num_nodes: 2
\ No newline at end of file diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py index 0ed27403cbc..cbd1eb86bb3 100644 --- a/buildscripts/resmokelib/selector.py +++ b/buildscripts/resmokelib/selector.py @@ -679,11 +679,13 @@ _SELECTOR_REGISTRY = { "db_test": (_DbTestSelectorConfig, _DbTestSelector), "fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector), "parallel_fsm_workload_test": (_MultiJSTestSelectorConfig, _MultiJSTestSelector), + "abort_txns_fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector), "json_schema_test": (_JsonTestSelectorConfig, _Selector), "js_test": (_JSTestSelectorConfig, _JSTestSelector), "multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector), "py_test": (_PyTestCaseSelectorConfig, _Selector), "sleep_test": (_SleepTestCaseSelectorConfig, _SleepTestCaseSelector), + "abort_multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector), } diff --git a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py index 6cbda33abe9..f2cf17dd9c3 100644 --- a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py +++ b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py @@ -99,3 +99,26 @@ class ParallelFSMWorkloadTestCase(FSMWorkloadTestCase): for workload_name in sorted(selected_tests): uid.update(workload_name) return uid.hexdigest() + + +class AbortTxnsFSMWorkloadTestCase(FSMWorkloadTestCase): + """An FSM workload intended to be used by test suites that test transaction expiration logic.""" + + REGISTERED_NAME = "abort_txns_fsm_workload_test" + + def _execute(self, process): + """Run the specified process.""" + self.logger.info("Starting %s...\n%s", self.short_description(), process.as_command()) + + process.start() + self.logger.info("%s started with pid %s.", self.short_description(), process.pid) + + self.return_code = process.wait() + # This test case is intended to randomly abort transactions in the core passthrough. We only + # expect to return a failure when the system crashes. This is different from the base + # implementation where we will throw in a non-zero return code. + if self.return_code != 0: + self.logger.info("Returning quietly instead of throwing failure: %s" % + (self.short_description())) + + self.logger.info("%s finished.", self.short_description()) diff --git a/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py b/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py index 1e790612153..83c658557a6 100644 --- a/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py +++ b/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py @@ -27,3 +27,34 @@ class MultiStmtTxnTestCase(jsrunnerfile.JSRunnerFileTestCase): def _populate_test_data(self, test_data): test_data["multiStmtTxnTestFile"] = self.multi_stmt_txn_test_file + + +class AbortMultiStmtTxnTestCase(MultiStmtTxnTestCase): + """Test case for aborting multi statement transactions.""" + + REGISTERED_NAME = "abort_multi_stmt_txn_passthrough" + + def __init__(self, logger, multi_stmt_txn_test_file, shell_executable=None, shell_options=None): + """Initialize AbortMultiStmtTxnTestCase to be used to test transaction expiration logic.""" + # pylint: disable=non-parent-init-called,super-init-not-called + jsrunnerfile.JSRunnerFileTestCase.__init__( + self, logger, "Abort Multi-statement Transaction Passthrough", multi_stmt_txn_test_file, + test_runner_file="jstests/libs/txns/txn_passthrough_runner.js", + shell_executable=shell_executable, shell_options=shell_options) + + def _execute(self, process): + """Run the specified process.""" + self.logger.info("Starting %s...\n%s", self.short_description(), process.as_command()) + + process.start() + self.logger.info("%s started with pid %s.", self.short_description(), process.pid) + + self.return_code = process.wait() + # This test case is intended to randomly abort transactions in the core passthrough. We only + # expect to return a failure when the system crashes. This is different from the base + # implementation where we will throw in a non-zero return code. + if self.return_code != 0: + self.logger.info("Returning quietly instead of throwing failure: %s" % + (self.short_description())) + + self.logger.info("%s finished.", self.short_description()) |