summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Chan <jason.chan@10gen.com>2020-11-10 12:07:35 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-12 07:02:09 +0000
commite399bf8689f592129c9655933bdb6a0e551a47b8 (patch)
tree0609ae3f7617076eb13c6d7f38453f44a74cfdac
parent0ed9f1bf7d27e43174ce806291a597947b1f98ae (diff)
downloadmongo-e399bf8689f592129c9655933bdb6a0e551a47b8.tar.gz
SERVER-51598 Add new abort_multi_stmt_txn_test suites
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml77
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml320
-rw-r--r--buildscripts/resmokelib/selector.py2
-rw-r--r--buildscripts/resmokelib/testing/testcases/fsm_workload_test.py23
-rw-r--r--buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py31
-rw-r--r--etc/evergreen.yml25
-rw-r--r--src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp16
-rw-r--r--src/mongo/db/session.cpp15
8 files changed, 508 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml
new file mode 100644
index 00000000000..6b24de0bbf3
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication_abort_multi_stmt_txn.yml
@@ -0,0 +1,77 @@
+# A test suite intended to attempt to reproduce the crash in HELP-19266. This test suite will abort
+# transactions by setting a smaller expiration time. This test suite will ignore all assertion
+# and command failure errors and should only fail on a system crash, a hang, or a data consistency
+# error.
+
+test_kind: abort_txns_fsm_workload_test
+
+selector:
+ roots:
+ - jstests/concurrency/fsm_workloads/**/*.js
+ exclude_files:
+ ##
+ # Disabled due to MongoDB restrictions and/or workload restrictions
+ ##
+ # These workloads use >100MB of data, which can overwhelm test hosts.
+ - jstests/concurrency/fsm_workloads/agg_group_external.js
+ - jstests/concurrency/fsm_workloads/agg_sort_external.js
+ # The findAndModify_update_grow.js workload can cause OOM kills on test hosts.
+ - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js
+
+ # Creates a cursor in one state function and uses it in a different state function, which means
+ # that in this suite it attempts to use the same cursor in multiple transactions.
+ - jstests/concurrency/fsm_workloads/invalidated_cursors.js
+ - jstests/concurrency/fsm_workloads/globally_managed_cursors.js
+ - jstests/concurrency/fsm_workloads/kill_multicollection_aggregation.js
+
+ # Relies on having one thread observe writes from the other threads, which won't become visible
+ # once a transaction in the thread is started because it'll keep reading from the same snapshot.
+ - jstests/concurrency/fsm_workloads/create_index_background.js
+
+ exclude_with_any_tags:
+ - requires_sharding
+
+ # Tests which expect commands to fail and catch the error can cause transactions to abort and
+ # retry indefinitely.
+ - catches_command_failures
+
+executor:
+ archive:
+ hooks:
+ - CheckReplDBHashInBackground
+ - CheckReplDBHash
+ - ValidateCollections
+ tests: true
+ config:
+ shell_options:
+ # Ignore assertion failures from the shell in this test suite.
+ eval: doassert = Function.prototype;
+ readMode: commands
+ global_vars:
+ TestData:
+ runInsideTransaction: true
+ runningWithSessions: true
+ hooks:
+ # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+ # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+ # validating the entire contents of the collection.
+ #
+ # We don't run the CheckReplDBHashInBackground in this suite as it uses transactions to get the
+ # db hashes. The hook will throw if the transaction is aborted due to the shortened expiration
+ # lifetime in this suite.
+ # TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
+ - class: CheckReplDBHash
+ - class: ValidateCollections
+ - class: CleanupConcurrencyWorkloads
+ fixture:
+ class: ReplicaSetFixture
+ mongod_options:
+ oplogSize: 1024
+ set_parameters:
+ enableTestCommands: 1
+ numInitialSyncAttempts: 1
+ failpoint.setTransactionLifetimeToRandomMillis:
+ mode: alwaysOn
+ failpoint.increaseFrequencyOfPeriodicThreadToExpireTransactions:
+ mode: alwaysOn
+ num_nodes: 3
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml
new file mode 100644
index 00000000000..edc974bb5a8
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/replica_sets_abort_multi_stmt_txn_jscore_passthrough.yml
@@ -0,0 +1,320 @@
+# A test suite intended to attempt to reproduce the crash in HELP-19266. This test suite will abort
+# transactions by setting a smaller expiration time. This test suite will ignore all assertion
+# and command failure errors and should only fail on a system crash, a hang, or a data consistency
+# error.
+
+test_kind: abort_multi_stmt_txn_passthrough
+
+selector:
+ roots:
+ - jstests/core/**/*.js
+ # TODO: SERVER-35089
+ # - jstests/libs/txns/txn_passthrough_runner_selftest.js
+ exclude_files:
+ # TODO: SERVER-35089
+ - jstests/core/geo_allowedcomparisons.js
+ - jstests/core/geo_big_polygon2.js
+ - jstests/core/in.js
+ - jstests/core/orj.js
+ - jstests/core/insert1.js
+
+ # These tests already run with transactions.
+ - jstests/core/txns/*.js
+
+ # These tests are not expected to pass with replica-sets:
+ - jstests/core/capped_update.js
+ - jstests/core/dbadmin.js
+ - jstests/core/opcounters_write_cmd.js
+ - jstests/core/read_after_optime.js
+
+ ##
+ ## Limitations with the way the runner file injects transactions.
+ ##
+
+ # These tests expects some statements to error, which will cause txns to abort entirely.
+ - jstests/core/bulk_api_ordered.js
+ - jstests/core/bulk_api_unordered.js
+ - jstests/core/bulk_legacy_enforce_gle.js
+ - jstests/core/capped5.js
+ - jstests/core/commands_with_uuid.js
+ - jstests/core/explain_execution_error.js
+ - jstests/core/expr.js
+ - jstests/core/find_and_modify_invalid_query_params.js
+ - jstests/core/find_getmore_bsonsize.js
+ - jstests/core/find_getmore_cmd.js
+ - jstests/core/find9.js
+ - jstests/core/index_big1.js
+ - jstests/core/index_bigkeys.js
+ - jstests/core/index_decimal.js
+ - jstests/core/index_multiple_compatibility.js
+ - jstests/core/index_partial_write_ops.js
+ - jstests/core/index8.js # No explicit check for failed command.
+ - jstests/core/indexa.js # No explicit check for failed command.
+ - jstests/core/indexes_multiple_commands.js
+ - jstests/core/insert_long_index_key.js
+ - jstests/core/js2.js
+ - jstests/core/json_schema/json_schema.js
+ - jstests/core/mr_bigobject.js
+ - jstests/core/not2.js
+ - jstests/core/notablescan.js
+ - jstests/core/or1.js
+ - jstests/core/or2.js
+ - jstests/core/or3.js
+ - jstests/core/ork.js
+ - jstests/core/ref4.js
+ - jstests/core/regex_limit.js
+ - jstests/core/remove_undefined.js
+ - jstests/core/set7.js
+ - jstests/core/sortb.js
+ - jstests/core/sortf.js
+ - jstests/core/sortg.js
+ - jstests/core/sortj.js
+ - jstests/core/tailable_skip_limit.js
+ - jstests/core/type_array.js
+ - jstests/core/uniqueness.js
+ - jstests/core/unset2.js
+ - jstests/core/update_addToSet.js
+ - jstests/core/update_arrayFilters.js
+ - jstests/core/update_find_and_modify_id.js
+ - jstests/core/update_modifier_pop.js
+ - jstests/core/updateh.js
+ - jstests/core/updatej.js
+ - jstests/core/ref.js
+
+ # Consecutive writes totalling more than 16MB will cause the txn to abort with
+ # a TransactionTooLarge error.
+ - jstests/core/batch_size.js
+ - jstests/core/single_batch.js
+
+ ##
+ ## Some aggregation stages don't support snapshot readconcern.
+ ##
+
+ # $explain (requires read concern local)
+ - jstests/core/agg_hint.js
+ - jstests/core/and.js
+ - jstests/core/collation.js
+ - jstests/core/explain_shell_helpers.js
+ - jstests/core/index_partial_read_ops.js
+ - jstests/core/optimized_match_explain.js
+ - jstests/core/sort_array.js
+ - jstests/core/views/views_collation.js
+
+ # $out
+ - jstests/core/bypass_doc_validation.js
+ - jstests/core/views/views_aggregation.js
+
+ # $listSessions
+ - jstests/core/list_all_local_sessions.js
+ - jstests/core/list_all_sessions.js
+ - jstests/core/list_local_sessions.js
+ - jstests/core/list_sessions.js
+
+ # $indexStats
+ - jstests/core/index_stats.js
+
+ # $collStats
+ - jstests/core/operation_latency_histogram.js
+ - jstests/core/views/views_coll_stats.js
+ - jstests/core/views/views_stats.js
+
+ ##
+ ## WriteErrors get converted to WriteCommandErrors if part of a txn.
+ ##
+
+ # The same error code, but with ok:0.
+ - jstests/core/json_schema/additional_items.js
+ - jstests/core/json_schema/additional_properties.js
+ - jstests/core/json_schema/bsontype.js
+ - jstests/core/json_schema/dependencies.js
+ - jstests/core/json_schema/items.js
+ - jstests/core/json_schema/logical_keywords.js
+ - jstests/core/json_schema/min_max_items.js
+ - jstests/core/json_schema/min_max_properties.js
+ - jstests/core/json_schema/pattern_properties.js
+ - jstests/core/json_schema/required.js
+ - jstests/core/json_schema/unique_items.js
+
+ - jstests/core/field_name_validation.js
+ - jstests/core/fts_array.js
+ - jstests/core/inc-SERVER-7446.js
+ - jstests/core/invalid_db_name.js
+ - jstests/core/push_sort.js
+
+ # Checks for "WriteErrors" explicitly from the response of db.runCommand()
+ - jstests/core/max_doc_size.js
+
+ # Calls res.getWriteError() or res.hasWriteError().
+ - jstests/core/bulk_api_ordered.js
+ - jstests/core/bulk_api_unordered.js
+ - jstests/core/bulk_legacy_enforce_gle.js
+ - jstests/core/cappeda.js
+ - jstests/core/doc_validation.js
+ - jstests/core/doc_validation_options.js
+ - jstests/core/geo_multinest0.js
+ - jstests/core/insert_illegal_doc.js
+ - jstests/core/ns_length.js
+ - jstests/core/push2.js
+ - jstests/core/remove6.js
+ - jstests/core/removeb.js
+ - jstests/core/rename4.js
+ - jstests/core/shell_writeconcern.js
+ - jstests/core/storefunc.js
+ - jstests/core/update_arrayFilters.js
+ - jstests/core/update_dbref.js
+ - jstests/core/updatel.js
+ - jstests/core/write_result.js
+
+ # Multiple writes in a txn, some of which fail because the collection doesn't exist.
+ # We create the collection and retry the last write, but previous writes would have
+ # still failed.
+ - jstests/core/dbref1.js
+ - jstests/core/dbref2.js
+ - jstests/core/ref3.js
+ - jstests/core/repair_database.js
+ - jstests/core/update3.js
+ - jstests/core/rename3.js
+
+ ##
+ ## Error: Unable to acquire lock within a max lock request timeout of '0ms' milliseconds
+ ##
+
+ # Collection drops done through applyOps are not converted to w:majority
+ - jstests/core/views/invalid_system_views.js
+
+ # Operations run on the "out" collection of a MapReduce call, which is not always
+ # immediately available to a transaction as it is still being replicated. Transactions
+ # fail with "Unable to acquire lock" errors.
+ - jstests/core/function_string_representations.js
+ - jstests/core/mr_errorhandling.js
+ - jstests/core/mr_merge.js
+ - jstests/core/mr_merge2.js
+ - jstests/core/mr_replaceIntoDB.js
+ - jstests/core/mr_outreduce.js
+ - jstests/core/mr_outreduce2.js
+
+ ##
+ ## Misc. reasons.
+ ##
+
+ # SERVER-34868 Cannot run a legacy query on a session.
+ - jstests/core/exhaust.js
+ - jstests/core/validate_cmd_ns.js
+
+ # SERVER-34772 Tailable Cursors are not allowed with snapshot readconcern.
+ - jstests/core/awaitdata_getmore_cmd.js
+ - jstests/core/getmore_cmd_maxtimems.js
+ - jstests/core/tailable_cursor_invalidation.js
+ - jstests/core/tailable_getmore_batch_size.js
+
+ # SERVER-34918 The "max" option of a capped collection can be exceeded until the next insert.
+ # The reason is that we don't update the count of a collection until a transaction commits,
+ # by which point it is too late to complain that "max" has been exceeded.
+ - jstests/core/capped_max1.js
+
+ # The "max" option of a capped collection can be temporarily exceeded before a
+ # txn is committed.
+ - jstests/core/bulk_insert_capped.js
+
+ # Wrong count for top info (WriteLock)
+ - jstests/core/top.js
+
+ # Expects collection to not have been created
+ - jstests/core/insert_id_undefined.js
+
+ # Creates sessions explicitly, resulting in txns being run through different sessions
+ # using a single txnNumber.
+ - jstests/core/list_all_local_cursors.js
+ - jstests/core/json_schema/misc_validation.js
+ - jstests/core/views/views_all_commands.js
+
+ # Fails with implicit sessions because it will use multiple sessions on the same Mongo connection.
+ - jstests/core/dropdb.js
+
+ # Committing a transaction when the server is fsync locked fails.
+ - jstests/core/fsync.js
+
+ # Expects legacy errors ($err).
+ - jstests/core/constructors.js
+
+ # txn interrupted by command outside of txn before getMore runs.
+ - jstests/core/commands_namespace_parsing.js
+ - jstests/core/drop3.js
+ - jstests/core/ensure_sorted.js
+ - jstests/core/geo_s2cursorlimitskip.js
+ - jstests/core/getmore_invalidated_cursors.js
+ - jstests/core/getmore_invalidated_documents.js
+ - jstests/core/kill_cursors.js
+ - jstests/core/list_collections1.js
+ - jstests/core/list_indexes.js
+ - jstests/core/oro.js
+
+ # Expects certain number of operations in the system.profile collection.
+ - jstests/core/profile*.js
+
+ # Parallel Shell - we do not signal the override to end a txn when a parallel shell closes.
+ - jstests/core/awaitdata_getmore_cmd.js
+ - jstests/core/compact_keeps_indexes.js
+ - jstests/core/count10.js
+ - jstests/core/count_plan_summary.js
+ - jstests/core/coveredIndex3.js
+ - jstests/core/currentop.js
+ - jstests/core/distinct3.js
+ - jstests/core/evald.js
+ - jstests/core/find_and_modify_concurrent_update.js
+ - jstests/core/fsync.js
+ - jstests/core/geo_update_btree.js
+ - jstests/core/killop_drop_collection.js
+ - jstests/core/loadserverscripts.js
+ - jstests/core/mr_killop.js
+ - jstests/core/remove9.js
+ - jstests/core/removeb.js
+ - jstests/core/removec.js
+ - jstests/core/shellstartparallel.js
+ - jstests/core/updatef.js
+
+ # Command expects to see result from parallel operation.
+ # E.g. Suppose the following sequence of events: op1, join() op2 in parallel shell, op3.
+ # op3 will still be using the snapshot from op1, and not see op2 at all.
+ - jstests/core/cursora.js
+ - jstests/core/bench_test1.js
+
+ # It is illegal to open a tailable cursor in a transaction
+ - jstests/core/geo_near_tailable.js
+
+ exclude_with_any_tags:
+ # "Cowardly refusing to override read concern of command: ..."
+ - assumes_read_concern_unchanged
+ # "writeConcern is not allowed within a multi-statement transaction"
+ - assumes_write_concern_unchanged
+
+executor:
+ config:
+ shell_options:
+ # Ignore assertion failures from the shell in this test suite.
+ eval: var testingReplication = true; doassert = Function.prototype;
+ global_vars:
+ TestData:
+ sessionOptions:
+ causalConsistency: false
+ readMode: commands
+ hooks:
+ # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+ # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+ # validating the entire contents of the collection.
+ - class: CheckReplOplogs
+ - class: CheckReplDBHash
+ - class: ValidateCollections
+ - class: CleanEveryN
+ n: 20
+ fixture:
+ class: ReplicaSetFixture
+ mongod_options:
+ set_parameters:
+ enableTestCommands: 1
+ numInitialSyncAttempts: 1
+ failpoint.setTransactionLifetimeToRandomMillis:
+ mode: alwaysOn
+ failpoint.increaseFrequencyOfPeriodicThreadToExpireTransactions:
+ mode: alwaysOn
+ num_nodes: 2 \ No newline at end of file
diff --git a/buildscripts/resmokelib/selector.py b/buildscripts/resmokelib/selector.py
index 0ed27403cbc..cbd1eb86bb3 100644
--- a/buildscripts/resmokelib/selector.py
+++ b/buildscripts/resmokelib/selector.py
@@ -679,11 +679,13 @@ _SELECTOR_REGISTRY = {
"db_test": (_DbTestSelectorConfig, _DbTestSelector),
"fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector),
"parallel_fsm_workload_test": (_MultiJSTestSelectorConfig, _MultiJSTestSelector),
+ "abort_txns_fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector),
"json_schema_test": (_JsonTestSelectorConfig, _Selector),
"js_test": (_JSTestSelectorConfig, _JSTestSelector),
"multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector),
"py_test": (_PyTestCaseSelectorConfig, _Selector),
"sleep_test": (_SleepTestCaseSelectorConfig, _SleepTestCaseSelector),
+ "abort_multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector),
}
diff --git a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
index 6cbda33abe9..f2cf17dd9c3 100644
--- a/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
+++ b/buildscripts/resmokelib/testing/testcases/fsm_workload_test.py
@@ -99,3 +99,26 @@ class ParallelFSMWorkloadTestCase(FSMWorkloadTestCase):
for workload_name in sorted(selected_tests):
uid.update(workload_name)
return uid.hexdigest()
+
+
+class AbortTxnsFSMWorkloadTestCase(FSMWorkloadTestCase):
+ """An FSM workload intended to be used by test suites that test transaction expiration logic."""
+
+ REGISTERED_NAME = "abort_txns_fsm_workload_test"
+
+ def _execute(self, process):
+ """Run the specified process."""
+ self.logger.info("Starting %s...\n%s", self.short_description(), process.as_command())
+
+ process.start()
+ self.logger.info("%s started with pid %s.", self.short_description(), process.pid)
+
+ self.return_code = process.wait()
+ # This test case is intended to randomly abort transactions in the core passthrough. We only
+ # expect to return a failure when the system crashes. This is different from the base
+ # implementation where we will throw in a non-zero return code.
+ if self.return_code != 0:
+ self.logger.info("Returning quietly instead of throwing failure: %s" %
+ (self.short_description()))
+
+ self.logger.info("%s finished.", self.short_description())
diff --git a/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py b/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py
index 1e790612153..83c658557a6 100644
--- a/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py
+++ b/buildscripts/resmokelib/testing/testcases/multi_stmt_txn_test.py
@@ -27,3 +27,34 @@ class MultiStmtTxnTestCase(jsrunnerfile.JSRunnerFileTestCase):
def _populate_test_data(self, test_data):
test_data["multiStmtTxnTestFile"] = self.multi_stmt_txn_test_file
+
+
+class AbortMultiStmtTxnTestCase(MultiStmtTxnTestCase):
+ """Test case for aborting multi statement transactions."""
+
+ REGISTERED_NAME = "abort_multi_stmt_txn_passthrough"
+
+ def __init__(self, logger, multi_stmt_txn_test_file, shell_executable=None, shell_options=None):
+ """Initialize AbortMultiStmtTxnTestCase to be used to test transaction expiration logic."""
+ # pylint: disable=non-parent-init-called,super-init-not-called
+ jsrunnerfile.JSRunnerFileTestCase.__init__(
+ self, logger, "Abort Multi-statement Transaction Passthrough", multi_stmt_txn_test_file,
+ test_runner_file="jstests/libs/txns/txn_passthrough_runner.js",
+ shell_executable=shell_executable, shell_options=shell_options)
+
+ def _execute(self, process):
+ """Run the specified process."""
+ self.logger.info("Starting %s...\n%s", self.short_description(), process.as_command())
+
+ process.start()
+ self.logger.info("%s started with pid %s.", self.short_description(), process.pid)
+
+ self.return_code = process.wait()
+ # This test case is intended to randomly abort transactions in the core passthrough. We only
+ # expect to return a failure when the system crashes. This is different from the base
+ # implementation where we will throw in a non-zero return code.
+ if self.return_code != 0:
+ self.logger.info("Returning quietly instead of throwing failure: %s" %
+ (self.short_description()))
+
+ self.logger.info("%s finished.", self.short_description())
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 868aed31edd..36fdc262709 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -4583,6 +4583,16 @@ tasks:
resmoke_args: --suites=replica_sets_multi_stmt_txn_jscore_passthrough --storageEngine=wiredTiger
- <<: *task_template
+ name: replica_sets_abort_multi_stmt_txn_jscore_passthrough
+ depends_on:
+ - name: jsCore
+ commands:
+ - func: "do setup"
+ - func: "run tests"
+ vars:
+ resmoke_args: --suites=replica_sets_abort_multi_stmt_txn_jscore_passthrough --storageEngine=wiredTiger
+
+- <<: *task_template
name: replica_sets_initsync_jscore_passthrough
depends_on:
- name: jsCore
@@ -4835,6 +4845,15 @@ tasks:
resmoke_jobs_max: 1
- <<: *task_template
+ name: concurrency_replication_abort_multi_stmt_txn
+ commands:
+ - func: "do setup"
+ - func: "run tests"
+ vars:
+ resmoke_args: --suites=concurrency_replication_abort_multi_stmt_txn --storageEngine=wiredTiger
+ resmoke_jobs_max: 1
+
+- <<: *task_template
name: concurrency_replication_ubsan
commands:
- func: "do setup"
@@ -6416,6 +6435,7 @@ buildvariants:
- name: concurrency_replication
- name: concurrency_replication_causal_consistency
- name: concurrency_replication_multi_stmt_txn
+ - name: concurrency_replication_abort_multi_stmt_txn
- name: concurrency_sharded_replication
- name: concurrency_sharded_replication_with_balancer
- name: concurrency_sharded_causal_consistency_gen
@@ -6463,6 +6483,7 @@ buildvariants:
- name: replica_sets_initsync_static_jscore_passthrough
- name: replica_sets_jscore_passthrough
- name: replica_sets_multi_stmt_txn_jscore_passthrough
+ - name: replica_sets_abort_multi_stmt_txn_jscore_passthrough
- name: replica_sets_kill_primary_jscore_passthrough
- name: replica_sets_terminate_primary_jscore_passthrough
- name: replica_sets_kill_secondaries_jscore_passthrough
@@ -8305,6 +8326,7 @@ buildvariants:
distros:
- rhel62-large
- name: concurrency_replication_multi_stmt_txn
+ - name: concurrency_replication_abort_multi_stmt_txn
- name: concurrency_sharded_replication
- name: concurrency_sharded_replication_with_balancer
- name: concurrency_sharded_causal_consistency_gen
@@ -8392,6 +8414,9 @@ buildvariants:
- name: replica_sets_multi_stmt_txn_jscore_passthrough
distros:
- rhel62-large
+ - name: replica_sets_abort_multi_stmt_txn_jscore_passthrough
+ distros:
+ - rhel62-large
- name: replica_sets_kill_primary_jscore_passthrough
distros:
- rhel62-large
diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
index e14f93d4d24..5eab0381527 100644
--- a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
+++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
@@ -38,10 +38,13 @@
#include "mongo/db/kill_sessions_local.h"
#include "mongo/db/service_context.h"
#include "mongo/db/session.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
#include "mongo/util/periodic_runner.h"
namespace mongo {
+// Used in our core transaction passthrough suites to test the abort logic for expired transactions.
+MONGO_FAIL_POINT_DEFINE(increaseFrequencyOfPeriodicThreadToExpireTransactions);
namespace {
const auto gServiceDecoration =
@@ -74,6 +77,17 @@ void PeriodicThreadToAbortExpiredTransactions::_init(ServiceContext* serviceCont
auto periodicRunner = serviceContext->getPeriodicRunner();
invariant(periodicRunner);
+ auto jobPeriodMillis = Milliseconds(1000);
+
+ if (MONGO_FAIL_POINT(increaseFrequencyOfPeriodicThreadToExpireTransactions)) {
+ // This failpoint is used in test suites in conjunction with the
+ // setTransactionLifetimeToRandomMillis failpoint, which sets the the transaction lifetime
+ // to expire in milliseconds.
+ jobPeriodMillis = Milliseconds(5);
+ log() << "increaseFrequencyOfPeriodicThreadToExpireTransactions failpoint enabled -- "
+ "setting frequency of periodic thread to "
+ << jobPeriodMillis << " ms.";
+ }
// We want this job period to be dynamic, to run every (transactionLifetimeLimitSeconds/2)
// seconds, where transactionLifetimeLimitSeconds is an adjustable server parameter, or within
@@ -115,7 +129,7 @@ void PeriodicThreadToAbortExpiredTransactions::_init(ServiceContext* serviceCont
killAllExpiredTransactions(opCtx.get());
},
- Seconds(1));
+ jobPeriodMillis);
_anchor = std::make_shared<PeriodicJobAnchor>(periodicRunner->makeJob(std::move(job)));
}
diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp
index edcc5be1294..c087ae93ce6 100644
--- a/src/mongo/db/session.cpp
+++ b/src/mongo/db/session.cpp
@@ -58,6 +58,7 @@
#include "mongo/db/stats/fill_locker_info.h"
#include "mongo/db/stats/top.h"
#include "mongo/db/transaction_history_iterator.h"
+#include "mongo/platform/random.h"
#include "mongo/stdx/memory.h"
#include "mongo/transport/transport_layer.h"
#include "mongo/util/fail_point_service.h"
@@ -88,6 +89,9 @@ MONGO_EXPORT_SERVER_PARAMETER(transactionLifetimeLimitSeconds, std::int32_t, 60)
return Status::OK();
});
+// Used in our core transaction passthrough suites to test the abort logic for expired transactions.
+MONGO_FAIL_POINT_DEFINE(setTransactionLifetimeToRandomMillis);
+
namespace {
@@ -588,6 +592,17 @@ void Session::_beginOrContinueTxn(WithLock wl,
const auto now = curTimeMicros64();
_transactionExpireDate = Date_t::fromMillisSinceEpoch(now / 1000) +
Seconds{transactionLifetimeLimitSeconds.load()};
+
+ if (MONGO_FAIL_POINT(setTransactionLifetimeToRandomMillis)) {
+ PseudoRandom prng(SecureRandom::create()->nextInt64());
+ // Override the transaction lifetime to expire in the next 0-20 ms.
+ const auto expireMillis = prng.nextInt32(20);
+ log() << "setTransactionLifetimeToRandomMillis failpoint enabled -- "
+ << "setting transaction to expire in " << expireMillis << "ms.";
+ _transactionExpireDate =
+ Date_t::fromMillisSinceEpoch(now / 1000) + Milliseconds{expireMillis};
+ }
+
// Tracks various transactions metrics.
{
stdx::lock_guard<stdx::mutex> ls(_statsMutex);