summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXueruiFa <xuerui.fa@mongodb.com>2021-03-15 21:29:33 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-08 14:46:19 +0000
commit8cc71b9b34bb6f647993163c766d387b6dc08ae0 (patch)
tree0c4acc743e20db8d91c5fe2f1c9eb9c4e3b58f30
parentb493b9b63c6847ca7482da01d39871920a08c165 (diff)
downloadmongo-8cc71b9b34bb6f647993163c766d387b6dc08ae0.tar.gz
SERVER-53807: Add tenant migrations passthrough coverage for transactions
(cherry picked from commit 310744312aa7554f69ea531fa478acfd991d1a5a)
-rw-r--r--buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml407
-rw-r--r--etc/evergreen.yml11
-rw-r--r--jstests/libs/override_methods/inject_tenant_prefix.js45
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp3
4 files changed, 448 insertions, 18 deletions
diff --git a/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml
new file mode 100644
index 00000000000..d87b8bc0695
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml
@@ -0,0 +1,407 @@
+test_kind: js_test
+# This suite starts two 3-node replica sets and uses the ContinuousTenantMigration hook to run
+# background tenant migrations. It also wraps all CRUD commands in transactions and asserts that
+# the transactions are committed/aborted correctly while tenant migrations are running.
+
+selector:
+ roots:
+ - jstests/core/**/*.js
+
+ exclude_files:
+ #
+ # Excluded from tenant_migration_jscore_passthrough.yml
+ #
+
+ # These tests already run with transactions.
+ - jstests/core/txns/**/*.js
+ # These tests depend on hardcoded database name equality.
+ - jstests/core/json_schema/misc_validation.js
+ - jstests/core/list_databases.js
+ - jstests/core/profile1.js
+ - jstests/core/profile3.js
+ - jstests/core/views/views_stats.js
+ # These tests contain assertions on the number of executed operations and this suite retries
+ # operations on TenantMigrationAborted errors.
+ - jstests/core/find_and_modify_metrics.js
+ - jstests/core/update_metrics.js
+ - jstests/core/operation_latency_histogram.js
+ - jstests/core/top.js
+ - jstests/core/profile_sampling.js
+ - jstests/core/profile_hide_index.js
+ # The override cannot deep copy very large or small dates.
+ - jstests/core/index_large_and_small_dates.js
+ # This test expects that the connection (i.e. 'threadName') does not change throughout each test
+ # case. That is not always true when there is a background tenant migration.
+ - jstests/core/failcommand_failpoint.js
+ # The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
+ # command multiple times, which may observe the change to the failpoint enabled by the migration
+ # hook.
+ - jstests/core/set_param1.js
+ # This test does not support tojson of command objects so the override cannot deep copy the
+ # command objects correctly.
+ - jstests/core/SERVER-23626.js
+ # These tests write with {w: 0} which doesn't wait for the storage transaction writing the
+ # document and the oplog entry to commit so the TenantMigrationConflict will not be caught.
+ - jstests/core/batch_write_command_w0.js
+ - jstests/core/crud_api.js
+ # These tests use benchRun which does not use runCommand.
+ - jstests/core/bench_test1.js
+ - jstests/core/bench_test3.js
+ - jstests/core/benchrun_pipeline_updates.js
+ # This test uses exhaust which does not use runCommand.
+ - jstests/core/exhaust.js
+ # These tests use db._authOrThrow which does not use runCommand.
+ - jstests/core/auth1.js
+ - jstests/core/connection_status.js
+ - jstests/core/user_management_helpers.js
+ # These tests use legacy read mode which does not use runCommand.
+ - jstests/core/comment_field.js
+ - jstests/core/invalidated_legacy_cursors.js
+ # TODO (SERVER-52727): Synchronize cloneCollectionAsCapped with tenant migrations.
+ - jstests/core/capped_convertToCapped1.js
+ # TODO (SERVER-52866): Synchronize getLastError with tenant migrations.
+ - jstests/core/bulk_legacy_enforce_gle.js
+ # This test contains assertions for the hostname that operations run on.
+ - jstests/core/currentop_cursors.js
+ # Server parameters are stored in-memory only so are not transferred onto the recipient. These
+ # tests set the server parameter "notablescan" to force the node to not execute queries that
+ # require a collection scan and return an error.
+ - jstests/core/notablescan.js
+ - jstests/core/notablescan_capped.js
+ # captrunc command is not blocked during tenant migration.
+ - jstests/core/capped6.js
+ # Multi-updates that conflict with tenant migration are not retried by inject_tenant_prefix.js.
+ - jstests/core/batch_write_collation_estsize.js
+ - jstests/core/bulk_api_ordered.js
+ - jstests/core/bulk_api_unordered.js
+ - jstests/core/fts_querylang.js
+ - jstests/core/idhack.js
+ - jstests/core/role_management_helpers.js
+ - jstests/core/roles_info.js
+ - jstests/core/server1470.js
+ - jstests/core/update_arrayFilters.js
+ - jstests/core/update_arraymatch2.js
+ - jstests/core/update_arraymatch3.js
+ - jstests/core/update_arraymatch5.js
+ - jstests/core/update_hint.js
+ - jstests/core/update_multi3.js
+ - jstests/core/update_multi4.js
+ - jstests/core/update_multi5.js
+ - jstests/core/update_pipeline_shell_helpers.js
+ - jstests/core/update_with_pipeline.js
+ - jstests/core/update7.js
+ - jstests/core/updatei.js
+ - jstests/core/updatej.js
+ - jstests/core/updatel.js
+ - jstests/core/where_system_js.js
+ - jstests/core/write_result.js
+
+ #
+ # Excluded from replica_sets_multi_stmt_txn_jscore_passthrough.yml
+ #
+
+ ##
+ ## Limitations with the way the runner file injects transactions.
+ ##
+
+ # These tests expects some statements to error, which will cause txns to abort entirely.
+ - jstests/core/capped5.js
+ - jstests/core/commands_with_uuid.js
+ - jstests/core/dbcase.js
+ - jstests/core/dbcase2.js
+ - jstests/core/explain_execution_error.js
+ - jstests/core/expr.js
+ - jstests/core/find9.js
+ - jstests/core/find_and_modify_invalid_query_params.js
+ - jstests/core/find_getmore_bsonsize.js
+ - jstests/core/find_getmore_cmd.js
+ - jstests/core/geo_allowedcomparisons.js
+ - jstests/core/geo_big_polygon2.js
+ - jstests/core/geonear_key.js
+ - jstests/core/get_more_cmd_refuses_api_params.js
+ - jstests/core/in.js
+ - jstests/core/index8.js # No explicit check for failed command.
+ - jstests/core/index_decimal.js
+ - jstests/core/index_multiple_compatibility.js
+ - jstests/core/index_partial_write_ops.js
+ - jstests/core/indexa.js # No explicit check for failed command.
+ - jstests/core/indexes_multiple_commands.js
+ - jstests/core/js2.js
+ - jstests/core/json_schema/json_schema.js
+ - jstests/core/mr_bigobject.js
+ - jstests/core/not2.js
+ - jstests/core/null_query_semantics.js
+ - jstests/core/or1.js
+ - jstests/core/or2.js
+ - jstests/core/or3.js
+ - jstests/core/ord.js
+ - jstests/core/orj.js
+ - jstests/core/projection_expr_mod.js
+ - jstests/core/ref.js
+ - jstests/core/ref4.js
+ - jstests/core/regex_limit.js
+ - jstests/core/remove_undefined.js
+ - jstests/core/set7.js
+ - jstests/core/sortb.js
+ - jstests/core/sortf.js
+ - jstests/core/sortg.js
+ - jstests/core/sortj.js
+ - jstests/core/sort_with_meta_operator.js
+ - jstests/core/tailable_skip_limit.js
+ - jstests/core/type_array.js
+ - jstests/core/uniqueness.js
+ - jstests/core/unset2.js
+ - jstests/core/update_addToSet.js
+ - jstests/core/update_array_offset_positional.js
+ - jstests/core/update_find_and_modify_id.js
+ - jstests/core/update_modifier_pop.js
+ - jstests/core/updateh.js
+
+ # Reads from system.views.
+ - jstests/core/views/views_drop.js
+
+ ##
+ ## Some aggregation stages don't support snapshot readconcern.
+ ##
+
+ # $explain (requires read concern local)
+ - jstests/core/agg_hint.js
+ - jstests/core/and.js
+ - jstests/core/collation.js
+ - jstests/core/distinct_multikey_dotted_path.js
+ - jstests/core/distinct_with_hashed_index.js
+ - jstests/core/explain_shell_helpers.js
+ - jstests/core/index_partial_read_ops.js
+ - jstests/core/optimized_match_explain.js
+ - jstests/core/sort_array.js
+ - jstests/core/views/views_collation.js
+ - jstests/core/wildcard_index_count.js
+ - jstests/core/explain_server_params.js
+
+ # $listSessions
+ - jstests/core/list_all_local_sessions.js
+ - jstests/core/list_all_sessions.js
+ - jstests/core/list_local_sessions.js
+ - jstests/core/list_sessions.js
+
+ # $indexStats
+ - jstests/core/index_stats.js
+
+ # $collStats
+ - jstests/core/views/views_coll_stats.js
+
+ # Errors expected to happen in tests, which can cause transactions to get aborted.
+ # So when the test tries to inspect the documents it can be out of sync (relative
+ # to test run without multi statement transactions).
+ - jstests/core/cappeda.js
+ - jstests/core/doc_validation.js
+ - jstests/core/doc_validation_options.js
+ - jstests/core/field_name_validation.js
+ - jstests/core/insert_illegal_doc.js
+ - jstests/core/positional_projection.js
+ - jstests/core/push_sort.js
+ - jstests/core/rename4.js
+ - jstests/core/update_dbref.js
+
+ # Trick for bypassing mongo shell validation in the test doesn't work because txn_override
+ # retry logic will hit the shell validation.
+ - jstests/core/invalid_db_name.js
+
+ # Multiple writes in a txn, some of which fail because the collection doesn't exist.
+ # We create the collection and retry the last write, but previous writes would have
+ # still failed.
+ - jstests/core/dbref1.js
+ - jstests/core/dbref2.js
+ - jstests/core/ref3.js
+ - jstests/core/update3.js
+ - jstests/core/rename3.js
+
+ ##
+ ## Error: Unable to acquire lock within a max lock request timeout of '0ms' milliseconds
+ ##
+
+ # Collection drops done through applyOps are not converted to w:majority
+ - jstests/core/views/invalid_system_views.js
+
+ ##
+ ## Misc. reasons.
+ ##
+
+ # SERVER-34868 Cannot run a legacy query on a session.
+ - jstests/core/validate_cmd_ns.js
+
+ # SERVER-34772 Tailable Cursors are not allowed with snapshot readconcern.
+ - jstests/core/awaitdata_getmore_cmd.js
+ - jstests/core/getmore_cmd_maxtimems.js
+ - jstests/core/tailable_cursor_invalidation.js
+ - jstests/core/tailable_getmore_batch_size.js
+
+ # SERVER-34918 The "max" option of a capped collection can be exceeded until the next insert.
+ # The reason is that we don't update the count of a collection until a transaction commits,
+ # by which point it is too late to complain that "max" has been exceeded.
+ - jstests/core/capped_max1.js
+
+ # The "max" option of a capped collection can be temporarily exceeded before a
+ # txn is committed.
+ - jstests/core/bulk_insert_capped.js
+
+ # Expects collection to not have been created
+ - jstests/core/insert_id_undefined.js
+
+ # Creates sessions explicitly, resulting in txns being run through different sessions
+ # using a single txnNumber.
+ - jstests/core/views/views_all_commands.js
+
+ # Fails with implicit sessions because it will use multiple sessions on the same Mongo connection.
+ - jstests/core/dropdb.js
+
+ # Committing a transaction when the server is fsync locked fails.
+ - jstests/core/fsync.js
+
+ # Expects legacy errors ($err).
+ - jstests/core/constructors.js
+
+ # txn interrupted by command outside of txn before getMore runs.
+ - jstests/core/commands_namespace_parsing.js
+ - jstests/core/drop3.js
+ - jstests/core/ensure_sorted.js
+ - jstests/core/geo_s2cursorlimitskip.js
+ - jstests/core/getmore_invalidated_cursors.js
+ - jstests/core/getmore_invalidated_documents.js
+ - jstests/core/kill_cursors.js
+ - jstests/core/list_collections1.js
+ - jstests/core/list_indexes.js
+ - jstests/core/list_indexes_invalidation.js
+ - jstests/core/list_namespaces_invalidation.js
+ - jstests/core/oro.js
+
+ # Parallel Shell - we do not signal the override to end a txn when a parallel shell closes.
+ - jstests/core/compact_keeps_indexes.js
+ - jstests/core/count10.js
+ - jstests/core/count_plan_summary.js
+ - jstests/core/coveredIndex3.js
+ - jstests/core/crud_ops_do_not_throw_locktimeout.js
+ - jstests/core/distinct3.js
+ - jstests/core/find_and_modify_concurrent_update.js
+ - jstests/core/geo_update_btree.js
+ - jstests/core/killop_drop_collection.js
+ - jstests/core/loadserverscripts.js
+ - jstests/core/mr_killop.js
+ - jstests/core/remove9.js
+ - jstests/core/removeb.js
+ - jstests/core/removec.js
+ - jstests/core/shellstartparallel.js
+ - jstests/core/updatef.js
+
+ # Command expects to see result from parallel operation.
+ # E.g. Suppose the following sequence of events: op1, join() op2 in parallel shell, op3.
+ # op3 will still be using the snapshot from op1, and not see op2 at all.
+ - jstests/core/bench_test1.js
+ - jstests/core/benchrun_pipeline_updates.js
+ - jstests/core/cursora.js
+
+ # Does not support tojson of command objects.
+ - jstests/core/SERVER-23626.js
+
+ # TODO(SERVER-55882): Investigate why this test is failing.
+ - jstests/core/wildcard_index_multikey.js
+
+ exclude_with_any_tags:
+ - assumes_standalone_mongod
+ # These tests run getMore commands which are not supported in the tenant migration passthrough.
+ - requires_getmore
+ # Due to background tenant migrations, operations in the main test shell are not guaranteed to
+ # be causally consistent with operations in a parallel shell. The reason is that
+ # TenantMigrationCommitted error is only thrown when the client does a write or a atClusterTime/
+ # afterClusterTime or linearlizable read. Therefore, one of shell may not be aware that the
+ # migration has occurred and would not forward the read/write command to the right replica set.
+ - uses_parallel_shell
+ # Profile settings are stored in-memory only so are not transferred to the recipient.
+ - requires_profiling
+ # emptycapped command is blocked during tenant migration.
+ - requires_emptycapped
+ - tenant_migration_incompatible
+ # "Cowardly refusing to override read concern of command: ..."
+ - assumes_read_concern_unchanged
+ # "writeConcern is not allowed within a multi-statement transaction"
+ - assumes_write_concern_unchanged
+ # Transactions are not allowed to operate on capped collections.
+ - requires_capped
+ # Retrying a query can change whether a plan cache entry is active.
+ - inspects_whether_plan_cache_entry_is_active
+ # $out is not supported in transactions
+ - uses_$out
+ # Transaction-continuing commands cannot specify API parameters, so tests that use API parameters
+ # cannot be run with transactions.
+ - uses_api_parameters
+ - does_not_support_transactions
+
+executor:
+ archive:
+ hooks:
+ - CheckReplOplogs
+ - CheckReplDBHash
+ - ValidateCollections
+ config:
+ shell_options:
+ eval: >-
+ testingReplication = true;
+ load('jstests/libs/override_methods/inject_tenant_prefix.js');
+ load('jstests/libs/override_methods/enable_sessions.js');
+ load('jstests/libs/override_methods/txn_passthrough_cmd_massage.js');
+ load('jstests/libs/override_methods/network_error_and_txn_override.js');
+ global_vars:
+ TestData: &TestData
+ tenantId: "tenantMigrationTenantId"
+ networkErrorAndTxnOverrideConfig:
+ wrapCRUDinTransactions: true
+ sessionOptions:
+ # Tests in this suite only read from primaries and only one node is electable, so causal
+ # consistency is not required to read your own writes.
+ causalConsistency: false
+ readMode: commands
+ hooks:
+ - class: ContinuousTenantMigration
+ shell_options:
+ global_vars:
+ TestData: *TestData
+ # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+ # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+ # validating the entire contents of the collection.
+ - class: CheckReplOplogs
+ - class: CheckReplDBHash
+ - class: ValidateCollections
+ - class: CleanEveryN
+ n: 1
+ fixture:
+ class: TenantMigrationFixture
+ common_mongod_options:
+ set_parameters:
+ enableTestCommands: 1
+ failpoint.abortTenantMigrationBeforeLeavingBlockingState:
+ mode:
+ activationProbability: 0.5
+ failpoint.pauseTenantMigrationBeforeLeavingBlockingState:
+ mode: alwaysOn
+ data:
+ blockTimeMS: 250
+ # To avoid routing commands in each test incorrectly, the ContinuousTenantMigration hook
+ # only runs donorForgetMigration against the donor of each migration when it is safe to do
+ # so. Therefore, the garbage collection delay doesn't need to be large.
+ tenantMigrationGarbageCollectionDelayMS: 1
+ ttlMonitorSleepSecs: 1
+ # The donor replica set may have active transactions while a migration is in progress. If
+ # the migration is committed, those transactions may never receive 'commitTransaction' or
+ # 'abortTransaction', since all writes are automatically rerouted to the recipient. We set
+ # a low 'transactionLifetimeLimitSeconds' value to ensure that they are cleaned up quickly.
+ transactionLifetimeLimitSeconds: 10
+ tlsMode: allowTLS
+ tlsCAFile: jstests/libs/ca.pem
+ tlsAllowInvalidHostnames: ''
+ per_mongod_options:
+ # Each entry is for a different replica set's extra mongod options.
+ - tlsCertificateKeyFile: jstests/libs/rs0.pem
+ - tlsCertificateKeyFile: jstests/libs/rs1.pem
+ num_replica_sets: 2
+ num_nodes_per_replica_set: 3
+ use_replica_set_connection_string: true
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 7c4bf03e7e4..ca87d9d847e 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -6305,6 +6305,16 @@ tasks:
resmoke_args: --storageEngine=wiredTiger
fallback_num_sub_suites: 10
+- name: tenant_migration_multi_stmt_txn_jscore_passthrough_gen
+ tags: ["txn"]
+ commands:
+ - func: "generate resmoke tasks"
+ vars:
+ depends_on: jsCore
+ use_large_distro: "true"
+ resmoke_args: --storageEngine=wiredTiger
+ fallback_num_sub_suites: 10
+
- name: parallel_gen
tags: ["misc_js"]
commands:
@@ -10336,6 +10346,7 @@ buildvariants:
- name: multi_stmt_txn_jscore_passthrough_with_migration_gen
- name: tenant_migration_jscore_passthrough_gen
- name: tenant_migration_causally_consistent_jscore_passthrough_gen
+ - name: tenant_migration_multi_stmt_txn_jscore_passthrough_gen
- name: multiversion_gen
- name: .query_fuzzer
- name: .random_multiversion_ds
diff --git a/jstests/libs/override_methods/inject_tenant_prefix.js b/jstests/libs/override_methods/inject_tenant_prefix.js
index f7d805b9d95..95e927162af 100644
--- a/jstests/libs/override_methods/inject_tenant_prefix.js
+++ b/jstests/libs/override_methods/inject_tenant_prefix.js
@@ -475,30 +475,41 @@ Mongo.prototype.runCommandRetryOnTenantMigrationErrors = function(
}
if (migrationCommittedErr || migrationAbortedErr) {
- // Update the command for reroute/retry.
- modifyCmdObjForRetry(cmdObjWithTenantId, resObj, true);
- // It is safe to reformat this resObj since it will not be returned to the caller of
- // runCommand.
- reformatResObjForLogging(resObj);
-
- // Build a new indexMap where the keys are the index that each write that needs to be
- // retried will have in the next attempt's cmdObj.
- indexMap = resetIndices(indexMap);
+ // If the command was inside a transaction, skip modifying any objects or fields, since
+ // we will retry the entire transaction outside of this file.
+ if (!TransactionsUtil.isTransientTransactionError(resObj)) {
+ // Update the command for reroute/retry.
+ modifyCmdObjForRetry(cmdObjWithTenantId, resObj, true);
+
+ // It is safe to reformat this resObj since it will not be returned to the caller of
+ // runCommand.
+ reformatResObjForLogging(resObj);
+
+ // Build a new indexMap where the keys are the index that each write that needs to
+ // be retried will have in the next attempt's cmdObj.
+ indexMap = resetIndices(indexMap);
+ }
if (migrationCommittedErr) {
+ jsTestLog(`Got TenantMigrationCommitted for command against database ${
+ dbNameWithTenantId} after trying ${numAttempts} times: ${tojson(resObj)}`);
// Store the connection to the recipient so the next commands can be rerouted.
this.migrationStateDoc = this.getTenantMigrationStateDoc();
this.reroutingMongo =
connect(this.migrationStateDoc.recipientConnectionString).getMongo();
-
- jsTest.log(`Got TenantMigrationCommitted for command against database ` +
- `"${dbNameWithTenantId}" after trying ${numAttempts} times, rerouting ` +
- `the command: ${tojson(resObj)}`);
} else if (migrationAbortedErr) {
- jsTest.log(
- `Got TenantMigrationAborted for command against database ` +
- `"${dbNameWithTenantId}" after trying ${numAttempts} times, retrying the ` +
- `command: ${tojson(resObj)}`);
+ jsTestLog(`Got TenantMigrationAborted for command against database ${
+ dbNameWithTenantId} after trying ${numAttempts} times: ${tojson(resObj)}`);
+ }
+
+ // If the result has a TransientTransactionError label, the entire transaction must be
+ // retried. Return immediately to let the retry be handled by
+ // 'network_error_and_txn_override.js'.
+ if (TransactionsUtil.isTransientTransactionError(resObj)) {
+ jsTestLog(`Got error for transaction against database ` +
+ `${dbNameWithTenantId} with TransientTransactionError, retrying ` +
+ `transaction against recipient: ${tojson(resObj)}`);
+ return resObj;
}
} else {
// Modify the resObj before returning the result.
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 236fc2ee5f8..b56dc8bd1b5 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -932,7 +932,8 @@ void TenantMigrationRecipientService::Instance::_processCommittedTransactionEntr
"sessionId"_attr = sessionId,
"txnNumber"_attr = txnNumber,
"tenantId"_attr = getTenantId(),
- "migrationId"_attr = getMigrationUUID());
+ "migrationId"_attr = getMigrationUUID(),
+ "entry"_attr = entry.toString());
auto txnParticipant = TransactionParticipant::get(opCtx);
uassert(5351300,