diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2018-09-26 11:26:36 -0400 |
---|---|---|
committer | Tess Avitabile <tess.avitabile@mongodb.com> | 2018-10-02 10:58:05 -0400 |
commit | aaa64733625433f31a5e212f4652a58816385388 (patch) | |
tree | 538d19b094a41d780b56ca1f7f065ad8cc08acf9 | |
parent | e74ff7028c06686611eb4652a80a212bf14e3757 (diff) | |
download | mongo-aaa64733625433f31a5e212f4652a58816385388.tar.gz |
SERVER-37227 Reintroduce enableMajorityReadConcern:false server parameter
123 files changed, 1213 insertions, 200 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharded_core_txns.yml b/buildscripts/resmokeconfig/suites/sharded_core_txns.yml index 87f14bfbbe0..c81e4535b02 100644 --- a/buildscripts/resmokeconfig/suites/sharded_core_txns.yml +++ b/buildscripts/resmokeconfig/suites/sharded_core_txns.yml @@ -87,7 +87,6 @@ executor: set_parameters: enableTestCommands: 1 mongod_options: - enableMajorityReadConcern: '' set_parameters: enableTestCommands: 1 numInitialSyncAttempts: 1 diff --git a/buildscripts/resmokelib/config.py b/buildscripts/resmokelib/config.py index c2f092a8154..d797785ecab 100644 --- a/buildscripts/resmokelib/config.py +++ b/buildscripts/resmokelib/config.py @@ -75,6 +75,7 @@ DEFAULTS = { "shell_write_mode": None, "shuffle": None, "stagger_jobs": None, + "majority_read_concern": None, # Default is set on the commandline. "storage_engine": None, "storage_engine_cache_size_gb": None, "tag_file": None, @@ -333,6 +334,9 @@ SHUFFLE = None # If true, the launching of jobs is staggered in resmoke.py. STAGGER_JOBS = None +# If set to true, it enables read concern majority. Else, read concern majority is disabled. +MAJORITY_READ_CONCERN = None + # If set, then all mongod's started by resmoke.py and by the mongo shell will use the specified # storage engine. STORAGE_ENGINE = None diff --git a/buildscripts/resmokelib/core/programs.py b/buildscripts/resmokelib/core/programs.py index 4eb10c6c234..4ccddecad46 100644 --- a/buildscripts/resmokelib/core/programs.py +++ b/buildscripts/resmokelib/core/programs.py @@ -88,6 +88,7 @@ def mongod_program( # pylint: disable=too-many-branches _apply_set_parameters(args, suite_set_parameters) shortcut_opts = { + "enableMajorityReadConcern": config.MAJORITY_READ_CONCERN, "nojournal": config.NO_JOURNAL, "serviceExecutor": config.SERVICE_EXECUTOR, "storageEngine": config.STORAGE_ENGINE, @@ -182,6 +183,7 @@ def mongo_shell_program( # pylint: disable=too-many-branches,too-many-locals,to global_vars = kwargs.pop("global_vars", {}).copy() shortcut_opts = { + "enableMajorityReadConcern": (config.MAJORITY_READ_CONCERN, True), "noJournal": (config.NO_JOURNAL, False), "serviceExecutor": (config.SERVICE_EXECUTOR, ""), "storageEngine": (config.STORAGE_ENGINE, ""), @@ -322,6 +324,7 @@ def dbtest_program(logger, executable=None, suites=None, process_kwargs=None, ** if suites is not None: args.extend(suites) + kwargs["enableMajorityReadConcern"] = config.MAJORITY_READ_CONCERN if config.STORAGE_ENGINE is not None: kwargs["storageEngine"] = config.STORAGE_ENGINE diff --git a/buildscripts/resmokelib/parser.py b/buildscripts/resmokelib/parser.py index 48d8ab89bde..fd92055c1da 100644 --- a/buildscripts/resmokelib/parser.py +++ b/buildscripts/resmokelib/parser.py @@ -196,6 +196,11 @@ def _make_parser(): # pylint: disable=too-many-statements help=("Enables or disables the stagger of launching resmoke jobs." " Defaults to %default.")) + parser.add_option("--majorityReadConcern", type="choice", action="store", + dest="majority_read_concern", choices=("on", "off"), metavar="ON|OFF", + help=("Enable or disable majority read concern support." + " Defaults to %default.")) + parser.add_option("--storageEngine", dest="storage_engine", metavar="ENGINE", help="The storage engine used by dbtests and jstests.") @@ -310,7 +315,7 @@ def _make_parser(): # pylint: disable=too-many-statements parser.set_defaults(benchrun_device="Desktop", dry_run="off", find_suites=False, list_suites=False, logger_file="console", shuffle="auto", - stagger_jobs="off", suite_files="with_server") + stagger_jobs="off", suite_files="with_server", majority_read_concern="on") return parser @@ -386,6 +391,7 @@ def _update_config_vars(values): # pylint: disable=too-many-statements _config.FAIL_FAST = not config.pop("continue_on_failure") _config.INCLUDE_WITH_ANY_TAGS = _tags_from_list(config.pop("include_with_any_tags")) _config.JOBS = config.pop("jobs") + _config.MAJORITY_READ_CONCERN = config.pop("majority_read_concern") == "on" _config.MONGO_EXECUTABLE = _expand_user(config.pop("mongo_executable")) _config.MONGOD_EXECUTABLE = _expand_user(config.pop("mongod_executable")) _config.MONGOD_SET_PARAMETERS = config.pop("mongod_set_parameters") diff --git a/buildscripts/resmokelib/testing/hooks/dbhash_background.py b/buildscripts/resmokelib/testing/hooks/dbhash_background.py index b8122f458b8..7d4790fe1fd 100644 --- a/buildscripts/resmokelib/testing/hooks/dbhash_background.py +++ b/buildscripts/resmokelib/testing/hooks/dbhash_background.py @@ -36,6 +36,11 @@ class CheckReplDBHashInBackground(jsfile.JSHook): " doesn't support snapshot reads.", server_status["storageEngine"]["name"]) return + if not server_status["storageEngine"].get("supportsCommittedReads", False): + self.logger.info("Not enabling the background thread because '%s' storage engine" + " doesn't support committed reads.", + server_status["storageEngine"]["name"]) + return if not server_status["storageEngine"].get("persistent", False): self.logger.info("Not enabling the background thread because '%s' storage engine" " is not persistent.", server_status["storageEngine"]["name"]) diff --git a/etc/evergreen.yml b/etc/evergreen.yml index b17a65f31c7..aeef3f38c8c 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -11502,6 +11502,563 @@ buildvariants: - ubuntu1604-packer - name: push +- name: enterprise-rhel-62-64-bit-majority-read-concern-off + display_name: "Enterprise RHEL 6.2 (majority read concern off)" + modules: + - enterprise + run_on: + - rhel62-small + expansions: &enterprise-rhel-62-64-bit-majority-read-concern-off-expansions + test_flags: >- + --majorityReadConcern=off + --excludeWithAnyTags=requires_majority_read_concern,uses_change_streams,uses_prepare_transaction,uses_multi_shard_transaction,uses_single_shard_transaction + compile_flags: >- + -j$(grep -c ^processor /proc/cpuinfo) + --ssl + --release + --variables-files=etc/scons/mongodbtoolchain_gcc.vars + MONGO_DISTMOD=rhel62 + multiversion_platform: rhel62 + multiversion_edition: enterprise + num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + repo_edition: enterprise + scons_cache_scope: shared + gorootvars: 'PATH="/opt/golang/go1.10/bin:/opt/mongodbtoolchain/v2/bin/:$PATH" GOROOT=/opt/golang/go1.10' + tooltags: "-tags 'ssl sasl'" + build_mongoreplay: true + display_tasks: + - *dbtest + - *replica_sets_auth + - *replica_sets_ese + - *sharding + - *sharding_auth + - *sharding_auth_audit + - *sharding_ese + - *sharding_last_stable_mongos_and_mixed_shards + - *sharding_op_query + - *unittests + tasks: + - name: compile_all_run_unittests_TG + distros: + - rhel62-large + - name: rollback_fuzzer + - name: rollback_fuzzer_clean_shutdowns + - name: rollback_fuzzer_unclean_shutdowns + - name: aggregation + - name: aggregation_ese + - name: aggregation_auth + - name: aggregation_facet_unwind_passthrough + - name: aggregation_mongos_passthrough + - name: aggregation_one_shard_sharded_collections + - name: aggregation_sharded_collections_passthrough + - name: audit + - name: auth + - name: auth_audit + - name: buildscripts_test + - name: bulk_gle_passthrough + - name: causally_consistent_jscore_passthrough + - name: causally_consistent_jscore_passthrough_auth + - name: sharded_causally_consistent_jscore_passthrough + - name: concurrency + - name: concurrency_replication + - name: concurrency_replication_causal_consistency + distros: + - rhel62-large + - name: concurrency_replication_multi_stmt_txn + - name: concurrency_sharded_replication + - name: concurrency_sharded_replication_with_balancer + - name: concurrency_sharded_causal_consistency + - name: concurrency_sharded_causal_consistency_and_balancer + - name: concurrency_simultaneous + - name: concurrency_simultaneous_replication + - name: disk_wiredtiger + - name: ese + - name: failpoints + - name: failpoints_auth + - name: gle_auth + - name: gle_auth_basics_passthrough + - name: gle_auth_basics_passthrough_write_cmd + - name: gle_auth_write_cmd + - name: integration_tests_replset + - name: integration_tests_sharded + - name: integration_tests_standalone + - name: integration_tests_standalone_audit + - name: jsCore + - name: jsCore_ese + - name: jsCore_auth + - name: jsCore_compatibility + - name: jsCore_decimal + - name: jsCore_minimum_batch_size + - name: jsCore_op_query + - name: jsCore_txns + - name: causally_consistent_jscore_txns_passthrough + - name: jsonSchema + - name: aggregation_multiversion_fuzzer + - name: aggregation_expression_multiversion_fuzzer + - name: jstestfuzz + - name: jstestfuzz_concurrent + - name: jstestfuzz_concurrent_replication + - name: jstestfuzz_concurrent_replication_session + - name: jstestfuzz_concurrent_sharded + - name: jstestfuzz_concurrent_sharded_causal_consistency + - name: jstestfuzz_concurrent_sharded_continuous_stepdown + - name: jstestfuzz_concurrent_sharded_session + - name: jstestfuzz_interrupt + - name: jstestfuzz_interrupt_replication + - name: jstestfuzz_replication + - name: jstestfuzz_replication_initsync + - name: jstestfuzz_replication_session + - name: jstestfuzz_sharded + - name: jstestfuzz_sharded_causal_consistency + - name: jstestfuzz_sharded_continuous_stepdown + - name: jstestfuzz_sharded_session + - name: logical_session_cache_replication_100ms_refresh_jscore_passthrough + - name: logical_session_cache_replication_1sec_refresh_jscore_passthrough + - name: logical_session_cache_replication_10sec_refresh_jscore_passthrough + - name: logical_session_cache_replication_default_refresh_jscore_passthrough + - name: multiversion + - name: noPassthroughWithMongod + - name: noPassthrough + - name: parallel + - name: parallel_compatibility + - name: read_concern_linearizable_passthrough + distros: + - rhel62-large + - name: read_only + - name: read_only_sharded + - name: replica_sets + distros: + - rhel62-large + - name: replica_sets_auth_0 + distros: + - rhel62-large + - name: replica_sets_auth_1 + distros: + - rhel62-large + - name: replica_sets_auth_2 + distros: + - rhel62-large + - name: replica_sets_auth_3 + distros: + - rhel62-large + - name: replica_sets_auth_4 + distros: + - rhel62-large + - name: replica_sets_auth_5 + distros: + - rhel62-large + - name: replica_sets_auth_6 + distros: + - rhel62-large + - name: replica_sets_auth_misc + distros: + - rhel62-large + - name: replica_sets_ese_0 + distros: + - rhel62-large + - name: replica_sets_ese_1 + distros: + - rhel62-large + - name: replica_sets_ese_2 + distros: + - rhel62-large + - name: replica_sets_ese_3 + distros: + - rhel62-large + - name: replica_sets_ese_4 + distros: + - rhel62-large + - name: replica_sets_ese_5 + distros: + - rhel62-large + - name: replica_sets_ese_6 + distros: + - rhel62-large + - name: replica_sets_ese_misc + distros: + - rhel62-large + - name: replica_sets_jscore_passthrough + distros: + - rhel62-large + - name: replica_sets_initsync_jscore_passthrough + distros: + - rhel62-large + - name: replica_sets_initsync_static_jscore_passthrough + distros: + - rhel62-large + - name: replica_sets_multi_stmt_txn_jscore_passthrough + distros: + - rhel62-large + - name: replica_sets_kill_secondaries_jscore_passthrough + distros: + - rhel62-large + - name: retryable_writes_jscore_passthrough + distros: + - rhel62-large + - name: sasl + - name: session_jscore_passthrough + - name: sharded_collections_jscore_passthrough + - name: sharding_0 + distros: + - rhel62-large + - name: sharding_1 + distros: + - rhel62-large + - name: sharding_2 + distros: + - rhel62-large + - name: sharding_3 + distros: + - rhel62-large + - name: sharding_4 + distros: + - rhel62-large + - name: sharding_5 + distros: + - rhel62-large + - name: sharding_6 + distros: + - rhel62-large + - name: sharding_7 + distros: + - rhel62-large + - name: sharding_8 + distros: + - rhel62-large + - name: sharding_9 + distros: + - rhel62-large + - name: sharding_10 + distros: + - rhel62-large + - name: sharding_11 + distros: + - rhel62-large + - name: sharding_12 + distros: + - rhel62-large + - name: sharding_13 + distros: + - rhel62-large + - name: sharding_14 + distros: + - rhel62-large + - name: sharding_misc + distros: + - rhel62-large + - name: sharding_auth_0 + distros: + - rhel62-large + - name: sharding_auth_1 + distros: + - rhel62-large + - name: sharding_auth_2 + distros: + - rhel62-large + - name: sharding_auth_3 + distros: + - rhel62-large + - name: sharding_auth_4 + distros: + - rhel62-large + - name: sharding_auth_5 + distros: + - rhel62-large + - name: sharding_auth_6 + distros: + - rhel62-large + - name: sharding_auth_7 + distros: + - rhel62-large + - name: sharding_auth_8 + distros: + - rhel62-large + - name: sharding_auth_9 + distros: + - rhel62-large + - name: sharding_auth_10 + distros: + - rhel62-large + - name: sharding_auth_11 + distros: + - rhel62-large + - name: sharding_auth_12 + distros: + - rhel62-large + - name: sharding_auth_13 + distros: + - rhel62-large + - name: sharding_auth_14 + distros: + - rhel62-large + - name: sharding_auth_15 + distros: + - rhel62-large + - name: sharding_auth_16 + distros: + - rhel62-large + - name: sharding_auth_17 + distros: + - rhel62-large + - name: sharding_auth_18 + distros: + - rhel62-large + - name: sharding_auth_19 + distros: + - rhel62-large + - name: sharding_auth_misc + distros: + - rhel62-large + - name: sharding_auth_audit_0 + distros: + - rhel62-large + - name: sharding_auth_audit_1 + distros: + - rhel62-large + - name: sharding_auth_audit_2 + distros: + - rhel62-large + - name: sharding_auth_audit_3 + distros: + - rhel62-large + - name: sharding_auth_audit_4 + distros: + - rhel62-large + - name: sharding_auth_audit_5 + distros: + - rhel62-large + - name: sharding_auth_audit_6 + distros: + - rhel62-large + - name: sharding_auth_audit_7 + distros: + - rhel62-large + - name: sharding_auth_audit_8 + distros: + - rhel62-large + - name: sharding_auth_audit_9 + distros: + - rhel62-large + - name: sharding_auth_audit_10 + distros: + - rhel62-large + - name: sharding_auth_audit_11 + distros: + - rhel62-large + - name: sharding_auth_audit_12 + distros: + - rhel62-large + - name: sharding_auth_audit_13 + distros: + - rhel62-large + - name: sharding_auth_audit_14 + distros: + - rhel62-large + - name: sharding_auth_audit_15 + distros: + - rhel62-large + - name: sharding_auth_audit_16 + distros: + - rhel62-large + - name: sharding_auth_audit_17 + distros: + - rhel62-large + - name: sharding_auth_audit_18 + distros: + - rhel62-large + - name: sharding_auth_audit_misc + distros: + - rhel62-large + - name: sharding_ese_0 + distros: + - rhel62-large + - name: sharding_ese_1 + distros: + - rhel62-large + - name: sharding_ese_2 + distros: + - rhel62-large + - name: sharding_ese_3 + distros: + - rhel62-large + - name: sharding_ese_4 + distros: + - rhel62-large + - name: sharding_ese_5 + distros: + - rhel62-large + - name: sharding_ese_6 + distros: + - rhel62-large + - name: sharding_ese_7 + distros: + - rhel62-large + - name: sharding_ese_8 + distros: + - rhel62-large + - name: sharding_ese_9 + distros: + - rhel62-large + - name: sharding_ese_10 + distros: + - rhel62-large + - name: sharding_ese_11 + distros: + - rhel62-large + - name: sharding_ese_12 + distros: + - rhel62-large + - name: sharding_ese_13 + distros: + - rhel62-large + - name: sharding_ese_14 + distros: + - rhel62-large + - name: sharding_ese_15 + distros: + - rhel62-large + - name: sharding_ese_16 + distros: + - rhel62-large + - name: sharding_ese_17 + distros: + - rhel62-large + - name: sharding_ese_18 + distros: + - rhel62-large + - name: sharding_ese_19 + distros: + - rhel62-large + - name: sharding_ese_20 + distros: + - rhel62-large + - name: sharding_ese_misc + distros: + - rhel62-large + - name: sharding_gle_auth_basics_passthrough + - name: sharding_gle_auth_basics_passthrough_write_cmd + - name: sharding_jscore_passthrough + - name: sharding_jscore_op_query_passthrough + - name: sharding_jscore_passthrough_wire_ops + - name: sharding_last_stable_mongos_and_mixed_shards_0 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_1 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_2 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_3 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_4 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_5 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_6 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_7 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_8 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_9 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_10 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_11 + distros: + - rhel62-large + - name: sharding_last_stable_mongos_and_mixed_shards_misc + distros: + - rhel62-large + - name: sharding_csrs_continuous_config_stepdown + distros: + - rhel62-large + - name: sharding_op_query_0 + distros: + - rhel62-large + - name: sharding_op_query_1 + distros: + - rhel62-large + - name: sharding_op_query_2 + distros: + - rhel62-large + - name: sharding_op_query_3 + distros: + - rhel62-large + - name: sharding_op_query_4 + distros: + - rhel62-large + - name: sharding_op_query_5 + distros: + - rhel62-large + - name: sharding_op_query_6 + distros: + - rhel62-large + - name: sharding_op_query_7 + distros: + - rhel62-large + - name: sharding_op_query_8 + distros: + - rhel62-large + - name: sharding_op_query_9 + distros: + - rhel62-large + - name: sharding_op_query_10 + distros: + - rhel62-large + - name: sharding_op_query_11 + distros: + - rhel62-large + - name: sharding_op_query_misc + distros: + - rhel62-large + - name: slow1 + - name: serial_run + - name: snmp + - name: ssl + - name: sslSpecial + - name: tool + - name: update_fuzzer + - name: update_fuzzer_replication + - name: write_concern_majority_passthrough + distros: + - rhel62-large + - name: secondary_reads_passthrough + distros: + - rhel62-large + +- name: enterprise-rhel-62-64-bit-required-majority-read-concern-off + display_name: "! Enterprise RHEL 6.2 (majority read concern off)" + modules: + - enterprise + run_on: + - rhel62-small + expansions: + <<: *enterprise-rhel-62-64-bit-majority-read-concern-off-expansions + burn_in_tests_build_variant: enterprise-rhel-62-64-bit-majority-read-concern-off + tasks: + # This build variants exists specifically to test that newly added or modified JavaScript tests + # are correctly tagged with "requires_majority_read_concern", "uses_change_streams", + # "uses_prepare_transaction", "uses_multi_shard_transaction", and "uses_single_shard_transaction" + # prior to the changes being pushed. It uses the task list from the + # enterprise-rhel-62-64-bit-majority-read-concern-off build variant to determine the resmoke.py + # YAML suite configurations to run the tests under. Do not add more tasks to this list. + - name: compile_TG + requires: + - name: burn_in_tests + distros: + - rhel62-large + - name: burn_in_tests + - name: enterprise-rhel-62-64-bit-coverage display_name: "~ Enterprise RHEL 6.2 DEBUG Code Coverage" modules: diff --git a/jstests/core/restart_catalog.js b/jstests/core/restart_catalog.js index 245c30e0469..b5ed4421acf 100644 --- a/jstests/core/restart_catalog.js +++ b/jstests/core/restart_catalog.js @@ -2,7 +2,7 @@ * Forces the server to restart the catalog and rebuild its in-memory catalog data structures, then * asserts that the server works normally. * @tags: [ - * assumes_read_concern_unchanged, + * assumes_read_concern_unchanged, requires_majority_read_concern, * * # restartCatalog command is not available on embedded * incompatible_with_embedded diff --git a/jstests/core/txns/abort_prepared_transaction.js b/jstests/core/txns/abort_prepared_transaction.js index 0f0cbc7289a..f89c7e3c686 100644 --- a/jstests/core/txns/abort_prepared_transaction.js +++ b/jstests/core/txns/abort_prepared_transaction.js @@ -1,7 +1,7 @@ /** * Tests prepared transaction abort support. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/commit_prepared_transaction.js b/jstests/core/txns/commit_prepared_transaction.js index 51240440564..c14c9c607bf 100644 --- a/jstests/core/txns/commit_prepared_transaction.js +++ b/jstests/core/txns/commit_prepared_transaction.js @@ -1,7 +1,7 @@ /** * Tests prepared transaction commit support. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/commit_prepared_transaction_errors.js b/jstests/core/txns/commit_prepared_transaction_errors.js index 4b24752e511..52d3ec35e04 100644 --- a/jstests/core/txns/commit_prepared_transaction_errors.js +++ b/jstests/core/txns/commit_prepared_transaction_errors.js @@ -1,7 +1,7 @@ /** * Test error cases for committing prepared transactions. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/disallow_operations_on_prepared_transaction.js b/jstests/core/txns/disallow_operations_on_prepared_transaction.js index f44b069cee5..9e02f4ddbb9 100644 --- a/jstests/core/txns/disallow_operations_on_prepared_transaction.js +++ b/jstests/core/txns/disallow_operations_on_prepared_transaction.js @@ -3,7 +3,7 @@ * be allowed to be called on a prepared transaction. All other cases should fail with * PreparedTransactionInProgress. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { diff --git a/jstests/core/txns/empty_prepare.js b/jstests/core/txns/empty_prepare.js index f2e8ae392b3..1c8b8aa323f 100644 --- a/jstests/core/txns/empty_prepare.js +++ b/jstests/core/txns/empty_prepare.js @@ -1,7 +1,7 @@ /** * Tests transactions that are prepared after no writes. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/ensure_active_txn_for_prepare_transaction.js b/jstests/core/txns/ensure_active_txn_for_prepare_transaction.js index 3695f7a9955..52e756f6b45 100644 --- a/jstests/core/txns/ensure_active_txn_for_prepare_transaction.js +++ b/jstests/core/txns/ensure_active_txn_for_prepare_transaction.js @@ -1,7 +1,7 @@ /** * Test that we can't call prepareTransaction if there isn't an active transaction on the session. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { diff --git a/jstests/core/txns/no_new_transactions_when_prepared_transaction_in_progress.js b/jstests/core/txns/no_new_transactions_when_prepared_transaction_in_progress.js index 0ac0dbd23bd..0da96c8d8cb 100644 --- a/jstests/core/txns/no_new_transactions_when_prepared_transaction_in_progress.js +++ b/jstests/core/txns/no_new_transactions_when_prepared_transaction_in_progress.js @@ -1,6 +1,6 @@ /** * Tests that we cannot start a new transaction when a prepared transaction exists on the session. - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] * */ diff --git a/jstests/core/txns/no_writes_to_config_transactions_with_prepared_transaction.js b/jstests/core/txns/no_writes_to_config_transactions_with_prepared_transaction.js index c21f189cfac..b7edeade302 100644 --- a/jstests/core/txns/no_writes_to_config_transactions_with_prepared_transaction.js +++ b/jstests/core/txns/no_writes_to_config_transactions_with_prepared_transaction.js @@ -2,7 +2,7 @@ * Tests that other than insertions, it is illegal to modify config.transactions while the session * has a prepared transaction. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { diff --git a/jstests/core/txns/prepare_committed_transaction.js b/jstests/core/txns/prepare_committed_transaction.js index cec363471e1..57b3c719075 100644 --- a/jstests/core/txns/prepare_committed_transaction.js +++ b/jstests/core/txns/prepare_committed_transaction.js @@ -1,7 +1,7 @@ /** * Test error cases when calling prepare on a committed transaction. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/prepare_conflict.js b/jstests/core/txns/prepare_conflict.js index a06ddd296f0..f41fa802d43 100644 --- a/jstests/core/txns/prepare_conflict.js +++ b/jstests/core/txns/prepare_conflict.js @@ -1,7 +1,7 @@ /** * Tests that prepare conflicts for prepared transactions are retried. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/prepare_conflict_read_concern_behavior.js b/jstests/core/txns/prepare_conflict_read_concern_behavior.js index db7776673a0..ce5a506c82f 100644 --- a/jstests/core/txns/prepare_conflict_read_concern_behavior.js +++ b/jstests/core/txns/prepare_conflict_read_concern_behavior.js @@ -2,7 +2,7 @@ * Test calling reads with various read concerns on a prepared transaction. Snapshot, linearizable * and afterClusterTime reads are the only reads that should block on a prepared transaction. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { diff --git a/jstests/core/txns/prepare_nonexistent_transaction.js b/jstests/core/txns/prepare_nonexistent_transaction.js index 8b4c256c8ed..e688f92af56 100644 --- a/jstests/core/txns/prepare_nonexistent_transaction.js +++ b/jstests/core/txns/prepare_nonexistent_transaction.js @@ -1,7 +1,7 @@ /** * Test error cases when calling prepare on a non-existent transaction. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/prepare_prepared_transaction.js b/jstests/core/txns/prepare_prepared_transaction.js index 7ba6ab7bf2b..c436d342fba 100644 --- a/jstests/core/txns/prepare_prepared_transaction.js +++ b/jstests/core/txns/prepare_prepared_transaction.js @@ -1,7 +1,7 @@ /** * Tests that we can successfully prepare a prepared transaction. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/prepare_requires_fcv42.js b/jstests/core/txns/prepare_requires_fcv42.js index b7cffaa441f..1328405701a 100644 --- a/jstests/core/txns/prepare_requires_fcv42.js +++ b/jstests/core/txns/prepare_requires_fcv42.js @@ -1,7 +1,7 @@ /** * Tests that 'prepareTransaction' only succeeds in FCV 4.2. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/core/txns/statement_ids_accepted.js b/jstests/core/txns/statement_ids_accepted.js index e42c5e0dced..689fff75013 100644 --- a/jstests/core/txns/statement_ids_accepted.js +++ b/jstests/core/txns/statement_ids_accepted.js @@ -1,6 +1,6 @@ // Makes sure all commands which are supposed to take statement ids do. This should test the // commands in the sessionCheckOutWhiteList in service_entry_point_common.cpp. -// @tags: [uses_transactions] +// @tags: [uses_transactions, uses_prepare_transaction] (function() { "use strict"; diff --git a/jstests/core/txns/timestamped_reads_wait_for_prepare_oplog_visibility.js b/jstests/core/txns/timestamped_reads_wait_for_prepare_oplog_visibility.js index 7899a1c7a14..2cc6b26deb0 100644 --- a/jstests/core/txns/timestamped_reads_wait_for_prepare_oplog_visibility.js +++ b/jstests/core/txns/timestamped_reads_wait_for_prepare_oplog_visibility.js @@ -2,7 +2,7 @@ * Tests that timestamped reads, reads with snapshot and afterClusterTime, wait for the prepare * transaction oplog entry to be visible before choosing a read timestamp. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { 'use strict'; diff --git a/jstests/core_standalone/read_concern.js b/jstests/core_standalone/read_concern.js index 17db9b689cd..1646ac71d11 100644 --- a/jstests/core_standalone/read_concern.js +++ b/jstests/core_standalone/read_concern.js @@ -1,5 +1,5 @@ // This test verifies readConcern behavior on a standalone mongod or embedded - +// @tags: [requires_majority_read_concern] (function() { 'use strict'; diff --git a/jstests/multiVersion/change_streams_feature_compatibility_version.js b/jstests/multiVersion/change_streams_feature_compatibility_version.js index 69615ff0a30..23c489893e8 100644 --- a/jstests/multiVersion/change_streams_feature_compatibility_version.js +++ b/jstests/multiVersion/change_streams_feature_compatibility_version.js @@ -1,6 +1,7 @@ // Test that a change stream is able to survive an upgrade. This is the most basic test to // demonstrate the survival of a stream, presuming the driver will attempt to retry and resume the // stream after network errors. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/noPassthrough/after_cluster_time.js b/jstests/noPassthrough/after_cluster_time.js index 718a9633a0c..1137e8495f2 100644 --- a/jstests/noPassthrough/after_cluster_time.js +++ b/jstests/noPassthrough/after_cluster_time.js @@ -1,5 +1,5 @@ // This test verifies readConcern:afterClusterTime behavior on a standalone mongod. -// @tags: [requires_replication] +// @tags: [requires_replication, requires_majority_read_concern] (function() { "use strict"; var standalone = diff --git a/jstests/noPassthrough/agg_explain_read_concern.js b/jstests/noPassthrough/agg_explain_read_concern.js index 0bac93f2eef..e3f0d7b8d94 100644 --- a/jstests/noPassthrough/agg_explain_read_concern.js +++ b/jstests/noPassthrough/agg_explain_read_concern.js @@ -1,5 +1,6 @@ /** * Test that explained aggregation commands behave correctly with the readConcern option. + * @tags: [requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/noPassthrough/change_streams_require_majority_read_concern.js b/jstests/noPassthrough/change_streams_require_majority_read_concern.js index f9dbe2f3b9f..8481ba586f1 100644 --- a/jstests/noPassthrough/change_streams_require_majority_read_concern.js +++ b/jstests/noPassthrough/change_streams_require_majority_read_concern.js @@ -1,4 +1,5 @@ // Tests that the $changeStream requires read concern majority. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/noPassthrough/change_streams_update_lookup_collation.js b/jstests/noPassthrough/change_streams_update_lookup_collation.js index 55df63bbc6a..b82ad384cde 100644 --- a/jstests/noPassthrough/change_streams_update_lookup_collation.js +++ b/jstests/noPassthrough/change_streams_update_lookup_collation.js @@ -2,7 +2,7 @@ // collation, regardless of the collation on the change stream. // // Collation is only supported with the find command, not with op query. -// @tags: [requires_find_command] +// @tags: [requires_find_command, uses_change_streams] (function() { "use strict"; diff --git a/jstests/noPassthrough/command_line_parsing.js b/jstests/noPassthrough/command_line_parsing.js index bd1b1ff0e8b..c68b050a676 100644 --- a/jstests/noPassthrough/command_line_parsing.js +++ b/jstests/noPassthrough/command_line_parsing.js @@ -35,6 +35,7 @@ delete m2result.parsed.storage.engine; delete m2result.parsed.storage.journal; delete m2result.parsed.storage.rocksdb; delete m2result.parsed.storage.wiredTiger; +delete m2result.parsed.replication; // Removes enableMajorityReadConcern setting. assert.docEq(m2expected.parsed, m2result.parsed); // test JSON config file @@ -62,4 +63,5 @@ delete m3result.parsed.storage.engine; delete m3result.parsed.storage.journal; delete m3result.parsed.storage.rocksdb; delete m3result.parsed.storage.wiredTiger; +delete m3result.parsed.replication; // Removes enableMajorityReadConcern setting. assert.docEq(m3expected.parsed, m3result.parsed); diff --git a/jstests/noPassthrough/disable_majority_reads_restart.js b/jstests/noPassthrough/disable_majority_reads_restart.js new file mode 100644 index 00000000000..596eabad052 --- /dev/null +++ b/jstests/noPassthrough/disable_majority_reads_restart.js @@ -0,0 +1,82 @@ +/** + * Tests restarting mongod with 'enableMajorityReadConcern' varying between true and false. + * + * @tags: [requires_persistence, requires_replication, requires_majority_read_concern, + * requires_wiredtiger] + */ +(function() { + "use strict"; + + const dbName = "test"; + const collName = "coll"; + + const rst = new ReplSetTest({nodes: 1}); + rst.startSet(); + rst.initiate(); + + // Insert a document and ensure it is in the stable checkpoint by restarting. + let coll = rst.getPrimary().getDB(dbName)[collName]; + assert.commandWorked(coll.insert({_id: 0}, {writeConcern: {w: "majority"}})); + rst.stopSet(undefined, true); + rst.startSet(undefined, true); + + // Disable snapshotting on all members of the replica set so that further operations do not + // enter the majority snapshot. + assert.commandWorked(rst.getPrimary().adminCommand( + {configureFailPoint: "disableSnapshotting", mode: "alwaysOn"})); + + // Insert a document that will not be in a stable checkpoint. + coll = rst.getPrimary().getDB(dbName)[collName]; + assert.commandWorked(coll.insert({_id: 1})); + + // Restart the node with enableMajorityReadConcern:false. + rst.stopSet(undefined, true); + rst.startSet({noCleanData: true, enableMajorityReadConcern: "false"}); + + // Both inserts should be reflected in the data and the oplog. + coll = rst.getPrimary().getDB(dbName)[collName]; + assert.eq([{_id: 0}, {_id: 1}], coll.find().sort({_id: 1}).toArray()); + let oplog = rst.getPrimary().getDB("local").oplog.rs; + assert.eq(1, oplog.find({o: {_id: 0}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 1}}).itcount()); + + // Restart the node with enableMajorityReadConcern:false without adding any documents. + rst.stopSet(undefined, true); + rst.startSet({noCleanData: true, enableMajorityReadConcern: "false"}); + + // Both inserts should still be reflected in the data and the oplog. + coll = rst.getPrimary().getDB(dbName)[collName]; + assert.eq([{_id: 0}, {_id: 1}], coll.find().sort({_id: 1}).toArray()); + oplog = rst.getPrimary().getDB("local").oplog.rs; + assert.eq(1, oplog.find({o: {_id: 0}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 1}}).itcount()); + + // Insert another document. + assert.commandWorked(coll.insert({_id: 2}, {writeConcern: {w: "majority"}})); + + // Restart the node with enableMajorityReadConcern:false. + rst.stopSet(undefined, true); + rst.startSet({noCleanData: true, enableMajorityReadConcern: "false"}); + + // All three inserts should be reflected in the data and the oplog. + coll = rst.getPrimary().getDB(dbName)[collName]; + assert.eq([{_id: 0}, {_id: 1}, {_id: 2}], coll.find().sort({_id: 1}).toArray()); + oplog = rst.getPrimary().getDB("local").oplog.rs; + assert.eq(1, oplog.find({o: {_id: 0}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 1}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 2}}).itcount()); + + // Restart the node with enableMajorityReadConcern:true. + rst.stopSet(undefined, true); + rst.startSet({noCleanData: true, enableMajorityReadConcern: "false"}); + + // All three inserts should still be reflected in the data and the oplog. + coll = rst.getPrimary().getDB(dbName)[collName]; + assert.eq([{_id: 0}, {_id: 1}, {_id: 2}], coll.find().sort({_id: 1}).toArray()); + oplog = rst.getPrimary().getDB("local").oplog.rs; + assert.eq(1, oplog.find({o: {_id: 0}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 1}}).itcount()); + assert.eq(1, oplog.find({o: {_id: 2}}).itcount()); + + rst.stopSet(); +})(); diff --git a/jstests/noPassthrough/do_not_rebuild_indexes_before_repair.js b/jstests/noPassthrough/do_not_rebuild_indexes_before_repair.js index 265bc478c46..12fd52a09f2 100644 --- a/jstests/noPassthrough/do_not_rebuild_indexes_before_repair.js +++ b/jstests/noPassthrough/do_not_rebuild_indexes_before_repair.js @@ -3,7 +3,7 @@ * indexes before repairing the instance. Replication is used to get the database into a state where * an index has been dropped on disk, but still exists in the catalog. * - * @tags: [requires_persistence, requires_replication] + * @tags: [requires_persistence, requires_replication, requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js b/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js index 85206f15650..a9a5bc022b7 100644 --- a/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js +++ b/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js @@ -2,7 +2,7 @@ // to be majority committed. If 'atClusterTime' is older than the oldest available snapshot, the // error code SnapshotTooOld is returned. // -// @tags: [uses_transactions] +// @tags: [uses_transactions, requires_majority_read_concern] (function() { "use strict"; diff --git a/jstests/noPassthrough/readConcern_snapshot.js b/jstests/noPassthrough/readConcern_snapshot.js index 7c3800ca689..3798cb8efe9 100644 --- a/jstests/noPassthrough/readConcern_snapshot.js +++ b/jstests/noPassthrough/readConcern_snapshot.js @@ -24,14 +24,14 @@ assert.commandFailedWithCode(sessionDb.runCommand({find: collName}), ErrorCodes.IllegalOperation); assert.commandFailedWithCode(session.abortTransaction_forTesting(), - ErrorCodes.NoSuchTransaction); + [ErrorCodes.NoSuchTransaction, ErrorCodes.IllegalOperation]); // Transactions without readConcern snapshot fail. session.startTransaction(); assert.commandFailedWithCode(sessionDb.runCommand({find: collName}), ErrorCodes.IllegalOperation); assert.commandFailedWithCode(session.abortTransaction_forTesting(), - ErrorCodes.NoSuchTransaction); + [ErrorCodes.NoSuchTransaction, ErrorCodes.IllegalOperation]); rst.stopSet(); return; diff --git a/jstests/noPassthrough/readConcern_snapshot_mongos.js b/jstests/noPassthrough/readConcern_snapshot_mongos.js index 1fb9a2631c8..a624a14c235 100644 --- a/jstests/noPassthrough/readConcern_snapshot_mongos.js +++ b/jstests/noPassthrough/readConcern_snapshot_mongos.js @@ -1,5 +1,5 @@ // Test parsing of readConcern level 'snapshot' on mongos. -// @tags: [requires_replication,requires_sharding, uses_transactions] +// @tags: [requires_replication,requires_sharding, uses_transactions, uses_single_shard_transaction] (function() { "use strict"; diff --git a/jstests/noPassthrough/read_concern_helper.js b/jstests/noPassthrough/read_concern_helper.js index 2f9e2c4807b..b83b48bdf34 100644 --- a/jstests/noPassthrough/read_concern_helper.js +++ b/jstests/noPassthrough/read_concern_helper.js @@ -1,4 +1,5 @@ // This tests readConcern handling for the find/findOne shell helpers. +// @tags: [requires_majority_read_concern] (function() { "use strict"; var testServer = MongoRunner.runMongod(); diff --git a/jstests/noPassthrough/read_concern_snapshot_aggregation.js b/jstests/noPassthrough/read_concern_snapshot_aggregation.js index 9a8d4fa9863..838b3ddd9b2 100644 --- a/jstests/noPassthrough/read_concern_snapshot_aggregation.js +++ b/jstests/noPassthrough/read_concern_snapshot_aggregation.js @@ -38,8 +38,11 @@ } // Test that $changeStream is disallowed with transactions. - testSnapshotAggFailsWithCode( - kCollName, [{$changeStream: {}}], ErrorCodes.OperationNotSupportedInTransaction); + // TODO SERVER-37221: Remove the check for 'supportsCommittedReads'. + if (sessionDB.serverStatus().storageEngine.supportsCommittedReads) { + testSnapshotAggFailsWithCode( + kCollName, [{$changeStream: {}}], ErrorCodes.OperationNotSupportedInTransaction); + } // Test that $collStats is disallowed with transactions. testSnapshotAggFailsWithCode( diff --git a/jstests/noPassthrough/read_majority.js b/jstests/noPassthrough/read_majority.js index f9ee8d01607..30e7b078ad8 100644 --- a/jstests/noPassthrough/read_majority.js +++ b/jstests/noPassthrough/read_majority.js @@ -12,6 +12,7 @@ * * All of this requires support for committed reads, so this test will be skipped if the storage * engine does not support them. + * @tags: [requires_majority_read_concern] */ load("jstests/libs/analyze_plan.js"); diff --git a/jstests/noPassthrough/read_majority_reads.js b/jstests/noPassthrough/read_majority_reads.js index 5f0ea2012e8..578f17d748f 100644 --- a/jstests/noPassthrough/read_majority_reads.js +++ b/jstests/noPassthrough/read_majority_reads.js @@ -11,7 +11,7 @@ * Each operation is tested on a single node, and (if supported) through mongos on both sharded and * unsharded collections. Mongos doesn't directly handle readConcern majority, but these tests * should ensure that it correctly propagates the setting to the shards when running commands. - * @tags: [requires_sharding] + * @tags: [requires_sharding, requires_majority_read_concern] */ (function() { diff --git a/jstests/noPassthrough/recovery_wt_cache_full.js b/jstests/noPassthrough/recovery_wt_cache_full.js index 2b26e475f82..dff22ad959a 100644 --- a/jstests/noPassthrough/recovery_wt_cache_full.js +++ b/jstests/noPassthrough/recovery_wt_cache_full.js @@ -1,6 +1,7 @@ /** * Fills WiredTiger cache during recovery oplog application. - * @tags: [requires_persistence, requires_replication, requires_wiredtiger] + * @tags: [requires_persistence, requires_replication, requires_wiredtiger, + * requires_majority_read_concern] */ (function() { 'use strict'; diff --git a/jstests/noPassthrough/restart_catalog_sharded_cluster.js b/jstests/noPassthrough/restart_catalog_sharded_cluster.js index 04ee63ce0de..696d62c2af8 100644 --- a/jstests/noPassthrough/restart_catalog_sharded_cluster.js +++ b/jstests/noPassthrough/restart_catalog_sharded_cluster.js @@ -1,6 +1,6 @@ /** * Tests restarting the catalog in a sharded cluster on the config server and the shards. - * @tags: [requires_replication, requires_sharding] + * @tags: [requires_replication, requires_sharding, requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/noPassthrough/skip_sharding_configuration_checks.js b/jstests/noPassthrough/skip_sharding_configuration_checks.js index bbb0d2b868b..eb067f94a52 100644 --- a/jstests/noPassthrough/skip_sharding_configuration_checks.js +++ b/jstests/noPassthrough/skip_sharding_configuration_checks.js @@ -1,6 +1,7 @@ /** * Starts standalone RS with skipShardingConfigurationChecks. - * @tags: [requires_persistence, requires_replication, requires_sharding] + * @tags: [requires_persistence, requires_replication, requires_sharding, + * requires_majority_read_concern] */ (function() { 'use strict'; diff --git a/jstests/noPassthrough/standalone_replication_recovery.js b/jstests/noPassthrough/standalone_replication_recovery.js index 67a5da61170..1def927772c 100644 --- a/jstests/noPassthrough/standalone_replication_recovery.js +++ b/jstests/noPassthrough/standalone_replication_recovery.js @@ -2,7 +2,8 @@ * Tests that a standalone succeeds when passed the 'recoverFromOplogAsStandalone' parameter. * * This test only makes sense for storage engines that support recover to stable timestamp. - * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication] + * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication, + * requires_majority_read_concern] */ (function() { diff --git a/jstests/noPassthrough/timestamp_index_builds.js b/jstests/noPassthrough/timestamp_index_builds.js index b55b1805e00..e5ffa405d45 100644 --- a/jstests/noPassthrough/timestamp_index_builds.js +++ b/jstests/noPassthrough/timestamp_index_builds.js @@ -15,7 +15,7 @@ * timestamping, merely that the catalog state is not corrupted due to the existence of background * index builds. * - * @tags: [requires_replication, requires_persistence] + * @tags: [requires_replication, requires_persistence, requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/noPassthrough/unsupported_change_stream_deployments.js b/jstests/noPassthrough/unsupported_change_stream_deployments.js index 6bc969abca7..3f24a8b0f2c 100644 --- a/jstests/noPassthrough/unsupported_change_stream_deployments.js +++ b/jstests/noPassthrough/unsupported_change_stream_deployments.js @@ -1,5 +1,5 @@ // Tests that the $changeStream stage returns an error when run against a standalone mongod. -// @tags: [requires_sharding] +// @tags: [requires_sharding, uses_change_streams] (function() { "use strict"; diff --git a/jstests/noPassthrough/wt_disable_majority_reads.js b/jstests/noPassthrough/wt_disable_majority_reads.js new file mode 100644 index 00000000000..57249723d2c --- /dev/null +++ b/jstests/noPassthrough/wt_disable_majority_reads.js @@ -0,0 +1,32 @@ +// @tags: [requires_wiredtiger, requires_replication] +(function() { + "use strict"; + + var rst = new ReplSetTest({ + nodes: [ + {"enableMajorityReadConcern": ""}, + {"enableMajorityReadConcern": "false"}, + {"enableMajorityReadConcern": "true"} + ] + }); + rst.startSet(); + rst.initiate(); + rst.awaitSecondaryNodes(); + + rst.getPrimary().getDB("test").getCollection("test").insert({}); + rst.awaitReplication(); + + // Node 0 is using the default, which is `enableMajorityReadConcern: true`. Thus a majority + // read should succeed. + assert.commandWorked(rst.nodes[0].getDB("test").runCommand( + {"find": "test", "readConcern": {"level": "majority"}})); + // Node 1 disables majority reads. Check for the appropriate error code. + assert.commandFailedWithCode(rst.nodes[1].getDB("test").runCommand( + {"find": "test", "readConcern": {"level": "majority"}}), + ErrorCodes.ReadConcernMajorityNotEnabled); + // Same as Node 0. + assert.commandWorked(rst.nodes[2].getDB("test").runCommand( + {"find": "test", "readConcern": {"level": "majority"}})); + + rst.stopSet(); +})(); diff --git a/jstests/replsets/clean_shutdown_oplog_state.js b/jstests/replsets/clean_shutdown_oplog_state.js index 2bd4be04439..e7239b0f78e 100644 --- a/jstests/replsets/clean_shutdown_oplog_state.js +++ b/jstests/replsets/clean_shutdown_oplog_state.js @@ -3,7 +3,7 @@ // present without this test failing. In particular if the rst.stop(1) doesn't execute mid-batch, // it isn't fully exercising the code. However, if the test fails there is definitely a bug. // -// @tags: [requires_persistence] +// @tags: [requires_persistence, requires_majority_read_concern] (function() { "use strict"; diff --git a/jstests/replsets/command_response_operation_time.js b/jstests/replsets/command_response_operation_time.js index cd889e0b090..d4aecdb2248 100644 --- a/jstests/replsets/command_response_operation_time.js +++ b/jstests/replsets/command_response_operation_time.js @@ -2,6 +2,7 @@ * Tests that reads and writes in a replica set return the correct operationTime for their * read/write concern level. Majority reads and writes return the last committed optime's timestamp * and local reads and writes return the last applied optime's timestamp. + * @tags: [requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/replsets/last_op_visible.js b/jstests/replsets/last_op_visible.js index 471669b9853..4b8b70a24b4 100644 --- a/jstests/replsets/last_op_visible.js +++ b/jstests/replsets/last_op_visible.js @@ -3,6 +3,7 @@ // lastOpVisible, and that majority read with afterOpTime of lastOpVisible will return it as well. // We then confirm that a writeConcern majority write will be seen as the lastVisibleOp by a // majority read. +// @tags: [requires_majority_read_concern] load("jstests/replsets/rslib.js"); diff --git a/jstests/replsets/operation_time_read_and_write_concern.js b/jstests/replsets/operation_time_read_and_write_concern.js index 235db85aab3..c1661db1d7e 100644 --- a/jstests/replsets/operation_time_read_and_write_concern.js +++ b/jstests/replsets/operation_time_read_and_write_concern.js @@ -1,6 +1,7 @@ /** * Validates the operationTime value in the command response depends on the read/writeConcern of the * the read/write commmand that produced it. + * @tags: [requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/replsets/prepare_prepared_transaction_wc_timeout.js b/jstests/replsets/prepare_prepared_transaction_wc_timeout.js index c7e71d2786c..09f7cf69a8c 100644 --- a/jstests/replsets/prepare_prepared_transaction_wc_timeout.js +++ b/jstests/replsets/prepare_prepared_transaction_wc_timeout.js @@ -2,7 +2,7 @@ * Tests that when preparing a prepared transaction, we wait for writeConcern if the client optime * is behind the prepareTimestamp. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/replsets/read_concern_majority_getmore_secondaries.js b/jstests/replsets/read_concern_majority_getmore_secondaries.js index d21560b6510..6db3658733e 100644 --- a/jstests/replsets/read_concern_majority_getmore_secondaries.js +++ b/jstests/replsets/read_concern_majority_getmore_secondaries.js @@ -1,4 +1,5 @@ // Test that getMore for a majority read on a secondary only reads committed data. +// @tags: [requires_majority_read_concern] (function() { "use strict"; // For supportsMajorityReadConcern(). diff --git a/jstests/replsets/read_concern_uninitated_set.js b/jstests/replsets/read_concern_uninitated_set.js index 1e59359b683..0737b88e229 100644 --- a/jstests/replsets/read_concern_uninitated_set.js +++ b/jstests/replsets/read_concern_uninitated_set.js @@ -2,7 +2,7 @@ * Test to ensure that specifying non-local read concern with an uninitiated set does not crash * node. * - * @tags: [requires_persistence] + * @tags: [requires_persistence, requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/replsets/rollback_waits_for_bgindex_completion.js b/jstests/replsets/rollback_waits_for_bgindex_completion.js index 041cc9404a9..eb76dd7ddcd 100644 --- a/jstests/replsets/rollback_waits_for_bgindex_completion.js +++ b/jstests/replsets/rollback_waits_for_bgindex_completion.js @@ -2,7 +2,7 @@ * Test to ensure that rollback waits for in-progress background index builds to finish before * starting the rollback process. Only applies to Recoverable Rollback via WiredTiger checkpoints. * - * @tags: [requires_wiredtiger, requires_journaling] + * @tags: [requires_wiredtiger, requires_journaling, requires_majority_read_concern] */ (function() { 'use strict'; diff --git a/jstests/replsets/secondary_reads_timestamp_visibility.js b/jstests/replsets/secondary_reads_timestamp_visibility.js index 6ebbe527cec..f91a8d56c46 100644 --- a/jstests/replsets/secondary_reads_timestamp_visibility.js +++ b/jstests/replsets/secondary_reads_timestamp_visibility.js @@ -58,6 +58,10 @@ let levels = ["local", "available", "majority"]; + if (!primaryDB.serverStatus().storageEngine.supportsCommittedReads) { + levels = ["local", "available"]; + } + // We should see the previous, un-replicated state on the secondary with every readconcern. for (let i in levels) { print("Checking that no new updates are visible yet for readConcern: " + levels[i]); diff --git a/jstests/replsets/shutdown_with_prepared_transaction.js b/jstests/replsets/shutdown_with_prepared_transaction.js index 387d7474ef3..f8844b21074 100644 --- a/jstests/replsets/shutdown_with_prepared_transaction.js +++ b/jstests/replsets/shutdown_with_prepared_transaction.js @@ -1,7 +1,7 @@ /** * Tests that a server can still be shut down while it has prepared transactions pending. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/replsets/speculative_transaction.js b/jstests/replsets/speculative_transaction.js index d1e5f14c3cc..6bfc427369d 100644 --- a/jstests/replsets/speculative_transaction.js +++ b/jstests/replsets/speculative_transaction.js @@ -3,7 +3,7 @@ * the same document can run back to back without waiting for the first transaction to * commit to a majority. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/replsets/stepdown_with_prepared_transaction.js b/jstests/replsets/stepdown_with_prepared_transaction.js index f1b738af4f9..1b30041c313 100644 --- a/jstests/replsets/stepdown_with_prepared_transaction.js +++ b/jstests/replsets/stepdown_with_prepared_transaction.js @@ -1,7 +1,7 @@ /** * Tests that it is possible to step down a primary while there are transactions in prepare. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/replsets/temp_namespace_restart_as_standalone.js b/jstests/replsets/temp_namespace_restart_as_standalone.js index d6453576bb8..a566a6b1309 100644 --- a/jstests/replsets/temp_namespace_restart_as_standalone.js +++ b/jstests/replsets/temp_namespace_restart_as_standalone.js @@ -2,7 +2,7 @@ * Tests that temporary collections are not dropped when a member of a replica set is started up as * a stand-alone mongod, i.e. without the --replSet parameter. * - * @tags: [requires_persistence] + * @tags: [requires_persistence, requires_majority_read_concern] */ (function() { var rst = new ReplSetTest({nodes: 2}); diff --git a/jstests/replsets/transactions_after_rollback_via_refetch.js b/jstests/replsets/transactions_after_rollback_via_refetch.js new file mode 100644 index 00000000000..5d5ba119ecd --- /dev/null +++ b/jstests/replsets/transactions_after_rollback_via_refetch.js @@ -0,0 +1,122 @@ +/** + * Basic test that transactions are able to run against a node immediately after it has executed a + * refetch based rollback of a few basic CRUD and DDL ops. Local writes done during the rollback + * process are not timestamped, so we want to ensure that transactions can be started against a + * valid snapshot post-rollback and read data correctly. + * + * @tags: [uses_transactions] + */ +(function() { + 'use strict'; + + load("jstests/replsets/libs/rollback_test.js"); + + let name = "transactions_after_rollback_via_refetch"; + let dbName = name; + let crudCollName = "crudColl"; + let collToDropName = "collToDrop"; + + let CommonOps = (node) => { + // Insert a couple of documents that will initially be present on all nodes. + let crudColl = node.getDB(dbName)[crudCollName]; + assert.commandWorked(crudColl.insert({_id: 0})); + assert.commandWorked(crudColl.insert({_id: 1})); + + // Create a collection so it can be dropped on the rollback node. + node.getDB(dbName)[collToDropName].insert({_id: 0}); + }; + + // We want to have the rollback node perform some inserts, updates, and deletes locally + // during the rollback process, so we can ensure that transactions will read correct data + // post-rollback, even though these writes will be un-timestamped. + let RollbackOps = (node) => { + let crudColl = node.getDB(dbName)[crudCollName]; + // Roll back an update (causes refetch and local update). + assert.commandWorked(crudColl.update({_id: 0}, {$set: {rollbackNode: 0}})); + // Roll back a delete (causes refetch and local insert). + assert.commandWorked(crudColl.remove({_id: 1})); + // Roll back an insert (causes local delete). + assert.commandWorked(crudColl.insert({_id: 2})); + + // Roll back a drop (re-creates the collection). + node.getDB(dbName)[collToDropName].drop(); + }; + + let SyncSourceOps = (node) => { + let coll = node.getDB(dbName)[crudCollName]; + // Update these docs so the rollback node will refetch them. + assert.commandWorked(coll.update({_id: 0}, {$set: {syncSource: 0}})); + assert.commandWorked(coll.update({_id: 1}, {$set: {syncSource: 1}})); + }; + + // Set up a replica set for use in RollbackTest. We disable majority reads on all nodes so that + // they will use the "rollbackViaRefetch" algorithm. + let replTest = new ReplSetTest( + {name, nodes: 3, useBridge: true, nodeOptions: {enableMajorityReadConcern: "false"}}); + replTest.startSet(); + const nodes = replTest.nodeList(); + replTest.initiate({ + _id: name, + members: [ + {_id: 0, host: nodes[0]}, + {_id: 1, host: nodes[1]}, + {_id: 2, host: nodes[2], arbiterOnly: true} + ] + }); + + let rollbackTest = new RollbackTest(name, replTest); + + CommonOps(rollbackTest.getPrimary()); + + let rollbackNode = rollbackTest.transitionToRollbackOperations(); + RollbackOps(rollbackNode); + + let syncSourceNode = rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + SyncSourceOps(syncSourceNode); + + // Wait for rollback to finish. + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); + + // Make the rollback node primary so we can run transactions against it. + rollbackTest.getTestFixture().stepUp(rollbackNode); + + jsTestLog("Testing transactions against the node that just rolled back."); + const sessionOptions = {causalConsistency: false}; + let session = rollbackNode.getDB(dbName).getMongo().startSession(sessionOptions); + let sessionDb = session.getDatabase(dbName); + let sessionColl = sessionDb[crudCollName]; + + // Make sure we can do basic CRUD ops inside a transaction and read the data back correctly, pre + // and post-commit. + session.startTransaction(); + // Make sure we read from the snapshot correctly. + assert.docEq(sessionColl.find().sort({_id: 1}).toArray(), + [{_id: 0, syncSource: 0}, {_id: 1, syncSource: 1}]); + // Do some basic ops. + assert.commandWorked(sessionColl.update({_id: 0}, {$set: {inTxn: 1}})); + assert.commandWorked(sessionColl.remove({_id: 1})); + assert.commandWorked(sessionColl.insert({_id: 2})); + // Make sure we read the updated data correctly. + assert.docEq(sessionColl.find().sort({_id: 1}).toArray(), + [{_id: 0, syncSource: 0, inTxn: 1}, {_id: 2}]); + session.commitTransaction(); + + // Make sure data is visible after commit. + assert.docEq(sessionColl.find().sort({_id: 1}).toArray(), + [{_id: 0, syncSource: 0, inTxn: 1}, {_id: 2}]); + + // Run a transaction that touches the collection that was re-created during rollback. + sessionColl = sessionDb[collToDropName]; + session.startTransaction(); + assert.docEq(sessionColl.find().sort({_id: 1}).toArray(), [{_id: 0}]); + assert.commandWorked(sessionColl.update({_id: 0}, {$set: {inTxn: 1}})); + session.commitTransaction(); + + // Make sure data is visible after commit. + assert.docEq(sessionColl.find().sort({_id: 1}).toArray(), [{_id: 0, inTxn: 1}]); + + // Check the replica set. + rollbackTest.stop(); + +}()); diff --git a/jstests/sharding/after_cluster_time.js b/jstests/sharding/after_cluster_time.js index fea0a573bac..3c99680ebe8 100644 --- a/jstests/sharding/after_cluster_time.js +++ b/jstests/sharding/after_cluster_time.js @@ -1,5 +1,6 @@ /** * Tests readConcern: afterClusterTime behavior in a sharded cluster. + * @tags: [requires_majority_read_concern] */ (function() { "use strict"; diff --git a/jstests/sharding/change_stream_chunk_migration.js b/jstests/sharding/change_stream_chunk_migration.js index e5eaa0460fc..64f7d860c2a 100644 --- a/jstests/sharding/change_stream_chunk_migration.js +++ b/jstests/sharding/change_stream_chunk_migration.js @@ -1,5 +1,6 @@ // Tests that change stream returns the stream of results continuously and in the right order when // it's migrating a chunk to a new shard. +// @tags: [uses_change_streams] (function() { 'use strict'; diff --git a/jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js b/jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js index 2295086a18c..d97e88f62a1 100644 --- a/jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js +++ b/jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js @@ -3,6 +3,7 @@ // so allows the shards to regularly report their advancing optimes in the absence of any new data, // which in turn allows the AsyncResultsMerger to return sorted results retrieved from the other // shards. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_stream_lookup_single_shard_cluster.js b/jstests/sharding/change_stream_lookup_single_shard_cluster.js index f642064cd57..ac00ea33d34 100644 --- a/jstests/sharding/change_stream_lookup_single_shard_cluster.js +++ b/jstests/sharding/change_stream_lookup_single_shard_cluster.js @@ -1,6 +1,7 @@ // Tests that a $changeStream pipeline is split rather than forwarded even in the case where the // cluster only has a single shard, and that it can therefore successfully look up a document in a // sharded collection. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_stream_read_preference.js b/jstests/sharding/change_stream_read_preference.js index d60b35a84b7..25a7b5ef061 100644 --- a/jstests/sharding/change_stream_read_preference.js +++ b/jstests/sharding/change_stream_read_preference.js @@ -1,5 +1,6 @@ // Tests that change streams and their update lookups obey the read preference specified by the // user. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_stream_resume_from_different_mongos.js b/jstests/sharding/change_stream_resume_from_different_mongos.js index 0ec7ab2d00c..7efe9e06a36 100644 --- a/jstests/sharding/change_stream_resume_from_different_mongos.js +++ b/jstests/sharding/change_stream_resume_from_different_mongos.js @@ -1,4 +1,5 @@ // Test resuming a change stream on a mongos other than the one the change stream was started on. +// @tags: [uses_change_streams] (function() { "use strict"; // For supportsMajorityReadConcern(). diff --git a/jstests/sharding/change_stream_shard_failover.js b/jstests/sharding/change_stream_shard_failover.js index ade85d27086..27a141bcfed 100644 --- a/jstests/sharding/change_stream_shard_failover.js +++ b/jstests/sharding/change_stream_shard_failover.js @@ -1,6 +1,7 @@ /** * Test resuming a change stream on a node other than the one it was started on. Accomplishes this * by triggering a stepdown. + * @tags: [uses_change_streams] */ // Checking UUID consistency uses cached connections, which are not valid across restarts or diff --git a/jstests/sharding/change_stream_update_lookup_collation.js b/jstests/sharding/change_stream_update_lookup_collation.js index 0cdd59cf131..6c57aba30e0 100644 --- a/jstests/sharding/change_stream_update_lookup_collation.js +++ b/jstests/sharding/change_stream_update_lookup_collation.js @@ -2,7 +2,7 @@ // use the collection's default collation once it gets to the shards. // // Collation is only supported with the find command, not with op query. -// @tags: [requires_find_command] +// @tags: [requires_find_command, uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_stream_update_lookup_read_concern.js b/jstests/sharding/change_stream_update_lookup_read_concern.js index 58cd8904870..e537484f63c 100644 --- a/jstests/sharding/change_stream_update_lookup_read_concern.js +++ b/jstests/sharding/change_stream_update_lookup_read_concern.js @@ -1,6 +1,7 @@ // Tests that a change stream's update lookup will use the appropriate read concern. In particular, // tests that the update lookup will return a version of the document at least as recent as the // change that we're doing the lookup for, and that change will be majority-committed. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_streams.js b/jstests/sharding/change_streams.js index 1000d047cd0..cd13f86678c 100644 --- a/jstests/sharding/change_streams.js +++ b/jstests/sharding/change_streams.js @@ -1,4 +1,5 @@ // Tests the behavior of change streams on sharded collections. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_streams_establishment_finds_new_shards.js b/jstests/sharding/change_streams_establishment_finds_new_shards.js index e66e80ec6d4..8b8bd4fd5a9 100644 --- a/jstests/sharding/change_streams_establishment_finds_new_shards.js +++ b/jstests/sharding/change_streams_establishment_finds_new_shards.js @@ -1,5 +1,6 @@ // Tests that change streams is able to find and return results from new shards which are added // during cursor establishment. +// @tags: [uses_change_streams] (function() { 'use strict'; diff --git a/jstests/sharding/change_streams_primary_shard_unaware.js b/jstests/sharding/change_streams_primary_shard_unaware.js index 27d7ef9bfa3..0dade0d553c 100644 --- a/jstests/sharding/change_streams_primary_shard_unaware.js +++ b/jstests/sharding/change_streams_primary_shard_unaware.js @@ -3,7 +3,7 @@ // // This test triggers a compiler bug that causes a crash when compiling with optimizations on, see // SERVER-36321. -// @tags: [requires_persistence], [blacklist_from_rhel_67_s390x] +// @tags: [requires_persistence, blacklist_from_rhel_67_s390x, uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_streams_shards_start_in_sync.js b/jstests/sharding/change_streams_shards_start_in_sync.js index 93e26ec9e7c..c3c3fb07e73 100644 --- a/jstests/sharding/change_streams_shards_start_in_sync.js +++ b/jstests/sharding/change_streams_shards_start_in_sync.js @@ -5,6 +5,7 @@ // could occur, followed by write 'B' to shard 1, and then the change stream could be established on // shard 1, then some third write 'C' could occur. This test ensures that in that case, both 'A' // and 'B' will be seen in the changestream before 'C'. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_streams_unsharded_becomes_sharded.js b/jstests/sharding/change_streams_unsharded_becomes_sharded.js index ea5178601b1..e865fb709d5 100644 --- a/jstests/sharding/change_streams_unsharded_becomes_sharded.js +++ b/jstests/sharding/change_streams_unsharded_becomes_sharded.js @@ -3,6 +3,7 @@ // 'documentKey' to include the new shard key, and that a resume token obtained prior to the // shardCollection command can be used to resume the stream even after the collection has been // sharded. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/change_streams_whole_db.js b/jstests/sharding/change_streams_whole_db.js index bc7d559610a..4051493c04f 100644 --- a/jstests/sharding/change_streams_whole_db.js +++ b/jstests/sharding/change_streams_whole_db.js @@ -1,4 +1,5 @@ // Tests the behavior of a change stream on a whole database in a sharded cluster. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/lookup_change_stream_post_image_compound_shard_key.js b/jstests/sharding/lookup_change_stream_post_image_compound_shard_key.js index 29689156c87..a1bce25ad81 100644 --- a/jstests/sharding/lookup_change_stream_post_image_compound_shard_key.js +++ b/jstests/sharding/lookup_change_stream_post_image_compound_shard_key.js @@ -1,5 +1,6 @@ // Tests the behavior of looking up the post image for change streams on collections which are // sharded with a compound shard key. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/lookup_change_stream_post_image_hashed_shard_key.js b/jstests/sharding/lookup_change_stream_post_image_hashed_shard_key.js index db8ac5ed31d..f1e9e6da502 100644 --- a/jstests/sharding/lookup_change_stream_post_image_hashed_shard_key.js +++ b/jstests/sharding/lookup_change_stream_post_image_hashed_shard_key.js @@ -1,5 +1,6 @@ // Tests the behavior of looking up the post image for change streams on collections which are // sharded with a hashed shard key. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/lookup_change_stream_post_image_id_shard_key.js b/jstests/sharding/lookup_change_stream_post_image_id_shard_key.js index a6dd9631dac..843dda1c524 100644 --- a/jstests/sharding/lookup_change_stream_post_image_id_shard_key.js +++ b/jstests/sharding/lookup_change_stream_post_image_id_shard_key.js @@ -1,5 +1,6 @@ // Tests the behavior of looking up the post image for change streams on collections which are // sharded with a key which is just the "_id" field. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/restart_transactions.js b/jstests/sharding/restart_transactions.js index 8d4089e266a..ebf93496762 100644 --- a/jstests/sharding/restart_transactions.js +++ b/jstests/sharding/restart_transactions.js @@ -2,7 +2,7 @@ * Verify the states that a multi-statement transaction can be restarted on at the active * transaction number for servers in a sharded cluster. * - * @tags: [requires_sharding, uses_transactions] + * @tags: [requires_sharding, uses_transactions, uses_prepare_transaction] */ (function() { "use strict"; diff --git a/jstests/sharding/resume_change_stream.js b/jstests/sharding/resume_change_stream.js index 53396973a7c..9b5e33d0173 100644 --- a/jstests/sharding/resume_change_stream.js +++ b/jstests/sharding/resume_change_stream.js @@ -1,6 +1,6 @@ // Tests resuming change streams on sharded collections. // We need to use a readConcern in this test, which requires read commands. -// @tags: [requires_find_command] +// @tags: [requires_find_command, uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/resume_change_stream_from_stale_mongos.js b/jstests/sharding/resume_change_stream_from_stale_mongos.js index 68ec7799242..7b8dd9cf673 100644 --- a/jstests/sharding/resume_change_stream_from_stale_mongos.js +++ b/jstests/sharding/resume_change_stream_from_stale_mongos.js @@ -1,6 +1,7 @@ // Tests that resuming a change stream that has become sharded via a mongos that believes the // collection is still unsharded will end up targeting the change stream to all shards after getting // a stale shard version. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/resume_change_stream_on_subset_of_shards.js b/jstests/sharding/resume_change_stream_on_subset_of_shards.js index 2690c32b85b..3c51004ed3c 100644 --- a/jstests/sharding/resume_change_stream_on_subset_of_shards.js +++ b/jstests/sharding/resume_change_stream_on_subset_of_shards.js @@ -1,5 +1,6 @@ // This tests resuming a change stream on a sharded collection where not all shards have a chunk in // the collection. +// @tags: [uses_change_streams] (function() { "use strict"; diff --git a/jstests/sharding/snapshot_cursor_commands_mongos.js b/jstests/sharding/snapshot_cursor_commands_mongos.js index 30cee7728d1..6609cbc4376 100644 --- a/jstests/sharding/snapshot_cursor_commands_mongos.js +++ b/jstests/sharding/snapshot_cursor_commands_mongos.js @@ -1,5 +1,5 @@ // Tests snapshot isolation on readConcern level snapshot reads through mongos. -// @tags: [requires_sharding, uses_transactions] +// @tags: [requires_sharding, uses_transactions, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/transactions_snapshot_errors_first_statement.js b/jstests/sharding/transactions_snapshot_errors_first_statement.js index 7e31480d520..7d8e3dabeb2 100644 --- a/jstests/sharding/transactions_snapshot_errors_first_statement.js +++ b/jstests/sharding/transactions_snapshot_errors_first_statement.js @@ -6,7 +6,7 @@ // Runs against an unsharded collection, a sharded collection with all chunks on one shard, and a // sharded collection with one chunk on both shards. // -// @tags: [requires_sharding, uses_transactions] +// @tags: [requires_sharding, uses_transactions, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/transactions_snapshot_errors_subsequent_statements.js b/jstests/sharding/transactions_snapshot_errors_subsequent_statements.js index f346a8eea48..28a6e861fee 100644 --- a/jstests/sharding/transactions_snapshot_errors_subsequent_statements.js +++ b/jstests/sharding/transactions_snapshot_errors_subsequent_statements.js @@ -5,7 +5,7 @@ // Runs against an unsharded collection, a sharded collection with all chunks on one shard, and a // sharded collection with one chunk on both shards. // -// @tags: [requires_sharding, uses_transactions] +// @tags: [requires_sharding, uses_transactions, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/transactions_stale_database_version_errors.js b/jstests/sharding/transactions_stale_database_version_errors.js index 02f1d37c113..6c3329c5400 100644 --- a/jstests/sharding/transactions_stale_database_version_errors.js +++ b/jstests/sharding/transactions_stale_database_version_errors.js @@ -1,6 +1,6 @@ // Tests mongos behavior on stale database version errors received in a transaction. // -// @tags: [requires_sharding, uses_transactions] +// @tags: [requires_sharding, uses_transactions, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/transactions_stale_shard_version_errors.js b/jstests/sharding/transactions_stale_shard_version_errors.js index 4d9b7f7edf3..7e796ca80a0 100644 --- a/jstests/sharding/transactions_stale_shard_version_errors.js +++ b/jstests/sharding/transactions_stale_shard_version_errors.js @@ -1,6 +1,6 @@ // Tests mongos behavior on stale shard version errors received in a transaction. // -// @tags: [requires_sharding, uses_transactions] +// @tags: [requires_sharding, uses_transactions, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/txn_agg.js b/jstests/sharding/txn_agg.js index 58a61ed87d9..eff1304a1cb 100644 --- a/jstests/sharding/txn_agg.js +++ b/jstests/sharding/txn_agg.js @@ -1,4 +1,4 @@ -// @tags: [uses_transactions, requires_find_command] +// @tags: [uses_transactions, requires_find_command, uses_multi_shard_transaction] (function() { "use strict"; diff --git a/jstests/sharding/txn_basic_two_phase_commit.js b/jstests/sharding/txn_basic_two_phase_commit.js index 182f653eb7e..677224c88b3 100644 --- a/jstests/sharding/txn_basic_two_phase_commit.js +++ b/jstests/sharding/txn_basic_two_phase_commit.js @@ -2,7 +2,7 @@ * Exercises the coordinator commands logic by simulating a basic two phase commit and basic two * phase abort. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { const dbName = "test"; diff --git a/src/mongo/db/initialize_operation_session_info.cpp b/src/mongo/db/initialize_operation_session_info.cpp index b0e22d31168..4f39c6293ff 100644 --- a/src/mongo/db/initialize_operation_session_info.cpp +++ b/src/mongo/db/initialize_operation_session_info.cpp @@ -43,8 +43,7 @@ boost::optional<OperationSessionInfoFromClient> initializeOperationSessionInfo( const BSONObj& requestBody, bool requiresAuth, bool isReplSetMemberOrMongos, - bool supportsDocLocking, - bool supportsRecoverToStableTimestamp) { + bool supportsDocLocking) { auto osi = OperationSessionInfoFromClient::parse("OperationSessionInfo"_sd, requestBody); if (opCtx->getClient()->isInDirectClient()) { @@ -119,11 +118,6 @@ boost::optional<OperationSessionInfoFromClient> initializeOperationSessionInfo( uassert(ErrorCodes::InvalidOptions, "Specifying autocommit=true is not allowed.", !osi.getAutocommit().value()); - - uassert(ErrorCodes::IllegalOperation, - "Multi-document transactions are only allowed on storage engines that support " - "recover to stable timestamp.", - supportsRecoverToStableTimestamp); } else { uassert(ErrorCodes::InvalidOptions, "'startTransaction' field requires 'autocommit' field to also be specified", diff --git a/src/mongo/db/initialize_operation_session_info.h b/src/mongo/db/initialize_operation_session_info.h index e67bf7531e4..8882c4e7052 100644 --- a/src/mongo/db/initialize_operation_session_info.h +++ b/src/mongo/db/initialize_operation_session_info.h @@ -46,9 +46,6 @@ namespace mongo { * Both isReplSetMemberOrMongos and supportsDocLocking need to be true if the command contains a * transaction number, otherwise this function will throw. * - * supportsRecoverToStableTimestamp needs to be true if the command contains autocommit:false, - * otherwise this function will throw. - * * On success, returns the parsed request information. Returning boost::none implies that the * proper command or session requirements were not met. */ @@ -57,7 +54,6 @@ boost::optional<OperationSessionInfoFromClient> initializeOperationSessionInfo( const BSONObj& requestBody, bool requiresAuth, bool isReplSetMemberOrMongos, - bool supportsDocLocking, - bool supportsRecoverToStableTimestamp); + bool supportsDocLocking); } // namespace mongo diff --git a/src/mongo/db/logical_session_id_test.cpp b/src/mongo/db/logical_session_id_test.cpp index 128a793b12b..e32e1e88089 100644 --- a/src/mongo/db/logical_session_id_test.cpp +++ b/src/mongo/db/logical_session_id_test.cpp @@ -235,7 +235,7 @@ TEST_F(LogicalSessionIdTest, GenWithoutAuthedUser) { TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_NoSessionIdNoTransactionNumber) { addSimpleUser(UserName("simple", "test")); - initializeOperationSessionInfo(_opCtx.get(), BSON("TestCmd" << 1), true, true, true, true); + initializeOperationSessionInfo(_opCtx.get(), BSON("TestCmd" << 1), true, true, true); ASSERT(!_opCtx->getLogicalSessionId()); ASSERT(!_opCtx->getTxnNumber()); @@ -251,7 +251,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_SessionIdNoTransacti << "TestField"), true, true, - true, true); ASSERT(_opCtx->getLogicalSessionId()); @@ -268,7 +267,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_MissingSessionIdWith << "TestField"), true, true, - true, true), AssertionException, ErrorCodes::InvalidOptions); @@ -285,7 +283,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_SessionIdAndTransact << "TestField"), true, true, - true, true); ASSERT(_opCtx->getLogicalSessionId()); @@ -307,7 +304,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_IsReplSetMemberOrMon << "TestField"), true, false, - true, true), AssertionException, ErrorCodes::IllegalOperation); @@ -325,27 +321,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_SupportsDocLockingFa << "TestField"), true, true, - false, - true), - AssertionException, - ErrorCodes::IllegalOperation); -} - -TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_SupportsRecoverToStableTimestampFalse) { - addSimpleUser(UserName("simple", "test")); - LogicalSessionFromClient lsid; - lsid.setId(UUID::gen()); - - ASSERT_THROWS_CODE( - initializeOperationSessionInfo( - _opCtx.get(), - BSON("TestCmd" << 1 << "lsid" << lsid.toBSON() << "txnNumber" << 100LL << "OtherField" - << "TestField" - << "autocommit" - << false), - true, - true, - true, false), AssertionException, ErrorCodes::IllegalOperation); @@ -364,7 +339,6 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_IgnoresInfoIfNoCache << "TestField"), true, true, - true, true)); } @@ -383,10 +357,10 @@ TEST_F(LogicalSessionIdTest, InitializeOperationSessionInfo_SendingInfoFailsInDi << "foo"); commandBuilder.appendElements(param); - ASSERT_THROWS_CODE(initializeOperationSessionInfo( - _opCtx.get(), commandBuilder.obj(), true, true, true, true), - AssertionException, - 50891); + ASSERT_THROWS_CODE( + initializeOperationSessionInfo(_opCtx.get(), commandBuilder.obj(), true, true, true), + AssertionException, + 50891); } _opCtx->getClient()->setInDirectClient(false); diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index 0376b73cee0..98a1f8b2f68 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -301,15 +301,14 @@ Status addMongodOptions(moe::OptionSection* options) { "specify index prefetching behavior (if secondary) [none|_id_only|all]") .format("(:?none)|(:?_id_only)|(:?all)", "(none/_id_only/all)"); - // `enableMajorityReadConcern` is always enabled starting in 3.6, regardless of user - // settings. We're leaving the option in to not break existing deployment scripts. A warning - // will appear if explicitly set to false. + // `enableMajorityReadConcern` is enabled by default starting in 3.6. rs_options .addOptionChaining("replication.enableMajorityReadConcern", "enableMajorityReadConcern", - moe::Switch, + moe::Bool, "enables majority readConcern") - .setDefault(moe::Value(true)); + .setDefault(moe::Value(true)) + .setImplicit(moe::Value(true)); replication_options.addOptionChaining( "master", "master", moe::Switch, "Master/slave replication no longer supported"); @@ -873,11 +872,8 @@ Status storeMongodOptions(const moe::Environment& params) { } if (params.count("replication.enableMajorityReadConcern")) { - bool val = params["replication.enableMajorityReadConcern"].as<bool>(); - if (!val) { - warning() << "enableMajorityReadConcern startup parameter was supplied, but its value " - "was ignored; majority read concern cannot be disabled."; - } + serverGlobalParams.enableMajorityReadConcern = + params["replication.enableMajorityReadConcern"].as<bool>(); } if (params.count("storage.indexBuildRetry")) { @@ -934,6 +930,15 @@ Status storeMongodOptions(const moe::Environment& params) { if (clusterRoleParam == "configsvr") { serverGlobalParams.clusterRole = ClusterRole::ConfigServer; + if (params.count("replication.enableMajorityReadConcern") && + !params["replication.enableMajorityReadConcern"].as<bool>()) { + warning() + << "Config servers require majority read concern, but it was explicitly " + "disabled. The override is being ignored and the process is continuing " + "with majority read concern enabled."; + } + serverGlobalParams.enableMajorityReadConcern = true; + // If we haven't explicitly specified a journal option, default journaling to true for // the config server role if (!params.count("storage.journal.enabled")) { diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 4e7d3dd21f7..3af514b94e4 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -892,7 +892,7 @@ bool ReplicationCoordinatorExternalStateImpl::isReadCommittedSupportedByStorageE auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); // This should never be called if the storage engine has not been initialized. invariant(storageEngine); - return storageEngine->getSnapshotManager(); + return storageEngine->supportsReadConcernMajority(); } bool ReplicationCoordinatorExternalStateImpl::isReadConcernSnapshotSupportedByStorageEngine( diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index 16d396b63eb..5b5bfd43a4f 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -242,13 +242,13 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, // If we were passed in a stable timestamp, we are in rollback recovery and should recover from // that stable timestamp. Otherwise, we're recovering at startup. If this storage engine - // supports recover to stable timestamp, we ask it for the recovery timestamp. If the storage - // engine returns a timestamp, we recover from that point. However, if the storage engine - // returns "none", the storage engine does not have a stable checkpoint and we must recover from - // an unstable checkpoint instead. - const bool supportsRecoverToStableTimestamp = - _storageInterface->supportsRecoverToStableTimestamp(opCtx->getServiceContext()); - if (!stableTimestamp && supportsRecoverToStableTimestamp) { + // supports recover to stable timestamp or enableMajorityReadConcern=false, we ask it for the + // recovery timestamp. If the storage engine returns a timestamp, we recover from that point. + // However, if the storage engine returns "none", the storage engine does not have a stable + // checkpoint and we must recover from an unstable checkpoint instead. + const bool supportsRecoveryTimestamp = + _storageInterface->supportsRecoveryTimestamp(opCtx->getServiceContext()); + if (!stableTimestamp && supportsRecoveryTimestamp) { stableTimestamp = _storageInterface->getRecoveryTimestamp(opCtx->getServiceContext()); } @@ -260,7 +260,7 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, << appliedThrough.toString()); if (stableTimestamp) { - invariant(supportsRecoverToStableTimestamp); + invariant(supportsRecoveryTimestamp); _recoverFromStableTimestamp(opCtx, *stableTimestamp, appliedThrough, topOfOplog); } else { _recoverFromUnstableCheckpoint(opCtx, appliedThrough, topOfOplog); @@ -301,6 +301,20 @@ void ReplicationRecoveryImpl::_recoverFromUnstableCheckpoint(OperationContext* o // application and must apply from the appliedThrough to the top of the oplog. log() << "Starting recovery oplog application at the appliedThrough: " << appliedThrough << ", through the top of the oplog: " << topOfOplog; + + // When `recoverFromOplog` truncates the oplog, that also happens to set the "oldest + // timestamp" to the truncation point[1]. `_applyToEndOfOplog` will then perform writes + // before the truncation point. Doing so violates the constraint that all updates must be + // timestamped newer than the "oldest timestamp". This call will move the "oldest + // timestamp" back to the `startPoint`. + // + // [1] This is arguably incorrect. On rollback for nodes that are not keeping history to + // the "majority point", the "oldest timestamp" likely needs to go back in time. The + // oplog's `cappedTruncateAfter` method was a convenient location for this logic, which, + // unfortunately, conflicts with the usage above. + opCtx->getServiceContext()->getStorageEngine()->setOldestTimestamp( + appliedThrough.getTimestamp()); + _applyToEndOfOplog(opCtx, appliedThrough.getTimestamp(), topOfOplog.getTimestamp()); } diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index 1a7cc5f8be8..cd5e072a513 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -78,11 +78,22 @@ public: _supportsRecoverToStableTimestamp = supports; } + bool supportsRecoveryTimestamp(ServiceContext* serviceCtx) const override { + stdx::lock_guard<stdx::mutex> lock(_mutex); + return _supportsRecoveryTimestamp; + } + + void setSupportsRecoveryTimestamp(bool supports) { + stdx::lock_guard<stdx::mutex> lock(_mutex); + _supportsRecoveryTimestamp = supports; + } + private: mutable stdx::mutex _mutex; Timestamp _initialDataTimestamp = Timestamp::min(); boost::optional<Timestamp> _recoveryTimestamp = boost::none; bool _supportsRecoverToStableTimestamp = true; + bool _supportsRecoveryTimestamp = true; }; class ReplicationRecoveryTest : public ServiceContextMongoDTest { @@ -357,9 +368,9 @@ DEATH_TEST_F(ReplicationRecoveryTest, } DEATH_TEST_F(ReplicationRecoveryTest, - RecoveryInvariantsIfStableTimestampAndDoesNotSupportRTT, + RecoveryInvariantsIfStableTimestampAndDoesNotSupportRecoveryTimestamp, "Invariant failure") { - getStorageInterfaceRecovery()->setSupportsRecoverToStableTimestamp(false); + getStorageInterfaceRecovery()->setSupportsRecoveryTimestamp(false); ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); diff --git a/src/mongo/db/repl/rollback_test_fixture.h b/src/mongo/db/repl/rollback_test_fixture.h index 8d31c63564f..26c568db2c4 100644 --- a/src/mongo/db/repl/rollback_test_fixture.h +++ b/src/mongo/db/repl/rollback_test_fixture.h @@ -137,6 +137,10 @@ public: return true; } + bool supportsRecoveryTimestamp(ServiceContext* serviceCtx) const override { + return true; + } + void setRecoverToTimestampStatus(Status status) { stdx::lock_guard<stdx::mutex> lock(_mutex); _recoverToTimestampStatus = status; diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h index c49a56149dc..32728c0f950 100644 --- a/src/mongo/db/repl/storage_interface.h +++ b/src/mongo/db/repl/storage_interface.h @@ -374,6 +374,11 @@ public: virtual bool supportsRecoverToStableTimestamp(ServiceContext* serviceCtx) const = 0; /** + * Returns whether the storage engine can provide a recovery timestamp. + */ + virtual bool supportsRecoveryTimestamp(ServiceContext* serviceCtx) const = 0; + + /** * Returns the stable timestamp that the storage engine recovered to on startup. If the * recovery point was not stable, returns "none". */ diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 044ae209c1c..65540f5ac78 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -1069,6 +1069,10 @@ bool StorageInterfaceImpl::supportsRecoverToStableTimestamp(ServiceContext* serv return serviceCtx->getStorageEngine()->supportsRecoverToStableTimestamp(); } +bool StorageInterfaceImpl::supportsRecoveryTimestamp(ServiceContext* serviceCtx) const { + return serviceCtx->getStorageEngine()->supportsRecoveryTimestamp(); +} + boost::optional<Timestamp> StorageInterfaceImpl::getRecoveryTimestamp( ServiceContext* serviceCtx) const { return serviceCtx->getStorageEngine()->getRecoveryTimestamp(); diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h index 954ae769c52..48de98cf184 100644 --- a/src/mongo/db/repl/storage_interface_impl.h +++ b/src/mongo/db/repl/storage_interface_impl.h @@ -166,6 +166,8 @@ public: bool supportsRecoverToStableTimestamp(ServiceContext* serviceCtx) const override; + bool supportsRecoveryTimestamp(ServiceContext* serviceCtx) const override; + boost::optional<Timestamp> getRecoveryTimestamp(ServiceContext* serviceCtx) const override; bool supportsDocLocking(ServiceContext* serviceCtx) const override; diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h index 9ea177f7e21..5ccc1c4c632 100644 --- a/src/mongo/db/repl/storage_interface_mock.h +++ b/src/mongo/db/repl/storage_interface_mock.h @@ -303,6 +303,10 @@ public: return false; } + bool supportsRecoveryTimestamp(ServiceContext* serviceCtx) const override { + return false; + } + boost::optional<Timestamp> getRecoveryTimestamp(ServiceContext* serviceCtx) const override { return boost::none; } diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 42af9416555..5025b48ff23 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -72,6 +72,7 @@ #include "mongo/db/session.h" #include "mongo/db/session_txn_record_gen.h" #include "mongo/db/stats/timer_stats.h" +#include "mongo/db/storage/recovery_unit.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" #include "mongo/util/fail_point_service.h" @@ -1253,9 +1254,35 @@ StatusWith<OpTime> SyncTail::multiApply(OperationContext* opCtx, MultiApplier::O // Each node records cumulative batch application stats for itself using this timer. TimerHolder timer(&applyBatchStats); + const bool pinOldestTimestamp = !serverGlobalParams.enableMajorityReadConcern; + std::unique_ptr<RecoveryUnit> pinningTransaction; + if (pinOldestTimestamp) { + // If `enableMajorityReadConcern` is false, storage aggressively trims + // history. Documents may not be inserted before the cutoff point. This piece will pin + // the "oldest timestamp" until after the batch is fully applied. + // + // When `enableMajorityReadConcern` is false, storage sets the "oldest timestamp" to + // the "get all committed" timestamp. Opening a transaction and setting its timestamp + // to first oplog entry's timestamp will prevent the "get all committed" timestamp + // from advancing. + // + // This transaction will be aborted after all writes from the batch of operations are + // complete. Aborting the transaction allows the "get all committed" point to be + // move forward. + pinningTransaction = std::unique_ptr<RecoveryUnit>( + opCtx->getServiceContext()->getStorageEngine()->newRecoveryUnit()); + pinningTransaction->beginUnitOfWork(opCtx); + fassert(40677, pinningTransaction->setTimestamp(ops.front().getTimestamp())); + } + // We must wait for the all work we've dispatched to complete before leaving this block // because the spawned threads refer to objects on the stack - ON_BLOCK_EXIT([&] { _writerPool->waitForIdle(); }); + ON_BLOCK_EXIT([&] { + _writerPool->waitForIdle(); + if (pinOldestTimestamp) { + pinningTransaction->abortUnitOfWork(); + } + }); // Write batch of ops into oplog. if (!_options.skipWritesToOplog) { @@ -1303,15 +1330,14 @@ StatusWith<OpTime> SyncTail::multiApply(OperationContext* opCtx, MultiApplier::O } } } - - // Notify the storage engine that a replication batch has completed. - // This means that all the writes associated with the oplog entries in the batch are - // finished and no new writes with timestamps associated with those oplog entries will show - // up in the future. - const auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); - storageEngine->replicationBatchIsComplete(); } + // Notify the storage engine that a replication batch has completed. This means that all the + // writes associated with the oplog entries in the batch are finished and no new writes with + // timestamps associated with those oplog entries will show up in the future. + const auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); + storageEngine->replicationBatchIsComplete(); + // Use this fail point to hold the PBWM lock and prevent the batch from completing. if (MONGO_FAIL_POINT(pauseBatchApplicationBeforeCompletion)) { log() << "pauseBatchApplicationBeforeCompletion fail point enabled. Blocking until fail " diff --git a/src/mongo/db/s/session_catalog_migration_destination_test.cpp b/src/mongo/db/s/session_catalog_migration_destination_test.cpp index 5299f54142f..7ca1a4a6294 100644 --- a/src/mongo/db/s/session_catalog_migration_destination_test.cpp +++ b/src/mongo/db/s/session_catalog_migration_destination_test.cpp @@ -242,8 +242,7 @@ public: // The ephemeral for test storage engine doesn't support document-level locking, so // requests with txnNumbers aren't allowed. To get around this, we have to manually set // up the session state and perform the insert. - initializeOperationSessionInfo( - innerOpCtx.get(), insertBuilder.obj(), true, true, true, false); + initializeOperationSessionInfo(innerOpCtx.get(), insertBuilder.obj(), true, true, true); OperationContextSessionMongod sessionTxnState( innerOpCtx.get(), true, boost::none, boost::none); diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h index 1a3f3a96c3e..34b3389da19 100644 --- a/src/mongo/db/server_options.h +++ b/src/mongo/db/server_options.h @@ -248,6 +248,8 @@ struct ServerGlobalParams { AtomicWord<bool> validateFeaturesAsMaster{true}; std::vector<std::string> disabledSecureAllocatorDomains; + + bool enableMajorityReadConcern = true; }; extern ServerGlobalParams serverGlobalParams; diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index 55dedecb657..c6e3b7ad7d1 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -624,8 +624,7 @@ void execCommandDatabase(OperationContext* opCtx, request.body, command->requiresAuth(), replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet, - opCtx->getServiceContext()->getStorageEngine()->supportsDocLocking(), - opCtx->getServiceContext()->getStorageEngine()->supportsRecoverToStableTimestamp()); + opCtx->getServiceContext()->getStorageEngine()->supportsDocLocking()); evaluateFailCommandFailPoint(opCtx, command->getName()); diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index 48e7986c9ea..88ae99e9f82 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -316,7 +316,7 @@ public: /** * See `StorageEngine::setOldestTimestamp` */ - virtual void setOldestTimestamp(Timestamp newOldestTimestamp) {} + virtual void setOldestTimestamp(Timestamp newOldestTimestamp, bool force) {} /** * See `StorageEngine::isCacheUnderPressure()` @@ -338,6 +338,13 @@ public: } /** + * See `StorageEngine::supportsRecoveryTimestamp` + */ + virtual bool supportsRecoveryTimestamp() const { + return false; + } + + /** * See `StorageEngine::recoverToStableTimestamp` */ virtual StatusWith<Timestamp> recoverToStableTimestamp(OperationContext* opCtx) { @@ -370,6 +377,10 @@ public: return false; } + virtual bool supportsReadConcernMajority() const { + return false; + } + /** * See `StorageEngine::replicationBatchIsComplete()` */ diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp index fab3545f636..37788d33008 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.cpp +++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp @@ -644,7 +644,8 @@ void KVStorageEngine::setOldestTimestampFromStable() { } void KVStorageEngine::setOldestTimestamp(Timestamp newOldestTimestamp) { - _engine->setOldestTimestamp(newOldestTimestamp); + const bool force = true; + _engine->setOldestTimestamp(newOldestTimestamp, force); } bool KVStorageEngine::isCacheUnderPressure(OperationContext* opCtx) const { @@ -659,6 +660,10 @@ bool KVStorageEngine::supportsRecoverToStableTimestamp() const { return _engine->supportsRecoverToStableTimestamp(); } +bool KVStorageEngine::supportsRecoveryTimestamp() const { + return _engine->supportsRecoveryTimestamp(); +} + StatusWith<Timestamp> KVStorageEngine::recoverToStableTimestamp(OperationContext* opCtx) { invariant(opCtx->lockState()->isW()); @@ -697,6 +702,10 @@ bool KVStorageEngine::supportsReadConcernSnapshot() const { return _engine->supportsReadConcernSnapshot(); } +bool KVStorageEngine::supportsReadConcernMajority() const { + return _engine->supportsReadConcernMajority(); +} + void KVStorageEngine::replicationBatchIsComplete() const { return _engine->replicationBatchIsComplete(); } diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h index a25b5438c30..98d8fea43bd 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.h +++ b/src/mongo/db/storage/kv/kv_storage_engine.h @@ -137,6 +137,8 @@ public: virtual bool supportsRecoverToStableTimestamp() const override; + virtual bool supportsRecoveryTimestamp() const override; + virtual StatusWith<Timestamp> recoverToStableTimestamp(OperationContext* opCtx) override; virtual boost::optional<Timestamp> getRecoveryTimestamp() const override; @@ -147,6 +149,8 @@ public: bool supportsReadConcernSnapshot() const final; + bool supportsReadConcernMajority() const final; + virtual void replicationBatchIsComplete() const override; SnapshotManager* getSnapshotManager() const final; diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index 6452044ce30..f236113f6c3 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -320,12 +320,23 @@ public: } /** + * Returns whether the storage engine can provide a recovery timestamp. + */ + virtual bool supportsRecoveryTimestamp() const { + return false; + } + + /** * Returns true if the storage engine supports the readConcern level "snapshot". */ virtual bool supportsReadConcernSnapshot() const { return false; } + virtual bool supportsReadConcernMajority() const { + return false; + } + /** * Recovers the storage engine state to the last stable timestamp. "Stable" in this case * refers to a timestamp that is guaranteed to never be rolled back. The stable timestamp diff --git a/src/mongo/db/storage/storage_init.cpp b/src/mongo/db/storage/storage_init.cpp index 6fdc7de4304..782965e206c 100644 --- a/src/mongo/db/storage/storage_init.cpp +++ b/src/mongo/db/storage/storage_init.cpp @@ -56,7 +56,7 @@ public: const BSONElement& configElement) const { auto engine = opCtx->getClient()->getServiceContext()->getStorageEngine(); return BSON("name" << storageGlobalParams.engine << "supportsCommittedReads" - << bool(engine->getSnapshotManager()) + << engine->supportsReadConcernMajority() << "supportsSnapshotReadConcern" << engine->supportsReadConcernSnapshot() << "readOnly" diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 2bac8fcc5df..2aed5a1feff 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -268,17 +268,24 @@ public: // First, initialDataTimestamp is Timestamp(0, 1) -> Take full checkpoint. This is // when there is no consistent view of the data (i.e: during initial sync). // - // Second, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on - // disk is prone to being rolled back. Hold off on checkpoints. Hope that the - // stable timestamp surpasses the data on disk, allowing storage to persist newer - // copies to disk. + // Second, enableMajorityReadConcern is false. In this case, we are not tracking a + // stable timestamp. Take a full checkpoint. // - // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady + // Third, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on disk + // is prone to being rolled back. Hold off on checkpoints. Hope that the stable + // timestamp surpasses the data on disk, allowing storage to persist newer copies to + // disk. + // + // Fourth, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady // state case. if (initialDataTimestamp.asULL() <= 1) { UniqueWiredTigerSession session = _sessionCache->getSession(); WT_SESSION* s = session->getSession(); invariantWTOK(s->checkpoint(s, "use_timestamp=false")); + } else if (!serverGlobalParams.enableMajorityReadConcern) { + UniqueWiredTigerSession session = _sessionCache->getSession(); + WT_SESSION* s = session->getSession(); + invariantWTOK(s->checkpoint(s, "use_timestamp=false")); } else if (stableTimestamp < initialDataTimestamp) { LOG_FOR_RECOVERY(2) << "Stable timestamp is behind the initial data timestamp, skipping " @@ -449,7 +456,8 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName, _durable(durable), _ephemeral(ephemeral), _inRepairMode(repair), - _readOnly(readOnly) { + _readOnly(readOnly), + _keepDataHistory(serverGlobalParams.enableMajorityReadConcern) { boost::filesystem::path journalPath = path; journalPath /= "journal"; if (_durable) { @@ -711,6 +719,10 @@ void WiredTigerKVEngine::cleanShutdown() { invariantWTOK(_conn->reconfigure(_conn, _fileVersion.getDowngradeString().c_str())); } + if (!serverGlobalParams.enableMajorityReadConcern) { + closeConfig += "use_timestamp=false,"; + } + invariantWTOK(_conn->close(_conn, closeConfig.c_str())); _conn = nullptr; } @@ -1362,6 +1374,10 @@ MONGO_FAIL_POINT_DEFINE(WTPreserveSnapshotHistoryIndefinitely); void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp, boost::optional<Timestamp> maximumTruncationTimestamp) { + if (!_keepDataHistory) { + return; + } + if (stableTimestamp.isNull()) { return; } @@ -1449,17 +1465,24 @@ void WiredTigerKVEngine::setOldestTimestampFromStable() { newOldestTimestamp = oplogReadTimestamp; } - _setOldestTimestamp(newOldestTimestamp, false); -} - -void WiredTigerKVEngine::setOldestTimestamp(Timestamp newOldestTimestamp) { - _setOldestTimestamp(newOldestTimestamp, true); + setOldestTimestamp(newOldestTimestamp, false); } -void WiredTigerKVEngine::_setOldestTimestamp(Timestamp newOldestTimestamp, bool force) { +void WiredTigerKVEngine::setOldestTimestamp(Timestamp newOldestTimestamp, bool force) { if (MONGO_FAIL_POINT(WTPreserveSnapshotHistoryIndefinitely)) { return; } + const auto localSnapshotTimestamp = _sessionCache->snapshotManager().getLocalSnapshot(); + if (!force && localSnapshotTimestamp && newOldestTimestamp > *localSnapshotTimestamp) { + // When force is not set, lag the `oldest timestamp` to the local snapshot timestamp. + // Secondary reads are performed at the local snapshot timestamp, so advancing the oldest + // timestamp beyond the local snapshot timestamp could cause secondary reads to fail. This + // is not a problem when majority read concern is enabled, since the replication system will + // not set the stable timestamp ahead of the local snapshot timestamp. However, when + // majority read concern is disabled and the oldest timestamp is set by the oplog manager, + // the oplog manager can set the oldest timestamp ahead of the local snapshot timestamp. + newOldestTimestamp = *localSnapshotTimestamp; + } char oldestTSConfigString["force=true,oldest_timestamp=,commit_timestamp="_sd.size() + (2 * 8 * 2) /* 2 timestamps of 16 hexadecimal digits each */ + @@ -1530,6 +1553,13 @@ void WiredTigerKVEngine::setInitialDataTimestamp(Timestamp initialDataTimestamp) } bool WiredTigerKVEngine::supportsRecoverToStableTimestamp() const { + if (!_keepDataHistory) { + return false; + } + return true; +} + +bool WiredTigerKVEngine::supportsRecoveryTimestamp() const { return true; } @@ -1602,8 +1632,8 @@ Timestamp WiredTigerKVEngine::getAllCommittedTimestamp() const { } boost::optional<Timestamp> WiredTigerKVEngine::getRecoveryTimestamp() const { - if (!supportsRecoverToStableTimestamp()) { - severe() << "WiredTiger is configured to not support recover to a stable timestamp"; + if (!supportsRecoveryTimestamp()) { + severe() << "WiredTiger is configured to not support providing a recovery timestamp"; fassertFailed(50745); } @@ -1642,10 +1672,6 @@ boost::optional<Timestamp> WiredTigerKVEngine::getLastStableRecoveryTimestamp() } Timestamp WiredTigerKVEngine::getOplogNeededForRollback() const { - // TODO: SERVER-36982 intends to allow holding onto minimum history (in front of the stable - // timestamp). If that results in never calling `StorageEngine::setStableTimestamp`, oplog - // will never be truncated. This method will need to be updated to accomodate that case, most - // simply by having this return `Timestamp::max()`. return Timestamp(_oplogNeededForRollback.load()); } @@ -1658,6 +1684,10 @@ boost::optional<Timestamp> WiredTigerKVEngine::getOplogNeededForCrashRecovery() } Timestamp WiredTigerKVEngine::getPinnedOplog() const { + if (!_keepDataHistory) { + // We use rollbackViaRefetch and take full checkpoints, so there is no need to pin oplog. + return Timestamp::max(); + } return getOplogNeededForCrashRecovery().value_or(getOplogNeededForRollback()); } @@ -1665,12 +1695,16 @@ bool WiredTigerKVEngine::supportsReadConcernSnapshot() const { return true; } +bool WiredTigerKVEngine::supportsReadConcernMajority() const { + return _keepDataHistory; +} + void WiredTigerKVEngine::startOplogManager(OperationContext* opCtx, const std::string& uri, WiredTigerRecordStore* oplogRecordStore) { stdx::lock_guard<stdx::mutex> lock(_oplogManagerMutex); if (_oplogManagerCount == 0) - _oplogManager->start(opCtx, uri, oplogRecordStore); + _oplogManager->start(opCtx, uri, oplogRecordStore, !_keepDataHistory); _oplogManagerCount++; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 32d8e93d438..6712bd35fc3 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -195,10 +195,17 @@ public: virtual void setOldestTimestampFromStable() override; - virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override; + /** + * Sets the oldest timestamp for which the storage engine must maintain snapshot history + * through. If force is true, oldest will be set to the given input value, unmodified, even if + * it is backwards in time from the last oldest timestamp (accomodating initial sync). + */ + virtual void setOldestTimestamp(Timestamp newOldestTimestamp, bool force) override; virtual bool supportsRecoverToStableTimestamp() const override; + virtual bool supportsRecoveryTimestamp() const override; + virtual StatusWith<Timestamp> recoverToStableTimestamp(OperationContext* opCtx) override; virtual boost::optional<Timestamp> getRecoveryTimestamp() const override; @@ -229,6 +236,8 @@ public: bool isCacheUnderPressure(OperationContext* opCtx) const override; + bool supportsReadConcernMajority() const final; + // wiredtiger specific // Calls WT_CONNECTION::reconfigure on the underlying WT_CONNECTION // held by this class @@ -384,13 +393,6 @@ private: */ bool _canRecoverToStableTimestamp() const; - /** - * Sets the oldest timestamp for which the storage engine must maintain snapshot history - * through. If force is true, oldest will be set to the given input value, unmodified, even if - * it is backwards in time from the last oldest timestamp (accomodating initial sync). - */ - void _setOldestTimestamp(Timestamp newOldestTimestamp, bool force); - WT_CONNECTION* _conn; WiredTigerFileVersion _fileVersion; WiredTigerEventHandler _eventHandler; @@ -414,6 +416,14 @@ private: bool _ephemeral; // whether we are using the in-memory mode of the WT engine const bool _inRepairMode; bool _readOnly; + + // If _keepDataHistory is true, then the storage engine keeps all history after the stable + // timestamp, and WiredTigerKVEngine is responsible for advancing the oldest timestamp. If + // _keepDataHistory is false (i.e. majority reads are disabled), then we only keep history after + // the "no holes point", and WiredTigerOplogManager is responsible for advancing the oldest + // timestamp. + const bool _keepDataHistory = true; + std::unique_ptr<WiredTigerJournalFlusher> _journalFlusher; // Depends on _sizeStorer std::unique_ptr<WiredTigerCheckpointThread> _checkpointThread; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 63c5f4f66e1..9359d1210ae 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -52,7 +52,8 @@ MONGO_FAIL_POINT_DEFINE(WTPausePrimaryOplogDurabilityLoop); void WiredTigerOplogManager::start(OperationContext* opCtx, const std::string& uri, - WiredTigerRecordStore* oplogRecordStore) { + WiredTigerRecordStore* oplogRecordStore, + bool updateOldestTimestamp) { invariant(!_isRunning); // Prime the oplog read timestamp. std::unique_ptr<SeekableRecordCursor> reverseOplogCursor = @@ -80,7 +81,8 @@ void WiredTigerOplogManager::start(OperationContext* opCtx, _oplogJournalThread = stdx::thread(&WiredTigerOplogManager::_oplogJournalThreadLoop, this, WiredTigerRecoveryUnit::get(opCtx)->getSessionCache(), - oplogRecordStore); + oplogRecordStore, + updateOldestTimestamp); _isRunning = true; _shuttingDown = false; @@ -161,8 +163,9 @@ void WiredTigerOplogManager::triggerJournalFlush() { } } -void WiredTigerOplogManager::_oplogJournalThreadLoop( - WiredTigerSessionCache* sessionCache, WiredTigerRecordStore* oplogRecordStore) noexcept { +void WiredTigerOplogManager::_oplogJournalThreadLoop(WiredTigerSessionCache* sessionCache, + WiredTigerRecordStore* oplogRecordStore, + const bool updateOldestTimestamp) noexcept { Client::initThread("WTOplogJournalThread"); // This thread updates the oplog read timestamp, the timestamp used to read from the oplog with @@ -239,6 +242,11 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop( } lk.unlock(); + if (updateOldestTimestamp) { + const bool force = false; + sessionCache->getKVEngine()->setOldestTimestamp(Timestamp(newTimestamp), force); + } + // Wake up any await_data cursors and tell them more data might be visible now. oplogRecordStore->notifyCappedWaitersIfNeeded(); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h index 819faf80907..7e8f72a775a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h @@ -52,10 +52,12 @@ public: ~WiredTigerOplogManager() {} // This method will initialize the oplog read timestamp and start the background thread that - // refreshes the value. + // refreshes the value. If `updateOldestTimestamp` is true, the background thread will also take + // responsibility for updating the oldest timestamp. void start(OperationContext* opCtx, const std::string& uri, - WiredTigerRecordStore* oplogRecordStore); + WiredTigerRecordStore* oplogRecordStore, + bool updateOldestTimestamp); void halt(); @@ -84,7 +86,8 @@ public: private: void _oplogJournalThreadLoop(WiredTigerSessionCache* sessionCache, - WiredTigerRecordStore* oplogRecordStore) noexcept; + WiredTigerRecordStore* oplogRecordStore, + const bool updateOldestTimestamp) noexcept; void _setOplogReadTimestamp(WithLock, uint64_t newTimestamp); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index b3eb7e9a9f7..04bdd8d3636 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -1820,23 +1820,31 @@ void WiredTigerRecordStore::cappedTruncateAfter(OperationContext* opCtx, Timestamp truncTs(lastKeptId.repr()); LOG(logLevel) << "Rewinding oplog visibility point to " << truncTs << " after truncation."; + if (!serverGlobalParams.enableMajorityReadConcern) { + // If majority read concern is disabled, we must set the oldest timestamp along with the + // commit timestamp. Otherwise, the commit timestamp might be set behind the oldest + // timestamp. + const bool force = true; + _kvEngine->setOldestTimestamp(truncTs, force); + } else { + char commitTSConfigString["commit_timestamp="_sd.size() + + (8 * 2) /* 8 hexadecimal characters */ + + 1 /* trailing null */]; + auto size = std::snprintf(commitTSConfigString, + sizeof(commitTSConfigString), + "commit_timestamp=%llx", + truncTs.asULL()); + if (size < 0) { + int e = errno; + error() << "error snprintf " << errnoWithDescription(e); + fassertFailedNoTrace(40662); + } - char commitTSConfigString["commit_timestamp="_sd.size() + - (8 * 2) /* 8 hexadecimal characters */ + 1 /* trailing null */]; - auto size = std::snprintf(commitTSConfigString, - sizeof(commitTSConfigString), - "commit_timestamp=%llx", - truncTs.asULL()); - if (size < 0) { - int e = errno; - error() << "error snprintf " << errnoWithDescription(e); - fassertFailedNoTrace(40662); + invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString)); + auto conn = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache()->conn(); + invariantWTOK(conn->set_timestamp(conn, commitTSConfigString)); } - invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString)); - auto conn = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache()->conn(); - invariantWTOK(conn->set_timestamp(conn, commitTSConfigString)); - _kvEngine->getOplogManager()->setOplogReadTimestamp(truncTs); LOG(1) << "truncation new read timestamp: " << truncTs; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp index 011db3e3566..c7998afc045 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp @@ -33,6 +33,7 @@ #include "mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h" +#include "mongo/db/server_options.h" #include "mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h" #include "mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h" #include "mongo/db/storage/wiredtiger/wiredtiger_util.h" @@ -63,6 +64,10 @@ void WiredTigerSnapshotManager::dropAllSnapshots() { } boost::optional<Timestamp> WiredTigerSnapshotManager::getMinSnapshotForNextCommittedRead() const { + if (!serverGlobalParams.enableMajorityReadConcern) { + return boost::none; + } + stdx::lock_guard<stdx::mutex> lock(_committedSnapshotMutex); return _committedSnapshot; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp index 004062a6d04..2188df7da8c 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp @@ -38,6 +38,7 @@ #include "mongo/base/simple_string_data_comparator.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/server_options.h" #include "mongo/db/server_parameters.h" #include "mongo/db/snapshot_window_options.h" #include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h" @@ -531,6 +532,10 @@ bool WiredTigerUtil::useTableLogging(NamespaceString ns, bool replEnabled) { return true; } + if (!serverGlobalParams.enableMajorityReadConcern) { + return true; + } + // Of the replica set configurations: if (ns.db() != "local") { // All replicated collections are not logged. diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript index 52b834e2e12..61fd4c1b8a6 100644 --- a/src/mongo/dbtests/SConscript +++ b/src/mongo/dbtests/SConscript @@ -11,6 +11,7 @@ env.Library( "framework_options.cpp", ], LIBDEPS=[ + '$BUILD_DIR/mongo/db/server_options_core', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/util/options_parser/options_parser_init', '$BUILD_DIR/mongo/unittest/unittest', diff --git a/src/mongo/dbtests/framework_options.cpp b/src/mongo/dbtests/framework_options.cpp index d9929c5842c..4514ebe5da4 100644 --- a/src/mongo/dbtests/framework_options.cpp +++ b/src/mongo/dbtests/framework_options.cpp @@ -95,6 +95,14 @@ Status addTestFrameworkOptions(moe::OptionSection* options) { options->addOptionChaining( "perfHist", "perfHist", moe::Unsigned, "number of back runs of perf stats to display"); + // If set to true, storage engine maintains the data history. Else, it won't maintain the data + // history. This setting applies only to 'wiredTiger' storage engine. + options + ->addOptionChaining("replication.enableMajorityReadConcern", + "enableMajorityReadConcern", + moe::Bool, + "enables majority readConcern") + .setDefault(moe::Value(true)); options ->addOptionChaining( "storage.engine", "storageEngine", moe::String, "what storage engine to use") @@ -188,6 +196,12 @@ Status storeTestFrameworkOptions(const moe::Environment& params, storageGlobalParams.engine = params["storage.engine"].as<string>(); + if (storageGlobalParams.engine == "wiredTiger" && + params.count("replication.enableMajorityReadConcern")) { + serverGlobalParams.enableMajorityReadConcern = + params["replication.enableMajorityReadConcern"].as<bool>(); + } + if (params.count("suites")) { frameworkGlobalParams.suites = params["suites"].as<vector<string>>(); } diff --git a/src/mongo/dbtests/repltests.cpp b/src/mongo/dbtests/repltests.cpp index a2d6e66b2c7..0a939bd3a44 100644 --- a/src/mongo/dbtests/repltests.cpp +++ b/src/mongo/dbtests/repltests.cpp @@ -234,6 +234,15 @@ protected: } } { + if (!serverGlobalParams.enableMajorityReadConcern) { + if (ops.size() > 0) { + if (auto tsElem = ops.front()["ts"]) { + _opCtx.getServiceContext()->getStorageEngine()->setOldestTimestamp( + tsElem.timestamp()); + } + } + } + OldClientContext ctx(&_opCtx, ns()); for (vector<BSONObj>::iterator i = ops.begin(); i != ops.end(); ++i) { if (0) { diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index bb26239575c..f7abe822a24 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -2432,7 +2432,8 @@ class CreateCollectionWithSystemIndex : public StorageTimestampTest { public: void run() { // Only run on 'wiredTiger'. No other storage engines to-date support timestamp writes. - if (mongo::storageGlobalParams.engine != "wiredTiger") { + if (!(mongo::storageGlobalParams.engine == "wiredTiger" && + mongo::serverGlobalParams.enableMajorityReadConcern)) { return; } @@ -2688,7 +2689,8 @@ public: void setupTests() { // Only run on storage engines that support snapshot reads. auto storageEngine = cc().getServiceContext()->getStorageEngine(); - if (!storageEngine->supportsReadConcernSnapshot()) { + if (!storageEngine->supportsReadConcernSnapshot() || + !mongo::serverGlobalParams.enableMajorityReadConcern) { unittest::log() << "Skipping this test suite because storage engine " << storageGlobalParams.engine << " does not support timestamp writes."; return; diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index 861435ae606..99f0d18a807 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -349,8 +349,8 @@ void runCommand(OperationContext* opCtx, } boost::optional<ScopedRouterSession> scopedSession; - auto osi = initializeOperationSessionInfo( - opCtx, request.body, command->requiresAuth(), true, true, true); + auto osi = + initializeOperationSessionInfo(opCtx, request.body, command->requiresAuth(), true, true); try { if (osi && osi->getAutocommit()) { diff --git a/src/mongo/shell/servers.js b/src/mongo/shell/servers.js index 317dad5755b..5fcb9e05109 100644 --- a/src/mongo/shell/servers.js +++ b/src/mongo/shell/servers.js @@ -649,20 +649,6 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro opts.auditDestination = jsTestOptions().auditDestination; } - if (opts.hasOwnProperty("enableMajorityReadConcern")) { - // opts.enableMajorityReadConcern, if set, must be an empty string - if (opts.enableMajorityReadConcern !== "") { - throw new Error("The enableMajorityReadConcern option must be an empty string if " + - "it is specified"); - } - } else if (jsTestOptions().enableMajorityReadConcern !== undefined) { - if (jsTestOptions().enableMajorityReadConcern !== "") { - throw new Error("The enableMajorityReadConcern option must be an empty string if " + - "it is specified"); - } - opts.enableMajorityReadConcern = ""; - } - if (opts.noReplSet) opts.replSet = null; if (opts.arbiter) @@ -1170,6 +1156,11 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro if (programName.endsWith('mongod')) { if (jsTest.options().storageEngine === "wiredTiger" || !jsTest.options().storageEngine) { + if (!argArrayContains("--enableMajorityReadConcern")) { + argArray.push( + ...['--enableMajorityReadConcern', + jsTest.options().enableMajorityReadConcern.toString()]); + } if (jsTest.options().storageEngineCacheSizeGB && !argArrayContains('--wiredTigerCacheSizeGB')) { argArray.push(...['--wiredTigerCacheSizeGB', |