summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--README.third_party.md68
-rw-r--r--buildscripts/resmokelib/core/process.py2
-rw-r--r--etc/backports_required_for_multiversion_tests.yml10
-rw-r--r--etc/evergreen.yml19
-rw-r--r--jstests/auth/repl.js16
-rw-r--r--jstests/concurrency/fsm_workloads/auth_privilege_consistency.js2
-rw-r--r--jstests/core/resume_query_from_non_existent_record.js66
-rw-r--r--jstests/core/shell1.js20
-rw-r--r--jstests/core/views/views_all_commands.js1
-rw-r--r--jstests/hooks/validate_collections.js2
-rw-r--r--jstests/libs/kill_sessions.js6
-rw-r--r--jstests/libs/override_methods/validate_collections_on_shutdown.js4
-rw-r--r--jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js2
-rw-r--r--jstests/noPassthrough/change_stream_error_label.js8
-rw-r--r--jstests/noPassthrough/out_merge_on_secondary_killop.js2
-rw-r--r--jstests/noPassthrough/server_transaction_metrics_secondary.js4
-rw-r--r--jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js2
-rw-r--r--jstests/noPassthrough/stepdown_query.js6
-rw-r--r--jstests/noPassthrough/timestamp_index_builds.js2
-rw-r--r--jstests/noPassthroughWithMongod/geo_polygon.js4
-rw-r--r--jstests/noPassthroughWithMongod/indexbg_interrupts.js3
-rw-r--r--jstests/noPassthroughWithMongod/no_balance_collection.js4
-rw-r--r--jstests/noPassthroughWithMongod/replReads.js6
-rw-r--r--jstests/replsets/auth1.js4
-rw-r--r--jstests/replsets/auth_no_pri.js2
-rw-r--r--jstests/replsets/awaitable_ismaster_fcv_change.js2
-rw-r--r--jstests/replsets/awaitdata_getmore_new_last_committed_optime.js2
-rw-r--r--jstests/replsets/buildindexes.js2
-rw-r--r--jstests/replsets/catchup.js2
-rw-r--r--jstests/replsets/db_reads_while_recovering_all_commands.js1
-rw-r--r--jstests/replsets/disconnect_on_legacy_write_to_secondary.js8
-rw-r--r--jstests/replsets/explain_slaveok.js107
-rw-r--r--jstests/replsets/fsync_lock_read_secondaries.js2
-rw-r--r--jstests/replsets/groupAndMapReduce.js4
-rw-r--r--jstests/replsets/initial_sync4.js2
-rw-r--r--jstests/replsets/initial_sync_ambiguous_index.js2
-rw-r--r--jstests/replsets/initial_sync_applier_error.js2
-rw-r--r--jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js2
-rw-r--r--jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js2
-rw-r--r--jstests/replsets/initial_sync_invalid_views.js2
-rw-r--r--jstests/replsets/initial_sync_move_forward.js2
-rw-r--r--jstests/replsets/initial_sync_oplog_rollover.js2
-rw-r--r--jstests/replsets/initial_sync_replSetGetStatus.js2
-rw-r--r--jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js2
-rw-r--r--jstests/replsets/initial_sync_test_fixture_test.js2
-rw-r--r--jstests/replsets/initial_sync_uuid_not_found.js2
-rw-r--r--jstests/replsets/initial_sync_with_write_load.js4
-rw-r--r--jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js2
-rw-r--r--jstests/replsets/libs/initial_sync_update_missing_doc.js2
-rw-r--r--jstests/replsets/libs/rollback_resumable_index_build.js40
-rw-r--r--jstests/replsets/libs/secondary_reads_test.js4
-rw-r--r--jstests/replsets/maintenance2.js2
-rw-r--r--jstests/replsets/no_disconnect_on_stepdown.js2
-rw-r--r--jstests/replsets/not_master_unacknowledged_write.js16
-rw-r--r--jstests/replsets/plan_cache_slaveok.js12
-rw-r--r--jstests/replsets/prepare_transaction_read_at_cluster_time.js2
-rw-r--r--jstests/replsets/print_secondary_replication_info_unreachable_secondary.js25
-rw-r--r--jstests/replsets/quiesce_mode.js2
-rw-r--r--jstests/replsets/read_committed_after_rollback.js2
-rw-r--r--jstests/replsets/read_committed_no_snapshots.js4
-rw-r--r--jstests/replsets/read_operations_during_rollback.js2
-rw-r--r--jstests/replsets/read_operations_during_step_down.js2
-rw-r--r--jstests/replsets/read_operations_during_step_up.js10
-rw-r--r--jstests/replsets/reconstruct_prepared_transactions_initial_sync.js2
-rw-r--r--jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js2
-rw-r--r--jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js2
-rw-r--r--jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js2
-rw-r--r--jstests/replsets/recover_prepared_transactions_startup_secondary_application.js2
-rw-r--r--jstests/replsets/rename_collection_temp.js2
-rw-r--r--jstests/replsets/replset1.js4
-rw-r--r--jstests/replsets/replset2.js2
-rw-r--r--jstests/replsets/replset5.js4
-rw-r--r--jstests/replsets/replset6.js2
-rw-r--r--jstests/replsets/resync_majority_member.js4
-rw-r--r--jstests/replsets/rollback_auth.js4
-rw-r--r--jstests/replsets/rollback_creates_rollback_directory.js4
-rw-r--r--jstests/replsets/rollback_crud_op_sequences.js4
-rw-r--r--jstests/replsets/rollback_ddl_op_sequences.js4
-rw-r--r--jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js12
-rw-r--r--jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js16
-rw-r--r--jstests/replsets/rollback_resumable_index_build_complete.js8
-rw-r--r--jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js39
-rw-r--r--jstests/replsets/rslib.js4
-rw-r--r--jstests/replsets/server8070.js4
-rw-r--r--jstests/replsets/slavedelay3.js2
-rw-r--r--jstests/replsets/slaveok_read_pref.js20
-rw-r--r--jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js2
-rw-r--r--jstests/replsets/step_down_on_secondary.js2
-rw-r--r--jstests/replsets/tenant_migration_donor_state_machine.js26
-rw-r--r--jstests/replsets/transactions_only_allowed_on_primaries.js4
-rw-r--r--jstests/sharding/agg_mongos_slaveok.js6
-rw-r--r--jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js2
-rw-r--r--jstests/sharding/auth_repl.js12
-rw-r--r--jstests/sharding/auth_slaveok_routing.js8
-rw-r--r--jstests/sharding/autodiscover_config_rs_from_secondary.js2
-rw-r--r--jstests/sharding/balance_repl.js2
-rw-r--r--jstests/sharding/chunk_history_window.js28
-rw-r--r--jstests/sharding/chunk_operations_invalidate_single_shard.js1
-rw-r--r--jstests/sharding/cluster_create_indexes_always_routes_through_primary.js4
-rw-r--r--jstests/sharding/config_rs_no_primary.js4
-rw-r--r--jstests/sharding/count_config_servers.js2
-rw-r--r--jstests/sharding/count_slaveok.js14
-rw-r--r--jstests/sharding/error_propagation.js2
-rw-r--r--jstests/sharding/mongos_forwards_api_parameters_to_shards.js213
-rw-r--r--jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js28
-rw-r--r--jstests/sharding/mongos_rs_shard_failure_tolerance.js46
-rw-r--r--jstests/sharding/query/explain_read_pref.js2
-rw-r--r--jstests/sharding/read_pref.js2
-rw-r--r--jstests/sharding/read_pref_cmd.js4
-rw-r--r--jstests/sharding/read_write_concern_defaults_application.js1
-rw-r--r--jstests/sharding/recovering_slaveok.js16
-rw-r--r--jstests/sharding/session_info_in_oplog.js4
-rw-r--r--jstests/sharding/shard_aware_init_secondaries.js4
-rw-r--r--jstests/sharding/shard_identity_config_update.js4
-rw-r--r--jstests/sharding/shard_identity_rollback.js4
-rw-r--r--jstests/sharding/shard_insert_getlasterror_w2.js2
-rw-r--r--jstests/slow1/replsets_priority1.js4
-rw-r--r--jstests/ssl/mongo_uri_secondaries.js2
-rw-r--r--src/mongo/client/dbclient_base.cpp2
-rw-r--r--src/mongo/db/SConscript27
-rw-r--r--src/mongo/db/api_parameters.cpp79
-rw-r--r--src/mongo/db/api_parameters.h122
-rw-r--r--src/mongo/db/api_parameters.idl (renamed from src/mongo/db/initialize_api_parameters.idl)0
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp4
-rw-r--r--src/mongo/db/catalog_raii.h2
-rw-r--r--src/mongo/db/catalog_raii_test.cpp6
-rw-r--r--src/mongo/db/clientcursor.h2
-rw-r--r--src/mongo/db/command_generic_argument.cpp6
-rw-r--r--src/mongo/db/commands.cpp8
-rw-r--r--src/mongo/db/commands.h13
-rw-r--r--src/mongo/db/commands/test_api_version_2_commands.cpp2
-rw-r--r--src/mongo/db/commands/test_deprecation_command.cpp2
-rw-r--r--src/mongo/db/db_raii.cpp35
-rw-r--r--src/mongo/db/db_raii_test.cpp82
-rw-r--r--src/mongo/db/dbdirectclient.cpp1
-rw-r--r--src/mongo/db/exec/sbe/expressions/expression.cpp3
-rw-r--r--src/mongo/db/exec/sbe/stages/loop_join.h3
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp17
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.h2
-rw-r--r--src/mongo/db/free_mon/free_mon_storage.cpp4
-rw-r--r--src/mongo/db/ftdc/collector.cpp5
-rw-r--r--src/mongo/db/index_build_entry_helpers.cpp3
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp11
-rw-r--r--src/mongo/db/initialize_api_parameters.cpp53
-rw-r--r--src/mongo/db/initialize_api_parameters.h66
-rw-r--r--src/mongo/db/mongod_options.cpp3
-rw-r--r--src/mongo/db/namespace_string.cpp12
-rw-r--r--src/mongo/db/namespace_string.h5
-rw-r--r--src/mongo/db/pipeline/document_source_writer.h2
-rw-r--r--src/mongo/db/pipeline/expression_context.h4
-rw-r--r--src/mongo/db/pipeline/process_interface/common_process_interface.cpp14
-rw-r--r--src/mongo/db/pipeline/sharded_agg_helpers.h8
-rw-r--r--src/mongo/db/query/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/algebra/operator.h305
-rw-r--r--src/mongo/db/query/optimizer/algebra/polyvalue.h381
-rw-r--r--src/mongo/db/query/optimizer/memo.cpp43
-rw-r--r--src/mongo/db/query/optimizer/memo.h (renamed from src/mongo/db/query/optimizer/visitor.h)24
-rw-r--r--src/mongo/db/query/optimizer/node.cpp216
-rw-r--r--src/mongo/db/query/optimizer/node.h150
-rw-r--r--src/mongo/db/query/optimizer/optimizer_test.cpp20
-rw-r--r--src/mongo/db/query/sbe_stage_builder_coll_scan.cpp68
-rw-r--r--src/mongo/db/query/sbe_stage_builder_expression.cpp100
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/bgsync.cpp10
-rw-r--r--src/mongo/db/repl/collection_bulk_loader_impl.cpp2
-rw-r--r--src/mongo/db/repl/oplog_applier_impl.cpp5
-rw-r--r--src/mongo/db/repl/oplog_batcher.cpp16
-rw-r--r--src/mongo/db/repl/primary_only_service.cpp12
-rw-r--r--src/mongo/db/repl/primary_only_service.h11
-rw-r--r--src/mongo/db/repl/primary_only_service_test.cpp34
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp3
-rw-r--r--src/mongo/db/repl/replication_info.cpp24
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp4
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp14
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.cpp7
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.h6
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp14
-rw-r--r--src/mongo/db/s/README.md1
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/config/configsvr_drop_collection_command.cpp12
-rw-r--r--src/mongo/db/s/config/configsvr_drop_database_command.cpp10
-rw-r--r--src/mongo/db/s/config/configsvr_shard_collection_command.cpp2
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp20
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp5
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp2
-rw-r--r--src/mongo/db/s/migration_util_test.cpp8
-rw-r--r--src/mongo/db/s/set_shard_version_command.cpp14
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.cpp15
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.h3
-rw-r--r--src/mongo/db/s/shard_key_util.cpp23
-rw-r--r--src/mongo/db/s/shard_key_util.h4
-rw-r--r--src/mongo/db/service_entry_point_common.cpp20
-rw-r--r--src/mongo/db/stats/api_version_metrics.h4
-rw-r--r--src/mongo/db/storage/SConscript15
-rw-r--r--src/mongo/db/storage/checkpointer.cpp168
-rw-r--r--src/mongo/db/storage/checkpointer.h114
-rw-r--r--src/mongo/db/storage/control/storage_control.cpp15
-rw-r--r--src/mongo/db/storage/durable_catalog.h5
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.cpp15
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.h5
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h4
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp4
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp1
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h2
-rw-r--r--src/mongo/db/storage/kv/kv_engine.h14
-rw-r--r--src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp4
-rw-r--r--src/mongo/db/storage/recovery_unit.h10
-rw-r--r--src/mongo/db/storage/snapshot_helper.cpp80
-rw-r--r--src/mongo/db/storage/snapshot_helper.h4
-rw-r--r--src/mongo/db/storage/storage_engine.h19
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp92
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h19
-rw-r--r--src/mongo/db/storage/storage_engine_mock.h8
-rw-r--r--src/mongo/db/storage/storage_options.cpp1
-rw-r--r--src/mongo/db/storage/storage_options.h4
-rw-r--r--src/mongo/db/storage/wiredtiger/SConscript1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp330
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h8
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp32
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp12
-rw-r--r--src/mongo/db/transaction_participant.cpp5
-rw-r--r--src/mongo/db/transaction_participant.h2
-rw-r--r--src/mongo/dbtests/querytests.cpp2
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp12
-rw-r--r--src/mongo/executor/SConscript1
-rw-r--r--src/mongo/executor/remote_command_request.cpp7
-rw-r--r--src/mongo/s/catalog_cache.cpp762
-rw-r--r--src/mongo/s/catalog_cache.h352
-rw-r--r--src/mongo/s/catalog_cache_refresh_test.cpp16
-rw-r--r--src/mongo/s/catalog_cache_test.cpp129
-rw-r--r--src/mongo/s/catalog_cache_test_fixture.cpp20
-rw-r--r--src/mongo/s/catalog_cache_test_fixture.h11
-rw-r--r--src/mongo/s/chunk_manager.cpp108
-rw-r--r--src/mongo/s/chunk_manager.h152
-rw-r--r--src/mongo/s/chunk_manager_refresh_bm.cpp13
-rw-r--r--src/mongo/s/commands/SConscript1
-rw-r--r--src/mongo/s/commands/cluster_drop_cmd.cpp4
-rw-r--r--src/mongo/s/commands/cluster_merge_chunks_cmd.cpp6
-rw-r--r--src/mongo/s/commands/cluster_move_chunk_cmd.cpp11
-rw-r--r--src/mongo/s/commands/cluster_shard_collection_cmd.cpp4
-rw-r--r--src/mongo/s/commands/cluster_split_cmd.cpp6
-rw-r--r--src/mongo/s/commands/flush_router_config_cmd.cpp2
-rw-r--r--src/mongo/s/commands/strategy.cpp16
-rw-r--r--src/mongo/s/comparable_chunk_version_test.cpp133
-rw-r--r--src/mongo/s/comparable_database_version_test.cpp8
-rw-r--r--src/mongo/s/query/async_results_merger.cpp3
-rw-r--r--src/mongo/s/query/cluster_client_cursor.h2
-rw-r--r--src/mongo/s/query/cluster_client_cursor_params.h2
-rw-r--r--src/mongo/s/query/cluster_find.cpp17
-rw-r--r--src/mongo/s/request_types/set_shard_version_request.h1
-rw-r--r--src/mongo/s/sessions_collection_sharded.cpp2
-rw-r--r--src/mongo/s/sharding_test_fixture_common.cpp6
-rw-r--r--src/mongo/s/sharding_test_fixture_common.h3
-rw-r--r--src/mongo/s/transaction_router.cpp6
-rw-r--r--src/mongo/s/transaction_router.h2
-rw-r--r--src/mongo/s/transaction_router_test.cpp120
-rw-r--r--src/mongo/s/write_ops/chunk_manager_targeter.cpp2
-rw-r--r--src/mongo/scripting/engine.cpp1
-rw-r--r--src/mongo/shell/collection.js4
-rw-r--r--src/mongo/shell/db.js33
-rw-r--r--src/mongo/shell/mongo.js4
-rw-r--r--src/mongo/transport/service_executor_fixed.cpp4
-rw-r--r--src/mongo/util/concurrency/thread_pool.cpp429
-rw-r--r--src/mongo/util/concurrency/thread_pool.h181
-rw-r--r--src/mongo/util/fail_point.cpp35
-rw-r--r--src/mongo/util/fail_point.h33
-rw-r--r--src/mongo/util/fail_point_test.cpp62
-rw-r--r--src/mongo/util/invalidating_lru_cache.h53
-rw-r--r--src/mongo/util/invalidating_lru_cache_test.cpp21
-rw-r--r--src/mongo/util/read_through_cache.h18
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py50
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/wttest.py10
280 files changed, 4509 insertions, 2766 deletions
diff --git a/.gitignore b/.gitignore
index 4c35f527d92..7cb6d71c0d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -176,6 +176,9 @@ src/mongo/embedded/java/aar/build/
src/mongo/embedded/java/jar/build/
local.properties
+# clangd language server
+.clangd/
+
compile_commands.json
generated_resmoke_config
selected_tests_config
diff --git a/README.third_party.md b/README.third_party.md
index 11ade4b1b24..b91064f9d7a 100644
--- a/README.third_party.md
+++ b/README.third_party.md
@@ -19,40 +19,40 @@ not authored by MongoDB, and has a license which requires reproduction,
a notice will be included in
`THIRD-PARTY-NOTICES`.
-| Name | License | Upstream Version | Vendored Version | Emits persisted data | Distributed in Release Binaries |
-| ---------------------------| ----------------- | ---------------- | ------------------| :------------------: | :-----------------------------: |
-| [abseil-cpp] | Apache-2.0 | | 070f6e47b3 | | ✗ |
-| Aladdin MD5 | Zlib | | Unknown | ✗ | ✗ |
-| [ASIO] | BSL-1.0 | 1.16.1 | b0926b61b0 | | ✗ |
-| [benchmark] | Apache-2.0 | 1.5.1 | 1.5.0 | | |
-| [Boost] | BSL-1.0 | 1.73.0 | 1.70.0 | | ✗ |
-| [fmt] | BSD-2-Clause | 6.2.1 | 6.1.1 | | ✗ |
-| [GPerfTools] | BSD-3-Clause | 2.8 | 2.8 | | ✗ |
-| [ICU4] | ICU | 67.1 | 57.1 | ✗ | ✗ |
-| [Intel Decimal FP Library] | BSD-3-Clause | 2.0 Update 2 | 2.0 Update 1 | | ✗ |
-| [JSON-Schema-Test-Suite] | MIT | | 728066f9c5 | | |
-| [kms-message] | | | 75e391a037 | | ✗ |
-| [libstemmer] | BSD-3-Clause | | Unknown | ✗ | ✗ |
-| [linenoise] | BSD-3-Clause | | Unknown + changes | | ✗ |
-| [MozJS] | MPL-2.0 | ESR 68.9 | ESR 60.3.0 | | ✗ |
-| [MurmurHash3] | Public Domain | | Unknown + changes | ✗ | ✗ |
-| [ocspbuilder] | MIT | 0.10.2 | 0.10.2 | | |
-| [ocspresponder] | Apache-2.0 | 0.5.0 | 0.5.0 | | |
-| [peglib] | MIT | 0.1.12 | 0.1.12 | | ✗ |
-| [Pcre] | BSD-3-Clause | 8.44 | 8.42 | | ✗ |
-| [S2] | Apache-2.0 | | Unknown | ✗ | ✗ |
-| [SafeInt] | MIT | 3.24 | 3.23 | | |
-| [scons] | MIT | 3.1.2 | 3.1.2 | | |
-| [Snappy] | BSD-3-Clause | 1.1.8 | 1.1.7 | ✗ | ✗ |
-| [timelib] | MIT | 2018.03 | 2018.01 | | ✗ |
-| [TomCrypt] | Public Domain | 1.18.2 | 1.18.2 | ✗ | ✗ |
-| [Unicode] | Unicode-DFS-2015 | 13.0.0 | 8.0.0 | ✗ | ✗ |
-| [Valgrind] | BSD-3-Clause<sup>\[<a href="#note_vg" id="ref_vg">1</a>]</sup> | 3.16.1 | 3.11.0 | | ✗ |
-| [variant] | BSL-1.0 | 1.4.0 | 1.4.0 | | ✗ |
-| [wiredtiger] | | | <sup>\[<a href="#note_wt" id="ref_wt">2</a>]</sup> | ✗ | ✗ |
-| [yaml-cpp] | MIT | 0.6.3 | 0.6.2 | | ✗ |
-| [Zlib] | Zlib | 1.2.11 | 1.2.11 | ✗ | ✗ |
-| [Zstandard] | BSD-3-Clause | 1.4.5 | 1.4.4 | ✗ | ✗ |
+| Name | License | Vendored Version | Emits persisted data | Distributed in Release Binaries |
+| ---------------------------| ----------------- | ------------------| :------------------: | :-----------------------------: |
+| [abseil-cpp] | Apache-2.0 | 070f6e47b3 | | ✗ |
+| Aladdin MD5 | Zlib | Unknown | ✗ | ✗ |
+| [ASIO] | BSL-1.0 | b0926b61b0 | | ✗ |
+| [benchmark] | Apache-2.0 | 1.5.0 | | |
+| [Boost] | BSL-1.0 | 1.70.0 | | ✗ |
+| [fmt] | BSD-2-Clause | 6.1.1 | | ✗ |
+| [GPerfTools] | BSD-3-Clause | 2.8 | | ✗ |
+| [ICU4] | ICU | 57.1 | ✗ | ✗ |
+| [Intel Decimal FP Library] | BSD-3-Clause | 2.0 Update 1 | | ✗ |
+| [JSON-Schema-Test-Suite] | MIT | 728066f9c5 | | |
+| [kms-message] | | 75e391a037 | | ✗ |
+| [libstemmer] | BSD-3-Clause | Unknown | ✗ | ✗ |
+| [linenoise] | BSD-3-Clause | Unknown + changes | | ✗ |
+| [MozJS] | MPL-2.0 | ESR 60.3.0 | | ✗ |
+| [MurmurHash3] | Public Domain | Unknown + changes | ✗ | ✗ |
+| [ocspbuilder] | MIT | 0.10.2 | | |
+| [ocspresponder] | Apache-2.0 | 0.5.0 | | |
+| [peglib] | MIT | 0.1.12 | | ✗ |
+| [Pcre] | BSD-3-Clause | 8.42 | | ✗ |
+| [S2] | Apache-2.0 | Unknown | ✗ | ✗ |
+| [SafeInt] | MIT | 3.23 | | |
+| [scons] | MIT | 3.1.2 | | |
+| [Snappy] | BSD-3-Clause | 1.1.7 | ✗ | ✗ |
+| [timelib] | MIT | 2018.01 | | ✗ |
+| [TomCrypt] | Public Domain | 1.18.2 | ✗ | ✗ |
+| [Unicode] | Unicode-DFS-2015 | 8.0.0 | ✗ | ✗ |
+| [Valgrind] | BSD-3-Clause<sup>\[<a href="#note_vg" id="ref_vg">1</a>]</sup> | 3.11.0 | | ✗ |
+| [variant] | BSL-1.0 | 1.4.0 | | ✗ |
+| [wiredtiger] | | <sup>\[<a href="#note_wt" id="ref_wt">2</a>]</sup> | ✗ | ✗ |
+| [yaml-cpp] | MIT | 0.6.2 | | ✗ |
+| [Zlib] | Zlib | 1.2.11 | ✗ | ✗ |
+| [Zstandard] | BSD-3-Clause | 1.4.4 | ✗ | ✗ |
[abseil-cpp]: https://github.com/abseil/abseil-cpp
[ASIO]: https://github.com/chriskohlhoff/asio
diff --git a/buildscripts/resmokelib/core/process.py b/buildscripts/resmokelib/core/process.py
index 2c458e4320f..b3d19bf599a 100644
--- a/buildscripts/resmokelib/core/process.py
+++ b/buildscripts/resmokelib/core/process.py
@@ -128,7 +128,7 @@ class Process(object):
logger=self.logger.name.replace('/', '-'),
process=os.path.basename(self.args[0]), pid=self.pid, t=now_str)
recorder_args = [
- _config.UNDO_RECORDER_PATH, "--thread-fuzzing", "-p",
+ _config.UNDO_RECORDER_PATH, "-p",
str(self.pid), "-o", recorder_output_file
]
self._recorder = subprocess.Popen(recorder_args, bufsize=buffer_size, env=self.env,
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 322f0dad785..ae045bcedf4 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -66,6 +66,16 @@ all:
test_file: jstests/replsets/secondaryOk_slaveOk_aliases.js
- ticket: SERVER-43902
test_file: jstests/sharding/scaled_collection_stats.js
+ - ticket: SERVER-50416
+ test_file: jstests/replsets/disconnect_on_legacy_write_to_secondary.js
+ - ticket: SERVER-50417
+ test_file: jstests/replsets/no_disconnect_on_stepdown.js
+ - ticket: SERVER-50417
+ test_file: jstests/replsets/not_master_unacknowledged_write.js
+ - ticket: SERVER-50417
+ test_file: jstests/replsets/read_operations_during_step_down.js
+ - ticket: SERVER-50417
+ test_file: jstests/replsets/read_operations_during_step_up.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 3935a97758d..de9f17fc7f7 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -12751,6 +12751,7 @@ buildvariants:
num_scons_link_jobs_available: 0.99
record_with: --recordWith /opt/undodb5/bin/live-record
exec_timeout_secs: 14400 # 4 hours
+ test_flags: --excludeWithAnyTags=requires_fast_memory
tasks:
- name: compile_without_package_TG
- name: .jscore .common
@@ -12764,6 +12765,24 @@ buildvariants:
- name: sharded_jscore_txns_sharded_collections
- name: sharding_jscore_passthrough
- name: sharding_jscore_op_query_passthrough
+ - name: aggregation
+ - name: aggregation_auth
+ - name: aggregation_disabled_optimization
+ - name: aggregation_ese
+ - name: aggregation_ese_gcm
+ - name: aggregation_facet_unwind_passthrough
+ - name: aggregation_slot_based_execution
+ - name: .auth .gle
+ - name: .jscore .encrypt
+ - name: noPassthroughWithMongod_gen
+ - name: parallel_compatibility
+ - name: serial_run
+ - name: session_jscore_passthrough
+ - name: .aggfuzzer
+ - name: query_fuzzer_standalone_gen
+ - name: update_fuzzer_gen
+ - name: jstestfuzz_gen
+ - name: jstestfuzz_interrupt_gen
- <<: *enterprise-rhel-62-64-bit-dynamic-required-template
name: rhel-62-64-bit-dynamic-visibility-test
diff --git a/jstests/auth/repl.js b/jstests/auth/repl.js
index 06aa38a38a7..6f5b7ed0dcb 100644
--- a/jstests/auth/repl.js
+++ b/jstests/auth/repl.js
@@ -1,4 +1,4 @@
-// Test that authorization information gets propogated correctly to secondaries and slaves.
+// Test that authorization information gets propogated correctly to secondaries.
var baseName = "jstests_auth_repl";
var rsName = baseName + "_rs";
@@ -26,7 +26,7 @@ var AuthReplTest = function(spec) {
assert(adminPri.auth("super", "super"), "could not authenticate as superuser");
if (secondaryConn != null) {
- secondaryConn.setSlaveOk(true);
+ secondaryConn.setSecondaryOk();
adminSec = secondaryConn.getDB("admin");
}
@@ -38,7 +38,7 @@ var AuthReplTest = function(spec) {
/**
* Use the rolesInfo command to check that the test
- * role is as expected on the secondary/slave
+ * role is as expected on the secondary
*/
var confirmRolesInfo = function(actionType) {
var role = adminSec.getRole(testRole, {showPrivileges: true});
@@ -48,7 +48,7 @@ var AuthReplTest = function(spec) {
/**
* Use the usersInfo command to check that the test
- * user is as expected on the secondary/slave
+ * user is as expected on the secondary
*/
var confirmUsersInfo = function(roleName) {
var user = adminSec.getUser(testUser);
@@ -58,7 +58,7 @@ var AuthReplTest = function(spec) {
/**
* Ensure that the test user has the proper privileges
- * on the secondary/slave
+ * on the secondary
*/
var confirmPrivilegeBeforeUpdate = function() {
// can run hostInfo
@@ -87,7 +87,7 @@ var AuthReplTest = function(spec) {
/**
* Ensure that the auth changes have taken effect
- * properly on the secondary/slave
+ * properly on the secondary
*/
var confirmPrivilegeAfterUpdate = function() {
// cannot run hostInfo
@@ -117,7 +117,7 @@ var AuthReplTest = function(spec) {
*/
that.setSecondary = function(secondary) {
secondaryConn = secondary;
- secondaryConn.setSlaveOk(true);
+ secondaryConn.setSecondaryOk();
adminSec = secondaryConn.getDB("admin");
};
@@ -149,7 +149,7 @@ var AuthReplTest = function(spec) {
/**
* Top-level test for updating users and roles and ensuring that the update
- * has the correct effect on the secondary/slave
+ * has the correct effect on the secondary
*/
that.testAll = function() {
authOnSecondary();
diff --git a/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js b/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
index 054f0c5ca15..f1c6ad28b9b 100644
--- a/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
+++ b/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
@@ -72,7 +72,7 @@ var $config = (function() {
// Create a new connection to any node which isn't "me".
const conn = new Mongo(node);
assert(conn);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
RSnodes.push(conn);
});
diff --git a/jstests/core/resume_query_from_non_existent_record.js b/jstests/core/resume_query_from_non_existent_record.js
new file mode 100644
index 00000000000..954325a5763
--- /dev/null
+++ b/jstests/core/resume_query_from_non_existent_record.js
@@ -0,0 +1,66 @@
+/**
+ * Test that an error is raised when we try to resume a query from a record which doesn't exist.
+ *
+ * @tags: [
+ * assumes_against_mongod_not_mongos,
+ * requires_find_command,
+ * multiversion_incompatible,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+const collName = "resume_query_from_non_existent_record";
+const coll = db[collName];
+
+coll.drop();
+
+const testData = [{_id: 0, a: 1}, {_id: 1, a: 2}, {_id: 2, a: 3}];
+assert.commandWorked(coll.insert(testData));
+
+// Run the initial query and request to return a resume token. We're interested only in a single
+// document, so 'batchSize' is set to 1.
+let res = assert.commandWorked(
+ db.runCommand({find: collName, hint: {$natural: 1}, batchSize: 1, $_requestResumeToken: true}));
+assert.eq(1, res.cursor.firstBatch.length);
+assert.contains(res.cursor.firstBatch[0], testData);
+const savedData = res.cursor.firstBatch;
+
+// Make sure the query returned a resume token which will be used to resume the query from.
+assert.hasFields(res.cursor, ["postBatchResumeToken"]);
+const resumeToken = res.cursor.postBatchResumeToken;
+
+// Kill the cursor before attempting to resume.
+assert.commandWorked(db.runCommand({killCursors: collName, cursors: [res.cursor.id]}));
+
+// Try to resume the query from the saved resume token.
+res = assert.commandWorked(db.runCommand({
+ find: collName,
+ hint: {$natural: 1},
+ batchSize: 1,
+ $_requestResumeToken: true,
+ $_resumeAfter: resumeToken
+}));
+assert.eq(1, res.cursor.firstBatch.length);
+assert.contains(res.cursor.firstBatch[0], testData);
+assert.neq(savedData[0], res.cursor.firstBatch[0]);
+
+// Kill the cursor before attempting to resume.
+assert.commandWorked(db.runCommand({killCursors: collName, cursors: [res.cursor.id]}));
+
+// Delete a document which corresponds to the saved resume token, so that we can guarantee it does
+// not exist.
+assert.commandWorked(coll.remove({_id: savedData[0]._id}, {justOne: true}));
+
+// Try to resume the query from the same token and check that it fails to position the cursor to
+// the record specified in the resume token.
+assert.commandFailedWithCode(db.runCommand({
+ find: collName,
+ hint: {$natural: 1},
+ batchSize: 1,
+ $_requestResumeToken: true,
+ $_resumeAfter: resumeToken
+}),
+ ErrorCodes.KeyNotFound);
+})();
diff --git a/jstests/core/shell1.js b/jstests/core/shell1.js
index 7ea23f8d3a5..4fc4c3a1c15 100644
--- a/jstests/core/shell1.js
+++ b/jstests/core/shell1.js
@@ -4,11 +4,15 @@ shellHelper("show", "tables;");
shellHelper("show", "tables");
shellHelper("show", "tables ;");
-// test slaveOk levels
-assert(!db.getSlaveOk() && !db.test.getSlaveOk() && !db.getMongo().getSlaveOk(), "slaveOk 1");
-db.getMongo().setSlaveOk();
-assert(db.getSlaveOk() && db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 2");
-db.setSlaveOk(false);
-assert(!db.getSlaveOk() && !db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 3");
-db.test.setSlaveOk(true);
-assert(!db.getSlaveOk() && db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 4");
+// test secondaryOk levels
+assert(!db.getSecondaryOk() && !db.test.getSecondaryOk() && !db.getMongo().getSecondaryOk(),
+ "secondaryOk 1");
+db.getMongo().setSecondaryOk();
+assert(db.getSecondaryOk() && db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+ "secondaryOk 2");
+db.setSecondaryOk(false);
+assert(!db.getSecondaryOk() && !db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+ "secondaryOk 3");
+db.test.setSecondaryOk();
+assert(!db.getSecondaryOk() && db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+ "secondaryOk 4");
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js
index 2f9d0cc1c60..04c4a34eabd 100644
--- a/jstests/core/views/views_all_commands.js
+++ b/jstests/core/views/views_all_commands.js
@@ -338,6 +338,7 @@ let viewsCommandTests = {
hello: {skip: isUnrelated},
hostInfo: {skip: isUnrelated},
httpClientRequest: {skip: isAnInternalCommand},
+ importCollection: {skip: isUnrelated},
insert: {command: {insert: "view", documents: [{x: 1}]}, expectFailure: true},
internalRenameIfOptionsAndIndexesMatch: {skip: isAnInternalCommand},
invalidateUserCache: {skip: isUnrelated},
diff --git a/jstests/hooks/validate_collections.js b/jstests/hooks/validate_collections.js
index cf7f1be9707..856191ca51c 100644
--- a/jstests/hooks/validate_collections.js
+++ b/jstests/hooks/validate_collections.js
@@ -88,7 +88,7 @@ function CollectionValidator() {
try {
print('Running validate() on ' + host);
const conn = new Mongo(host);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
jsTest.authenticate(conn);
// Skip validating collections for arbiters.
diff --git a/jstests/libs/kill_sessions.js b/jstests/libs/kill_sessions.js
index b4643ec8ed5..1dabe9c9cea 100644
--- a/jstests/libs/kill_sessions.js
+++ b/jstests/libs/kill_sessions.js
@@ -120,7 +120,7 @@ var _kill_sessions_api_module = (function() {
// hosts. We identify particular ops by secs sleeping.
this.visit(function(client) {
let admin = client.getDB("admin");
- admin.getMongo().setSlaveOk();
+ admin.getMongo().setSecondaryOk();
assert.soon(function() {
let inProgressOps = admin.aggregate([{$currentOp: {'allUsers': true}}]);
@@ -183,7 +183,7 @@ var _kill_sessions_api_module = (function() {
Fixture.prototype.assertNoSessionsInCursors = function() {
this.visit(function(client) {
var db = client.getDB("admin");
- db.setSlaveOk();
+ db.setSecondaryOk();
assert.soon(() => {
let cursors = db.aggregate([
{"$currentOp": {"idleCursors": true, "allUsers": true}}
@@ -205,7 +205,7 @@ var _kill_sessions_api_module = (function() {
});
var db = client.getDB("admin");
- db.setSlaveOk();
+ db.setSecondaryOk();
var cursors = db.aggregate([
{"$currentOp": {"idleCursors": true, "allUsers": true}},
{"$match": {type: "idleCursor"}}
diff --git a/jstests/libs/override_methods/validate_collections_on_shutdown.js b/jstests/libs/override_methods/validate_collections_on_shutdown.js
index a1e56fd1ca8..a378d6e390a 100644
--- a/jstests/libs/override_methods/validate_collections_on_shutdown.js
+++ b/jstests/libs/override_methods/validate_collections_on_shutdown.js
@@ -29,8 +29,8 @@ MongoRunner.validateCollectionsCallback = function(port) {
return;
}
- // Set slaveOk=true so that we can run commands against any secondaries.
- conn.setSlaveOk();
+ // Set secondaryOk=true so that we can run commands against any secondaries.
+ conn.setSecondaryOk();
let dbNames;
let result =
diff --git a/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js b/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
index 3e855455985..e6191f97449 100644
--- a/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
+++ b/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
@@ -29,7 +29,7 @@ assert.commandWorked(secondary.getDB("admin").runCommand(
{configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"}));
const findWait = startParallelShell(function() {
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
assert.eq(
db.getSiblingDB('read').getCollection('readColl').find().comment('read hangs').itcount(),
1);
diff --git a/jstests/noPassthrough/change_stream_error_label.js b/jstests/noPassthrough/change_stream_error_label.js
index 899207b3bd1..2b326a22cd7 100644
--- a/jstests/noPassthrough/change_stream_error_label.js
+++ b/jstests/noPassthrough/change_stream_error_label.js
@@ -12,9 +12,9 @@ rst.startSet();
rst.initiate();
rst.awaitSecondaryNodes();
-// Disable "slaveOk" on the connection so that we are not allowed to run on the Secondary.
+// Disable "secondaryOk" on the connection so that we are not allowed to run on the Secondary.
const testDB = rst.getSecondary().getDB(jsTestName());
-testDB.getMongo().setSlaveOk(false);
+testDB.getMongo().setSecondaryOk(false);
const coll = testDB.test;
// Issue a change stream. We should fail with a NotPrimaryNoSecondaryOk error.
@@ -28,8 +28,8 @@ assert.contains("ResumableChangeStreamError", err.errorLabels, err);
// Now verify that the 'failGetMoreAfterCursorCheckout' failpoint can effectively exercise the
// error label generation logic for change stream getMores.
function testFailGetMoreAfterCursorCheckoutFailpoint({errorCode, expectedLabel}) {
- // Re-enable "slaveOk" on the test connection.
- testDB.getMongo().setSlaveOk(true);
+ // Re-enable "secondaryOk" on the test connection.
+ testDB.getMongo().setSecondaryOk();
// Activate the failpoint and set the exception that it will throw.
assert.commandWorked(testDB.adminCommand({
diff --git a/jstests/noPassthrough/out_merge_on_secondary_killop.js b/jstests/noPassthrough/out_merge_on_secondary_killop.js
index 7cdc25d8eae..d5863374f96 100644
--- a/jstests/noPassthrough/out_merge_on_secondary_killop.js
+++ b/jstests/noPassthrough/out_merge_on_secondary_killop.js
@@ -57,7 +57,7 @@ function testKillOp(pipeline, comment, failpointName) {
// Run the aggregate and ensure that it is interrupted.
const runAggregate = `
const testDB = db.getSiblingDB("${kDBName}");
- testDB.setSlaveOk(true);
+ testDB.setSecondaryOk();
const res = testDB.runCommand({
aggregate: "inputColl",
pipeline: ${tojson(pipeline)},
diff --git a/jstests/noPassthrough/server_transaction_metrics_secondary.js b/jstests/noPassthrough/server_transaction_metrics_secondary.js
index 9282b19bea0..3a337e17fde 100644
--- a/jstests/noPassthrough/server_transaction_metrics_secondary.js
+++ b/jstests/noPassthrough/server_transaction_metrics_secondary.js
@@ -19,8 +19,8 @@ replTest.initiate(config);
const primary = replTest.getPrimary();
const secondary = replTest.getSecondary();
-// Set slaveOk=true so that normal read commands would be allowed on the secondary.
-secondary.setSlaveOk(true);
+// Set secondaryOk=true so that normal read commands would be allowed on the secondary.
+secondary.setSecondaryOk();
// Create a test collection that we can run commands against.
assert.commandWorked(primary.getDB(dbName)[collName].insert({_id: 0}));
diff --git a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
index 3a43603e935..a00cabc89a9 100644
--- a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
+++ b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
@@ -87,7 +87,7 @@ assert.soonNoExcept(function() {
});
// Confirm that the write with the oplog hold behind it is now gone (truncated) as expected.
-primary.setSlaveOk();
+primary.setSecondaryOk();
const find = primary.getDB(dbName).getCollection(collName).findOne({_id: "writeAfterHole"});
assert.eq(find, null);
diff --git a/jstests/noPassthrough/stepdown_query.js b/jstests/noPassthrough/stepdown_query.js
index 4809e471f7a..239497725ff 100644
--- a/jstests/noPassthrough/stepdown_query.js
+++ b/jstests/noPassthrough/stepdown_query.js
@@ -25,8 +25,8 @@ var collName = jsTest.name();
function runTest(host, rst, waitForPrimary) {
// We create a new connection to 'host' here instead of passing in the original connection.
- // This to work around the fact that connections created by ReplSetTest already have slaveOk
- // set on them, but we need a connection with slaveOk not set for this test.
+ // This to work around the fact that connections created by ReplSetTest already have secondaryOk
+ // set on them, but we need a connection with secondaryOk not set for this test.
var conn = new Mongo(host);
var coll = conn.getDB(dbName).getCollection(collName);
assert(!coll.exists());
@@ -51,7 +51,7 @@ function runTest(host, rst, waitForPrimary) {
} catch (e) {
}
- // Even though our connection doesn't have slaveOk set, we should still be able to iterate
+ // Even though our connection doesn't have secondaryOk set, we should still be able to iterate
// our cursor and kill our cursor.
assert(cursor.hasNext());
assert.doesNotThrow(function() {
diff --git a/jstests/noPassthrough/timestamp_index_builds.js b/jstests/noPassthrough/timestamp_index_builds.js
index 8e13ff0d21c..f7995108f34 100644
--- a/jstests/noPassthrough/timestamp_index_builds.js
+++ b/jstests/noPassthrough/timestamp_index_builds.js
@@ -87,7 +87,7 @@ for (let nodeIdx = 0; nodeIdx < 2; ++nodeIdx) {
jsTestLog("Starting as a replica set. Both indexes should exist. Node: " + nodeIdentity);
let conn = rst.start(nodeIdx, {startClean: false}, true);
rst.waitForState(conn, ReplSetTest.State.SECONDARY);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
IndexBuildTest.assertIndexes(getColl(conn), 2, ['_id_', 'foo_1']);
rst.stop(nodeIdx);
}
diff --git a/jstests/noPassthroughWithMongod/geo_polygon.js b/jstests/noPassthroughWithMongod/geo_polygon.js
index ce7f9ebf67c..d0085fa1f92 100644
--- a/jstests/noPassthroughWithMongod/geo_polygon.js
+++ b/jstests/noPassthroughWithMongod/geo_polygon.js
@@ -16,7 +16,9 @@ for (x = -180; x < 180; x += .5) {
assert.commandWorked(bulk.execute());
var numTests = 31;
-for (var n = 0; n < numTests; n++) {
+// Reduce the amount of repetitions on live-record buildvariant
+var start = (TestData.undoRecorderPath ? 20 : 0);
+for (var n = start; n < numTests; n++) {
t.dropIndexes();
t.ensureIndex({loc: "2d"}, {bits: 2 + n});
diff --git a/jstests/noPassthroughWithMongod/indexbg_interrupts.js b/jstests/noPassthroughWithMongod/indexbg_interrupts.js
index a1bf783f032..420fb2b6d96 100644
--- a/jstests/noPassthroughWithMongod/indexbg_interrupts.js
+++ b/jstests/noPassthroughWithMongod/indexbg_interrupts.js
@@ -32,7 +32,8 @@ var checkOp = function(checkDB) {
var dbname = 'bgIndexSec';
var collection = 'jstests_feh';
-var size = 100000;
+// Reduce the amount of data on live-record buildvariant
+var size = (TestData.undoRecorderPath ? 10000 : 100000);
// Set up replica set
var replTest = new ReplSetTest({name: 'bgIndex', nodes: 3});
diff --git a/jstests/noPassthroughWithMongod/no_balance_collection.js b/jstests/noPassthroughWithMongod/no_balance_collection.js
index 38182f1c481..2ffaf7aecfc 100644
--- a/jstests/noPassthroughWithMongod/no_balance_collection.js
+++ b/jstests/noPassthroughWithMongod/no_balance_collection.js
@@ -78,7 +78,9 @@ st.waitForBalancer(true, 60000);
var lastMigration = sh._lastMigration(collB);
var bulk = collB.initializeUnorderedBulkOp();
-for (var i = 0; i < 1000000; i++) {
+// Reduce the amount of data on live-record buildvariant
+var n = (TestData.undoRecorderPath ? 100000 : 1000000);
+for (var i = 0; i < n; i++) {
bulk.insert({_id: i, hello: "world"});
}
assert.commandWorked(bulk.execute());
diff --git a/jstests/noPassthroughWithMongod/replReads.js b/jstests/noPassthroughWithMongod/replReads.js
index 5c40dbd900c..fde1143911c 100644
--- a/jstests/noPassthroughWithMongod/replReads.js
+++ b/jstests/noPassthroughWithMongod/replReads.js
@@ -1,4 +1,4 @@
-// Test that doing slaveOk reads from secondaries hits all the secondaries evenly
+// Test that doing secondaryOk reads from secondaries hits all the secondaries evenly
// @tags: [requires_sharding]
function testReadLoadBalancing(numReplicas) {
@@ -52,7 +52,7 @@ function testReadLoadBalancing(numReplicas) {
for (var i = 0; i < secondaries.length * 10; i++) {
conn = new Mongo(s._mongos[0].host);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
conn.getDB('test').foo.findOne();
connections.push(conn);
}
@@ -103,7 +103,7 @@ function testReadLoadBalancing(numReplicas) {
for (var i = 0; i < secondaries.length * 10; i++) {
conn = new Mongo(s._mongos[0].host);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
conn.getDB('test').foo.findOne();
connections.push(conn);
}
diff --git a/jstests/replsets/auth1.js b/jstests/replsets/auth1.js
index cfc9c405dd1..a91137391a8 100644
--- a/jstests/replsets/auth1.js
+++ b/jstests/replsets/auth1.js
@@ -88,7 +88,7 @@ print("try some legal and illegal reads");
var r = primary.getDB("test").foo.findOne();
assert.eq(r.x, 1);
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
function doQueryOn(p) {
var error = assert.throws(function() {
@@ -200,7 +200,7 @@ wait(function() {
print("make sure it has the config, too");
assert.soon(function() {
for (var i in rs.nodes) {
- rs.nodes[i].setSlaveOk();
+ rs.nodes[i].setSecondaryOk();
rs.nodes[i].getDB("admin").auth("foo", "bar");
config = rs.nodes[i].getDB("local").system.replset.findOne();
// We expect the config version to be 3 due to the initial config and then the
diff --git a/jstests/replsets/auth_no_pri.js b/jstests/replsets/auth_no_pri.js
index 16a94763b04..bc606dc913a 100644
--- a/jstests/replsets/auth_no_pri.js
+++ b/jstests/replsets/auth_no_pri.js
@@ -23,7 +23,7 @@ rs.waitForState(nodes[2], ReplSetTest.State.SECONDARY);
// Make sure you can still authenticate a replset connection with no primary
var conn2 = new Mongo(rs.getURL());
-conn2.setSlaveOk(true);
+conn2.setSecondaryOk();
assert(conn2.getDB('admin').auth({user: 'admin', pwd: 'pwd', mechanism: "SCRAM-SHA-1"}));
assert.eq(1, conn2.getDB('admin').foo.findOne().a);
diff --git a/jstests/replsets/awaitable_ismaster_fcv_change.js b/jstests/replsets/awaitable_ismaster_fcv_change.js
index 41ed644e5e6..9e22fde507d 100644
--- a/jstests/replsets/awaitable_ismaster_fcv_change.js
+++ b/jstests/replsets/awaitable_ismaster_fcv_change.js
@@ -21,7 +21,7 @@ const secondaryAdminDB = secondary.getDB("admin");
function runAwaitableIsMasterBeforeFCVChange(
topologyVersionField, targetFCV, isPrimary, prevMinWireVersion, serverMaxWireVersion) {
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
let response = assert.commandWorked(db.runCommand({
isMaster: 1,
topologyVersion: topologyVersionField,
diff --git a/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js b/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
index f98e2fb4326..4d17389c209 100644
--- a/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
+++ b/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
@@ -56,7 +56,7 @@ let waitForGetMoreToFinish = startParallelShell(() => {
load('jstests/replsets/rslib.js');
const secondary = db.getMongo();
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
const dbName = 'test';
const collName = 'coll';
diff --git a/jstests/replsets/buildindexes.js b/jstests/replsets/buildindexes.js
index e00a9e94ef7..3be0ba68896 100644
--- a/jstests/replsets/buildindexes.js
+++ b/jstests/replsets/buildindexes.js
@@ -21,7 +21,7 @@ var primary = replTest.getPrimary().getDB(name);
var secondaryConns = replTest.getSecondaries();
var secondaries = [];
for (var i in secondaryConns) {
- secondaryConns[i].setSlaveOk();
+ secondaryConns[i].setSecondaryOk();
secondaries.push(secondaryConns[i].getDB(name));
}
replTest.awaitReplication();
diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js
index a8284ad1772..7ab31e4d76c 100644
--- a/jstests/replsets/catchup.js
+++ b/jstests/replsets/catchup.js
@@ -37,7 +37,7 @@ rst.nodes.forEach(function(node) {
});
function checkOpInOplog(node, op, count) {
- node.getDB("admin").getMongo().setSlaveOk();
+ node.getDB("admin").getMongo().setSecondaryOk();
var oplog = node.getDB("local")['oplog.rs'];
var oplogArray = oplog.find().toArray();
assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray));
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js
index e742e1b43af..a5b3f9d3ab0 100644
--- a/jstests/replsets/db_reads_while_recovering_all_commands.js
+++ b/jstests/replsets/db_reads_while_recovering_all_commands.js
@@ -192,6 +192,7 @@ const allCommands = {
hello: {skip: isNotAUserDataRead},
hostInfo: {skip: isNotAUserDataRead},
httpClientRequest: {skip: isNotAUserDataRead},
+ importCollection: {skip: isNotAUserDataRead},
insert: {skip: isPrimaryOnly},
internalRenameIfOptionsAndIndexesMatch: {skip: isAnInternalCommand},
invalidateUserCache: {skip: isNotAUserDataRead},
diff --git a/jstests/replsets/disconnect_on_legacy_write_to_secondary.js b/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
index edf5950a62b..9678fc1a98e 100644
--- a/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
+++ b/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
@@ -52,16 +52,16 @@ const primaryDb = primaryDataConn.getDB("test");
const primaryColl = primaryDb[collname];
primaryDataConn.forceWriteMode('legacy');
-function getNotMasterLegacyUnackWritesCounter() {
+function getNotPrimaryLegacyUnackWritesCounter() {
return assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1}))
- .metrics.repl.network.notMasterLegacyUnacknowledgedWrites;
+ .metrics.repl.network.notPrimaryLegacyUnacknowledgedWrites;
}
function runStepDownTest({description, failpoint, operation}) {
jsTestLog("Enabling failpoint to block " + description + "s");
let failPoint = configureFailPoint(primaryAdmin, failpoint);
- let failedLegacyUnackWritesBefore = getNotMasterLegacyUnackWritesCounter();
+ let failedLegacyUnackWritesBefore = getNotPrimaryLegacyUnackWritesCounter();
jsTestLog("Trying legacy " + description + " on stepping-down primary");
operation();
@@ -77,7 +77,7 @@ function runStepDownTest({description, failpoint, operation}) {
// Validate the number of legacy unacknowledged writes failed due to step down resulted
// in network disconnection.
- let failedLegacyUnackWritesAfter = getNotMasterLegacyUnackWritesCounter();
+ let failedLegacyUnackWritesAfter = getNotPrimaryLegacyUnackWritesCounter();
assert.eq(failedLegacyUnackWritesAfter, failedLegacyUnackWritesBefore + 1);
// Allow the primary to be re-elected, and wait for it.
diff --git a/jstests/replsets/explain_slaveok.js b/jstests/replsets/explain_slaveok.js
index 68eda89bce7..f3215af9ab6 100644
--- a/jstests/replsets/explain_slaveok.js
+++ b/jstests/replsets/explain_slaveok.js
@@ -1,12 +1,12 @@
// Test the explain command on the primary and on secondaries:
//
-// 1) Explain of read operations should work on the secondaries iff slaveOk is set.
+// 1) Explain of read operations should work on the secondaries iff secondaryOk is set.
//
// 2) Explain of write operations should
-// --fail on secondaries, even if slaveOk is set,
+// --fail on secondaries, even if secondaryOk is set,
// --succeed on primary without applying any writes.
-var name = "explain_slaveok";
+var name = "explain_secondaryok";
print("Start replica set with two nodes");
var replTest = new ReplSetTest({name: name, nodes: 2});
@@ -16,22 +16,22 @@ var primary = replTest.getPrimary();
// Insert a document and let it sync to the secondary.
print("Initial sync");
-primary.getDB("test").explain_slaveok.insert({a: 1});
+primary.getDB("test").explain_secondaryok.insert({a: 1});
replTest.awaitReplication();
// Check that the document is present on the primary.
-assert.eq(1, primary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+assert.eq(1, primary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
-// We shouldn't be able to read from the secondary with slaveOk off.
+// We shouldn't be able to read from the secondary with secondaryOk off.
var secondary = replTest.getSecondary();
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
assert.throws(function() {
- secondary.getDB("test").explain_slaveok.findOne({a: 1});
+ secondary.getDB("test").explain_secondaryok.findOne({a: 1});
});
-// With slaveOk on, we should be able to read from the secondary.
-secondary.getDB("test").getMongo().setSlaveOk(true);
-assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+// With secondaryOk on, we should be able to read from the secondary.
+secondary.getDB("test").getMongo().setSecondaryOk();
+assert.eq(1, secondary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
//
// Test explains on primary.
@@ -39,12 +39,12 @@ assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
// Explain a count on the primary.
var explainOut = primary.getDB("test").runCommand(
- {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
+ {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
assert.commandWorked(explainOut, "explain read op on primary");
// Explain an update on the primary.
explainOut = primary.getDB("test").runCommand({
- explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+ explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
verbosity: "executionStats"
});
assert.commandWorked(explainOut, "explain write op on primary");
@@ -57,52 +57,52 @@ assert.eq(1, stages.nWouldModify);
// Confirm that the document did not actually get modified on the primary
// or on the secondary.
-assert.eq(1, primary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
-secondary.getDB("test").getMongo().setSlaveOk(true);
-assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+assert.eq(1, primary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
+secondary.getDB("test").getMongo().setSecondaryOk();
+assert.eq(1, secondary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
//
// Test explains on secondary.
//
-// Explain a count on the secondary with slaveOk off. Should fail because
-// slaveOk is required for explains on a secondary.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain a count on the secondary with secondaryOk off. Should fail because
+// secondaryOk is required for explains on a secondary.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
explainOut = secondary.getDB("test").runCommand(
- {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "explain read op on secondary, slaveOk false");
+ {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "explain read op on secondary, secondaryOk false");
-// Explain of count should succeed once slaveOk is true.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// Explain of count should succeed once secondaryOk is true.
+secondary.getDB("test").getMongo().setSecondaryOk();
explainOut = secondary.getDB("test").runCommand(
- {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandWorked(explainOut, "explain read op on secondary, slaveOk true");
+ {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandWorked(explainOut, "explain read op on secondary, secondaryOk true");
-// Explain .find() on a secondary, setting slaveOk directly on the query.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain .find() on a secondary, setting secondaryOk directly on the query.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
assert.throws(function() {
- secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
+ secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
});
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
explainOut = secondary.getDB("test")
- .explain_slaveok.explain("executionStats")
+ .explain_secondaryok.explain("executionStats")
.find({a: 1})
.addOption(DBQuery.Option.slaveOk)
.finish();
-assert.commandWorked(explainOut, "explain read op on secondary, slaveOk set to true on query");
+assert.commandWorked(explainOut, "explain read op on secondary, slaveOk bit set to true on query");
-secondary.getDB("test").getMongo().setSlaveOk(true);
+secondary.getDB("test").getMongo().setSecondaryOk();
explainOut =
- secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
-assert.commandWorked(explainOut, "explain .find() on secondary, slaveOk set to true");
+ secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
+assert.commandWorked(explainOut, "explain .find() on secondary, secondaryOk set to true");
-// Explain .find() on a secondary, setting slaveOk to false with various read preferences.
+// Explain .find() on a secondary, setting secondaryOk to false with various read preferences.
var readPrefModes = ["secondary", "secondaryPreferred", "primaryPreferred", "nearest"];
readPrefModes.forEach(function(prefString) {
- secondary.getDB("test").getMongo().setSlaveOk(false);
+ secondary.getDB("test").getMongo().setSecondaryOk(false);
explainOut = secondary.getDB("test")
- .explain_slaveok.explain("executionStats")
+ .explain_secondaryok.explain("executionStats")
.find({a: 1})
.readPref(prefString)
.finish();
@@ -112,7 +112,7 @@ readPrefModes.forEach(function(prefString) {
// Similarly should succeed if a read preference is set on the connection.
secondary.setReadPref(prefString);
explainOut =
- secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
+ secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
assert.commandWorked(
explainOut,
"explain .find() on secondary, '" + prefString + "' read preference on connection");
@@ -120,35 +120,36 @@ readPrefModes.forEach(function(prefString) {
secondary.setReadPref();
});
-// Fail explain find() on a secondary, setting slaveOk to false with read preference set to primary.
+// Fail explain find() on a secondary, setting secondaryOk to false with read preference set to
+// primary.
var prefStringPrimary = "primary";
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
explainOut = secondary.getDB("test").runCommand(
- {explain: {find: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "not master and slaveOk=false");
+ {explain: {find: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "not primary and secondaryOk=false");
// Similarly should fail if a read preference is set on the connection.
secondary.setReadPref(prefStringPrimary);
explainOut = secondary.getDB("test").runCommand(
- {explain: {find: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "not master and slaveOk=false");
+ {explain: {find: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "not primary and secondaryOk=false");
// Unset read pref on the connection.
secondary.setReadPref();
-// Explain an update on the secondary with slaveOk off. Should fail because
-// slaveOk is required for explains on a secondary.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain an update on the secondary with secondaryOk off. Should fail because
+// secondaryOk is required for explains on a secondary.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
explainOut = secondary.getDB("test").runCommand({
- explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+ explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
verbosity: "executionStats"
});
-assert.commandFailed(explainOut, "explain write op on secondary, slaveOk false");
+assert.commandFailed(explainOut, "explain write op on secondary, secondaryOk false");
-// Explain of the update should also fail with slaveOk on.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// Explain of the update should also fail with secondaryOk on.
+secondary.getDB("test").getMongo().setSecondaryOk();
explainOut = secondary.getDB("test").runCommand({
- explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+ explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
verbosity: "executionStats"
});
-assert.commandFailed(explainOut, "explain write op on secondary, slaveOk true");
+assert.commandFailed(explainOut, "explain write op on secondary, secondaryOk true");
replTest.stopSet();
diff --git a/jstests/replsets/fsync_lock_read_secondaries.js b/jstests/replsets/fsync_lock_read_secondaries.js
index e73ceab58ba..daed9de7ad6 100644
--- a/jstests/replsets/fsync_lock_read_secondaries.js
+++ b/jstests/replsets/fsync_lock_read_secondaries.js
@@ -50,7 +50,7 @@ replTest.awaitReplication();
// Calling getPrimary also populates '_secondaries'.
var secondaries = replTest.getSecondaries();
-secondaries[0].setSlaveOk();
+secondaries[0].setSecondaryOk();
assert.commandWorked(secondaries[0].getDB("admin").runCommand({fsync: 1, lock: 1}));
var docNum = 1000;
diff --git a/jstests/replsets/groupAndMapReduce.js b/jstests/replsets/groupAndMapReduce.js
index 270436bf62c..2723f800a07 100644
--- a/jstests/replsets/groupAndMapReduce.js
+++ b/jstests/replsets/groupAndMapReduce.js
@@ -36,7 +36,7 @@ doTest = function(signal) {
assert(secondaries.length == 2, "Expected 2 secondaries but length was " + secondaries.length);
secondaries.forEach(function(secondary) {
// try to read from secondary
- secondary.slaveOk = true;
+ secondary.setSecondaryOk();
var count = secondary.getDB("foo").foo.find().itcount();
printjson(count);
assert.eq(len, count, "secondary count wrong: " + secondary);
@@ -46,7 +46,7 @@ doTest = function(signal) {
printjson(one);
print("Calling inline mr() with slaveOk=true, must succeed");
- secondary.slaveOk = true;
+ secondary.setSecondaryOk();
map = function() {
emit(this.a, 1);
};
diff --git a/jstests/replsets/initial_sync4.js b/jstests/replsets/initial_sync4.js
index 80103839bfb..35dbd632715 100644
--- a/jstests/replsets/initial_sync4.js
+++ b/jstests/replsets/initial_sync4.js
@@ -45,7 +45,7 @@
jsTestLog("5. Wait for new node to start cloning");
- s.setSlaveOk();
+ s.setSecondaryOk();
var sc = s.getDB("d")["c"];
wait(function() {
diff --git a/jstests/replsets/initial_sync_ambiguous_index.js b/jstests/replsets/initial_sync_ambiguous_index.js
index 7e415fade43..c50324db935 100644
--- a/jstests/replsets/initial_sync_ambiguous_index.js
+++ b/jstests/replsets/initial_sync_ambiguous_index.js
@@ -44,7 +44,7 @@ const secondary = rst.add({
rsConfig: {votes: 0, priority: 0},
setParameter: {"numInitialSyncAttempts": 1, 'collectionClonerBatchSize': clonerBatchSize}
});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryColl = secondary.getDB(dbName).getCollection(collectionName);
// We set the collectionClonerBatchSize low above, so we will definitely hit
diff --git a/jstests/replsets/initial_sync_applier_error.js b/jstests/replsets/initial_sync_applier_error.js
index 7ef7058aea4..e880c739ef1 100644
--- a/jstests/replsets/initial_sync_applier_error.js
+++ b/jstests/replsets/initial_sync_applier_error.js
@@ -31,7 +31,7 @@ assert.commandWorked(coll.insert({_id: 0, content: "hi"}));
// but before copying databases.
var secondary =
replSet.add({setParameter: "numInitialSyncAttempts=2", rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
let failPoint = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
replSet.reInitiate();
diff --git a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
index 023184c5dfb..292b0318ecd 100644
--- a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
+++ b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
@@ -139,7 +139,7 @@ replTest.awaitReplication();
jsTestLog("Initial sync completed");
// Make sure the secondary fetched enough transaction oplog entries.
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryOplog = secondary.getDB("local").getCollection("oplog.rs");
assert.eq(secondaryOplog.find({"ts": beginFetchingTs}).itcount(), 1);
diff --git a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
index a4420ff9940..9b1839b4c43 100644
--- a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
+++ b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
@@ -98,7 +98,7 @@ replTest.waitForState(secondary, ReplSetTest.State.SECONDARY);
jsTestLog("Initial sync completed");
// Make sure the secondary fetched enough transaction oplog entries.
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryOplog = secondary.getDB("local").getCollection("oplog.rs");
assert.eq(secondaryOplog.find({"ts": beginFetchingTs}).itcount(), 1);
diff --git a/jstests/replsets/initial_sync_invalid_views.js b/jstests/replsets/initial_sync_invalid_views.js
index fb5a1975323..9faf5207608 100644
--- a/jstests/replsets/initial_sync_invalid_views.js
+++ b/jstests/replsets/initial_sync_invalid_views.js
@@ -18,7 +18,7 @@ assert.commandWorked(coll.insert({a: 1}));
// Add a secondary node but make it hang before copying databases.
let secondary = replSet.add({rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
assert.commandWorked(secondary.getDB('admin').runCommand(
{configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'}));
diff --git a/jstests/replsets/initial_sync_move_forward.js b/jstests/replsets/initial_sync_move_forward.js
index d5142d06b98..c99b529a700 100644
--- a/jstests/replsets/initial_sync_move_forward.js
+++ b/jstests/replsets/initial_sync_move_forward.js
@@ -41,7 +41,7 @@ assert.commandWorked(masterColl.ensureIndex({x: 1}, {unique: true}));
// Add a secondary.
var secondary =
rst.add({setParameter: "numInitialSyncAttempts=1", rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
var secondaryColl = secondary.getDB("test").coll;
// Pause initial sync when the secondary has copied {_id: 0, x: 0} and {_id: 1, x: 1}.
diff --git a/jstests/replsets/initial_sync_oplog_rollover.js b/jstests/replsets/initial_sync_oplog_rollover.js
index 268ec261d39..b9c1eda8f2f 100644
--- a/jstests/replsets/initial_sync_oplog_rollover.js
+++ b/jstests/replsets/initial_sync_oplog_rollover.js
@@ -38,7 +38,7 @@ var firstOplogEntry = getFirstOplogEntry(primary);
// Add a secondary node but make it hang before copying databases.
var secondary = replSet.add();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
var failPoint = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
replSet.reInitiate();
diff --git a/jstests/replsets/initial_sync_replSetGetStatus.js b/jstests/replsets/initial_sync_replSetGetStatus.js
index d315421577a..3d999cc9553 100644
--- a/jstests/replsets/initial_sync_replSetGetStatus.js
+++ b/jstests/replsets/initial_sync_replSetGetStatus.js
@@ -24,7 +24,7 @@ assert.commandWorked(coll.insert({a: 2}));
// Add a secondary node but make it hang before copying databases.
var secondary = replSet.add({rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
var failPointBeforeCopying = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
var failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish');
diff --git a/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js b/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
index 80012cab607..9ea82c52bb3 100644
--- a/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
+++ b/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
@@ -47,7 +47,7 @@ function restartSecondaryAndForceSyncSource(replSet, secondary, syncSource, dbNa
// Wait for the secondary to complete initial sync.
waitForState(secondary, ReplSetTest.State.SECONDARY);
// Allow for secondary reads.
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
const secondaryDB = secondary.getDB(dbName);
// Confirm that we have a prepared transaction in progress on the secondary.
diff --git a/jstests/replsets/initial_sync_test_fixture_test.js b/jstests/replsets/initial_sync_test_fixture_test.js
index 625620584a4..755df7a2109 100644
--- a/jstests/replsets/initial_sync_test_fixture_test.js
+++ b/jstests/replsets/initial_sync_test_fixture_test.js
@@ -108,7 +108,7 @@ let prepareTimestamp = PrepareHelpers.prepareTransaction(session);
assert(!initialSyncTest.step());
secondary = initialSyncTest.getSecondary();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
// Make sure that we cannot read from this node yet.
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
diff --git a/jstests/replsets/initial_sync_uuid_not_found.js b/jstests/replsets/initial_sync_uuid_not_found.js
index 90e0024b76c..2e2911ee6dd 100644
--- a/jstests/replsets/initial_sync_uuid_not_found.js
+++ b/jstests/replsets/initial_sync_uuid_not_found.js
@@ -39,7 +39,7 @@ function ResyncWithFailpoint(failpointName, failpointData) {
assert.eq(primary, rst.getPrimary(), 'Primary changed after reconfig');
jsTestLog('Wait for new node to start cloning');
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
const secondaryDB = secondary.getDB(primaryDB.getName());
const secondaryColl = secondaryDB[primaryColl.getName()];
diff --git a/jstests/replsets/initial_sync_with_write_load.js b/jstests/replsets/initial_sync_with_write_load.js
index fc1164c6c43..0474c1f9c10 100644
--- a/jstests/replsets/initial_sync_with_write_load.js
+++ b/jstests/replsets/initial_sync_with_write_load.js
@@ -24,8 +24,8 @@ replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
var master = replTest.getPrimary();
var a_conn = conns[0];
var b_conn = conns[1];
-a_conn.setSlaveOk();
-b_conn.setSlaveOk();
+a_conn.setSecondaryOk();
+b_conn.setSecondaryOk();
var A = a_conn.getDB("test");
var B = b_conn.getDB("test");
var AID = replTest.getNodeId(a_conn);
diff --git a/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js b/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
index 1c71eb94b66..da8c8fd544d 100644
--- a/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
+++ b/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
@@ -74,7 +74,7 @@ TestData.clusterTime = clusterTimeAfterPrepare;
const waitForSecondaryReadBlockedOnPrepareConflictThread = startParallelShell(() => {
// Allow for secondary reads.
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
const parallelTestDB = db.getSiblingDB(TestData.dbName);
const parallelTestCollName = TestData.collName;
diff --git a/jstests/replsets/libs/initial_sync_update_missing_doc.js b/jstests/replsets/libs/initial_sync_update_missing_doc.js
index f45d9a4e107..68bb14ac668 100644
--- a/jstests/replsets/libs/initial_sync_update_missing_doc.js
+++ b/jstests/replsets/libs/initial_sync_update_missing_doc.js
@@ -17,7 +17,7 @@ load("jstests/libs/fail_point_util.js");
// must be called after reInitiateSetWithSecondary.
var reInitiateSetWithSecondary = function(replSet, secondaryConfig) {
const secondary = replSet.add(secondaryConfig);
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
// Make the secondary hang after retrieving the last op on the sync source but before
// copying databases.
diff --git a/jstests/replsets/libs/rollback_resumable_index_build.js b/jstests/replsets/libs/rollback_resumable_index_build.js
index 0ab2148e783..e5c63d91ada 100644
--- a/jstests/replsets/libs/rollback_resumable_index_build.js
+++ b/jstests/replsets/libs/rollback_resumable_index_build.js
@@ -7,8 +7,10 @@ const RollbackResumableIndexBuildTest = class {
* rollback starts is specified by rollbackStartFailPointName. The phase that the index build
* will resume from after rollback completes is specified by rollbackEndFailPointName. If
* either of these points is in the drain writes phase, documents to insert into the side
- * writes table must be specified by sideWrites. Documents specified by insertsToBeRolledBack
- * are inserted after transitioning to rollback operations and will be rolled back.
+ * writes table must be specified by sideWrites. locksYieldedFailPointName specifies a point
+ * during the index build between rollbackEndFailPointName and rollbackStartFailPointName at
+ * which its locks are yielded. Documents specified by insertsToBeRolledBack are inserted after
+ * transitioning to rollback operations and will be rolled back.
*/
static run(rollbackTest,
dbName,
@@ -18,6 +20,7 @@ const RollbackResumableIndexBuildTest = class {
rollbackStartFailPointData,
rollbackEndFailPointName,
rollbackEndFailPointData,
+ locksYieldedFailPointName,
insertsToBeRolledBack,
sideWrites = []) {
const originalPrimary = rollbackTest.getPrimary();
@@ -29,6 +32,14 @@ const RollbackResumableIndexBuildTest = class {
rollbackTest.awaitLastOpCommitted();
+ // Set internalQueryExecYieldIterations to 0 and maxIndexBuildDrainBatchSize to 1 so that
+ // the index build is guaranteed to yield its locks between the rollback end and start
+ // failpoints.
+ assert.commandWorked(
+ originalPrimary.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 0}));
+ assert.commandWorked(
+ originalPrimary.adminCommand({setParameter: 1, maxIndexBuildDrainBatchSize: 1}));
+
const coll = originalPrimary.getDB(dbName).getCollection(collName);
const indexName = "rollback_resumable_index_build";
@@ -57,32 +68,23 @@ const RollbackResumableIndexBuildTest = class {
assert.commandWorked(coll.insert(insertsToBeRolledBack));
- // Disable the failpoint in a parallel shell so that the primary can step down when the
- // rollback test is transitioning to sync source operations before rollback.
- const awaitDisableFailPointAfterContinuingInBackground = startParallelShell(
- funWithArgs(function(failPointName, buildUUID) {
- // Wait for the index build to be continue in the background.
- checkLog.containsJson(db.getMongo(), 4760400, {
- buildUUID: function(uuid) {
- return uuid["uuid"]["$uuid"] === buildUUID;
- }
- });
-
- // Disable the failpoint so that stepdown can proceed.
- assert.commandWorked(
- db.adminCommand({configureFailPoint: failPointName, mode: "off"}));
- }, rollbackEndFp.failPointName, buildUUID), originalPrimary.port);
+ // Move the index build forward to a point at which its locks are yielded. This allows the
+ // primary to step down during the call to transitionToSyncSourceOperationsBeforeRollback()
+ // below.
+ const locksYieldedFp = configureFailPoint(
+ originalPrimary, locksYieldedFailPointName, {namespace: coll.getFullName()});
+ rollbackEndFp.off();
+ locksYieldedFp.wait();
rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
- awaitDisableFailPointAfterContinuingInBackground();
-
// The index creation will report as having failed due to InterruptedDueToReplStateChange,
// but it is still building in the background.
awaitCreateIndex();
// Wait until the index build reaches the desired starting point so that we can start the
// rollback.
+ locksYieldedFp.off();
rollbackStartFp.wait();
// We ignore the return value here because the node will go into rollback immediately upon
diff --git a/jstests/replsets/libs/secondary_reads_test.js b/jstests/replsets/libs/secondary_reads_test.js
index 1d712fce05a..4840708dba2 100644
--- a/jstests/replsets/libs/secondary_reads_test.js
+++ b/jstests/replsets/libs/secondary_reads_test.js
@@ -14,7 +14,7 @@ function SecondaryReadsTest(name = "secondary_reads_test") {
let primaryDB = primary.getDB(dbName);
let secondary = rst.getSecondary();
let secondaryDB = secondary.getDB(dbName);
- secondaryDB.getMongo().setSlaveOk();
+ secondaryDB.getMongo().setSecondaryOk();
let readers = [];
let signalColl = "signalColl";
@@ -37,7 +37,7 @@ function SecondaryReadsTest(name = "secondary_reads_test") {
this.startSecondaryReaders = function(nReaders, readFn) {
let read = function() {
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
db = db.getSiblingDB(TestData.dbName);
while (true) {
readFn();
diff --git a/jstests/replsets/maintenance2.js b/jstests/replsets/maintenance2.js
index 2b904346945..c62d6bf17b6 100644
--- a/jstests/replsets/maintenance2.js
+++ b/jstests/replsets/maintenance2.js
@@ -40,7 +40,7 @@ secondaries.forEach(function(secondary) {
assert.eq(stats.myState, 3, "Secondary should be in recovering state.");
print("count should fail in recovering state...");
- secondary.slaveOk = true;
+ secondary.setSecondaryOk();
assert.commandFailed(secondary.getDB("foo").runCommand({count: "foo"}));
// unset maintenance mode when done
diff --git a/jstests/replsets/no_disconnect_on_stepdown.js b/jstests/replsets/no_disconnect_on_stepdown.js
index 68877c6fc64..77a5526c50b 100644
--- a/jstests/replsets/no_disconnect_on_stepdown.js
+++ b/jstests/replsets/no_disconnect_on_stepdown.js
@@ -73,7 +73,7 @@ function runStepDownTest({description, failpoint, operation, errorCode}) {
assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
assert.eq(replMetrics.stateTransition.userOperationsKilled, 1);
- assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0);
+ assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, 0);
// Allow the primary to be re-elected, and wait for it.
assert.commandWorked(primaryAdmin.adminCommand({replSetFreeze: 0}));
diff --git a/jstests/replsets/not_master_unacknowledged_write.js b/jstests/replsets/not_master_unacknowledged_write.js
index a1570de931a..1fc65ddb7ba 100644
--- a/jstests/replsets/not_master_unacknowledged_write.js
+++ b/jstests/replsets/not_master_unacknowledged_write.js
@@ -5,12 +5,12 @@
(function() {
"use strict";
-function getNotMasterUnackWritesCounter() {
+function getNotPrimaryUnackWritesCounter() {
return assert.commandWorked(primaryDB.adminCommand({serverStatus: 1}))
- .metrics.repl.network.notMasterUnacknowledgedWrites;
+ .metrics.repl.network.notPrimaryUnacknowledgedWrites;
}
-const collName = "not_master_unacknowledged_write";
+const collName = "not_primary_unacknowledged_write";
var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]});
rst.startSet();
@@ -22,8 +22,8 @@ var secondaryDB = secondary.getDB("test");
var primaryColl = primaryDB[collName];
var secondaryColl = secondaryDB[collName];
-// Verify that reading from secondaries does not impact `notMasterUnacknowledgedWrites`.
-const preReadingCounter = getNotMasterUnackWritesCounter();
+// Verify that reading from secondaries does not impact `notPrimaryUnacknowledgedWrites`.
+const preReadingCounter = getNotPrimaryUnackWritesCounter();
jsTestLog("Reading from secondary ...");
[{name: "findOne", fn: () => secondaryColl.findOne()},
{name: "distinct", fn: () => secondaryColl.distinct("item")},
@@ -32,7 +32,7 @@ jsTestLog("Reading from secondary ...");
assert.doesNotThrow(fn);
assert.eq(assert.commandWorked(secondary.getDB("admin").isMaster()).ismaster, false);
});
-const postReadingCounter = getNotMasterUnackWritesCounter();
+const postReadingCounter = getNotPrimaryUnackWritesCounter();
assert.eq(preReadingCounter, postReadingCounter);
jsTestLog("Primary on port " + primary.port + " hangs up on unacknowledged writes");
@@ -71,7 +71,7 @@ var command =
var awaitShell = startParallelShell(command, primary.port);
-let failedUnackWritesBefore = getNotMasterUnackWritesCounter();
+let failedUnackWritesBefore = getNotPrimaryUnackWritesCounter();
jsTestLog("Beginning unacknowledged insert");
primaryColl.insertOne({}, {writeConcern: {w: 0}});
@@ -87,7 +87,7 @@ assert.includes(result.toString(), "network error while attempting to run comman
// Validate the number of unacknowledged writes failed due to step down resulted in network
// disconnection.
-let failedUnackWritesAfter = getNotMasterUnackWritesCounter();
+let failedUnackWritesAfter = getNotPrimaryUnackWritesCounter();
assert.eq(failedUnackWritesAfter, failedUnackWritesBefore + 1);
rst.stopSet();
diff --git a/jstests/replsets/plan_cache_slaveok.js b/jstests/replsets/plan_cache_slaveok.js
index 4ef60d93795..c20decf9eb1 100644
--- a/jstests/replsets/plan_cache_slaveok.js
+++ b/jstests/replsets/plan_cache_slaveok.js
@@ -1,7 +1,7 @@
// Verify that the plan cache and index filter commands can be run on secondaries, but only
-// if slave ok is explicitly set.
+// if secondaryOk is explicitly set.
-var name = "plan_cache_slaveok";
+var name = "plan_cache_secondaryok";
function assertPlanCacheCommandsSucceed(db) {
assert.commandWorked(db.runCommand({planCacheClear: name, query: {a: 1}}));
@@ -50,13 +50,13 @@ assert.eq(1, primary.getDB("test")[name].findOne({a: 1})["a"]);
// Make sure the plan cache commands succeed on the primary.
assertPlanCacheCommandsSucceed(primary.getDB("test"));
-// With slave ok false, the commands should fail on the secondary.
+// With secondaryOk false, the commands should fail on the secondary.
var secondary = replTest.getSecondary();
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
assertPlanCacheCommandsFail(secondary.getDB("test"));
-// With slave ok true, the commands should succeed on the secondary.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// With secondaryOk true, the commands should succeed on the secondary.
+secondary.getDB("test").getMongo().setSecondaryOk();
assertPlanCacheCommandsSucceed(secondary.getDB("test"));
replTest.stopSet();
diff --git a/jstests/replsets/prepare_transaction_read_at_cluster_time.js b/jstests/replsets/prepare_transaction_read_at_cluster_time.js
index 24894823b1a..1e6ae30b5d6 100644
--- a/jstests/replsets/prepare_transaction_read_at_cluster_time.js
+++ b/jstests/replsets/prepare_transaction_read_at_cluster_time.js
@@ -16,7 +16,7 @@ const runDBHashFn = (host, dbName, clusterTime, useSnapshot) => {
const conn = new Mongo(host);
const db = conn.getDB(dbName);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
let cmd;
if (useSnapshot) {
cmd = {dbHash: 1, readConcern: {level: "snapshot", atClusterTime: eval(clusterTime)}};
diff --git a/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js b/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js
new file mode 100644
index 00000000000..4948ac85801
--- /dev/null
+++ b/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js
@@ -0,0 +1,25 @@
+// Tests the output of db.printSecondaryReplicationInfo() for unreachable secondaries.
+
+(function() {
+"use strict";
+const name = "printSecondaryReplicationInfo";
+const replSet = new ReplSetTest({name: name, nodes: 2});
+replSet.startSet();
+replSet.initiateWithHighElectionTimeout();
+
+const primary = replSet.getPrimary();
+primary.getDB('test').foo.insert({a: 1});
+replSet.awaitReplication();
+
+const secondary = replSet.getSecondary();
+replSet.stop(replSet.getNodeId(secondary));
+replSet.waitForState(secondary, ReplSetTest.State.DOWN);
+
+const joinShell =
+ startParallelShell("db.getSiblingDB('admin').printSecondaryReplicationInfo();", primary.port);
+joinShell();
+assert(
+ rawMongoProgramOutput().match("no replication info, yet. State: \\(not reachable/healthy\\)"));
+
+replSet.stopSet();
+})();
diff --git a/jstests/replsets/quiesce_mode.js b/jstests/replsets/quiesce_mode.js
index ae47952cbf8..52c39f3edbd 100644
--- a/jstests/replsets/quiesce_mode.js
+++ b/jstests/replsets/quiesce_mode.js
@@ -48,7 +48,7 @@ function runAwaitableIsMaster(topologyVersionField) {
}
function runFind() {
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
assert.eq(4, db.getSiblingDB("test").coll.find().itcount());
}
diff --git a/jstests/replsets/read_committed_after_rollback.js b/jstests/replsets/read_committed_after_rollback.js
index 41bd1d29268..a7e46e15e86 100644
--- a/jstests/replsets/read_committed_after_rollback.js
+++ b/jstests/replsets/read_committed_after_rollback.js
@@ -75,7 +75,7 @@ assert.eq(doDirtyRead(oldPrimaryColl), 'INVALID');
assert.eq(doCommittedRead(oldPrimaryColl), 'old');
// Change the partitioning so that oldPrimary is isolated, and newPrimary can be elected.
-oldPrimary.setSlaveOk();
+oldPrimary.setSecondaryOk();
oldPrimary.disconnect(arbiters);
newPrimary.reconnect(arbiters);
assert.soon(() => newPrimary.adminCommand('isMaster').ismaster, '', 60 * 1000);
diff --git a/jstests/replsets/read_committed_no_snapshots.js b/jstests/replsets/read_committed_no_snapshots.js
index a0fe52cd565..280b0de7d49 100644
--- a/jstests/replsets/read_committed_no_snapshots.js
+++ b/jstests/replsets/read_committed_no_snapshots.js
@@ -38,9 +38,9 @@ replTest.initiateWithAnyNodeAsPrimary(
var primary = replTest.getPrimary();
var secondaries = replTest.getSecondaries();
var healthySecondary = secondaries[0];
-healthySecondary.setSlaveOk();
+healthySecondary.setSecondaryOk();
var noSnapshotSecondary = secondaries[1];
-noSnapshotSecondary.setSlaveOk();
+noSnapshotSecondary.setSecondaryOk();
// Do a write, wait for it to replicate, and ensure it is visible.
var res = primary.getDB(name).runCommandWithMetadata( //
diff --git a/jstests/replsets/read_operations_during_rollback.js b/jstests/replsets/read_operations_during_rollback.js
index d743c7b8303..f91ba15d31b 100644
--- a/jstests/replsets/read_operations_during_rollback.js
+++ b/jstests/replsets/read_operations_during_rollback.js
@@ -24,7 +24,7 @@ setFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
setFailPoint(rollbackNode, "GetMoreHangBeforeReadLock");
const joinGetMoreThread = startParallelShell(() => {
- db.getMongo().setSlaveOk();
+ db.getMongo().setSecondaryOk();
const cursorID = assert.commandWorked(db.runCommand({"find": "coll", batchSize: 0})).cursor.id;
// Make sure an outstanding read operation gets killed during rollback even though the read
// was started before rollback. Outstanding read operations are killed during rollback and
diff --git a/jstests/replsets/read_operations_during_step_down.js b/jstests/replsets/read_operations_during_step_down.js
index 96fa2651237..4909c7a9177 100644
--- a/jstests/replsets/read_operations_during_step_down.js
+++ b/jstests/replsets/read_operations_during_step_down.js
@@ -113,7 +113,7 @@ assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
// Should account for find and getmore commands issued before step down.
assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
-assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0);
+assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, 0);
rst.stopSet();
})();
diff --git a/jstests/replsets/read_operations_during_step_up.js b/jstests/replsets/read_operations_during_step_up.js
index 91d202659a4..86ea4a3c0d6 100644
--- a/jstests/replsets/read_operations_during_step_up.js
+++ b/jstests/replsets/read_operations_during_step_up.js
@@ -36,10 +36,10 @@ assert.commandWorked(
        primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
rst.awaitReplication();
-// It's possible for notMasterUnacknowledgedWrites to be non-zero because of mirrored reads during
+// It's possible for notPrimaryUnacknowledgedWrites to be non-zero because of mirrored reads during
// initial sync.
let replMetrics = assert.commandWorked(secondaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
-const startingNumNotMasterErrors = replMetrics.network.notMasterUnacknowledgedWrites;
+const startingNumNotMasterErrors = replMetrics.network.notPrimaryUnacknowledgedWrites;
// Open a cursor on secondary.
const cursorIdToBeReadAfterStepUp =
@@ -49,7 +49,7 @@ jsTestLog("2. Start blocking getMore cmd before step up");
const joinGetMoreThread = startParallelShell(() => {
// Open another cursor on secondary before step up.
secondaryDB = db.getSiblingDB(TestData.dbName);
- secondaryDB.getMongo().setSlaveOk(true);
+ secondaryDB.getMongo().setSecondaryOk();
const cursorIdToBeReadDuringStepUp =
assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName, batchSize: 0}))
@@ -71,7 +71,7 @@ waitForCurOpByFailPoint(
jsTestLog("2. Start blocking find cmd before step up");
const joinFindThread = startParallelShell(() => {
secondaryDB = db.getSiblingDB(TestData.dbName);
- secondaryDB.getMongo().setSlaveOk(true);
+ secondaryDB.getMongo().setSecondaryOk();
// Enable the fail point for find cmd.
assert.commandWorked(
@@ -127,7 +127,7 @@ assert.eq(replMetrics.stateTransition.lastStateTransition, "stepUp");
assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
// Should account for find and getmore commands issued before step up.
assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
-assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, startingNumNotMasterErrors);
+assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, startingNumNotMasterErrors);
rst.stopSet();
})();
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
index 6257f066eab..b72dd806295 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
@@ -129,7 +129,7 @@ replTest.awaitSecondaryNodes();
jsTestLog("Initial sync completed");
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryColl = secondary.getDB(dbName).getCollection(collName);
// Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
index 3574010f636..38f7f431ca2 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
@@ -106,7 +106,7 @@ replTest.awaitSecondaryNodes();
jsTestLog("Initial sync completed");
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryColl = secondary.getDB(dbName).getCollection(collName);
// Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
index dbc2c05dfff..d5b0eb39898 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
@@ -80,7 +80,7 @@ replTest.awaitSecondaryNodes();
jsTestLog("Initial sync completed");
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryColl = secondary.getDB(dbName).getCollection(collName);
// Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
index 85e4b4e9874..f5100c39e35 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
@@ -107,7 +107,7 @@ PrepareHelpers.awaitMajorityCommitted(replTest, prepareTimestamp);
jsTestLog("Initial sync completed");
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
const secondaryColl = secondary.getDB(dbName).getCollection(collName);
jsTestLog("Checking that the transaction is properly prepared");
diff --git a/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js b/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
index 31845da9629..56f40a8793f 100644
--- a/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
+++ b/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
@@ -82,7 +82,7 @@ PrepareHelpers.awaitMajorityCommitted(replTest, prepareTimestamp2);
// Wait for the node to complete recovery before trying to read from it.
replTest.awaitSecondaryNodes();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
jsTestLog("Checking that the first transaction is properly prepared");
diff --git a/jstests/replsets/rename_collection_temp.js b/jstests/replsets/rename_collection_temp.js
index dc6ffd6f84a..1bf716784d6 100644
--- a/jstests/replsets/rename_collection_temp.js
+++ b/jstests/replsets/rename_collection_temp.js
@@ -54,7 +54,7 @@ replTest.awaitReplication();
var secondary = replTest.getSecondary();
var secondaryFoo = secondary.getDB("foo");
-secondaryFoo.permanentColl.setSlaveOk(true);
+secondaryFoo.permanentColl.setSecondaryOk();
// Get the information on the secondary to ensure it was replicated correctly.
checkCollectionTemp(secondaryFoo, "permanentColl", false);
diff --git a/jstests/replsets/replset1.js b/jstests/replsets/replset1.js
index 8225422338d..35ceb70121f 100644
--- a/jstests/replsets/replset1.js
+++ b/jstests/replsets/replset1.js
@@ -104,7 +104,7 @@ var doTest = function(signal) {
var secondaries = replTest.getSecondaries();
assert(secondaries.length == 2, "Expected 2 secondaries but length was " + secondaries.length);
secondaries.forEach(function(secondary) {
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
var count = secondary.getDB("bar").runCommand({count: "bar"});
printjson(count);
assert.eq(1000, count.n, "secondary count wrong: " + secondary);
@@ -118,7 +118,7 @@ var doTest = function(signal) {
var t = db.foo;
var ts = secondaries.map(function(z) {
- z.setSlaveOk();
+ z.setSecondaryOk();
return z.getDB("foo").foo;
});
diff --git a/jstests/replsets/replset2.js b/jstests/replsets/replset2.js
index 3c9b9613eed..38134794dfb 100644
--- a/jstests/replsets/replset2.js
+++ b/jstests/replsets/replset2.js
@@ -29,7 +29,7 @@ doTest = function(signal) {
var secondaries = replTest.getSecondaries();
secondaries.forEach(function(secondary) {
- secondary.setSlaveOk();
+ secondary.setSecondaryOk();
});
// Test write concern with multiple inserts.
diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js
index 5488d8a9cd0..9ea1424a426 100644
--- a/jstests/replsets/replset5.js
+++ b/jstests/replsets/replset5.js
@@ -55,8 +55,8 @@ if (wcError != null) {
}
var secondaries = replTest.getSecondaries();
-secondaries[0].setSlaveOk();
-secondaries[1].setSlaveOk();
+secondaries[0].setSecondaryOk();
+secondaries[1].setSecondaryOk();
var secondary0Count = secondaries[0].getDB(testDB).foo.find().itcount();
assert(secondary0Count == docNum,
diff --git a/jstests/replsets/replset6.js b/jstests/replsets/replset6.js
index 40998d7f4f2..fd33175d823 100644
--- a/jstests/replsets/replset6.js
+++ b/jstests/replsets/replset6.js
@@ -10,7 +10,7 @@ var p = rt.getPrimary();
rt.awaitSecondaryNodes();
var secondaries = rt.getSecondaries();
s = secondaries[0];
-s.setSlaveOk();
+s.setSecondaryOk();
admin = p.getDB("admin");
debug = function(foo) {}; // print( foo ); }
diff --git a/jstests/replsets/resync_majority_member.js b/jstests/replsets/resync_majority_member.js
index 4ab48f043ea..df0d233b4e3 100644
--- a/jstests/replsets/resync_majority_member.js
+++ b/jstests/replsets/resync_majority_member.js
@@ -99,8 +99,8 @@ assert.soon(() => {
});
// Observe that the old write does not exist anywhere in the set.
-syncSource.setSlaveOk();
-resyncNode.setSlaveOk();
+syncSource.setSecondaryOk();
+resyncNode.setSecondaryOk();
assert.eq(0, syncSource.getDB(dbName)[collName].find(disappearingDoc).itcount());
assert.eq(0, resyncNode.getDB(dbName)[collName].find(disappearingDoc).itcount());
diff --git a/jstests/replsets/rollback_auth.js b/jstests/replsets/rollback_auth.js
index 0aa7995bdc2..372f84ce645 100644
--- a/jstests/replsets/rollback_auth.js
+++ b/jstests/replsets/rollback_auth.js
@@ -47,8 +47,8 @@ replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
var master = replTest.getPrimary();
var a_conn = conns[0];
var b_conn = conns[1];
-a_conn.setSlaveOk();
-b_conn.setSlaveOk();
+a_conn.setSecondaryOk();
+b_conn.setSecondaryOk();
var A = a_conn.getDB("admin");
var B = b_conn.getDB("admin");
var a = a_conn.getDB("test");
diff --git a/jstests/replsets/rollback_creates_rollback_directory.js b/jstests/replsets/rollback_creates_rollback_directory.js
index 3cb47eb65a2..db795769bdf 100644
--- a/jstests/replsets/rollback_creates_rollback_directory.js
+++ b/jstests/replsets/rollback_creates_rollback_directory.js
@@ -31,8 +31,8 @@ function runRollbackDirectoryTest(shouldCreateRollbackFiles) {
var master = replTest.getPrimary();
var a_conn = conns[0];
var b_conn = conns[1];
- a_conn.setSlaveOk();
- b_conn.setSlaveOk();
+ a_conn.setSecondaryOk();
+ b_conn.setSecondaryOk();
var A = a_conn.getDB("test");
var B = b_conn.getDB("test");
var Apath = replTest.getDbPath(a_conn) + '/';
diff --git a/jstests/replsets/rollback_crud_op_sequences.js b/jstests/replsets/rollback_crud_op_sequences.js
index a2e89332141..cd42c303a96 100644
--- a/jstests/replsets/rollback_crud_op_sequences.js
+++ b/jstests/replsets/rollback_crud_op_sequences.js
@@ -45,10 +45,10 @@ replTest.initiate({
replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
var master = replTest.getPrimary();
var a_conn = conns[0];
-a_conn.setSlaveOk();
+a_conn.setSecondaryOk();
var A = a_conn.getDB("admin");
var b_conn = conns[1];
-b_conn.setSlaveOk();
+b_conn.setSecondaryOk();
var B = b_conn.getDB("admin");
assert.eq(master, conns[0], "conns[0] assumed to be master");
assert.eq(a_conn, master);
diff --git a/jstests/replsets/rollback_ddl_op_sequences.js b/jstests/replsets/rollback_ddl_op_sequences.js
index aff9cedaa39..62b2fb9cae2 100644
--- a/jstests/replsets/rollback_ddl_op_sequences.js
+++ b/jstests/replsets/rollback_ddl_op_sequences.js
@@ -54,10 +54,10 @@ replTest.initiate({
replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
var master = replTest.getPrimary();
var a_conn = conns[0];
-a_conn.setSlaveOk();
+a_conn.setSecondaryOk();
var A = a_conn.getDB("admin");
var b_conn = conns[1];
-b_conn.setSlaveOk();
+b_conn.setSecondaryOk();
var B = b_conn.getDB("admin");
assert.eq(master, conns[0], "conns[0] assumed to be master");
assert.eq(a_conn, master);
diff --git a/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js b/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
index 81631163f06..5b75f1b7b2c 100644
--- a/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
@@ -13,12 +13,6 @@
load('jstests/replsets/libs/rollback_resumable_index_build.js');
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
- jsTestLog('Skipping test.');
- return;
-}
-
const dbName = "test";
const rollbackStartFailPointName = "hangIndexBuildDuringBulkLoadPhase";
const insertsToBeRolledBack = [{a: 4}, {a: 5}];
@@ -35,8 +29,9 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
{a: 1},
rollbackStartFailPointName,
{iteration: 1},
- "hangAfterSettingUpIndexBuildUnlocked",
+ "hangAfterSettingUpIndexBuild",
{},
+ "setYieldAllLocksHang",
insertsToBeRolledBack);
// Rollback to the collection scan phase.
@@ -47,7 +42,8 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
rollbackStartFailPointName,
{iteration: 1},
"hangIndexBuildDuringCollectionScanPhaseBeforeInsertion",
- {fieldsToMatch: {a: 2}},
+ {iteration: 1},
+ "setYieldAllLocksHang",
insertsToBeRolledBack);
rollbackTest.stop();
diff --git a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
index 23807d85383..33abffdc2e3 100644
--- a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
@@ -13,12 +13,6 @@
load('jstests/replsets/libs/rollback_resumable_index_build.js');
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
- jsTestLog('Skipping test.');
- return;
-}
-
const dbName = "test";
const rollbackStartFailPointName = "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion";
const insertsToBeRolledBack = [{a: 6}, {a: 7}];
@@ -34,9 +28,10 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {fieldsToMatch: {a: 2}},
- "hangAfterSettingUpIndexBuildUnlocked",
+ {iteration: 3},
+ "hangAfterSettingUpIndexBuild",
{},
+ "setYieldAllLocksHang",
insertsToBeRolledBack);
// Rollback to earlier in the collection scan phase.
@@ -45,9 +40,10 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {iteration: 4},
+ {iteration: 3},
"hangIndexBuildDuringCollectionScanPhaseAfterInsertion",
- {iteration: 2},
+ {iteration: 1},
+ "setYieldAllLocksHang",
insertsToBeRolledBack);
rollbackTest.stop();
diff --git a/jstests/replsets/rollback_resumable_index_build_complete.js b/jstests/replsets/rollback_resumable_index_build_complete.js
index 51f5988f4c9..cb74fc96345 100644
--- a/jstests/replsets/rollback_resumable_index_build_complete.js
+++ b/jstests/replsets/rollback_resumable_index_build_complete.js
@@ -13,12 +13,6 @@
load('jstests/replsets/libs/rollback_resumable_index_build.js');
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
- jsTestLog('Skipping test.');
- return;
-}
-
const dbName = "test";
const insertsToBeRolledBack = [{a: 7}, {a: 8}];
@@ -32,7 +26,7 @@ RollbackResumableIndexBuildTest.runIndexBuildComplete(rollbackTest,
dbName,
coll.getName(),
{a: 1},
- "hangAfterSettingUpIndexBuildUnlocked",
+ "hangAfterSettingUpIndexBuild",
{},
insertsToBeRolledBack);
diff --git a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
index 4e025596884..922ee451e01 100644
--- a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
@@ -13,15 +13,9 @@
load('jstests/replsets/libs/rollback_resumable_index_build.js');
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
- jsTestLog('Skipping test.');
- return;
-}
-
const dbName = "test";
const rollbackStartFailPointName = "hangIndexBuildDuringDrainWritesPhase";
-const insertsToBeRolledBack = [{a: 13}, {a: 14}];
+const insertsToBeRolledBack = [{a: 18}, {a: 19}];
const rollbackTest = new RollbackTest(jsTestName());
const coll = rollbackTest.getPrimary().getDB(dbName).getCollection(jsTestName());
@@ -34,11 +28,12 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {iteration: 0},
- "hangAfterSettingUpIndexBuildUnlocked",
+ {iteration: 1},
+ "hangAfterSettingUpIndexBuild",
{},
+ "hangDuringIndexBuildDrainYield",
insertsToBeRolledBack,
- [{a: 4}, {a: 5}]);
+ [{a: 4}, {a: 5}, {a: 6}]);
// Rollback to the collection scan phase.
RollbackResumableIndexBuildTest.run(rollbackTest,
@@ -46,11 +41,12 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {iteration: 0},
+ {iteration: 1},
"hangIndexBuildDuringCollectionScanPhaseBeforeInsertion",
- {fieldsToMatch: {a: 2}},
+ {iteration: 1},
+ "hangDuringIndexBuildDrainYield",
insertsToBeRolledBack,
- [{a: 6}, {a: 7}]);
+ [{a: 7}, {a: 8}, {a: 9}]);
// Rollback to the bulk load phase.
RollbackResumableIndexBuildTest.run(rollbackTest,
@@ -58,26 +54,25 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {iteration: 0},
+ {iteration: 1},
"hangIndexBuildDuringBulkLoadPhase",
{iteration: 1},
+ "hangDuringIndexBuildDrainYield",
insertsToBeRolledBack,
- [{a: 8}, {a: 9}]);
+ [{a: 10}, {a: 11}, {a: 12}]);
-// Rollback to earlier in the drain writes phase. We set maxIndexBuildDrainBatchSize to 1 so that
-// the primary can step down between iterations.
-assert.commandWorked(
- rollbackTest.getPrimary().adminCommand({setParameter: 1, maxIndexBuildDrainBatchSize: 1}));
+// Rollback to earlier in the drain writes phase.
RollbackResumableIndexBuildTest.run(rollbackTest,
dbName,
coll.getName(),
{a: 1},
rollbackStartFailPointName,
- {iteration: 2},
+ {iteration: 3},
"hangIndexBuildDuringDrainWritesPhaseSecond",
- {iteration: 0},
+ {iteration: 1},
+ "hangDuringIndexBuildDrainYield",
insertsToBeRolledBack,
- [{a: 10}, {a: 11}, {a: 12}]);
+ [{a: 13}, {a: 14}, {a: 15}, {a: 16}, {a: 17}]);
rollbackTest.stop();
})();
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
index 65567450c96..b53a5030d42 100644
--- a/jstests/replsets/rslib.js
+++ b/jstests/replsets/rslib.js
@@ -147,7 +147,7 @@ reconnect = function(conn) {
};
getLatestOp = function(server) {
- server.getDB("admin").getMongo().setSlaveOk();
+ server.getDB("admin").getMongo().setSecondaryOk();
var log = server.getDB("local")['oplog.rs'];
var cursor = log.find({}).sort({'$natural': -1}).limit(1);
if (cursor.hasNext()) {
@@ -157,7 +157,7 @@ getLatestOp = function(server) {
};
getLeastRecentOp = function({server, readConcern}) {
- server.getDB("admin").getMongo().setSlaveOk();
+ server.getDB("admin").getMongo().setSecondaryOk();
const oplog = server.getDB("local").oplog.rs;
const cursor = oplog.find().sort({$natural: 1}).limit(1).readConcern(readConcern);
if (cursor.hasNext()) {
diff --git a/jstests/replsets/server8070.js b/jstests/replsets/server8070.js
index 876a768fd7a..5bc4fd8f60d 100644
--- a/jstests/replsets/server8070.js
+++ b/jstests/replsets/server8070.js
@@ -36,8 +36,8 @@ replSet.initiate({
// set up common points of access
var master = replSet.getPrimary();
var primary = master.getDB("foo");
-replSet.nodes[1].setSlaveOk();
-replSet.nodes[2].setSlaveOk();
+replSet.nodes[1].setSecondaryOk();
+replSet.nodes[2].setSecondaryOk();
var member2 = replSet.nodes[1].getDB("admin");
var member3 = replSet.nodes[2].getDB("admin");
diff --git a/jstests/replsets/slavedelay3.js b/jstests/replsets/slavedelay3.js
index 9d09fa4486c..1d12d22912b 100644
--- a/jstests/replsets/slavedelay3.js
+++ b/jstests/replsets/slavedelay3.js
@@ -17,7 +17,7 @@ var secondaryConns = replTest.getSecondaries();
var secondaries = [];
for (var i in secondaryConns) {
var d = secondaryConns[i].getDB(name);
- d.getMongo().setSlaveOk();
+ d.getMongo().setSecondaryOk();
secondaries.push(d);
}
diff --git a/jstests/replsets/slaveok_read_pref.js b/jstests/replsets/slaveok_read_pref.js
index 9fc11600e8d..e35f36e9c97 100644
--- a/jstests/replsets/slaveok_read_pref.js
+++ b/jstests/replsets/slaveok_read_pref.js
@@ -1,5 +1,5 @@
-// Test that slaveOk is implicitly allowed for queries on a secondary with a read preference other
-// than 'primary', and that queries which do have 'primary' read preference fail.
+// Test that secondaryOk is implicitly allowed for queries on a secondary with a read preference
+// other than 'primary', and that queries which do have 'primary' read preference fail.
(function() {
"use strict";
@@ -28,18 +28,18 @@ const secDB = rst.getSecondary().getDB(jsTestName());
for (let readMode of ["commands", "legacy"]) {
for (let readPref of readPrefs) {
- for (let slaveOk of [true, false]) {
- const testType = {readMode: readMode, readPref: readPref, slaveOk: slaveOk};
+ for (let secondaryOk of [true, false]) {
+ const testType = {readMode: readMode, readPref: readPref, secondaryOk: secondaryOk};
secDB.getMongo().forceReadMode(readMode);
- secDB.getMongo().setSlaveOk(slaveOk);
+ secDB.getMongo().setSecondaryOk(secondaryOk);
const cursor = (readPref ? secDB.test.find().readPref(readPref) : secDB.test.find());
- if (readPref === "primary" || (!readPref && !slaveOk)) {
+ if (readPref === "primary" || (!readPref && !secondaryOk)) {
// Attempting to run the query throws an error of type NotPrimaryNoSecondaryOk.
- const slaveOkErr = assert.throws(() => cursor.itcount(), [], tojson(testType));
- assert.commandFailedWithCode(slaveOkErr, ErrorCodes.NotPrimaryNoSecondaryOk);
+ const secondaryOkErr = assert.throws(() => cursor.itcount(), [], tojson(testType));
+ assert.commandFailedWithCode(secondaryOkErr, ErrorCodes.NotPrimaryNoSecondaryOk);
} else {
// Succeeds for all non-primary readPrefs, and for no readPref iff slaveOk.
const docCount = assert.doesNotThrow(() => cursor.itcount(), [], tojson(testType));
@@ -51,7 +51,7 @@ for (let readMode of ["commands", "legacy"]) {
function assertNotPrimaryNoSecondaryOk(func) {
secDB.getMongo().forceReadMode("commands");
- secDB.getMongo().setSlaveOk(false);
+ secDB.getMongo().setSecondaryOk(false);
secDB.getMongo().setReadPref("primary");
const res = assert.throws(func);
assert.commandFailedWithCode(res, ErrorCodes.NotPrimaryNoSecondaryOk);
@@ -59,7 +59,7 @@ function assertNotPrimaryNoSecondaryOk(func) {
// Test that agg with $out/$merge and non-inline mapReduce fail with 'NotPrimaryNoSecondaryOk' when
// directed at a secondary with "primary" read preference.
-const secondaryColl = secDB.slaveok_read_pref;
+const secondaryColl = secDB.secondaryok_read_pref;
assertNotPrimaryNoSecondaryOk(() => secondaryColl.aggregate([{$out: "target"}]).itcount());
assertNotPrimaryNoSecondaryOk(
() =>
diff --git a/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js b/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
index 690151796b3..ec537f873fd 100644
--- a/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
+++ b/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
@@ -35,7 +35,7 @@ rst.awaitSecondaryNodes();
// Get the new secondary connection.
secondary = rst.getSecondary();
-secondary.setSlaveOk(true);
+secondary.setSecondaryOk();
const secondaryAdminDb = secondary.getDB("admin");
// Assert that the FCV document was cloned through initial sync on the secondary.
diff --git a/jstests/replsets/step_down_on_secondary.js b/jstests/replsets/step_down_on_secondary.js
index 64fcf73c3a2..5e8933d9017 100644
--- a/jstests/replsets/step_down_on_secondary.js
+++ b/jstests/replsets/step_down_on_secondary.js
@@ -93,7 +93,7 @@ jsTestLog("Do a read that hits a prepare conflict on the old primary");
const wTPrintPrepareConflictLogFailPoint = configureFailPoint(primary, "WTPrintPrepareConflictLog");
const joinReadThread = startParallelShell(() => {
- db.getMongo().setSlaveOk(true);
+ db.getMongo().setSecondaryOk();
oldPrimaryDB = db.getSiblingDB(TestData.dbName);
assert.commandFailedWithCode(oldPrimaryDB.runCommand({
diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js
index 7f21efef1c6..f626ce5d5b9 100644
--- a/jstests/replsets/tenant_migration_donor_state_machine.js
+++ b/jstests/replsets/tenant_migration_donor_state_machine.js
@@ -154,9 +154,6 @@ configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
jsTest.log("Test the case where the migration aborts");
const migrationId = UUID();
- let configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS);
- configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
-
let abortFp = configureFailPoint(donorPrimary, "abortTenantMigrationAfterBlockingStarts");
assert.commandFailedWithCode(donorPrimary.adminCommand({
donorStartMigration: 1,
@@ -187,6 +184,29 @@ configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
testDonorForgetMigration(donorRst, recipientRst, migrationId, kDBPrefix);
})();
+// Drop the TTL index to make sure that the migration state is still available when the
+// donorForgetMigration command is retried.
+configDonorsColl.dropIndex({expireAt: 1});
+
+(() => {
+ jsTest.log("Test that donorForgetMigration can be run multiple times");
+ const migrationId = UUID();
+
+ assert.commandWorked(donorPrimary.adminCommand({
+ donorStartMigration: 1,
+ migrationId: migrationId,
+ recipientConnectionString: kRecipientConnString,
+ databasePrefix: kDBPrefix,
+ readPreference: {mode: "primary"}
+ }));
+
+ assert.commandWorked(
+ donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}));
+
+ assert.commandWorked(
+ donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}));
+})();
+
donorRst.stopSet();
recipientRst.stopSet();
})();
diff --git a/jstests/replsets/transactions_only_allowed_on_primaries.js b/jstests/replsets/transactions_only_allowed_on_primaries.js
index 7b71cf3eb67..e987eda0205 100644
--- a/jstests/replsets/transactions_only_allowed_on_primaries.js
+++ b/jstests/replsets/transactions_only_allowed_on_primaries.js
@@ -27,8 +27,8 @@ replTest.initiate(config);
const primary = replTest.getPrimary();
const secondary = replTest.getSecondary();
-// Set slaveOk=true so that normal read commands would be allowed on the secondary.
-secondary.setSlaveOk(true);
+// Set secondaryOk=true so that normal read commands would be allowed on the secondary.
+secondary.setSecondaryOk();
// Create a test collection that we can run commands against.
const primaryDB = primary.getDB(dbName);
diff --git a/jstests/sharding/agg_mongos_slaveok.js b/jstests/sharding/agg_mongos_slaveok.js
index 287902092bc..01fb4286429 100644
--- a/jstests/sharding/agg_mongos_slaveok.js
+++ b/jstests/sharding/agg_mongos_slaveok.js
@@ -1,5 +1,5 @@
/**
- * Tests aggregate command against mongos with slaveOk. For more tests on read preference,
+ * Tests aggregate command against mongos with secondaryOk. For more tests on read preference,
* please refer to jstests/sharding/read_pref_cmd.js.
* @tags: [
* requires_replication,
@@ -21,12 +21,12 @@ var doTest = function(st, doSharded) {
}
testDB.user.insert({x: 10}, {writeConcern: {w: NODES}});
- testDB.setSlaveOk(true);
+ testDB.setSecondaryOk();
var secNode = st.rs0.getSecondary();
secNode.getDB('test').setProfilingLevel(2);
- // wait for mongos to recognize that the slave is up
+ // wait for mongos to recognize that the secondary is up
awaitRSClientHosts(st.s, secNode, {ok: true});
var res = testDB.runCommand({aggregate: 'user', pipeline: [{$project: {x: 1}}], cursor: {}});
diff --git a/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js b/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
index b0bd0f59e8c..a11f8dbc694 100644
--- a/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
+++ b/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
@@ -39,7 +39,7 @@ jsTest.log('Config nodes up: 1 of 3, shard nodes up: 1 of 2: ' +
'Only queries will work (no shard primary)');
st.rs0.stop(0);
st.restartMongos(0);
-st.s0.setSlaveOk(true);
+st.s0.setSecondaryOk();
assert.eq([{_id: 0, count: 3}], st.s0.getDB('TestDB').TestColl.find().toArray());
jsTest.log('Config nodes up: 1 of 3, shard nodes up: 0 of 2: ' +
diff --git a/jstests/sharding/auth_repl.js b/jstests/sharding/auth_repl.js
index cd89c91f136..b806090fc3a 100644
--- a/jstests/sharding/auth_repl.js
+++ b/jstests/sharding/auth_repl.js
@@ -19,7 +19,7 @@ var testColl = testDB.user;
// before setting up authentication
assert.commandWorked(adminDB.runCommand({replSetGetStatus: 1}));
-conn.setSlaveOk();
+conn.setSecondaryOk();
assert.commandWorked(adminDB.runCommand({replSetGetStatus: 1}));
// Add admin user using direct connection to primary to simulate connection from remote host
@@ -38,19 +38,19 @@ assert.eq(1, testDB.auth('a', 'a'));
jsTest.log('Sending an authorized query that should be ok');
assert.commandWorked(testColl.insert({x: 1}, {writeConcern: {w: nodeCount}}));
-conn.setSlaveOk(true);
+conn.setSecondaryOk();
doc = testColl.findOne();
assert(doc != null);
doc = testColl.find().readPref('secondary').next();
assert(doc != null);
-conn.setSlaveOk(false);
+conn.setSecondaryOk(false);
doc = testColl.findOne();
assert(doc != null);
var queryToPriShouldFail = function() {
- conn.setSlaveOk(false);
+ conn.setSecondaryOk(false);
assert.throws(function() {
testColl.findOne();
@@ -63,7 +63,7 @@ var queryToPriShouldFail = function() {
};
var queryToSecShouldFail = function() {
- conn.setSlaveOk(true);
+ conn.setSecondaryOk();
assert.throws(function() {
testColl.findOne();
@@ -104,7 +104,7 @@ queryToPriShouldFail();
assert.eq(1, testDB.auth('a', 'a'));
// Find out the current cached secondary in the repl connection
-conn.setSlaveOk(true);
+conn.setSecondaryOk();
var serverInfo = testColl.find().readPref('secondary').explain().serverInfo;
var secNodeIdx = -1;
var secPortStr = serverInfo.port.toString();
diff --git a/jstests/sharding/auth_slaveok_routing.js b/jstests/sharding/auth_slaveok_routing.js
index 8eff7833c9b..1e573fc7c9e 100644
--- a/jstests/sharding/auth_slaveok_routing.js
+++ b/jstests/sharding/auth_slaveok_routing.js
@@ -1,5 +1,5 @@
/**
- * This tests whether slaveOk reads are properly routed through mongos in
+ * This tests whether secondaryOk reads are properly routed through mongos in
* an authenticated environment. This test also includes restarting the
* entire set, then querying afterwards.
*
@@ -59,11 +59,11 @@ priAdminDB.createUser({user: 'user', pwd: 'password', roles: jsTest.adminUserRol
{w: 3, wtimeout: 30000});
coll.drop();
-coll.setSlaveOk(true);
+coll.setSecondaryOk();
/* Secondaries should be up here, but they can still be in RECOVERY
* state, which will make the ReplicaSetMonitor mark them as
- * ok = false and not eligible for slaveOk queries.
+ * ok = false and not eligible for secondaryOk queries.
*/
awaitRSClientHosts(mongos, replTest.getSecondaries(), {ok: true, secondary: true});
@@ -90,7 +90,7 @@ for (var n = 0; n < nodeCount; n++) {
replTest.awaitSecondaryNodes();
-coll.setSlaveOk(true);
+coll.setSecondaryOk();
/* replSetMonitor does not refresh the nodes information when getting secondaries.
* A node that is previously labeled as secondary can now be a primary, so we
diff --git a/jstests/sharding/autodiscover_config_rs_from_secondary.js b/jstests/sharding/autodiscover_config_rs_from_secondary.js
index 9d9bd4adbd5..cc6ca3c11ae 100644
--- a/jstests/sharding/autodiscover_config_rs_from_secondary.js
+++ b/jstests/sharding/autodiscover_config_rs_from_secondary.js
@@ -53,7 +53,7 @@ var mongos = MongoRunner.runMongos({configdb: seedList});
rst.stop(1);
var admin = mongos.getDB('admin');
-mongos.setSlaveOk(true);
+mongos.setSecondaryOk();
assert.eq(1, admin.foo.findOne().a);
MongoRunner.stopMongos(mongos);
rst.stopSet();
diff --git a/jstests/sharding/balance_repl.js b/jstests/sharding/balance_repl.js
index 83c92ff37b1..fb501c979cb 100644
--- a/jstests/sharding/balance_repl.js
+++ b/jstests/sharding/balance_repl.js
@@ -44,7 +44,7 @@ var collPrimary = (new Mongo(s.s0.host)).getDB('TestDB').TestColl;
assert.eq(2100, collPrimary.find().itcount());
var collSlaveOk = (new Mongo(s.s0.host)).getDB('TestDB').TestColl;
-collSlaveOk.setSlaveOk();
+collSlaveOk.setSecondaryOk();
assert.eq(2100, collSlaveOk.find().itcount());
assert.commandWorked(s.s0.adminCommand({
diff --git a/jstests/sharding/chunk_history_window.js b/jstests/sharding/chunk_history_window.js
index 1be21395483..adc2ca7247a 100644
--- a/jstests/sharding/chunk_history_window.js
+++ b/jstests/sharding/chunk_history_window.js
@@ -21,17 +21,29 @@
load("jstests/sharding/libs/sharded_transactions_helpers.js");
-const configHistoryWindowSecs = 10;
+// The snapshot window is the max of minSnapshotHistoryWindowInSeconds and
+// transactionLifetimeLimitSeconds.
+const transactionLifetimeLimitSecs = 15;
+const minSnapshotHistoryWindowSecs = transactionLifetimeLimitSecs;
+const snapshotHistoryWindowSecs =
+ Math.max(minSnapshotHistoryWindowSecs, transactionLifetimeLimitSecs);
+
const st = new ShardingTest({
shards: {rs0: {nodes: 2}, rs1: {nodes: 2}},
other: {
configOptions: {
setParameter: {
- minSnapshotHistoryWindowInSeconds: configHistoryWindowSecs,
+ minSnapshotHistoryWindowInSeconds: minSnapshotHistoryWindowSecs,
+ transactionLifetimeLimitSeconds: transactionLifetimeLimitSecs,
logComponentVerbosity: tojson({sharding: {verbosity: 2}})
}
},
- rsOptions: {setParameter: {minSnapshotHistoryWindowInSeconds: 600}}
+ rsOptions: {
+ setParameter: {
+ minSnapshotHistoryWindowInSeconds: minSnapshotHistoryWindowSecs,
+ transactionLifetimeLimitSeconds: transactionLifetimeLimitSecs,
+ }
+ }
}
});
@@ -40,14 +52,14 @@ assert.eq(assert
.commandWorked(
primaryAdmin.runCommand({getParameter: 1, minSnapshotHistoryWindowInSeconds: 1}))
.minSnapshotHistoryWindowInSeconds,
- 600);
+ minSnapshotHistoryWindowSecs);
const configAdmin = st.configRS.getPrimary().getDB("admin");
assert.eq(assert
.commandWorked(
configAdmin.runCommand({getParameter: 1, minSnapshotHistoryWindowInSeconds: 1}))
.minSnapshotHistoryWindowInSeconds,
- 10);
+ minSnapshotHistoryWindowSecs);
const mongosDB = st.s.getDB(jsTestName());
const mongosColl = mongosDB.test;
@@ -81,9 +93,9 @@ assert.eq(2, chunk.history.length, tojson(chunk));
// Test history window with 1s margin.
const testMarginMS = 1000;
-// Test that reading from a snapshot at insertTS is valid for up to configHistoryWindowSecs
+// Test that reading from a snapshot at insertTS is valid for up to snapshotHistoryWindowSecs
// minus the testMarginMS (as a buffer).
-const testWindowMS = configHistoryWindowSecs * 1000 - testMarginMS;
+const testWindowMS = snapshotHistoryWindowSecs * 1000 - testMarginMS;
while (Date.now() - 1000 * insertTS.getTime() < testWindowMS) {
// Test that reading from a snapshot at insertTS is still valid.
assert.commandWorked(mongosDB.runCommand(
@@ -95,7 +107,7 @@ while (Date.now() - 1000 * insertTS.getTime() < testWindowMS) {
}
// Sleep until our most recent chunk move is before the oldest history in our window.
-const chunkExpirationTime = postMoveChunkTime + configHistoryWindowSecs * 1000;
+const chunkExpirationTime = postMoveChunkTime + snapshotHistoryWindowSecs * 1000;
sleep(chunkExpirationTime + testMarginMS - Date.now());
jsTestLog("Move chunk back to shard 0 to trigger history cleanup");
diff --git a/jstests/sharding/chunk_operations_invalidate_single_shard.js b/jstests/sharding/chunk_operations_invalidate_single_shard.js
index e660cec2305..30a736fcdea 100644
--- a/jstests/sharding/chunk_operations_invalidate_single_shard.js
+++ b/jstests/sharding/chunk_operations_invalidate_single_shard.js
@@ -52,6 +52,7 @@ let testSplit = () => {
const mongosCollectionVersion = getMongosCollVersion(ns);
assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: -500}}));
+ assert.eq(mongosCollectionVersion, getMongosCollVersion(ns));
testColl.findOne({x: 0});
testColl.findOne({x: 1000});
diff --git a/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js b/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
index 6c661e0abac..6b61bd12a68 100644
--- a/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
+++ b/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
@@ -1,5 +1,5 @@
// Ensure that a call to createIndexes in a sharded cluster will route to the primary, even when
-// setSlaveOk() is set to true.
+// setSecondaryOk() is set to true.
(function() {
'use strict';
@@ -12,7 +12,7 @@ assert.commandWorked(testDB.adminCommand({enableSharding: testDBName}));
assert.commandWorked(
testDB.adminCommand({shardCollection: testDB[collName].getFullName(), key: {x: 1}}));
-st.s.setSlaveOk(true);
+st.s.setSecondaryOk();
assert.commandWorked(
testDB.runCommand({createIndexes: collName, indexes: [{key: {a: 1}, name: "index"}]}));
diff --git a/jstests/sharding/config_rs_no_primary.js b/jstests/sharding/config_rs_no_primary.js
index 91ce74de45d..8bcf7e54cd4 100644
--- a/jstests/sharding/config_rs_no_primary.js
+++ b/jstests/sharding/config_rs_no_primary.js
@@ -43,9 +43,9 @@ var testOps = function(mongos) {
assert.throws(function() {
mongos.getDB('config').shards.findOne();
});
- mongos.setSlaveOk(true);
+ mongos.setSecondaryOk();
var shardDoc = mongos.getDB('config').shards.findOne();
- mongos.setSlaveOk(false);
+ mongos.setSecondaryOk(false);
assert.neq(null, shardDoc);
jsTestLog("Doing ops that require metadata writes and thus should fail against: " + mongos);
diff --git a/jstests/sharding/count_config_servers.js b/jstests/sharding/count_config_servers.js
index ded75607cd0..0904a873e52 100644
--- a/jstests/sharding/count_config_servers.js
+++ b/jstests/sharding/count_config_servers.js
@@ -13,7 +13,7 @@ TestData.skipCheckOrphans = true;
"use strict";
var st = new ShardingTest({name: 'sync_conn_cmd', shards: 0});
-st.s.setSlaveOk(true);
+st.s.setSecondaryOk();
var configDB = st.config;
var coll = configDB.test;
diff --git a/jstests/sharding/count_slaveok.js b/jstests/sharding/count_slaveok.js
index e527128a7cd..23612d96220 100644
--- a/jstests/sharding/count_slaveok.js
+++ b/jstests/sharding/count_slaveok.js
@@ -1,5 +1,5 @@
/**
- * Tests count and distinct using slaveOk. Also tests a scenario querying a set where only one
+ * Tests count and distinct using secondaryOk. Also tests a scenario querying a set where only one
* secondary is up.
*/
@@ -20,7 +20,7 @@ var rst = st.rs0;
// Insert data into replica set
var conn = new Mongo(st.s.host);
-var coll = conn.getCollection('test.countSlaveOk');
+var coll = conn.getCollection('test.countSecondaryOk');
coll.drop();
var bulk = coll.initializeUnorderedBulkOp();
@@ -51,9 +51,9 @@ awaitRSClientHosts(conn, sec, {ok: true, secondary: true});
// Make sure that mongos realizes that primary is already down
awaitRSClientHosts(conn, primary, {ok: false});
-// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when
+// Need to check secondaryOk=true first, since secondaryOk=false will destroy conn in pool when
// master is down
-conn.setSlaveOk();
+conn.setSecondaryOk();
// count using the command path
assert.eq(30, coll.find({i: 0}).count());
@@ -62,14 +62,14 @@ assert.eq(30, coll.find({i: 0}).itcount());
assert.eq(10, coll.distinct("i").length);
try {
- conn.setSlaveOk(false);
- // Should throw exception, since not slaveOk'd
+ conn.setSecondaryOk(false);
+ // Should throw exception, since not secondaryOk'd
coll.find({i: 0}).count();
print("Should not reach here!");
assert(false);
} catch (e) {
- print("Non-slaveOk'd connection failed.");
+ print("Non-secondaryOk'd connection failed.");
}
st.stop();
diff --git a/jstests/sharding/error_propagation.js b/jstests/sharding/error_propagation.js
index 6f47075f753..6fa9b7da74c 100644
--- a/jstests/sharding/error_propagation.js
+++ b/jstests/sharding/error_propagation.js
@@ -8,7 +8,7 @@
var st = new ShardingTest({mongos: 1, shards: 1, rs: {nodes: 3}});
var db = st.getDB('test');
-db.setSlaveOk(true);
+db.setSecondaryOk();
assert.commandWorked(db.foo.insert({a: 1}, {writeConcern: {w: 3}}));
assert.commandWorked(db.runCommand(
diff --git a/jstests/sharding/mongos_forwards_api_parameters_to_shards.js b/jstests/sharding/mongos_forwards_api_parameters_to_shards.js
new file mode 100644
index 00000000000..e611f716992
--- /dev/null
+++ b/jstests/sharding/mongos_forwards_api_parameters_to_shards.js
@@ -0,0 +1,213 @@
+/**
+ * When a client calls a mongos command with API parameters, mongos must forward them to shards.
+ *
+ * @tags: [multiversion_incompatible]
+ */
+
+(function() {
+'use strict';
+
+load('jstests/sharding/libs/sharded_transactions_helpers.js');
+
+let st = new ShardingTest({
+ mongos: 1,
+ shards: 2,
+ rs: {nodes: 1, setParameter: {logComponentVerbosity: tojson({command: {verbosity: 2}})}}
+});
+
+class APIParameterTest {
+ constructor(
+ command,
+ {dbName = "db", inAPIVersion1 = true, permittedInTxn = true, shardCommandName} = {}) {
+ this.command = command;
+ this.dbName = dbName;
+ this.inAPIVersion1 = inAPIVersion1;
+ this.permittedInTxn = permittedInTxn;
+ if (shardCommandName === undefined) {
+ this.commandName = Object.keys(command)[0];
+ } else {
+ // mongos executes a different command on the shards, e.g. mapReduce becomes aggregate.
+ this.commandName = shardCommandName;
+ }
+ }
+}
+
+const tests = [
+ // Write commands. Note, these rely on _id 1 residing on shard 0.
+ new APIParameterTest({insert: "collection", documents: [{_id: 1}]}),
+ new APIParameterTest({update: "collection", updates: [{q: {_id: 1}, u: {$set: {x: 1}}}]}),
+ new APIParameterTest({delete: "collection", deletes: [{q: {_id: 1}, limit: 1}]}),
+
+ // Read commands.
+ new APIParameterTest({aggregate: "collection", pipeline: [], cursor: {}}),
+ new APIParameterTest({aggregate: "collection", pipeline: [], cursor: {}, explain: true},
+ {shardCommandName: "explain", permittedInTxn: false}),
+ new APIParameterTest({find: "collection"}),
+ new APIParameterTest({count: "collection"}, {permittedInTxn: false}),
+ new APIParameterTest({count: "collection", query: {_id: {$lt: 0}}},
+ {inAPIVersion1: false, permittedInTxn: false}),
+ new APIParameterTest({distinct: "collection", key: "_id"},
+ {inAPIVersion1: false, permittedInTxn: false}),
+ new APIParameterTest(
+ {
+ mapReduce: "collection",
+ map: function() {
+ emit(1, 1);
+ },
+ reduce: function(key, values) {
+ return {count: values.length};
+ },
+ out: {inline: 1}
+ },
+ {inAPIVersion1: false, permittedInTxn: false, shardCommandName: "aggregate"}),
+
+ // FindAndModify.
+ new APIParameterTest({findAndModify: "collection", query: {_id: 1}, remove: true}),
+
+ // DDL. Order matters: we must create, modify, then drop an index on collection2.
+ new APIParameterTest({createIndexes: "collection2", indexes: [{key: {x: 1}, name: "x_1"}]}),
+ new APIParameterTest({collMod: "collection2", index: {keyPattern: {x: 1}, hidden: true}},
+ {permittedInTxn: false}),
+ new APIParameterTest({dropIndexes: "collection2", index: "x_1"}, {permittedInTxn: false}),
+ // We can create indexes on a non-existent collection in a sharded transaction.
+ new APIParameterTest({create: "newCollection"}),
+ new APIParameterTest({renameCollection: "db.newCollection", to: "db.newerCollection"},
+ {inAPIVersion1: false, permittedInTxn: false, dbName: "admin"}),
+ new APIParameterTest({drop: "collection"}, {permittedInTxn: false}),
+ new APIParameterTest({dropDatabase: 1}, {permittedInTxn: false}),
+];
+
+function checkPrimaryLog(conn, commandName, apiVersion, apiStrict, apiDeprecationErrors, message) {
+ const logs = checkLog.getGlobalLog(conn);
+ let lastCommandInvocation;
+
+ for (let logMsg of logs) {
+ const obj = JSON.parse(logMsg);
+ // Search for "About to run the command" logs.
+ if (obj.id !== 21965) {
+ continue;
+ }
+
+ const args = obj.attr.commandArgs;
+ if (commandName !== Object.keys(args)[0]) {
+ continue;
+ }
+
+ lastCommandInvocation = args;
+ if (args.apiVersion !== apiVersion || args.apiStrict !== apiStrict ||
+ args.apiDeprecationErrors !== apiDeprecationErrors) {
+ continue;
+ }
+
+ // Found a match.
+ return;
+ }
+
+ if (lastCommandInvocation === undefined) {
+ doassert(`Primary didn't log ${commandName}`);
+ return;
+ }
+
+ doassert(`Primary didn't log ${message}, last invocation of ${commandName} was` +
+ ` ${tojson(lastCommandInvocation)}`);
+}
+
+for (const sharded of [false, true]) {
+ for (const [apiVersion, apiStrict, apiDeprecationErrors] of [[undefined, undefined, undefined],
+ ["1", undefined, undefined],
+ ["1", undefined, false],
+ ["1", undefined, true],
+ ["1", false, undefined],
+ ["1", false, false],
+ ["1", false, true],
+ ["1", true, undefined],
+ ["1", true, false],
+ ["1", true, true],
+ ]) {
+ for (let inTransaction of [false, true]) {
+ if (sharded) {
+ jsTestLog("Sharded setup");
+ assert.commandWorked(st.s.getDB("db")["collection"].insert(
+ {_id: 0}, {writeConcern: {w: "majority"}}));
+ assert.commandWorked(st.s.getDB("db")["collection"].insert(
+ {_id: 20}, {writeConcern: {w: "majority"}}));
+
+ assert.commandWorked(st.s.adminCommand({enableSharding: "db"}));
+ st.ensurePrimaryShard("db", st.shard0.shardName);
+ assert.commandWorked(
+ st.s.adminCommand({shardCollection: "db.collection", key: {_id: 1}}));
+
+ // The chunk with _id 1 is on shard 0.
+ assert.commandWorked(
+ st.s.adminCommand({split: "db.collection", middle: {_id: 10}}));
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: "db.collection", find: {_id: 20}, to: st.shard1.shardName}));
+ } else {
+ jsTestLog("Unsharded setup");
+ assert.commandWorked(st.s.getDB("db")["collection"].insert(
+ {_id: 0}, {writeConcern: {w: "majority"}}));
+ st.ensurePrimaryShard("db", st.shard0.shardName);
+ }
+
+ // Shard 0's primary.
+ const primary = st.rs0.getPrimary();
+
+ for (const test of tests) {
+ if (inTransaction && !test.permittedInTxn) {
+ continue;
+ }
+
+ if (apiStrict && !test.inAPIVersion1) {
+ continue;
+ }
+
+ // Make a copy of the test's command body, and set its API parameters.
+ const commandWithAPIParams = Object.assign({}, test.command);
+ if (apiVersion !== undefined) {
+ commandWithAPIParams.apiVersion = apiVersion;
+ }
+
+ if (apiStrict !== undefined) {
+ commandWithAPIParams.apiStrict = apiStrict;
+ }
+
+ if (apiDeprecationErrors !== undefined) {
+ commandWithAPIParams.apiDeprecationErrors = apiDeprecationErrors;
+ }
+
+ assert.commandWorked(primary.adminCommand({clearLog: "global"}));
+ const message = `command ${tojson(commandWithAPIParams)}` +
+ ` ${sharded ? "sharded" : "unsharded"},` +
+ ` ${inTransaction ? "in" : "outside"} transaction`;
+
+ flushRoutersAndRefreshShardMetadata(st, {ns: "db.collection"});
+
+ jsTestLog(`Running ${message}`);
+
+ if (inTransaction) {
+ const session = st.s0.startSession();
+ const sessionDb = session.getDatabase(test.dbName);
+ session.startTransaction();
+ assert.commandWorked(sessionDb.runCommand(commandWithAPIParams));
+ assert.commandWorked(session.commitTransaction_forTesting());
+ } else {
+ const db = st.s0.getDB(test.dbName);
+ assert.commandWorked(db.runCommand(commandWithAPIParams));
+ }
+
+ checkPrimaryLog(primary,
+ test.commandName,
+ apiVersion,
+ apiStrict,
+ apiDeprecationErrors,
+ message);
+ }
+
+ jsTestLog("JS test cleanup: Drop database 'db'");
+ st.s0.getDB("db").runCommand({dropDatabase: 1});
+ }
+ }
+}
+
+st.stop();
+})();
diff --git a/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js b/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
index 5cb277197b1..466c4314d45 100644
--- a/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
+++ b/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
@@ -160,9 +160,9 @@ gc(); // Clean up new connections
jsTest.log("Stopping primary of second shard...");
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
// Need to save this node for later
var rs1Secondary = st.rs1.getSecondary();
@@ -192,13 +192,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
jsTest.log("Testing new connections with second primary down...");
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
@@ -212,9 +212,9 @@ gc(); // Clean up new connections
jsTest.log("Stopping primary of first shard...");
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
st.rs0.stop(st.rs0.getPrimary());
@@ -241,13 +241,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
jsTest.log("Testing new connections with first primary down...");
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
@@ -261,9 +261,9 @@ gc(); // Clean up new connections
jsTest.log("Stopping second shard...");
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
st.rs1.stop(rs1Secondary);
@@ -288,10 +288,10 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
jsTest.log("Testing new connections with second shard down...");
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
mongosConnNew = authDBUsers(new Mongo(mongos.host));
diff --git a/jstests/sharding/mongos_rs_shard_failure_tolerance.js b/jstests/sharding/mongos_rs_shard_failure_tolerance.js
index 34d68c45f6e..89dc4c07986 100644
--- a/jstests/sharding/mongos_rs_shard_failure_tolerance.js
+++ b/jstests/sharding/mongos_rs_shard_failure_tolerance.js
@@ -131,11 +131,11 @@ st.rs1.stop(st.rs1.getPrimary());
jsTest.log("Testing active connection with second primary down...");
// Reads with read prefs
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1}));
assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnActive.setSlaveOk(false);
+mongosConnActive.setSecondaryOk(false);
mongosConnActive.setReadPref("primary");
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -145,14 +145,14 @@ assert.throws(function() {
assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
// Ensure read prefs override slaveOK
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
mongosConnActive.setReadPref("primary");
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.throws(function() {
mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1});
});
assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnActive.setSlaveOk(false);
+mongosConnActive.setSecondaryOk(false);
mongosConnActive.setReadPref("secondary");
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -187,11 +187,11 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
assert.commandWorked(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 6}, wc));
// Reads with read prefs
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1}));
assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnIdle.setSlaveOk(false);
+mongosConnIdle.setSecondaryOk(false);
mongosConnIdle.setReadPref("primary");
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -201,14 +201,14 @@ assert.throws(function() {
assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
// Ensure read prefs override slaveOK
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
mongosConnIdle.setReadPref("primary");
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.throws(function() {
mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1});
});
assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnIdle.setSlaveOk(false);
+mongosConnIdle.setSecondaryOk(false);
mongosConnIdle.setReadPref("secondary");
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -234,13 +234,13 @@ jsTest.log("Testing new connections with second primary down...");
// Reads with read prefs
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
gc(); // Clean up new connections incrementally to compensate for slow win32 machine.
@@ -261,17 +261,17 @@ gc(); // Clean up new connections incrementally to compensate for slow win32 ma
// Ensure read prefs override slaveok
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
mongosConnNew.setReadPref("primary");
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
mongosConnNew.setReadPref("primary");
assert.throws(function() {
mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1});
});
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
mongosConnNew.setReadPref("primary");
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -343,7 +343,7 @@ st.rs0.stop(st.rs0.getPrimary());
jsTest.log("Testing active connection with first primary down...");
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1}));
assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -358,7 +358,7 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_id: 9}));
assert.writeError(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 9}));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1}));
assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -366,13 +366,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
jsTest.log("Testing new connections with first primary down...");
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
mongosConnNew = new Mongo(mongos.host);
@@ -392,7 +392,7 @@ st.rs1.stop(rs1Secondary);
jsTest.log("Testing active connection with second shard down...");
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -406,17 +406,17 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_id: 12}));
assert.writeError(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 12}));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
jsTest.log("Testing new connections with second shard down...");
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
mongosConnNew = new Mongo(mongos.host);
diff --git a/jstests/sharding/query/explain_read_pref.js b/jstests/sharding/query/explain_read_pref.js
index ce5e2cf47af..c3c51d85756 100644
--- a/jstests/sharding/query/explain_read_pref.js
+++ b/jstests/sharding/query/explain_read_pref.js
@@ -58,7 +58,7 @@ var testAllModes = function(conn, isMongos) {
var mode = args[0], tagSets = args[1], secExpected = args[2];
var testDB = conn.getDB('TestDB');
- conn.setSlaveOk(false); // purely rely on readPref
+ conn.setSecondaryOk(false); // purely rely on readPref
jsTest.log('Testing mode: ' + mode + ', tag sets: ' + tojson(tagSets));
// .explain().find()
diff --git a/jstests/sharding/read_pref.js b/jstests/sharding/read_pref.js
index 95c0e9697c3..9267cb18430 100644
--- a/jstests/sharding/read_pref.js
+++ b/jstests/sharding/read_pref.js
@@ -134,7 +134,7 @@ var doTest = function(useDollarQuerySyntax) {
var explainServer = getExplainServer(explain);
assert.neq(primaryNode.name, explainServer);
- conn.setSlaveOk();
+ conn.setSecondaryOk();
// It should also work with slaveOk
explain = getExplain("secondary");
diff --git a/jstests/sharding/read_pref_cmd.js b/jstests/sharding/read_pref_cmd.js
index 2c2a7f3332b..f94dd924f45 100644
--- a/jstests/sharding/read_pref_cmd.js
+++ b/jstests/sharding/read_pref_cmd.js
@@ -165,7 +165,7 @@ let testConnReadPreference = function(conn, isMongos, rsNodes, {readPref, expect
let testDB = conn.getDB(kDbName);
let shardedColl = conn.getCollection(kShardedNs);
- conn.setSlaveOk(false); // purely rely on readPref
+ conn.setSecondaryOk(false); // purely rely on readPref
conn.setReadPref(readPref.mode, readPref.tagSets, readPref.hedge);
/**
@@ -387,7 +387,7 @@ let testCursorReadPreference = function(conn, isMongos, rsNodes, {readPref, expe
tojson(readPref.tagSets)}, hedge ${tojson(readPref.hedge)}`);
let testColl = conn.getCollection(kShardedNs);
- conn.setSlaveOk(false); // purely rely on readPref
+ conn.setSecondaryOk(false); // purely rely on readPref
let bulk = testColl.initializeUnorderedBulkOp();
for (let i = 0; i < kNumDocs; ++i) {
diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js
index 5db16a6e27f..1fd7146d32f 100644
--- a/jstests/sharding/read_write_concern_defaults_application.js
+++ b/jstests/sharding/read_write_concern_defaults_application.js
@@ -459,6 +459,7 @@ let testCases = {
hello: {skip: "does not accept read or write concern"},
hostInfo: {skip: "does not accept read or write concern"},
httpClientRequest: {skip: "does not accept read or write concern"},
+ importCollection: {skip: "internal command"},
insert: {
setUp: function(conn) {
assert.commandWorked(conn.getDB(db).runCommand({create: coll, writeConcern: {w: 1}}));
diff --git a/jstests/sharding/recovering_slaveok.js b/jstests/sharding/recovering_slaveok.js
index 512719b08b6..d9bcd44da87 100644
--- a/jstests/sharding/recovering_slaveok.js
+++ b/jstests/sharding/recovering_slaveok.js
@@ -1,6 +1,6 @@
/**
- * This tests that slaveOk'd queries in sharded setups get correctly routed when a slave goes into
- * RECOVERING state, and don't break
+ * This tests that secondaryOk'd queries in sharded setups get correctly routed when a slave goes
+ * into RECOVERING state, and don't break
*/
// Shard secondaries are restarted, which may cause that shard's primary to stepdown while it does
@@ -12,11 +12,11 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
load("jstests/replsets/rslib.js");
var shardTest =
- new ShardingTest({name: "recovering_slaveok", shards: 2, mongos: 2, other: {rs: true}});
+ new ShardingTest({name: "recovering_secondaryok", shards: 2, mongos: 2, other: {rs: true}});
var mongos = shardTest.s0;
var mongosSOK = shardTest.s1;
-mongosSOK.setSlaveOk();
+mongosSOK.setSecondaryOk();
var admin = mongos.getDB("admin");
var config = mongos.getDB("config");
@@ -50,7 +50,7 @@ shardTest.shardColl(coll,
/* dbname */ null,
/* waitForDelete */ true);
-print("3: test normal and slaveOk queries");
+print("3: test normal and secondaryOk queries");
// Make shardA and rsA the same
var shardA = shardTest.getShard(coll, {_id: -1});
@@ -87,7 +87,7 @@ print("6: stop non-RECOVERING secondary");
rsA.stop(goodSec);
-print("7: check our regular and slaveOk query");
+print("7: check our regular and secondaryOk query");
assert.eq(2, coll.find().itcount());
assert.eq(2, collSOk.find().itcount());
@@ -100,7 +100,7 @@ print("9: wait for recovery");
rsA.waitForState(rsA.getSecondaries(), ReplSetTest.State.SECONDARY, 5 * 60 * 1000);
-print("10: check our regular and slaveOk query");
+print("10: check our regular and secondaryOk query");
// We need to make sure our nodes are considered accessible from mongos - otherwise we fail
// See SERVER-7274
@@ -112,7 +112,7 @@ awaitRSClientHosts(coll.getMongo(), rsB.nodes, {ok: true});
awaitRSClientHosts(collSOk.getMongo(), [rsA.getSecondaries()[0]], {secondary: true, ok: true});
awaitRSClientHosts(collSOk.getMongo(), [rsB.getSecondaries()[0]], {secondary: true, ok: true});
-print("SlaveOK Query...");
+print("SecondaryOk Query...");
var sOKCount = collSOk.find().itcount();
var collCount = null;
diff --git a/jstests/sharding/session_info_in_oplog.js b/jstests/sharding/session_info_in_oplog.js
index 617d5759207..a7644fca599 100644
--- a/jstests/sharding/session_info_in_oplog.js
+++ b/jstests/sharding/session_info_in_oplog.js
@@ -329,7 +329,7 @@ replTest.initiate();
var priConn = replTest.getPrimary();
var secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
runTests(priConn, priConn, secConn);
@@ -338,7 +338,7 @@ replTest.stopSet();
var st = new ShardingTest({shards: {rs0: {nodes: kNodes}}});
secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
runTests(st.s, st.rs0.getPrimary(), secConn);
st.stop();
diff --git a/jstests/sharding/shard_aware_init_secondaries.js b/jstests/sharding/shard_aware_init_secondaries.js
index 59a8542f44b..f852c6e58a1 100644
--- a/jstests/sharding/shard_aware_init_secondaries.js
+++ b/jstests/sharding/shard_aware_init_secondaries.js
@@ -41,7 +41,7 @@ assert.commandWorked(priConn.getDB('admin').system.version.update(
shardIdentityQuery, shardIdentityUpdate, {upsert: true, writeConcern: {w: 2}}));
var secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
var res = secConn.getDB('admin').runCommand({shardingState: 1});
@@ -55,7 +55,7 @@ replTest.waitForPrimary();
replTest.awaitSecondaryNodes();
secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
res = secConn.getDB('admin').runCommand({shardingState: 1});
diff --git a/jstests/sharding/shard_identity_config_update.js b/jstests/sharding/shard_identity_config_update.js
index 3e668c5903c..43c10bbbd22 100644
--- a/jstests/sharding/shard_identity_config_update.js
+++ b/jstests/sharding/shard_identity_config_update.js
@@ -63,7 +63,7 @@ assert.soon(function() {
});
var secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
assert.soon(function() {
return checkConfigStrUpdated(secConn, expectedConfigStr);
});
@@ -96,7 +96,7 @@ assert.soon(function() {
});
secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
assert.soon(function() {
return checkConfigStrUpdated(secConn, origConfigConnStr);
});
diff --git a/jstests/sharding/shard_identity_rollback.js b/jstests/sharding/shard_identity_rollback.js
index d6e47fa3137..25dbc2e19e4 100644
--- a/jstests/sharding/shard_identity_rollback.js
+++ b/jstests/sharding/shard_identity_rollback.js
@@ -52,7 +52,7 @@ assert.eq(shardIdentityDoc.clusterId, res.clusterId);
// Ensure sharding state on the secondaries was *not* initialized
secondaries.forEach(function(secondary) {
- secondary.setSlaveOk(true);
+ secondary.setSecondaryOk();
res = secondary.getDB('admin').runCommand({shardingState: 1});
assert(!res.enabled, tojson(res));
});
@@ -105,7 +105,7 @@ try {
// specified. We do want to wait to be able to connect to the node here however, so we need to pass
// {waitForConnect: true}.
priConn = replTest.start(priConn.nodeId, {shardsvr: '', waitForConnect: true}, true);
-priConn.setSlaveOk();
+priConn.setSecondaryOk();
// Wait for the old primary to replicate the document that was written to the new primary while
// it was shut down.
diff --git a/jstests/sharding/shard_insert_getlasterror_w2.js b/jstests/sharding/shard_insert_getlasterror_w2.js
index 7bde30b2dc5..a4a0f5c540f 100644
--- a/jstests/sharding/shard_insert_getlasterror_w2.js
+++ b/jstests/sharding/shard_insert_getlasterror_w2.js
@@ -70,7 +70,7 @@ replSet1.stop(secondary2);
replSet1.waitForState(primary, ReplSetTest.State.SECONDARY);
testDB.getMongo().adminCommand({setParameter: 1, logLevel: 1});
-testDB.getMongo().setSlaveOk();
+testDB.getMongo().setSecondaryOk();
print("trying some queries");
assert.soon(function() {
try {
diff --git a/jstests/slow1/replsets_priority1.js b/jstests/slow1/replsets_priority1.js
index 3ff6c058cc7..4dea828c793 100644
--- a/jstests/slow1/replsets_priority1.js
+++ b/jstests/slow1/replsets_priority1.js
@@ -146,9 +146,9 @@ for (var i = 0; i < n; i++) {
assert.soon(function() {
var versions = [0, 0];
var secondaries = rs.getSecondaries();
- secondaries[0].setSlaveOk();
+ secondaries[0].setSecondaryOk();
versions[0] = secondaries[0].getDB("local").system.replset.findOne().version;
- secondaries[1].setSlaveOk();
+ secondaries[1].setSecondaryOk();
versions[1] = secondaries[1].getDB("local").system.replset.findOne().version;
return versions[0] == config.version && versions[1] == config.version;
});
diff --git a/jstests/ssl/mongo_uri_secondaries.js b/jstests/ssl/mongo_uri_secondaries.js
index a4ed1eae93c..73cca540c80 100644
--- a/jstests/ssl/mongo_uri_secondaries.js
+++ b/jstests/ssl/mongo_uri_secondaries.js
@@ -39,7 +39,7 @@ const subShellCommand = function(hosts) {
for (var i = 0; i < 10; i++) {
var db = Ms[i].getDB("test");
- db.setSlaveOk(true);
+ db.setSecondaryOk();
db.col.find().readPref("secondary").toArray();
}
};
diff --git a/src/mongo/client/dbclient_base.cpp b/src/mongo/client/dbclient_base.cpp
index 04748a525af..9a7c87eed66 100644
--- a/src/mongo/client/dbclient_base.cpp
+++ b/src/mongo/client/dbclient_base.cpp
@@ -49,8 +49,8 @@
#include "mongo/client/constants.h"
#include "mongo/client/dbclient_cursor.h"
#include "mongo/config.h"
+#include "mongo/db/api_parameters_gen.h"
#include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters_gen.h"
#include "mongo/db/json.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/query/kill_cursors_gen.h"
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 2d90253f008..a0c8de84714 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -869,6 +869,7 @@ env.Library(
'$BUILD_DIR/mongo/db/storage/storage_engine_lock_file',
'$BUILD_DIR/mongo/db/storage/storage_engine_metadata',
'commands/server_status_core',
+ 'initialize_api_parameters',
'introspect',
'lasterror',
'query_exec',
@@ -1460,17 +1461,39 @@ env.Library(
env.Library(
target='shared_request_handling',
source=[
- 'initialize_api_parameters.cpp',
'transaction_validation.cpp',
- env.Idlc('initialize_api_parameters.idl')[0],
],
LIBDEPS=[
+ 'api_parameters',
'error_labels',
'logical_session_cache_impl',
],
)
env.Library(
+ target='api_parameters',
+ source=[
+ 'api_parameters.cpp',
+ env.Idlc('api_parameters.idl')[0],
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/idl/idl_parser',
+ '$BUILD_DIR/mongo/idl/server_parameter',
+ ],
+)
+
+env.Library(
+ target='initialize_api_parameters',
+ source=[
+ 'initialize_api_parameters.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ 'api_parameters',
+ 'commands',
+ ],
+)
+
+env.Library(
target='logical_time',
source=[
'logical_time.cpp',
diff --git a/src/mongo/db/api_parameters.cpp b/src/mongo/db/api_parameters.cpp
new file mode 100644
index 00000000000..05ffe9c49cb
--- /dev/null
+++ b/src/mongo/db/api_parameters.cpp
@@ -0,0 +1,79 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/api_parameters.h"
+
+namespace mongo {
+
+const OperationContext::Decoration<APIParameters> APIParameters::get =
+ OperationContext::declareDecoration<APIParameters>();
+
+APIParameters APIParameters::fromClient(const APIParametersFromClient& apiParamsFromClient) {
+ APIParameters apiParameters = APIParameters();
+ auto apiVersion = apiParamsFromClient.getApiVersion();
+ auto apiStrict = apiParamsFromClient.getApiStrict();
+ auto apiDeprecationErrors = apiParamsFromClient.getApiDeprecationErrors();
+
+ if (apiVersion) {
+ apiParameters.setAPIVersion(apiVersion.value());
+ }
+
+ if (apiStrict) {
+ apiParameters.setAPIStrict(apiStrict.value());
+ }
+
+ if (apiDeprecationErrors) {
+ apiParameters.setAPIDeprecationErrors(apiDeprecationErrors.value());
+ }
+
+ return apiParameters;
+}
+
+APIParameters APIParameters::fromBSON(const BSONObj& cmdObj) {
+ return APIParameters::fromClient(
+ APIParametersFromClient::parse("APIParametersFromClient"_sd, cmdObj));
+}
+
+void APIParameters::appendInfo(BSONObjBuilder* builder) const {
+ if (_apiVersion) {
+ builder->append(kAPIVersionFieldName, *_apiVersion);
+ }
+ if (_apiStrict) {
+ builder->append(kAPIStrictFieldName, *_apiStrict);
+ }
+ if (_apiDeprecationErrors) {
+ builder->append(kAPIDeprecationErrorsFieldName, *_apiDeprecationErrors);
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/api_parameters.h b/src/mongo/db/api_parameters.h
new file mode 100644
index 00000000000..7539dcb345e
--- /dev/null
+++ b/src/mongo/db/api_parameters.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/api_parameters_gen.h"
+#include "mongo/db/operation_context.h"
+
+namespace mongo {
+
+/**
+ * Decorates operation context with methods to retrieve apiVersion, apiStrict, and
+ * apiDeprecationErrors.
+ */
+class APIParameters {
+
+public:
+ static constexpr StringData kAPIVersionFieldName = "apiVersion"_sd;
+ static constexpr StringData kAPIStrictFieldName = "apiStrict"_sd;
+ static constexpr StringData kAPIDeprecationErrorsFieldName = "apiDeprecationErrors"_sd;
+
+ static const OperationContext::Decoration<APIParameters> get;
+ static APIParameters fromClient(const APIParametersFromClient& apiParamsFromClient);
+ static APIParameters fromBSON(const BSONObj& cmdObj);
+
+ void appendInfo(BSONObjBuilder* builder) const;
+
+ const boost::optional<std::string>& getAPIVersion() const {
+ return _apiVersion;
+ }
+
+ void setAPIVersion(StringData apiVersion) {
+ _apiVersion = apiVersion.toString();
+ }
+
+ const boost::optional<bool>& getAPIStrict() const {
+ return _apiStrict;
+ }
+
+ void setAPIStrict(bool apiStrict) {
+ _apiStrict = apiStrict;
+ }
+
+ const boost::optional<bool>& getAPIDeprecationErrors() const {
+ return _apiDeprecationErrors;
+ }
+
+ void setAPIDeprecationErrors(bool apiDeprecationErrors) {
+ _apiDeprecationErrors = apiDeprecationErrors;
+ }
+
+ const bool getParamsPassed() const {
+ return _apiVersion || _apiStrict || _apiDeprecationErrors;
+ }
+
+private:
+ boost::optional<std::string> _apiVersion;
+ boost::optional<bool> _apiStrict;
+ boost::optional<bool> _apiDeprecationErrors;
+};
+
+
+/**
+ * Temporarily remove the user's API parameters from an OperationContext.
+ */
+class IgnoreAPIParametersBlock {
+public:
+ IgnoreAPIParametersBlock() = delete;
+ IgnoreAPIParametersBlock(const IgnoreAPIParametersBlock&) = delete;
+ IgnoreAPIParametersBlock& operator=(const IgnoreAPIParametersBlock&) = delete;
+
+ explicit IgnoreAPIParametersBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+ _apiParams = APIParameters::get(_opCtx);
+ APIParameters::get(_opCtx) = APIParameters();
+ }
+
+ void release() {
+ if (_released) {
+ return;
+ }
+
+ APIParameters::get(_opCtx) = _apiParams;
+ _released = true;
+ }
+
+ ~IgnoreAPIParametersBlock() {
+ release();
+ }
+
+private:
+ OperationContext* _opCtx;
+ APIParameters _apiParams;
+ bool _released = false;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/initialize_api_parameters.idl b/src/mongo/db/api_parameters.idl
index cc3a3d13e6c..cc3a3d13e6c 100644
--- a/src/mongo/db/initialize_api_parameters.idl
+++ b/src/mongo/db/api_parameters.idl
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 617145e3abc..63a304a1f30 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -872,7 +872,9 @@ boost::optional<ResumeIndexInfo> MultiIndexBlock::_abortWithoutCleanup(Operation
void MultiIndexBlock::_writeStateToDisk(OperationContext* opCtx) const {
auto obj = _constructStateObject();
- auto rs = opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx);
+ auto rs = opCtx->getServiceContext()
+ ->getStorageEngine()
+ ->makeTemporaryRecordStoreForResumableIndexBuild(opCtx);
WriteUnitOfWork wuow(opCtx);
diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h
index 47444538dd5..367b87e933b 100644
--- a/src/mongo/db/catalog_raii.h
+++ b/src/mongo/db/catalog_raii.h
@@ -291,7 +291,7 @@ private:
class ReadSourceScope {
public:
ReadSourceScope(OperationContext* opCtx,
- RecoveryUnit::ReadSource readSource = RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource readSource,
boost::optional<Timestamp> provided = boost::none);
~ReadSourceScope();
diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp
index cc222301ca0..e767d1f30ca 100644
--- a/src/mongo/db/catalog_raii_test.cpp
+++ b/src/mongo/db/catalog_raii_test.cpp
@@ -230,7 +230,7 @@ public:
}
private:
- ReadSource _source = ReadSource::kUnset;
+ ReadSource _source = ReadSource::kNoTimestamp;
boost::optional<Timestamp> _timestamp;
};
@@ -257,8 +257,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) {
ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided);
ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), Timestamp(1, 2));
{
- ReadSourceScope scope(opCtx());
- ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset);
+ ReadSourceScope scope(opCtx(), ReadSource::kNoTimestamp);
+ ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoTimestamp);
opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap);
ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap);
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index ee2040764b6..f4d7960a759 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -32,10 +32,10 @@
#include <boost/optional.hpp>
#include <functional>
+#include "mongo/db/api_parameters.h"
#include "mongo/db/auth/privilege.h"
#include "mongo/db/auth/user_name.h"
#include "mongo/db/cursor_id.h"
-#include "mongo/db/initialize_api_parameters.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/db/query/plan_executor.h"
diff --git a/src/mongo/db/command_generic_argument.cpp b/src/mongo/db/command_generic_argument.cpp
index 8434b65a3c3..e15c2498a97 100644
--- a/src/mongo/db/command_generic_argument.cpp
+++ b/src/mongo/db/command_generic_argument.cpp
@@ -56,9 +56,9 @@ static constexpr std::array<SpecialArgRecord, 34> specials{{
// /-isGeneric
// | /-stripFromRequest
// | | /-stripFromReply
- {"apiVersion"_sd, 1, 0, 0},
- {"apiStrict"_sd, 1, 0, 0},
- {"apiDeprecationErrors"_sd, 1, 0, 0},
+ {"apiVersion"_sd, 1, 1, 0},
+ {"apiStrict"_sd, 1, 1, 0},
+ {"apiDeprecationErrors"_sd, 1, 1, 0},
{"$audit"_sd, 1, 1, 0},
{"$client"_sd, 1, 1, 0},
{"$configServerState"_sd, 1, 1, 1},
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index f9ef6f72574..8d9a8de296b 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -867,6 +867,14 @@ Command::Command(StringData name, std::vector<StringData> aliases)
globalCommandRegistry()->registerCommand(this, _name, _aliases);
}
+const std::set<std::string>& Command::apiVersions() const {
+ return kNoApiVersions;
+}
+
+const std::set<std::string>& Command::deprecatedApiVersions() const {
+ return kNoApiVersions;
+}
+
bool Command::hasAlias(const StringData& alias) const {
return globalCommandRegistry()->findCommand(alias) == this;
}
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h
index 1877556f356..06803dbad84 100644
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -358,15 +358,18 @@ public:
/*
* Returns the list of API versions that include this command.
*/
- virtual const std::set<std::string>& apiVersions() const {
- return kNoApiVersions;
- }
+ virtual const std::set<std::string>& apiVersions() const;
/*
* Returns the list of API versions in which this command is deprecated.
*/
- virtual const std::set<std::string>& deprecatedApiVersions() const {
- return kNoApiVersions;
+ virtual const std::set<std::string>& deprecatedApiVersions() const;
+
+ /*
+ * Some commands permit any values for apiVersion, apiStrict, and apiDeprecationErrors.
+ */
+ virtual bool acceptsAnyApiVersionParameters() const {
+ return false;
}
/**
diff --git a/src/mongo/db/commands/test_api_version_2_commands.cpp b/src/mongo/db/commands/test_api_version_2_commands.cpp
index b2c79a7ef70..738e13b1366 100644
--- a/src/mongo/db/commands/test_api_version_2_commands.cpp
+++ b/src/mongo/db/commands/test_api_version_2_commands.cpp
@@ -27,8 +27,8 @@
* it in the license file.
*/
+#include "mongo/db/api_parameters.h"
#include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters.h"
namespace mongo {
diff --git a/src/mongo/db/commands/test_deprecation_command.cpp b/src/mongo/db/commands/test_deprecation_command.cpp
index 44e61edb4a0..74d93942ddd 100644
--- a/src/mongo/db/commands/test_deprecation_command.cpp
+++ b/src/mongo/db/commands/test_deprecation_command.cpp
@@ -27,8 +27,8 @@
* it in the license file.
*/
+#include "mongo/db/api_parameters.h"
#include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters.h"
namespace mongo {
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index a8329f4641d..22a9181f157 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -90,6 +90,10 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline) {
+ // The caller was expecting to conflict with batch application before entering this function.
+ // i.e. the caller does not currently have a ShouldNotConflict... block in scope.
+ bool callerWasConflicting = opCtx->lockState()->shouldConflictWithSecondaryBatchApplication();
+
// Don't take the ParallelBatchWriterMode lock when the server parameter is set and our
// storage engine supports snapshot reads.
if (gAllowSecondaryReadsDuringBatchApplication.load() &&
@@ -100,11 +104,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
const auto collectionLockMode = getLockModeForQuery(opCtx, nsOrUUID.nss());
_autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline);
- // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do
- // not consider changing our ReadSource (e.g. FTDC needs that).
- if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp)
- return;
-
repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
@@ -154,6 +153,32 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
<< afterClusterTime->asTimestamp().toString());
}
+ // This assertion protects operations from reading inconsistent data on secondaries when
+ // using the default ReadSource of kNoTimestamp.
+
+ // Reading at lastApplied on secondaries is the safest behavior and is enabled for all user
+ // and DBDirectClient reads using 'local' and 'available' readConcerns. If an internal
+ // operation wishes to read without a timestamp during a batch, a ShouldNotConflict can
+ // suppress this fatal assertion with the following considerations:
+ // * The operation is not reading replicated data in a replication state where batch
+ // application is active OR
+ // * Reading inconsistent, out-of-order data is either inconsequential or required by
+ // the operation.
+
+ // If the caller entered this function expecting to conflict with batch application
+ // (i.e. no ShouldNotConflict block in scope), but they are reading without a timestamp and
+ // not holding the PBWM lock, then there is a possibility that this reader may
+ // unintentionally see inconsistent data during a batch. Certain namespaces are applied
+ // serially in oplog application, and therefore can be safely read without taking the PBWM
+ // lock or reading at a timestamp.
+ if (readSource == RecoveryUnit::ReadSource::kNoTimestamp && callerWasConflicting &&
+ !nss.mustBeAppliedInOwnOplogBatch() &&
+ SnapshotHelper::shouldReadAtLastApplied(opCtx, nss)) {
+ LOGV2_FATAL(4728700,
+ "Reading from replicated collection without read timestamp or PBWM lock",
+ "collection"_attr = nss);
+ }
+
auto minSnapshot = coll->getMinimumVisibleSnapshot();
if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) {
return;
diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp
index b101ce91961..eba322c5581 100644
--- a/src/mongo/db/db_raii_test.cpp
+++ b/src/mongo/db/db_raii_test.cpp
@@ -42,6 +42,7 @@
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/storage/snapshot_manager.h"
#include "mongo/logv2/log.h"
+#include "mongo/unittest/death_test.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/time_support.h"
@@ -219,6 +220,8 @@ TEST_F(DBRAIITestFixture,
Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client2.first->setInDirectClient(true);
AutoGetCollectionForRead coll(client2.second.get(), nss);
}
@@ -239,6 +242,8 @@ TEST_F(DBRAIITestFixture,
Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client2.first->setInDirectClient(true);
AutoGetCollectionForRead coll(client2.second.get(), nss);
}
@@ -266,10 +271,12 @@ TEST_F(DBRAIITestFixture,
Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client2.first->setInDirectClient(true);
AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js"));
// Reading from an unreplicated collection does not change the ReadSource to kLastApplied.
ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(),
- RecoveryUnit::ReadSource::kUnset);
+ RecoveryUnit::ReadSource::kNoTimestamp);
// Reading from a replicated collection will try to switch to kLastApplied. Because we are
// already reading without a timestamp and we can't reacquire the PBWM lock to continue reading
@@ -300,12 +307,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) {
auto snapshotManager =
client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
snapshotManager->setLastApplied(opTime.getTimestamp());
+
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client1.first->setInDirectClient(true);
AutoGetCollectionForRead coll(client1.second.get(), nss);
// We can't read from kLastApplied in this scenario because there is a catalog conflict. Resort
// to taking the PBWM lock and reading without a timestamp.
ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
- RecoveryUnit::ReadSource::kUnset);
+ RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode(
resourceIdParallelBatchWriterMode, MODE_IS));
}
@@ -325,6 +335,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) {
auto snapshotManager =
client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
ASSERT_FALSE(snapshotManager->getLastApplied());
+
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client1.first->setInDirectClient(true);
AutoGetCollectionForRead coll(client1.second.get(), nss);
ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
@@ -334,6 +347,33 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) {
resourceIdParallelBatchWriterMode, MODE_IS));
}
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadOplogOnSecondary) {
+ // This test simulates a situation where AutoGetCollectionForRead reads at lastApplied on a
+ // secondary.
+ auto replCoord = repl::ReplicationCoordinator::get(client1.second.get());
+ ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+ // Ensure the default ReadSource is used.
+ ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+ RecoveryUnit::ReadSource::kNoTimestamp);
+
+ // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock
+ // class and does not update the correct state in the SnapshotManager.
+ repl::OpTime opTime(Timestamp(2, 1), 1);
+ auto snapshotManager =
+ client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+ snapshotManager->setLastApplied(opTime.getTimestamp());
+
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ client1.first->setInDirectClient(true);
+ AutoGetCollectionForRead coll(client1.second.get(), NamespaceString::kRsOplogNamespace);
+
+ ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+ RecoveryUnit::ReadSource::kLastApplied);
+ ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode(
+ resourceIdParallelBatchWriterMode, MODE_IS));
+}
+
TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) {
auto opCtx = client1.second.get();
@@ -342,11 +382,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) {
CollectionOptions options;
options.capped = true;
ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options));
+
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ opCtx->getClient()->setInDirectClient(true);
AutoGetCollectionForRead autoColl(opCtx, nss);
auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection());
// The collection scan should use the default ReadSource on a primary.
- ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource());
+ ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp,
+ opCtx->recoveryUnit()->getTimestampReadSource());
// When the tailable query recovers from its yield, it should discover that the node is
// secondary and change its read source.
@@ -373,6 +417,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp)
ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options));
ASSERT_OK(
repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+ // Simulate using a DBDirectClient to test this behavior for user reads.
+ opCtx->getClient()->setInDirectClient(true);
AutoGetCollectionForRead autoColl(opCtx, nss);
auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection());
@@ -390,9 +437,36 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp)
// After restoring, the collection scan should now be reading with kUnset, the default on
// primaries.
- ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource());
+ ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp,
+ opCtx->recoveryUnit()->getTimestampReadSource());
ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&unused, nullptr));
}
+DEATH_TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUnsafe, "Fatal assertion") {
+ auto opCtx = client1.second.get();
+ ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {}));
+
+ ASSERT_OK(
+ repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+ // Non-user read on a replicated collection should fail because we are reading on a secondary
+ // without a timestamp.
+ AutoGetCollectionForRead autoColl(opCtx, nss);
+}
+
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadSafe) {
+ auto opCtx = client1.second.get();
+ ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {}));
+
+ ASSERT_OK(
+ repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+ // Non-user read on a replicated collection should not fail because of the ShouldNotConflict
+ // block.
+ ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
+
+ AutoGetCollectionForRead autoColl(opCtx, nss);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/dbdirectclient.cpp b/src/mongo/db/dbdirectclient.cpp
index 5386bf567d2..bb1f5553906 100644
--- a/src/mongo/db/dbdirectclient.cpp
+++ b/src/mongo/db/dbdirectclient.cpp
@@ -143,6 +143,7 @@ DbResponse loopbackBuildResponse(OperationContext* const opCtx,
toSend.header().setId(nextMessageId());
toSend.header().setResponseToMsgId(0);
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
return opCtx->getServiceContext()->getServiceEntryPoint()->handleRequest(opCtx, toSend).get();
}
} // namespace
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index 5c598445272..6b517f293b5 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -359,7 +359,7 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = {
{"addToArray", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToArray, true}},
{"addToSet", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToSet, true}},
{"doubleDoubleSum",
- BuiltinFn{[](size_t n) { return n > 0; }, vm::Builtin::doubleDoubleSum, true}},
+ BuiltinFn{[](size_t n) { return n > 0; }, vm::Builtin::doubleDoubleSum, false}},
{"bitTestZero", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::bitTestZero, false}},
{"bitTestMask", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::bitTestMask, false}},
{"bitTestPosition",
@@ -402,6 +402,7 @@ static stdx::unordered_map<std::string, InstrFn> kInstrFunctions = {
InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsNumber, false}},
{"isBinData",
InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsBinData, false}},
+ {"isDate", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsDate, false}},
{"sum", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendSum, true}},
{"min", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendMin, true}},
{"max", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendMax, true}},
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.h b/src/mongo/db/exec/sbe/stages/loop_join.h
index bf19c50b8f2..0f94d39a9c1 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.h
+++ b/src/mongo/db/exec/sbe/stages/loop_join.h
@@ -57,8 +57,7 @@ public:
private:
// Set of variables coming from the outer side.
const value::SlotVector _outerProjects;
- // Set of correlated variables from the outer side that are visible on the inner side. They must
- // be also present in the _outerProjects.
+ // Set of correlated variables from the outer side that are visible on the inner side.
const value::SlotVector _outerCorrelated;
// If not set then this is a cross product.
const std::unique_ptr<EExpression> _predicate;
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index b5890497f45..ba7c849431b 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -96,6 +96,7 @@ int Instruction::stackOffset[Instruction::Tags::lastInstruction] = {
0, // isString
0, // isNumber
0, // isBinData
+ 0, // isDate
0, // typeMatch
0, // function is special, the stack offset is encoded in the instruction itself
@@ -314,6 +315,10 @@ void CodeFragment::appendIsBinData() {
appendSimpleInstruction(Instruction::isBinData);
}
+void CodeFragment::appendIsDate() {
+ appendSimpleInstruction(Instruction::isDate);
+}
+
void CodeFragment::appendTypeMatch(uint32_t typeMask) {
Instruction i;
i.tag = Instruction::typeMatch;
@@ -1814,6 +1819,18 @@ std::tuple<uint8_t, value::TypeTags, value::Value> ByteCode::run(const CodeFragm
}
break;
}
+ case Instruction::isDate: {
+ auto [owned, tag, val] = getFromStack(0);
+
+ if (tag != value::TypeTags::Nothing) {
+ topStack(false, value::TypeTags::Boolean, tag == value::TypeTags::Date);
+ }
+
+ if (owned) {
+ value::releaseValue(tag, val);
+ }
+ break;
+ }
case Instruction::typeMatch: {
auto typeMask = value::readFromMemory<uint32_t>(pcPointer);
pcPointer += sizeof(typeMask);
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index a5197d17437..e4590a79c71 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -149,6 +149,7 @@ struct Instruction {
isString,
isNumber,
isBinData,
+ isDate,
typeMatch,
function,
@@ -259,6 +260,7 @@ public:
void appendIsString();
void appendIsNumber();
void appendIsBinData();
+ void appendIsDate();
void appendTypeMatch(uint32_t typeMask);
void appendFunction(Builtin f, uint8_t arity);
void appendJump(int jumpOffset);
diff --git a/src/mongo/db/free_mon/free_mon_storage.cpp b/src/mongo/db/free_mon/free_mon_storage.cpp
index 7c25c6a671c..89be39295e1 100644
--- a/src/mongo/db/free_mon/free_mon_storage.cpp
+++ b/src/mongo/db/free_mon/free_mon_storage.cpp
@@ -57,6 +57,10 @@ boost::optional<FreeMonStorageState> FreeMonStorage::read(OperationContext* opCt
auto storageInterface = repl::StorageInterface::get(opCtx);
+ // Ensure we read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
+
AutoGetCollectionForRead autoRead(opCtx, NamespaceString::kServerConfigurationNamespace);
auto swObj = storageInterface->findById(
diff --git a/src/mongo/db/ftdc/collector.cpp b/src/mongo/db/ftdc/collector.cpp
index 37dd68b136e..11ba9d4d3a4 100644
--- a/src/mongo/db/ftdc/collector.cpp
+++ b/src/mongo/db/ftdc/collector.cpp
@@ -70,8 +70,9 @@ std::tuple<BSONObj, Date_t> FTDCCollectorCollection::collect(Client* client) {
ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock(opCtx->lockState());
opCtx->lockState()->skipAcquireTicket();
- // Explicitly start future read transactions without a timestamp.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
for (auto& collector : _collectors) {
BSONObjBuilder subObjBuilder(builder.subobjStart(collector->name()));
diff --git a/src/mongo/db/index_build_entry_helpers.cpp b/src/mongo/db/index_build_entry_helpers.cpp
index da3f43b29e2..fc689873f6e 100644
--- a/src/mongo/db/index_build_entry_helpers.cpp
+++ b/src/mongo/db/index_build_entry_helpers.cpp
@@ -254,7 +254,8 @@ Status removeIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) {
StatusWith<IndexBuildEntry> getIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) {
// Read the most up to date data.
- ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
AutoGetCollectionForRead autoCollection(opCtx, NamespaceString::kIndexBuildEntryNamespace);
const Collection* collection = autoCollection.getCollection();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index c8caafc318f..d27dd0848db 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2553,7 +2553,8 @@ void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx,
// Read without a timestamp. When we commit, we block writes which guarantees all writes are
// visible.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
// The collection scan might read with a kMajorityCommitted read source, but will restore
// kNoTimestamp afterwards.
_scanCollectionAndInsertSortedKeysIntoIndex(opCtx, replState);
@@ -2655,7 +2656,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
opCtx,
replState->buildUUID,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kYield));
}
@@ -2681,7 +2682,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
opCtx,
replState->buildUUID,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
}
@@ -2769,7 +2770,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
opCtx,
replState->buildUUID,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
try {
@@ -2916,7 +2917,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
opCtx,
replState->buildUUID,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
uassertStatusOK(
diff --git a/src/mongo/db/initialize_api_parameters.cpp b/src/mongo/db/initialize_api_parameters.cpp
index 11a5b68ae10..575fd476827 100644
--- a/src/mongo/db/initialize_api_parameters.cpp
+++ b/src/mongo/db/initialize_api_parameters.cpp
@@ -27,8 +27,17 @@
* it in the license file.
*/
+#include "mongo/platform/basic.h"
+
#include "mongo/db/initialize_api_parameters.h"
+#include <string>
+
+#include "mongo/db/commands.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/str.h"
+
namespace mongo {
const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
@@ -44,6 +53,10 @@ const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
apiParamsFromClient.getApiVersion());
}
+ if (command->acceptsAnyApiVersionParameters()) {
+ return apiParamsFromClient;
+ }
+
if (apiParamsFromClient.getApiDeprecationErrors() || apiParamsFromClient.getApiStrict()) {
uassert(4886600,
"Provided apiStrict and/or apiDeprecationErrors without passing apiVersion",
@@ -88,44 +101,4 @@ const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
return apiParamsFromClient;
}
-const OperationContext::Decoration<APIParameters> handle =
- OperationContext::declareDecoration<APIParameters>();
-
-APIParameters& APIParameters::get(OperationContext* opCtx) {
- return handle(opCtx);
-}
-
-APIParameters APIParameters::fromClient(const APIParametersFromClient& apiParamsFromClient) {
- APIParameters apiParameters = APIParameters();
- auto apiVersion = apiParamsFromClient.getApiVersion();
- auto apiStrict = apiParamsFromClient.getApiStrict();
- auto apiDeprecationErrors = apiParamsFromClient.getApiDeprecationErrors();
-
- if (apiVersion) {
- apiParameters.setAPIVersion(apiVersion.value());
- }
-
- if (apiStrict) {
- apiParameters.setAPIStrict(apiStrict.value());
- }
-
- if (apiDeprecationErrors) {
- apiParameters.setAPIDeprecationErrors(apiDeprecationErrors.value());
- }
-
- return apiParameters;
-}
-
-void APIParameters::appendInfo(BSONObjBuilder* builder) const {
- if (_apiVersion) {
- builder->append(kAPIVersionFieldName, *_apiVersion);
- }
- if (_apiStrict) {
- builder->append(kAPIStrictFieldName, *_apiStrict);
- }
- if (_apiDeprecationErrors) {
- builder->append(kAPIDeprecationErrorsFieldName, *_apiDeprecationErrors);
- }
-}
-
} // namespace mongo
diff --git a/src/mongo/db/initialize_api_parameters.h b/src/mongo/db/initialize_api_parameters.h
index 73215f607c8..e62d0defecc 100644
--- a/src/mongo/db/initialize_api_parameters.h
+++ b/src/mongo/db/initialize_api_parameters.h
@@ -29,73 +29,19 @@
#pragma once
-#include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters_gen.h"
-#include "mongo/db/operation_context.h"
+#include "api_parameters.h"
namespace mongo {
+class BSONObj;
+class Command;
+class OperationContext;
+
/**
- * See VERSIONED_API_README.md for an overview of the Versioned API.
- *
- * This function parses a command's API Version parameters from a request and stores the apiVersion,
+ * Parse a command's API Version parameters from a request and store the apiVersion,
* apiStrict, and apiDeprecationErrors fields.
*/
const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
const BSONObj& requestBody,
Command* command);
-
-/**
- * Decorates operation context with methods to retrieve apiVersion, apiStrict, and
- * apiDeprecationErrors.
- */
-class APIParameters {
-
-public:
- static constexpr StringData kAPIVersionFieldName = "apiVersion"_sd;
- static constexpr StringData kAPIStrictFieldName = "apiStrict"_sd;
- static constexpr StringData kAPIDeprecationErrorsFieldName = "apiDeprecationErrors"_sd;
-
- APIParameters() = default;
- static APIParameters& get(OperationContext* opCtx);
- static APIParameters fromClient(const APIParametersFromClient& apiParamsFromClient);
-
- void appendInfo(BSONObjBuilder* builder) const;
-
- const boost::optional<std::string>& getAPIVersion() const {
- return _apiVersion;
- }
-
- void setAPIVersion(StringData apiVersion) {
- _apiVersion = apiVersion.toString();
- }
-
- const boost::optional<bool>& getAPIStrict() const {
- return _apiStrict;
- }
-
- void setAPIStrict(bool apiStrict) {
- _apiStrict = apiStrict;
- }
-
- const boost::optional<bool>& getAPIDeprecationErrors() const {
- return _apiDeprecationErrors;
- }
-
- void setAPIDeprecationErrors(bool apiDeprecationErrors) {
- _apiDeprecationErrors = apiDeprecationErrors;
- }
-
- bool getParamsPassed() const {
- return _apiVersion || _apiStrict || _apiDeprecationErrors;
- }
-
- BSONObj toBSON() const;
-
-private:
- boost::optional<std::string> _apiVersion;
- boost::optional<bool> _apiStrict;
- boost::optional<bool> _apiDeprecationErrors;
-};
-
} // namespace mongo
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index f0722782157..e499d04881a 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -404,6 +404,9 @@ Status storeMongodOptions(const moe::Environment& params) {
if (params.count("storage.syncPeriodSecs")) {
storageGlobalParams.syncdelay = params["storage.syncPeriodSecs"].as<double>();
+ storageGlobalParams.checkpointDelaySecs =
+ static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
+
if (storageGlobalParams.syncdelay < 0 ||
storageGlobalParams.syncdelay > StorageGlobalParams::kMaxSyncdelaySecs) {
return Status(ErrorCodes::BadValue,
diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp
index 9471aca909c..bee7df5ca40 100644
--- a/src/mongo/db/namespace_string.cpp
+++ b/src/mongo/db/namespace_string.cpp
@@ -144,6 +144,18 @@ bool NamespaceString::isLegalClientSystemNS() const {
return false;
}
+/**
+ * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately
+ * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog
+ * entries on 'system.views' are being applied out of the original order.
+ *
+ * Process updates to 'admin.system.version' individually as well so the secondary's FCV when
+ * processing each operation matches the primary's when committing that operation.
+ */
+bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
+ return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection();
+}
+
NamespaceString NamespaceString::makeListCollectionsNSS(StringData dbName) {
NamespaceString nss(dbName, listCollectionsCursorCol);
dassert(nss.isValid());
diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h
index a43406f8bd4..e5de9877c84 100644
--- a/src/mongo/db/namespace_string.h
+++ b/src/mongo/db/namespace_string.h
@@ -338,6 +338,11 @@ public:
bool isDropPendingNamespace() const;
/**
+ * Returns true if operations on this namespace must be applied in their own oplog batch.
+ */
+ bool mustBeAppliedInOwnOplogBatch() const;
+
+ /**
* Returns the drop-pending namespace name for this namespace, provided the given optime.
*
* Example:
diff --git a/src/mongo/db/pipeline/document_source_writer.h b/src/mongo/db/pipeline/document_source_writer.h
index 9c175890ecf..b91c49a90db 100644
--- a/src/mongo/db/pipeline/document_source_writer.h
+++ b/src/mongo/db/pipeline/document_source_writer.h
@@ -65,7 +65,7 @@ public:
}
repl::ReadConcernArgs::get(_opCtx) = repl::ReadConcernArgs();
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::kUnset);
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
}
~DocumentSourceWriteBlock() {
diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h
index 6cd1bba4f3b..5140d8ea32f 100644
--- a/src/mongo/db/pipeline/expression_context.h
+++ b/src/mongo/db/pipeline/expression_context.h
@@ -328,10 +328,6 @@ public:
// 'jsHeapLimitMB' server parameter.
boost::optional<int> jsHeapLimitMB;
- // When set this timeout limits the allowed execution time for a JavaScript function invocation
- // under any Scope returned by getJsExecWithScope().
- int jsFnTimeoutMillis;
-
// An interface for accessing information or performing operations that have different
// implementations on mongod and mongos, or that only make sense on one of the two.
// Additionally, putting some of this functionality behind an interface prevents aggregation
diff --git a/src/mongo/db/pipeline/process_interface/common_process_interface.cpp b/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
index 330ef41693e..b6b304c348b 100644
--- a/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
+++ b/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
@@ -184,15 +184,11 @@ bool CommonProcessInterface::keyPatternNamesExactPaths(const BSONObj& keyPattern
boost::optional<ChunkVersion> CommonProcessInterface::refreshAndGetCollectionVersion(
const boost::intrusive_ptr<ExpressionContext>& expCtx, const NamespaceString& nss) const {
- const bool forceRefreshFromThisThread = false;
- auto cm = uassertStatusOK(
- Grid::get(expCtx->opCtx)
- ->catalogCache()
- ->getCollectionRoutingInfoWithRefresh(expCtx->opCtx, nss, forceRefreshFromThisThread));
- if (cm.isSharded()) {
- return cm.getVersion();
- }
- return boost::none;
+ const auto cm = uassertStatusOK(Grid::get(expCtx->opCtx)
+ ->catalogCache()
+ ->getCollectionRoutingInfoWithRefresh(expCtx->opCtx, nss));
+
+ return cm.isSharded() ? boost::make_optional(cm.getVersion()) : boost::none;
}
std::vector<FieldPath> CommonProcessInterface::_shardKeyToDocumentKeyFields(
diff --git a/src/mongo/db/pipeline/sharded_agg_helpers.h b/src/mongo/db/pipeline/sharded_agg_helpers.h
index 13a20fee607..c63ac997a32 100644
--- a/src/mongo/db/pipeline/sharded_agg_helpers.h
+++ b/src/mongo/db/pipeline/sharded_agg_helpers.h
@@ -245,13 +245,9 @@ auto shardVersionRetry(OperationContext* opCtx,
str::stream() << "StaleConfig error on unexpected namespace. Expected "
<< nss << ", received " << staleInfo->getNss());
catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- nss,
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
+ nss, staleInfo->getVersionWanted(), staleInfo->getShardId());
} else {
- catalogCache->onEpochChange(nss);
+ catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
}
if (!logAndTestMaxRetries(e)) {
throw;
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index a82d443d5bf..bfb2bc1dc4c 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -180,6 +180,7 @@ env.Library(
],
LIBDEPS=[
"$BUILD_DIR/mongo/base",
+ "$BUILD_DIR/mongo/db/api_parameters",
"$BUILD_DIR/mongo/db/catalog/collection_catalog",
# TODO: This dependency edge can be removed when the 'allowDiskUse' option no longer depends
# on enabling test commands.
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index 175b109625d..0863192a593 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -8,6 +8,7 @@ env.Library(
target="optimizer",
source=[
"defs.cpp",
+ "memo.cpp",
"node.cpp",
],
LIBDEPS=[
diff --git a/src/mongo/db/query/optimizer/algebra/operator.h b/src/mongo/db/query/optimizer/algebra/operator.h
new file mode 100644
index 00000000000..524b7246413
--- /dev/null
+++ b/src/mongo/db/query/optimizer/algebra/operator.h
@@ -0,0 +1,305 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "mongo/db/query/optimizer/algebra/polyvalue.h"
+
+namespace mongo::optimizer {
+namespace algebra {
+
+template <typename T, int S>
+struct OpNodeStorage {
+ T _nodes[S];
+
+ template <typename... Ts>
+ OpNodeStorage(Ts&&... vals) : _nodes{std::forward<Ts>(vals)...} {}
+};
+
+template <typename T>
+struct OpNodeStorage<T, 0> {};
+
+/*=====-----
+ *
+ * Arity of operator can be:
+ * 1. statically known - A, A, A, ...
+ * 2. dynamic prefix with optional statically know - vector<A>, A, A, A, ...
+ *
+ * Denotations map A to some B.
+ * So static arity <A,A,A> is mapped to <B,B,B>.
+ * Similarly, arity <vector<A>,A> is mapped to <vector<B>,B>
+ *
+ * There is a wrinkle when B is a reference (if allowed at all)
+ * Arity <vector<A>, A, A> is mapped to <vector<B>&, B&, B&> - note that the reference is lifted
+ * outside of the vector.
+ *
+ */
+template <typename Slot, typename Derived, int Arity>
+class OpSpecificArity : public OpNodeStorage<Slot, Arity> {
+ using Base = OpNodeStorage<Slot, Arity>;
+
+public:
+ template <typename... Ts>
+ OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {
+ static_assert(sizeof...(Ts) == Arity, "constructor paramaters do not match");
+ }
+
+ template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+ auto& get() noexcept {
+ return this->_nodes[I];
+ }
+
+ template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+ const auto& get() const noexcept {
+ return this->_nodes[I];
+ }
+};
+/*=====-----
+ *
+ * Operator with dynamic arity
+ *
+ */
+template <typename Slot, typename Derived, int Arity>
+class OpSpecificDynamicArity : public OpSpecificArity<Slot, Derived, Arity> {
+ using Base = OpSpecificArity<Slot, Derived, Arity>;
+
+ std::vector<Slot> _dyNodes;
+
+public:
+ template <typename... Ts>
+ OpSpecificDynamicArity(std::vector<Slot> nodes, Ts&&... vals)
+ : Base({std::forward<Ts>(vals)...}), _dyNodes(std::move(nodes)) {}
+
+ auto& nodes() {
+ return _dyNodes;
+ }
+ const auto& nodes() const {
+ return _dyNodes;
+ }
+};
+
+/*=====-----
+ *
+ * Semantic transport interface
+ *
+ */
+namespace detail {
+template <typename D, typename T, typename = std::void_t<>>
+struct has_prepare : std::false_type {};
+template <typename D, typename T>
+struct has_prepare<D, T, std::void_t<decltype(std::declval<D>().prepare(std::declval<T&>()))>>
+ : std::true_type {};
+
+template <typename D, typename T>
+inline constexpr auto has_prepare_v = has_prepare<D, T>::value;
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr int get_arity(const OpSpecificArity<Slot, Derived, Arity>*) {
+ return Arity;
+}
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Derived, Arity>*) {
+ return false;
+}
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Derived, Arity>*) {
+ return true;
+}
+
+template <typename T>
+using OpConcreteType = typename std::remove_reference_t<T>::template get_t<0>;
+} // namespace detail
+
+template <typename D, bool withSlot>
+class OpTransporter {
+ D& _domain;
+
+ template <typename T, bool B>
+ struct Deducer {};
+ template <typename T>
+ struct Deducer<T, true> {
+ using type = decltype(std::declval<D>().transport(
+ std::declval<T>(), std::declval<detail::OpConcreteType<T>&>()));
+ };
+ template <typename T>
+ struct Deducer<T, false> {
+ using type =
+ decltype(std::declval<D>().transport(std::declval<detail::OpConcreteType<T>&>()));
+ };
+ template <typename T>
+ using deduced_t = typename Deducer<T, withSlot>::type;
+
+ template <typename N, typename T, typename... Ts>
+ auto transformStep(N&& slot, T&& op, Ts&&... args) {
+ if constexpr (withSlot) {
+ return _domain.transport(
+ std::forward<N>(slot), std::forward<T>(op), std::forward<Ts>(args)...);
+ } else {
+ return _domain.transport(std::forward<T>(op), std::forward<Ts>(args)...);
+ }
+ }
+
+ template <typename N, typename T, size_t... I>
+ auto transportUnpack(N&& slot, T&& op, std::index_sequence<I...>) {
+ return transformStep(
+ std::forward<N>(slot), std::forward<T>(op), op.template get<I>().visit(*this)...);
+ }
+ template <typename N, typename T, size_t... I>
+ auto transportDynamicUnpack(N&& slot, T&& op, std::index_sequence<I...>) {
+ std::vector<decltype(slot.visit(*this))> v;
+ for (auto& node : op.nodes()) {
+ v.emplace_back(node.visit(*this));
+ }
+ return transformStep(std::forward<N>(slot),
+ std::forward<T>(op),
+ std::move(v),
+ op.template get<I>().visit(*this)...);
+ }
+ template <typename N, typename T, size_t... I>
+ void transportUnpackVoid(N&& slot, T&& op, std::index_sequence<I...>) {
+ (op.template get<I>().visit(*this), ...);
+ return transformStep(std::forward<N>(slot), std::forward<T>(op), op.template get<I>()...);
+ }
+ template <typename N, typename T, size_t... I>
+ void transportDynamicUnpackVoid(N&& slot, T&& op, std::index_sequence<I...>) {
+ for (auto& node : op.nodes()) {
+ node.visit(*this);
+ }
+ (op.template get<I>().visit(*this), ...);
+ return transformStep(
+ std::forward<N>(slot), std::forward<T>(op), op.nodes(), op.template get<I>()...);
+ }
+
+public:
+ OpTransporter(D& domain) : _domain(domain) {}
+
+ template <typename N, typename T, typename R = deduced_t<N>>
+ R operator()(N&& slot, T&& op) {
+ // N is either `PolyValue<Ts...>&` or `const PolyValue<Ts...>&` i.e. reference
+ // T is either `A&` or `const A&` where A is one of Ts
+ using type = std::remove_reference_t<T>;
+
+ constexpr int arity = detail::get_arity(static_cast<type*>(nullptr));
+ constexpr bool is_dynamic = detail::is_dynamic(static_cast<type*>(nullptr));
+
+ if constexpr (detail::has_prepare_v<D, type>) {
+ _domain.prepare(std::forward<T>(op));
+ }
+ if constexpr (is_dynamic) {
+ if constexpr (std::is_same_v<R, void>) {
+ return transportDynamicUnpackVoid(
+ std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+ } else {
+ return transportDynamicUnpack(
+ std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+ }
+ } else {
+ if constexpr (std::is_same_v<R, void>) {
+ return transportUnpackVoid(
+ std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+ } else {
+ return transportUnpack(
+ std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+ }
+ }
+ }
+};
+
+template <typename D, bool withSlot>
+class OpWalker {
+ D& _domain;
+
+ template <typename N, typename T, typename... Ts>
+ auto walkStep(N&& slot, T&& op, Ts&&... args) {
+ if constexpr (withSlot) {
+ return _domain.walk(
+ std::forward<N>(slot), std::forward<T>(op), std::forward<Ts>(args)...);
+ } else {
+ return _domain.walk(std::forward<T>(op), std::forward<Ts>(args)...);
+ }
+ }
+
+ template <typename N, typename T, typename... Args, size_t... I>
+ auto walkUnpack(N&& slot, T&& op, std::index_sequence<I...>, Args&&... args) {
+ return walkStep(std::forward<N>(slot),
+ std::forward<T>(op),
+ std::forward<Args>(args)...,
+ op.template get<I>()...);
+ }
+ template <typename N, typename T, typename... Args, size_t... I>
+ auto walkDynamicUnpack(N&& slot, T&& op, std::index_sequence<I...>, Args&&... args) {
+ return walkStep(std::forward<N>(slot),
+ std::forward<T>(op),
+ std::forward<Args>(args)...,
+ op.nodes(),
+ op.template get<I>()...);
+ }
+
+public:
+ OpWalker(D& domain) : _domain(domain) {}
+
+ template <typename N, typename T, typename... Args>
+ auto operator()(N&& slot, T&& op, Args&&... args) {
+ // N is either `PolyValue<Ts...>&` or `const PolyValue<Ts...>&` i.e. reference
+ // T is either `A&` or `const A&` where A is one of Ts
+ using type = std::remove_reference_t<T>;
+
+ constexpr int arity = detail::get_arity(static_cast<type*>(nullptr));
+ constexpr bool is_dynamic = detail::is_dynamic(static_cast<type*>(nullptr));
+
+ if constexpr (is_dynamic) {
+ return walkDynamicUnpack(std::forward<N>(slot),
+ std::forward<T>(op),
+ std::make_index_sequence<arity>{},
+ std::forward<Args>(args)...);
+ } else {
+ return walkUnpack(std::forward<N>(slot),
+ std::forward<T>(op),
+ std::make_index_sequence<arity>{},
+ std::forward<Args>(args)...);
+ }
+ }
+};
+
+template <bool withSlot = false, typename D, typename N>
+auto transport(N&& node, D& domain) {
+ return node.visit(OpTransporter<D, withSlot>{domain});
+}
+
+template <bool withSlot = false, typename D, typename N, typename... Args>
+auto walk(N&& node, D& domain, Args&&... args) {
+ return node.visit(OpWalker<D, withSlot>{domain}, std::forward<Args>(args)...);
+}
+
+} // namespace algebra
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/algebra/polyvalue.h b/src/mongo/db/query/optimizer/algebra/polyvalue.h
new file mode 100644
index 00000000000..374041c5704
--- /dev/null
+++ b/src/mongo/db/query/optimizer/algebra/polyvalue.h
@@ -0,0 +1,381 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <array>
+#include <stdexcept>
+#include <type_traits>
+
+namespace mongo::optimizer {
+namespace algebra {
+namespace detail {
+
+template <typename T, typename... Args>
+inline constexpr bool is_one_of_v = std::disjunction_v<std::is_same<T, Args>...>;
+
+template <typename T, typename... Args>
+inline constexpr bool is_one_of_f() {
+ return is_one_of_v<T, Args...>;
+}
+
+template <typename... Args>
+struct is_unique_t : std::true_type {};
+
+template <typename H, typename... T>
+struct is_unique_t<H, T...>
+ : std::bool_constant<!is_one_of_f<H, T...>() && is_unique_t<T...>::value> {};
+
+template <typename... Args>
+inline constexpr bool is_unique_v = is_unique_t<Args...>::value;
+
+// Given the type T find its index in Ts
+template <typename T, typename... Ts>
+static inline constexpr int find_index() {
+ static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+ constexpr bool matchVector[] = {std::is_same<T, Ts>::value...};
+
+ for (int index = 0; index < static_cast<int>(sizeof...(Ts)); ++index) {
+ if (matchVector[index]) {
+ return index;
+ }
+ }
+
+ return -1;
+}
+
+template <int N, typename T, typename... Ts>
+struct get_type_by_index_impl {
+ using type = typename get_type_by_index_impl<N - 1, Ts...>::type;
+};
+template <typename T, typename... Ts>
+struct get_type_by_index_impl<0, T, Ts...> {
+ using type = T;
+};
+
+// Given the index I return the type from Ts
+template <int I, typename... Ts>
+using get_type_by_index = typename get_type_by_index_impl<I, Ts...>::type;
+
+} // namespace detail
+
+/*=====-----
+ *
+ * The overload trick to construct visitors from lambdas.
+ *
+ */
+template <class... Ts>
+struct overload : Ts... {
+ using Ts::operator()...;
+};
+template <class... Ts>
+overload(Ts...)->overload<Ts...>;
+
+/*=====-----
+ *
+ * Forward declarations
+ *
+ */
+template <typename... Ts>
+class PolyValue;
+
+template <typename T, typename... Ts>
+class ControlBlockVTable;
+
+/*=====-----
+ *
+ * The base control block that PolyValue holds.
+ *
+ * It does not contain anything else by the runtime tag.
+ *
+ */
+template <typename... Ts>
+class ControlBlock {
+ const int _tag;
+
+protected:
+ ControlBlock(int tag) noexcept : _tag(tag) {}
+
+public:
+ auto getRuntimeTag() const noexcept {
+ return _tag;
+ }
+};
+
+/*=====-----
+ *
+ * The concrete control block VTable generator.
+ *
+ * It must be empty ad PolyValue derives from the generators
+ * and we want EBO to kick in.
+ *
+ */
+template <typename T, typename... Ts>
+class ControlBlockVTable {
+ static constexpr int _staticTag = detail::find_index<T, Ts...>();
+ static_assert(_staticTag != -1, "Type must be on the list");
+
+ using AbstractType = ControlBlock<Ts...>;
+ using PolyValueType = PolyValue<Ts...>;
+
+ /*=====-----
+ *
+ * The concrete control block for every type T of Ts.
+ *
+ * It derives from the ControlBlock. All methods are private and only
+ * the friend class ControlBlockVTable can call them.
+ *
+ */
+ class ConcreteType : public AbstractType {
+ T _t;
+
+ public:
+ template <typename... Args>
+ ConcreteType(Args&&... args) : AbstractType(_staticTag), _t(std::forward<Args>(args)...) {}
+
+ const T* getPtr() const {
+ return &_t;
+ }
+
+ T* getPtr() {
+ return &_t;
+ }
+ };
+
+ static constexpr auto concrete(AbstractType* block) noexcept {
+ return static_cast<ConcreteType*>(block);
+ }
+
+ static constexpr auto concrete(const AbstractType* block) noexcept {
+ return static_cast<const ConcreteType*>(block);
+ }
+
+public:
+ template <typename... Args>
+ static AbstractType* make(Args&&... args) {
+ return new ConcreteType(std::forward<Args>(args)...);
+ }
+
+ static AbstractType* clone(const AbstractType* block) {
+ return new ConcreteType(*concrete(block));
+ }
+
+ static void destroy(AbstractType* block) noexcept {
+ delete concrete(block);
+ }
+
+ static bool compareEq(AbstractType* blockLhs, AbstractType* blockRhs) noexcept {
+ if (blockLhs->getRuntimeTag() == blockRhs->getRuntimeTag()) {
+ return *castConst<T>(blockLhs) == *castConst<T>(blockRhs);
+ }
+ return false;
+ }
+
+ template <typename U>
+ static constexpr bool is_v = std::is_base_of_v<U, T>;
+
+ template <typename U>
+ static U* cast(AbstractType* block) {
+ if constexpr (is_v<U>) {
+ return static_cast<U*>(concrete(block)->getPtr());
+ } else {
+ // gcc bug 81676
+ (void)block;
+ return nullptr;
+ }
+ }
+
+ template <typename U>
+ static const U* castConst(const AbstractType* block) {
+ if constexpr (is_v<U>) {
+ return static_cast<const U*>(concrete(block)->getPtr());
+ } else {
+ // gcc bug 81676
+ (void)block;
+ return nullptr;
+ }
+ }
+
+ template <typename V, typename... Args>
+ static auto visit(V&& v, PolyValueType& holder, AbstractType* block, Args&&... args) {
+ return v(holder, *cast<T>(block), std::forward<Args>(args)...);
+ }
+
+ template <typename V, typename... Args>
+ static auto visitConst(V&& v,
+ const PolyValueType& holder,
+ const AbstractType* block,
+ Args&&... args) {
+ return v(holder, *castConst<T>(block), std::forward<Args>(args)...);
+ }
+};
+
+/*=====-----
+ *
+ * This is a variation on variant and polymorphic value theme.
+ *
+ * A tag based dispatch
+ *
+ * Supported operations:
+ * - construction
+ * - destruction
+ * - clone a = b;
+ * - cast a.cast<T>()
+ * - multi-method cast to common base a.cast<B>()
+ * - multi-method visit
+ */
+template <typename... Ts>
+class PolyValue : private ControlBlockVTable<Ts, Ts...>... {
+ static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+ static_assert(std::conjunction_v<std::is_empty<ControlBlockVTable<Ts, Ts...>>...>,
+ "VTable base classes must be empty");
+
+ ControlBlock<Ts...>* _object{nullptr};
+
+ PolyValue(ControlBlock<Ts...>* object) noexcept : _object(object) {}
+
+ auto tag() const noexcept {
+ return _object->getRuntimeTag();
+ }
+
+ void check() const {
+ if (!_object) {
+ throw std::logic_error("PolyValue is empty");
+ }
+ }
+
+ static void destroy(ControlBlock<Ts...>* object) {
+ static constexpr std::array destroyTbl = {&ControlBlockVTable<Ts, Ts...>::destroy...};
+
+ destroyTbl[object->getRuntimeTag()](object);
+ }
+
+public:
+ PolyValue() = delete;
+
+ PolyValue(const PolyValue& other) {
+ static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
+ if (other._object) {
+ _object = cloneTbl[other.tag()](other._object);
+ }
+ }
+
+ PolyValue(PolyValue&& other) noexcept {
+ swap(other);
+ }
+
+ ~PolyValue() noexcept {
+ if (_object) {
+ destroy(_object);
+ }
+ }
+
+ PolyValue& operator=(PolyValue other) noexcept {
+ swap(other);
+ return *this;
+ }
+
+ template <typename T, typename... Args>
+ static PolyValue make(Args&&... args) {
+ return PolyValue{ControlBlockVTable<T, Ts...>::make(std::forward<Args>(args)...)};
+ }
+
+ template <int I>
+ using get_t = detail::get_type_by_index<I, Ts...>;
+
+ template <typename V, typename... Args>
+ auto visit(V&& v, Args&&... args) {
+ // unfortunately gcc rejects much nicer code, clang and msvc accept
+ // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
+ // visit<V>... };
+
+ using FunPtrType =
+ decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visit<V, Args...>);
+ static constexpr FunPtrType visitTbl[] = {
+ &ControlBlockVTable<Ts, Ts...>::template visit<V, Args...>...};
+
+ check();
+ return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ }
+
+ template <typename V, typename... Args>
+ auto visit(V&& v, Args&&... args) const {
+ // unfortunately gcc rejects much nicer code, clang and msvc accept
+ // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
+ // visitConst<V>... };
+
+ using FunPtrType =
+ decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, Args...>);
+ static constexpr FunPtrType visitTbl[] = {
+ &ControlBlockVTable<Ts, Ts...>::template visitConst<V, Args...>...};
+
+ check();
+ return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ }
+
+ template <typename T>
+ T* cast() {
+ check();
+ static constexpr std::array castTbl = {&ControlBlockVTable<Ts, Ts...>::template cast<T>...};
+ return castTbl[tag()](_object);
+ }
+
+ template <typename T>
+ const T* cast() const {
+ static constexpr std::array castTbl = {
+ &ControlBlockVTable<Ts, Ts...>::template castConst<T>...};
+
+ check();
+ return castTbl[tag()](_object);
+ }
+
+ template <typename T>
+ bool is() const {
+ static constexpr std::array isTbl = {ControlBlockVTable<Ts, Ts...>::template is_v<T>...};
+
+ check();
+ return isTbl[tag()];
+ }
+
+ bool empty() const {
+ return !_object;
+ }
+
+ void swap(PolyValue& other) noexcept {
+ std::swap(other._object, _object);
+ }
+
+ bool operator==(const PolyValue& rhs) const noexcept {
+ static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+ return cmp[tag()](_object, rhs._object);
+ }
+};
+
+} // namespace algebra
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/memo.cpp b/src/mongo/db/query/optimizer/memo.cpp
new file mode 100644
index 00000000000..c4dadbb3d5a
--- /dev/null
+++ b/src/mongo/db/query/optimizer/memo.cpp
@@ -0,0 +1,43 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/optimizer/algebra/operator.h"
+#include "mongo/db/query/optimizer/memo.h"
+#include "mongo/db/query/optimizer/node.h"
+
+namespace mongo::optimizer {
+
+std::string MemoGenerator::generateMemo(const PolymorphicNode& e) {
+ _os.str("");
+ _os.clear();
+ algebra::transport<false>(e, *this);
+ return _os.str();
+}
+
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/visitor.h b/src/mongo/db/query/optimizer/memo.h
index 1aa0a886fab..ad3703f8fd8 100644
--- a/src/mongo/db/query/optimizer/visitor.h
+++ b/src/mongo/db/query/optimizer/memo.h
@@ -31,16 +31,24 @@
#include <string>
+#include "mongo/db/query/optimizer/node.h"
+
namespace mongo::optimizer {
-class AbstractVisitor {
+class MemoGenerator {
public:
- virtual void visit(const ScanNode& node) = 0;
- virtual void visit(const MultiJoinNode& node) = 0;
- virtual void visit(const UnionNode& node) = 0;
- virtual void visit(const GroupByNode& node) = 0;
- virtual void visit(const UnwindNode& node) = 0;
- virtual void visit(const WindNode& node) = 0;
+ template <typename T, typename... Ts>
+ void transport(const T&, Ts&&...) {}
+
+ template <typename T>
+ void prepare(const T& n) {
+ n.generateMemo(_os);
+ }
+
+ std::string generateMemo(const PolymorphicNode& e);
+
+private:
+ std::ostringstream _os;
};
-} // namespace mongo::optimizer
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index 4836dcce39e..a1455efd60f 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -30,130 +30,19 @@
#include <functional>
#include <stack>
+#include "mongo/db/query/optimizer/memo.h"
#include "mongo/db/query/optimizer/node.h"
-#include "mongo/db/query/optimizer/visitor.h"
-#include "mongo/util/assert_util.h"
namespace mongo::optimizer {
-Node::Node(Context& ctx) : _nodeId(ctx.getNextNodeId()), _children() {}
-
-Node::Node(Context& ctx, NodePtr child) : _nodeId(ctx.getNextNodeId()) {
- _children.push_back(std::move(child));
-}
-
-Node::Node(Context& ctx, ChildVector children)
- : _nodeId(ctx.getNextNodeId()), _children(std::move(children)) {}
+Node::Node(Context& ctx) : _nodeId(ctx.getNextNodeId()) {}
void Node::generateMemoBase(std::ostringstream& os) const {
os << "NodeId: " << _nodeId << "\n";
}
-void Node::visitPreOrder(AbstractVisitor& visitor) const {
- visit(visitor);
- for (const NodePtr& ptr : _children) {
- ptr->visitPreOrder(visitor);
- }
-}
-
-void Node::visitPostOrder(AbstractVisitor& visitor) const {
- for (const NodePtr& ptr : _children) {
- ptr->visitPostOrder(visitor);
- }
- visit(visitor);
-}
-
-std::string Node::generateMemo() const {
- class MemoVisitor : public AbstractVisitor {
- protected:
- void visit(const ScanNode& node) override {
- node.generateMemo(_os);
- }
- void visit(const MultiJoinNode& node) override {
- node.generateMemo(_os);
- }
- void visit(const UnionNode& node) override {
- node.generateMemo(_os);
- }
- void visit(const GroupByNode& node) override {
- node.generateMemo(_os);
- }
- void visit(const UnwindNode& node) override {
- node.generateMemo(_os);
- }
- void visit(const WindNode& node) override {
- node.generateMemo(_os);
- }
-
- public:
- std::ostringstream _os;
- };
-
- MemoVisitor visitor;
- visitPreOrder(visitor);
- return visitor._os.str();
-}
-
-NodePtr Node::clone(Context& ctx) const {
- class CloneVisitor : public AbstractVisitor {
- public:
- explicit CloneVisitor(Context& ctx) : _ctx(ctx), _childStack() {}
-
- protected:
- void visit(const ScanNode& node) override {
- doClone(node, [&](ChildVector v){ return ScanNode::clone(_ctx, node); });
- }
- void visit(const MultiJoinNode& node) override {
- doClone(node, [&](ChildVector v){ return MultiJoinNode::clone(_ctx, node, std::move(v)); });
- }
- void visit(const UnionNode& node) override {
- doClone(node, [&](ChildVector v){ return UnionNode::clone(_ctx, node, std::move(v)); });
- }
- void visit(const GroupByNode& node) override {
- doClone(node, [&](ChildVector v){ return GroupByNode::clone(_ctx, node, std::move(v.at(0))); });
- }
- void visit(const UnwindNode& node) override {
- doClone(node, [&](ChildVector v){ return UnwindNode::clone(_ctx, node, std::move(v.at(0))); });
- }
- void visit(const WindNode& node) override {
- doClone(node, [&](ChildVector v){ return WindNode::clone(_ctx, node, std::move(v.at(0))); });
- }
-
- private:
- void doClone(const Node& node, const std::function<NodePtr(ChildVector newChildren)>& cloneFn) {
- ChildVector newChildren;
- for (int i = 0; i < node.getChildCount(); i++) {
- newChildren.push_back(std::move(_childStack.top()));
- _childStack.pop();
- }
- _childStack.push(cloneFn(std::move(newChildren)));
- }
-
- public:
- Context& _ctx;
- std::stack<NodePtr> _childStack;
- };
-
- CloneVisitor visitor(ctx);
- visitPostOrder(visitor);
- invariant(visitor._childStack.size() == 1);
- return std::move(visitor._childStack.top());
-}
-
-int Node::getChildCount() const {
- return _children.size();
-}
-
-NodePtr ScanNode::create(Context& ctx, CollectionNameType collectionName) {
- return NodePtr(new ScanNode(ctx, std::move(collectionName)));
-}
-
-NodePtr ScanNode::clone(Context& ctx, const ScanNode& other) {
- return create(ctx, other._collectionName);
-}
-
ScanNode::ScanNode(Context& ctx, CollectionNameType collectionName)
- : Node(ctx), _collectionName(std::move(collectionName)) {}
+ : Base(), Node(ctx), _collectionName(std::move(collectionName)) {}
void ScanNode::generateMemo(std::ostringstream& os) const {
Node::generateMemoBase(os);
@@ -161,27 +50,12 @@ void ScanNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void ScanNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
-NodePtr MultiJoinNode::create(Context& ctx,
- FilterSet filterSet,
- ProjectionMap projectionMap,
- ChildVector children) {
- return NodePtr(new MultiJoinNode(
- ctx, std::move(filterSet), std::move(projectionMap), std::move(children)));
-}
-
-NodePtr MultiJoinNode::clone(Context& ctx, const MultiJoinNode& other, ChildVector newChildren) {
- return create(ctx, other._filterSet, other._projectionMap, std::move(newChildren));
-}
-
MultiJoinNode::MultiJoinNode(Context& ctx,
FilterSet filterSet,
ProjectionMap projectionMap,
- ChildVector children)
- : Node(ctx, std::move(children)),
+ PolymorphicNodeVector children)
+ : Base(std::move(children)),
+ Node(ctx),
_filterSet(std::move(filterSet)),
_projectionMap(std::move(projectionMap)) {}
@@ -191,20 +65,8 @@ void MultiJoinNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void MultiJoinNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
-NodePtr UnionNode::create(Context& ctx, ChildVector children) {
- return NodePtr(new UnionNode(ctx, std::move(children)));
-}
-
-NodePtr UnionNode::clone(Context& ctx, const UnionNode& other, ChildVector newChildren) {
- return create(ctx, std::move(newChildren));
-}
-
-UnionNode::UnionNode(Context& ctx, ChildVector children)
- : Node(ctx, std::move(children)) {}
+UnionNode::UnionNode(Context& ctx, PolymorphicNodeVector children)
+ : Base(std::move(children)), Node(ctx) {}
void UnionNode::generateMemo(std::ostringstream& os) const {
Node::generateMemoBase(os);
@@ -212,27 +74,12 @@ void UnionNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void UnionNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
-NodePtr GroupByNode::create(Context& ctx,
- GroupByNode::GroupByVector groupByVector,
- GroupByNode::ProjectionMap projectionMap,
- NodePtr child) {
- return NodePtr(
- new GroupByNode(ctx, std::move(groupByVector), std::move(projectionMap), std::move(child)));
-}
-
-NodePtr GroupByNode::clone(Context& ctx, const GroupByNode& other, NodePtr newChild) {
- return create(ctx, other._groupByVector, other._projectionMap, std::move(newChild));
-}
-
GroupByNode::GroupByNode(Context& ctx,
GroupByNode::GroupByVector groupByVector,
GroupByNode::ProjectionMap projectionMap,
- NodePtr child)
- : Node(ctx, std::move(child)),
+ PolymorphicNode child)
+ : Base(std::move(child)),
+ Node(ctx),
_groupByVector(std::move(groupByVector)),
_projectionMap(std::move(projectionMap)) {}
@@ -242,27 +89,12 @@ void GroupByNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void GroupByNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
-NodePtr UnwindNode::create(Context& ctx,
- ProjectionName projectionName,
- const bool retainNonArrays,
- NodePtr child) {
- return NodePtr(
- new UnwindNode(ctx, std::move(projectionName), retainNonArrays, std::move(child)));
-}
-
-NodePtr UnwindNode::clone(Context& ctx, const UnwindNode& other, NodePtr newChild) {
- return create(ctx, other._projectionName, other._retainNonArrays, std::move(newChild));
-}
-
UnwindNode::UnwindNode(Context& ctx,
ProjectionName projectionName,
const bool retainNonArrays,
- NodePtr child)
- : Node(ctx, std::move(child)),
+ PolymorphicNode child)
+ : Base(std::move(child)),
+ Node(ctx),
_projectionName(std::move(projectionName)),
_retainNonArrays(retainNonArrays) {}
@@ -272,20 +104,8 @@ void UnwindNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void UnwindNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
-NodePtr WindNode::create(Context& ctx, ProjectionName projectionName, NodePtr child) {
- return NodePtr(new WindNode(ctx, std::move(projectionName), std::move(child)));
-}
-
-NodePtr WindNode::clone(Context& ctx, const WindNode& other, NodePtr newChild) {
- return create(ctx, other._projectionName, std::move(newChild));
-}
-
-WindNode::WindNode(Context& ctx, ProjectionName projectionName, NodePtr child)
- : Node(ctx, std::move(child)), _projectionName(std::move(projectionName)) {}
+WindNode::WindNode(Context& ctx, ProjectionName projectionName, PolymorphicNode child)
+ : Base(std::move(child)), Node(ctx), _projectionName(std::move(projectionName)) {}
void WindNode::generateMemo(std::ostringstream& os) const {
Node::generateMemoBase(os);
@@ -293,8 +113,4 @@ void WindNode::generateMemo(std::ostringstream& os) const {
<< "\n";
}
-void WindNode::visit(AbstractVisitor& visitor) const {
- visitor.visit(*this);
-}
-
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h
index 78010d7d333..33215f967e0 100644
--- a/src/mongo/db/query/optimizer/node.h
+++ b/src/mongo/db/query/optimizer/node.h
@@ -37,6 +37,7 @@
#include <utility>
#include <vector>
+#include "mongo/db/query/optimizer/algebra/operator.h"
#include "mongo/db/query/optimizer/defs.h"
#include "mongo/db/query/optimizer/filter.h"
#include "mongo/db/query/optimizer/projection.h"
@@ -45,156 +46,137 @@
namespace mongo::optimizer {
-class Node;
-using NodePtr = std::unique_ptr<Node>;
-class AbstractVisitor;
+class ScanNode;
+class MultiJoinNode;
+class UnionNode;
+class GroupByNode;
+class UnwindNode;
+class WindNode;
-class Node {
-public:
- using ChildVector = std::vector<NodePtr>;
+using PolymorphicNode =
+ algebra::PolyValue<ScanNode, MultiJoinNode, UnionNode, GroupByNode, UnwindNode, WindNode>;
+
+template <typename Derived, size_t Arity>
+using Operator = algebra::OpSpecificArity<PolymorphicNode, Derived, Arity>;
+
+template <typename Derived, size_t Arity>
+using OperatorDynamic = algebra::OpSpecificDynamicArity<PolymorphicNode, Derived, Arity>;
+
+template <typename Derived>
+using OperatorDynamicHomogenous = OperatorDynamic<Derived, 0>;
+
+using PolymorphicNodeVector = std::vector<PolymorphicNode>;
+
+template <typename T, typename... Args>
+inline auto make(Args&&... args) {
+ return PolymorphicNode::make<T>(std::forward<Args>(args)...);
+}
+template <typename... Args>
+inline auto makeSeq(Args&&... args) {
+ PolymorphicNodeVector seq;
+ (seq.emplace_back(std::forward<Args>(args)), ...);
+ return seq;
+}
+
+class Node {
protected:
explicit Node(Context& ctx);
- explicit Node(Context& ctx, NodePtr child);
- explicit Node(Context& ctx, ChildVector children);
void generateMemoBase(std::ostringstream& os) const;
- virtual void visit(AbstractVisitor& visitor) const = 0;
- void visitPreOrder(AbstractVisitor& visitor) const;
- void visitPostOrder(AbstractVisitor& visitor) const;
-
- // clone
public:
Node() = delete;
- std::string generateMemo() const;
-
- NodePtr clone(Context& ctx) const;
-
- int getChildCount() const;
-
private:
const NodeIdType _nodeId;
- ChildVector _children;
};
-class ScanNode : public Node {
+class ScanNode final : public Operator<ScanNode, 0>, public Node {
+ using Base = Operator<ScanNode, 0>;
+
public:
- static NodePtr create(Context& ctx, CollectionNameType collectionName);
- static NodePtr clone(Context& ctx, const ScanNode& other);
+ explicit ScanNode(Context& ctx, CollectionNameType collectionName);
void generateMemo(std::ostringstream& os) const;
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
private:
- explicit ScanNode(Context& ctx, CollectionNameType collectionName);
-
const CollectionNameType _collectionName;
};
-class MultiJoinNode : public Node {
+class MultiJoinNode final : public OperatorDynamicHomogenous<MultiJoinNode>, public Node {
+ using Base = OperatorDynamicHomogenous<MultiJoinNode>;
+
public:
using FilterSet = std::unordered_set<FilterType>;
using ProjectionMap = std::unordered_map<ProjectionName, ProjectionType>;
- static NodePtr create(Context& ctx,
- FilterSet filterSet,
- ProjectionMap projectionMap,
- ChildVector children);
- static NodePtr clone(Context& ctx, const MultiJoinNode& other, ChildVector newChildren);
-
- void generateMemo(std::ostringstream& os) const;
-
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
-private:
explicit MultiJoinNode(Context& ctx,
FilterSet filterSet,
ProjectionMap projectionMap,
- ChildVector children);
+ PolymorphicNodeVector children);
+ void generateMemo(std::ostringstream& os) const;
+
+private:
FilterSet _filterSet;
ProjectionMap _projectionMap;
};
-class UnionNode : public Node {
+class UnionNode final : public OperatorDynamicHomogenous<UnionNode>, public Node {
+ using Base = OperatorDynamicHomogenous<UnionNode>;
+
public:
- static NodePtr create(Context& ctx, ChildVector children);
- static NodePtr clone(Context& ctx, const UnionNode& other, ChildVector newChildren);
+ explicit UnionNode(Context& ctx, PolymorphicNodeVector children);
void generateMemo(std::ostringstream& os) const;
-
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
-private:
- explicit UnionNode(Context& ctx, ChildVector children);
};
-class GroupByNode : public Node {
+class GroupByNode : public Operator<GroupByNode, 1>, public Node {
+ using Base = Operator<GroupByNode, 1>;
+
public:
using GroupByVector = std::vector<ProjectionName>;
using ProjectionMap = std::unordered_map<ProjectionName, ProjectionType>;
- static NodePtr create(Context& ctx,
- GroupByVector groupByVector,
- ProjectionMap projectionMap,
- NodePtr child);
- static NodePtr clone(Context& ctx, const GroupByNode& other, NodePtr newChild);
-
- void generateMemo(std::ostringstream& os) const;
-
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
-private:
explicit GroupByNode(Context& ctx,
GroupByVector groupByVector,
ProjectionMap projectionMap,
- NodePtr child);
+ PolymorphicNode child);
+
+ void generateMemo(std::ostringstream& os) const;
+private:
GroupByVector _groupByVector;
ProjectionMap _projectionMap;
};
-class UnwindNode : public Node {
+class UnwindNode final : public Operator<UnwindNode, 1>, public Node {
+ using Base = Operator<UnwindNode, 1>;
+
public:
- static NodePtr create(Context& ctx,
- ProjectionName projectionName,
- bool retainNonArrays,
- NodePtr child);
- static NodePtr clone(Context& ctx, const UnwindNode& other, NodePtr newChild);
+ explicit UnwindNode(Context& ctx,
+ ProjectionName projectionName,
+ bool retainNonArrays,
+ PolymorphicNode child);
void generateMemo(std::ostringstream& os) const;
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
private:
- UnwindNode(Context& ctx, ProjectionName projectionName, bool retainNonArrays, NodePtr child);
-
const ProjectionName _projectionName;
const bool _retainNonArrays;
};
-class WindNode : public Node {
+class WindNode final : public Operator<WindNode, 1>, public Node {
+ using Base = Operator<WindNode, 1>;
+
public:
- static NodePtr create(Context& ctx, ProjectionName projectionName, NodePtr child);
- static NodePtr clone(Context& ctx, const WindNode& other, NodePtr newChild);
+ explicit WindNode(Context& ctx, ProjectionName projectionName, PolymorphicNode child);
void generateMemo(std::ostringstream& os) const;
-protected:
- void visit(AbstractVisitor& visitor) const override;
-
private:
- WindNode(Context& ctx, ProjectionName projectionName, NodePtr child);
-
const ProjectionName _projectionName;
};
diff --git a/src/mongo/db/query/optimizer/optimizer_test.cpp b/src/mongo/db/query/optimizer/optimizer_test.cpp
index 86966e05a7e..f1cffe77303 100644
--- a/src/mongo/db/query/optimizer/optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/optimizer_test.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/db/query/optimizer/memo.h"
#include "mongo/db/query/optimizer/node.h"
#include "mongo/unittest/unittest.h"
@@ -35,15 +36,20 @@ namespace {
TEST(Optimizer, Basic) {
Context ctx;
+ MemoGenerator gen;
- NodePtr ptrScan = ScanNode::create(ctx, "test");
- Node::ChildVector v;
- v.push_back(std::move(ptrScan));
- NodePtr ptrJoin = MultiJoinNode::create(ctx, {}, {}, std::move(v));
- ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", ptrJoin->generateMemo());
+ PolymorphicNode scanNode = make<ScanNode>(ctx, "test");
+ ASSERT_EQ("NodeId: 0\nScan\n", gen.generateMemo(scanNode));
- NodePtr cloned = ptrJoin->clone(ctx);
- ASSERT_EQ("NodeId: 3\nMultiJoin\nNodeId: 2\nScan\n", cloned->generateMemo());
+ PolymorphicNode joinNode = make<MultiJoinNode>(ctx,
+ MultiJoinNode::FilterSet{},
+ MultiJoinNode::ProjectionMap{},
+ makeSeq(std::move(scanNode)));
+ ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", gen.generateMemo(joinNode));
+
+
+ PolymorphicNode cloned = joinNode;
+ ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", gen.generateMemo(cloned));
}
} // namespace
diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
index 05f9bcefb96..1a338abf238 100644
--- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/exec/sbe/stages/loop_join.h"
#include "mongo/db/exec/sbe/stages/project.h"
#include "mongo/db/exec/sbe/stages/scan.h"
+#include "mongo/db/exec/sbe/stages/union.h"
#include "mongo/db/query/sbe_stage_builder_filter.h"
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/db/storage/oplog_hack.h"
@@ -330,20 +331,63 @@ generateGenericCollScan(const Collection* collection,
// Check if the scan should be started after the provided resume RecordId and construct a nested
// loop join sub-tree to project out the resume RecordId as a seekRecordIdSlot and feed it to
- // the inner side (scan).
- //
- // Note that we also inject a 'skip 1' stage on top of the inner branch, as we need to start
- // _after_ the resume RecordId.
- //
- // TODO SERVER-48472: raise KeyNotFound error if we cannot position the cursor on
- // seekRecordIdSlot.
+ // the inner side (scan). We will also construct a union sub-tree as an outer side of the loop
+ // join to implement the check that the record we're trying to reposition the scan exists.
if (seekRecordIdSlot && !isTailableResumeBranch) {
+ // Project out the RecordId we want to resume from as 'seekSlot'.
+ auto seekSlot = slotIdGenerator->generate();
+ auto projStage = sbe::makeProjectStage(
+ sbe::makeS<sbe::LimitSkipStage>(sbe::makeS<sbe::CoScanStage>(), 1, boost::none),
+ seekSlot,
+ sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::NumberInt64,
+ csn->resumeAfterRecordId->repr()));
+
+ // Construct a 'seek' branch of the 'union'. If we're succeeded to reposition the cursor,
+ // the branch will output the 'seekSlot' to start the real scan from, otherwise it will
+ // produce EOF.
+ auto seekBranch =
+ sbe::makeS<sbe::LoopJoinStage>(std::move(projStage),
+ sbe::makeS<sbe::ScanStage>(nss,
+ boost::none,
+ boost::none,
+ std::vector<std::string>{},
+ sbe::makeSV(),
+ seekSlot,
+ forward,
+ yieldPolicy,
+ tracker),
+
+ sbe::makeSV(seekSlot),
+ sbe::makeSV(seekSlot),
+ nullptr);
+
+ // Construct a 'fail' branch of the union. The 'unusedSlot' is needed as each union branch
+ // must have the same number of slots, and we use just one in the 'seek' branch above. This
+ // branch will only be executed if the 'seek' branch produces EOF, which can only happen if
+ // if the seek did not find the record id specified in $_resumeAfter.
+ auto unusedSlot = slotIdGenerator->generate();
+ auto failBranch = sbe::makeProjectStage(
+ sbe::makeS<sbe::CoScanStage>(),
+ unusedSlot,
+ sbe::makeE<sbe::EFail>(
+ ErrorCodes::KeyNotFound,
+ str::stream() << "Failed to resume collection scan: the recordId from which we are "
+ << "attempting to resume no longer exists in the collection: "
+ << csn->resumeAfterRecordId));
+
+ // Construct a union stage from the 'seek' and 'fail' branches. Note that this stage will
+ // ever produce a single call to getNext() due to a 'limit 1' sitting on top of it.
+ auto unionStage = sbe::makeS<sbe::UnionStage>(
+ make_vector<std::unique_ptr<sbe::PlanStage>>(std::move(seekBranch),
+ std::move(failBranch)),
+ std::vector<sbe::value::SlotVector>{sbe::makeSV(seekSlot), sbe::makeSV(unusedSlot)},
+ sbe::makeSV(*seekRecordIdSlot));
+
+ // Construct the final loop join. Note that we also inject a 'skip 1' stage on top of the
+ // inner branch, as we need to start _after_ the resume RecordId, and a 'limit 1' stage on
+ // top of the outer branch, as it should produce just a single seek recordId.
stage = sbe::makeS<sbe::LoopJoinStage>(
- sbe::makeProjectStage(
- sbe::makeS<sbe::LimitSkipStage>(sbe::makeS<sbe::CoScanStage>(), 1, boost::none),
- *seekRecordIdSlot,
- sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::NumberInt64,
- csn->resumeAfterRecordId->repr())),
+ sbe::makeS<sbe::LimitSkipStage>(std::move(unionStage), 1, boost::none),
sbe::makeS<sbe::LimitSkipStage>(std::move(stage), boost::none, 1),
sbe::makeSV(),
sbe::makeSV(*seekRecordIdSlot),
diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp
index b17164df951..22c03198b9b 100644
--- a/src/mongo/db/query/sbe_stage_builder_expression.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp
@@ -883,13 +883,103 @@ public:
_context->pushExpr(
sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(absExpr)));
}
+
void visit(ExpressionAdd* expr) final {
- _context->ensureArity(2);
- auto rhs = _context->popExpr();
- auto lhs = _context->popExpr();
- _context->pushExpr(
- sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::add, std::move(lhs), std::move(rhs)));
+ size_t arity = expr->getChildren().size();
+ _context->ensureArity(arity);
+ auto frameId = _context->frameIdGenerator->generate();
+
+
+ auto generateNotNumberOrDate = [frameId](const sbe::value::SlotId slotId) {
+ sbe::EVariable var{frameId, slotId};
+ return sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::logicAnd,
+ sbe::makeE<sbe::EPrimUnary>(
+ sbe::EPrimUnary::logicNot,
+ sbe::makeE<sbe::EFunction>("isNumber", sbe::makeEs(var.clone()))),
+ sbe::makeE<sbe::EPrimUnary>(
+ sbe::EPrimUnary::logicNot,
+ sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(var.clone()))));
+ };
+
+ if (arity == 2) {
+ auto rhs = _context->popExpr();
+ auto lhs = _context->popExpr();
+ auto binds = sbe::makeEs(std::move(lhs), std::move(rhs));
+ sbe::EVariable lhsVar{frameId, 0};
+ sbe::EVariable rhsVar{frameId, 1};
+
+ auto addExpr = sbe::makeE<sbe::EIf>(
+ sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr,
+ generateNullOrMissing(frameId, 0),
+ generateNullOrMissing(frameId, 1)),
+ sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0),
+ sbe::makeE<sbe::EIf>(
+ sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr,
+ generateNotNumberOrDate(0),
+ generateNotNumberOrDate(1)),
+ sbe::makeE<sbe::EFail>(
+ ErrorCodes::Error{4974201},
+ "only numbers and dates are allowed in an $add expression"),
+ sbe::makeE<sbe::EIf>(
+ sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::logicAnd,
+ sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(lhsVar.clone())),
+ sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(rhsVar.clone()))),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{4974202},
+ "only one date allowed in an $add expression"),
+ sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::add, lhsVar.clone(), rhsVar.clone()))));
+
+ _context->pushExpr(
+ sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(addExpr)));
+ } else {
+ std::vector<std::unique_ptr<sbe::EExpression>> binds;
+ for (size_t i = 0; i < arity; i++) {
+ binds.push_back(_context->popExpr());
+ }
+ std::reverse(std::begin(binds), std::end(binds));
+
+ std::vector<std::unique_ptr<sbe::EExpression>> checkExprsNull;
+ std::vector<std::unique_ptr<sbe::EExpression>> checkExprsNotNumberOrDate;
+ std::vector<std::unique_ptr<sbe::EExpression>> argVars;
+ for (size_t idx = 0; idx < arity; idx++) {
+ checkExprsNull.push_back(generateNullOrMissing(frameId, idx));
+ checkExprsNotNumberOrDate.push_back(generateNotNumberOrDate(idx));
+ argVars.push_back(sbe::makeE<sbe::EVariable>(frameId, idx));
+ }
+
+ using iter_t = std::vector<std::unique_ptr<sbe::EExpression>>::iterator;
+ auto checkNullAllArguments =
+ std::accumulate(std::move_iterator<iter_t>(checkExprsNull.begin() + 1),
+ std::move_iterator<iter_t>(checkExprsNull.end()),
+ std::move(checkExprsNull.front()),
+ [](auto&& acc, auto&& ex) {
+ return sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::logicOr, std::move(acc), std::move(ex));
+ });
+ auto checkNotNumberOrDateAllArguments =
+ std::accumulate(std::move_iterator<iter_t>(checkExprsNotNumberOrDate.begin() + 1),
+ std::move_iterator<iter_t>(checkExprsNotNumberOrDate.end()),
+ std::move(checkExprsNotNumberOrDate.front()),
+ [](auto&& acc, auto&& ex) {
+ return sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::logicOr, std::move(acc), std::move(ex));
+ });
+ auto addExpr = sbe::makeE<sbe::EIf>(
+ std::move(checkNullAllArguments),
+ sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0),
+ sbe::makeE<sbe::EIf>(
+ std::move(checkNotNumberOrDateAllArguments),
+ sbe::makeE<sbe::EFail>(
+ ErrorCodes::Error{4974203},
+ "only numbers and dates are allowed in an $add expression"),
+ sbe::makeE<sbe::EFunction>("doubleDoubleSum", std::move(argVars))));
+ _context->pushExpr(
+ sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(addExpr)));
+ }
}
+
void visit(ExpressionAllElementsTrue* expr) final {
unsupportedExpression(expr->getOpName());
}
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index eeddbed7948..0ef88b0a410 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1232,6 +1232,7 @@ env.Library(
'$BUILD_DIR/mongo/db/repl/speculative_authenticate',
'$BUILD_DIR/mongo/db/stats/counters',
'$BUILD_DIR/mongo/transport/message_compressor',
+ 'primary_only_service',
'replication_auth',
'split_horizon',
],
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 77daa595256..a9c8b47e61a 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -700,8 +700,9 @@ void BackgroundSync::_runRollback(OperationContext* opCtx,
ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
- // Explicitly start future read transactions without a timestamp.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
// Rollback is a synchronous operation that uses the task executor and may not be
// executed inside the fetcher callback.
@@ -878,8 +879,9 @@ void BackgroundSync::start(OperationContext* opCtx) {
OpTime lastAppliedOpTime;
ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
- // Explicitly start future read transactions without a timestamp.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
do {
lastAppliedOpTime = _readLastAppliedOpTime(opCtx);
diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
index eab00297cdd..23fce736413 100644
--- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp
+++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
@@ -278,7 +278,7 @@ Status CollectionBulkLoaderImpl::commit() {
status = _idIndexBlock->drainBackgroundWrites(
_opCtx.get(),
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
_nss.isSystemDotViews() ? IndexBuildInterceptor::DrainYieldPolicy::kNoYield
: IndexBuildInterceptor::DrainYieldPolicy::kYield);
if (!status.isOK()) {
diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp
index 24ff5ad96d6..f769fd14c6d 100644
--- a/src/mongo/db/repl/oplog_applier_impl.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl.cpp
@@ -779,8 +779,9 @@ Status OplogApplierImpl::applyOplogBatchPerWorker(OperationContext* opCtx,
// destroyed by unstash in its destructor. Thus we set the flag explicitly.
opCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false);
- // Explicitly start future read transactions without a timestamp.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
// When querying indexes, we return the record matching the key if it exists, or an adjacent
// document. This means that it is possible for us to hit a prepare conflict if we query for an
diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp
index 99f7077519d..efd257d26d8 100644
--- a/src/mongo/db/repl/oplog_batcher.cpp
+++ b/src/mongo/db/repl/oplog_batcher.cpp
@@ -121,13 +121,6 @@ bool isUnpreparedCommit(const OplogEntry& entry) {
* the final oplog entry in the transaction is processed individually, since the operations are not
* actually run until the commit operation is reached.
*
- * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately
- * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog
- * entries on 'system.views' are being applied out of the original order.
- *
- * Process updates to 'admin.system.version' individually as well so the secondary's FCV when
- * processing each operation matches the primary's when committing that operation.
- *
* The ends of large transactions (> 16MB) should also be processed immediately on its own in order
* to avoid scenarios where parts of the transaction is batched with other operations not in the
* transaction.
@@ -143,8 +136,7 @@ bool OplogBatcher::mustProcessIndividually(const OplogEntry& entry) {
}
const auto nss = entry.getNss();
- return nss.isSystemDotViews() || nss.isServerConfigurationCollection() ||
- nss.isPrivilegeCollection();
+ return nss.mustBeAppliedInOwnOplogBatch();
}
std::size_t OplogBatcher::getOpCount(const OplogEntry& entry) {
@@ -355,12 +347,6 @@ std::size_t getBatchLimitOplogEntries() {
std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* storageInterface) {
// We can't change the timestamp source within a write unit of work.
invariant(!opCtx->lockState()->inAWriteUnitOfWork());
- // We're only reading oplog metadata, so the timestamp is not important. If we read with the
- // default (which is lastApplied on secondaries), we may end up with a reader that is at
- // lastApplied. If we then roll back, then when we reconstruct prepared transactions during
- // rollback recovery we will be preparing transactions before the read timestamp, which triggers
- // an assertion in WiredTiger.
- ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
auto oplogMaxSizeResult = storageInterface->getOplogMaxSize(opCtx);
auto oplogMaxSize = fassert(40301, oplogMaxSizeResult);
return std::min(oplogMaxSize / 10, std::size_t(replBatchLimitBytes.load()));
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index acf79190d1b..ab7f25ec861 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -227,9 +227,21 @@ void PrimaryOnlyServiceRegistry::onStepDown() {
}
}
+void PrimaryOnlyServiceRegistry::reportServiceInfo(BSONObjBuilder* result) {
+ BSONObjBuilder subBuilder(result->subobjStart("primaryOnlyServices"));
+ for (auto& service : _servicesByName) {
+ subBuilder.appendNumber(service.first, service.second->getNumberOfInstances());
+ }
+}
+
PrimaryOnlyService::PrimaryOnlyService(ServiceContext* serviceContext)
: _serviceContext(serviceContext) {}
+size_t PrimaryOnlyService::getNumberOfInstances() {
+ stdx::lock_guard lk(_mutex);
+ return _instances.size();
+}
+
bool PrimaryOnlyService::isRunning() const {
stdx::lock_guard lk(_mutex);
return _state == State::kRunning;
diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h
index 0d57cf96d9b..664ecd9fa11 100644
--- a/src/mongo/db/repl/primary_only_service.h
+++ b/src/mongo/db/repl/primary_only_service.h
@@ -217,6 +217,11 @@ public:
*/
bool isRunning() const;
+ /**
+ * Returns the number of currently running Instances of this service.
+ */
+ size_t getNumberOfInstances();
+
protected:
/**
* Constructs a new Instance object with the given initial state.
@@ -328,6 +333,12 @@ public:
*/
PrimaryOnlyService* lookupServiceByNamespace(const NamespaceString& ns);
+ /**
+ * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing a count of the
+ * number of active instances for each registered service.
+ */
+ void reportServiceInfo(BSONObjBuilder* result);
+
void onStartup(OperationContext*) final;
void onShutdown() final;
void onStepUpBegin(OperationContext*, long long term) final {}
diff --git a/src/mongo/db/repl/primary_only_service_test.cpp b/src/mongo/db/repl/primary_only_service_test.cpp
index d89005e8b1d..d7c76b6b7cb 100644
--- a/src/mongo/db/repl/primary_only_service_test.cpp
+++ b/src/mongo/db/repl/primary_only_service_test.cpp
@@ -367,6 +367,40 @@ TEST_F(PrimaryOnlyServiceTest, DoubleCreateInstance) {
TestServiceHangDuringInitialization.setMode(FailPoint::off);
}
+TEST_F(PrimaryOnlyServiceTest, ReportServiceInfo) {
+ {
+ BSONObjBuilder resultBuilder;
+ _registry->reportServiceInfo(&resultBuilder);
+
+ ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 0)),
+ resultBuilder.obj());
+ }
+
+ // Make sure the instance doesn't complete.
+ TestServiceHangDuringInitialization.setMode(FailPoint::alwaysOn);
+ auto instance = TestService::Instance::getOrCreate(_service, BSON("_id" << 0 << "state" << 0));
+
+ {
+ BSONObjBuilder resultBuilder;
+ _registry->reportServiceInfo(&resultBuilder);
+
+ ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 1)),
+ resultBuilder.obj());
+ }
+
+ auto instance2 = TestService::Instance::getOrCreate(_service, BSON("_id" << 1 << "state" << 0));
+
+ {
+ BSONObjBuilder resultBuilder;
+ _registry->reportServiceInfo(&resultBuilder);
+
+ ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 2)),
+ resultBuilder.obj());
+ }
+
+ TestServiceHangDuringInitialization.setMode(FailPoint::off);
+}
+
TEST_F(PrimaryOnlyServiceTest, CreateWhenNotPrimary) {
_registry->onStepDown();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 3cfe7be562b..3f03ceb9d28 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -357,7 +357,8 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown
// Ensure that all writes are visible before reading. If we failed mid-batch, it would be
// possible to read from a kNoOverlap ReadSource where not all writes to the minValid document
// are visible, generating a writeConflict that would not resolve.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx);
if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() &&
diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp
index ec551d390ea..188de5e8d16 100644
--- a/src/mongo/db/repl/replication_info.cpp
+++ b/src/mongo/db/repl/replication_info.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/repl/is_master_response.h"
+#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/repl/replication_auth.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_process.h"
@@ -86,12 +87,17 @@ constexpr auto kHelloString = "hello"_sd;
constexpr auto kCamelCaseIsMasterString = "isMaster"_sd;
constexpr auto kLowerCaseIsMasterString = "ismaster"_sd;
+void appendPrimaryOnlyServiceInfo(ServiceContext* serviceContext, BSONObjBuilder* result) {
+ auto registry = PrimaryOnlyServiceRegistry::get(serviceContext);
+ registry->reportServiceInfo(result);
+}
+
/**
* Appends replication-related fields to the isMaster response. Returns the topology version that
* was included in the response.
*/
TopologyVersion appendReplicationInfo(OperationContext* opCtx,
- BSONObjBuilder& result,
+ BSONObjBuilder* result,
bool appendReplicationProcess,
bool useLegacyResponseFields,
boost::optional<TopologyVersion> clientTopologyVersion,
@@ -108,9 +114,9 @@ TopologyVersion appendReplicationInfo(OperationContext* opCtx,
}
auto isMasterResponse =
replCoord->awaitIsMasterResponse(opCtx, horizonParams, clientTopologyVersion, deadline);
- result.appendElements(isMasterResponse->toBSON(useLegacyResponseFields));
+ result->appendElements(isMasterResponse->toBSON(useLegacyResponseFields));
if (appendReplicationProcess) {
- replCoord->appendSlaveInfoData(&result);
+ replCoord->appendSlaveInfoData(result);
}
invariant(isMasterResponse->getTopologyVersion());
return isMasterResponse->getTopologyVersion().get();
@@ -142,10 +148,10 @@ TopologyVersion appendReplicationInfo(OperationContext* opCtx,
opCtx->sleepFor(Milliseconds(*maxAwaitTimeMS));
}
- result.appendBool((useLegacyResponseFields ? "ismaster" : "isWritablePrimary"),
- ReplicationCoordinator::get(opCtx)->isMasterForReportingPurposes());
+ result->appendBool((useLegacyResponseFields ? "ismaster" : "isWritablePrimary"),
+ ReplicationCoordinator::get(opCtx)->isMasterForReportingPurposes());
- BSONObjBuilder topologyVersionBuilder(result.subobjStart("topologyVersion"));
+ BSONObjBuilder topologyVersionBuilder(result->subobjStart("topologyVersion"));
currentTopologyVersion.serialize(&topologyVersionBuilder);
return currentTopologyVersion;
@@ -171,12 +177,14 @@ public:
// TODO SERVER-50219: Change useLegacyResponseFields to false once the serverStatus changes
// to remove master-slave terminology are merged.
appendReplicationInfo(opCtx,
- result,
+ &result,
appendReplicationProcess,
true /* useLegacyResponseFields */,
boost::none /* clientTopologyVersion */,
boost::none /* maxAwaitTimeMS */);
+ appendPrimaryOnlyServiceInfo(opCtx->getServiceContext(), &result);
+
auto rbid = ReplicationProcess::get(opCtx)->getRollbackID();
if (ReplicationProcess::kUninitializedRollbackId != rbid) {
result.append("rbid", rbid);
@@ -426,7 +434,7 @@ public:
auto result = replyBuilder->getBodyBuilder();
auto currentTopologyVersion = appendReplicationInfo(
- opCtx, result, 0, useLegacyResponseFields, clientTopologyVersion, maxAwaitTimeMS);
+ opCtx, &result, 0, useLegacyResponseFields, clientTopologyVersion, maxAwaitTimeMS);
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
const int configServerModeNumber = 2;
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index bba59beb626..c0c242421f9 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -131,7 +131,9 @@ public:
_oplogApplicationEndPoint(oplogApplicationEndPoint) {}
void startup(OperationContext* opCtx) final {
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ invariant(opCtx->recoveryUnit()->getTimestampReadSource() ==
+ RecoveryUnit::ReadSource::kNoTimestamp);
+
_client = std::make_unique<DBDirectClient>(opCtx);
BSONObj predicate = _oplogApplicationEndPoint
? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint)
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 159179530a9..371a2c6af5f 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -74,6 +74,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/rollback_gen.h"
#include "mongo/db/service_context.h"
+#include "mongo/db/storage/checkpointer.h"
#include "mongo/db/storage/control/journal_flusher.h"
#include "mongo/db/storage/control/storage_control.h"
#include "mongo/db/storage/durable_catalog.h"
@@ -1271,7 +1272,18 @@ void StorageInterfaceImpl::setStableTimestamp(ServiceContext* serviceCtx, Timest
"holdStableTimestamp"_attr = holdStableTimestamp);
}
});
- serviceCtx->getStorageEngine()->setStableTimestamp(newStableTimestamp);
+
+ StorageEngine* storageEngine = serviceCtx->getStorageEngine();
+ Timestamp prevStableTimestamp = storageEngine->getStableTimestamp();
+
+ storageEngine->setStableTimestamp(newStableTimestamp);
+
+ Checkpointer* checkpointer = Checkpointer::get(serviceCtx);
+ if (checkpointer && !checkpointer->hasTriggeredFirstStableCheckpoint()) {
+ checkpointer->triggerFirstStableCheckpoint(prevStableTimestamp,
+ storageEngine->getInitialDataTimestamp(),
+ storageEngine->getStableTimestamp());
+ }
}
void StorageInterfaceImpl::setInitialDataTimestamp(ServiceContext* serviceCtx,
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp
index d821c6c3f26..a07833a2caf 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp
@@ -90,6 +90,13 @@ Status TenantMigrationDonorService::Instance::checkIfOptionsConflict(BSONObj opt
return Status::OK();
}
+void TenantMigrationDonorService::Instance::onReceiveDonorForgetMigration() {
+ stdx::lock_guard<Latch> lg(_mutex);
+ if (!_receivedDonorForgetMigrationPromise.getFuture().isReady()) {
+ _receivedDonorForgetMigrationPromise.emplaceValue();
+ }
+}
+
repl::OpTime TenantMigrationDonorService::Instance::_insertStateDocument() {
const auto stateDocBson = _stateDoc.toBSON();
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h
index ddf178121e4..6d1da3ac6d4 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.h
+++ b/src/mongo/db/repl/tenant_migration_donor_service.h
@@ -87,9 +87,7 @@ public:
return _decisionPromise.getFuture();
}
- void onReceiveDonorForgetMigration() {
- _receivedDonorForgetMigrationPromise.emplaceValue();
- }
+ void onReceiveDonorForgetMigration();
private:
const NamespaceString _stateDocumentsNS = NamespaceString::kTenantMigrationDonorsNamespace;
@@ -142,6 +140,8 @@ public:
const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
RemoteCommandTargeter* recipientTargeter);
+ mutable Mutex _mutex = MONGO_MAKE_LATCH("TenantMigrationDonorService::_mutex");
+
ServiceContext* _serviceContext;
TenantMigrationDonorDocument _stateDoc;
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 0c7a1f0727b..67fb840de64 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -262,8 +262,9 @@ std::pair<std::vector<OplogEntry>, bool> _readTransactionOperationsFromOplogChai
const std::vector<OplogEntry*>& cachedOps,
const bool checkForCommands) noexcept {
bool isTransactionWithCommand = false;
- // Traverse the oplog chain with its own snapshot and read timestamp.
- ReadSourceScope readSourceScope(opCtx);
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
std::vector<OplogEntry> ops;
@@ -538,11 +539,10 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica
LOGV2(21848, "Hit skipReconstructPreparedTransactions failpoint");
return;
}
- // Read the transactions table and the oplog collection without a timestamp.
- // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the
- // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in
- // this scope to be untimestamped.
- ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
+
+ // Ensure future transactions read without a timestamp.
+ invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+ opCtx->recoveryUnit()->getTimestampReadSource());
DBDirectClient client(opCtx);
const auto cursor = client.query(NamespaceString::kSessionTransactionsTableNamespace,
diff --git a/src/mongo/db/s/README.md b/src/mongo/db/s/README.md
index bf23835067c..a2a4547f1f8 100644
--- a/src/mongo/db/s/README.md
+++ b/src/mongo/db/s/README.md
@@ -103,7 +103,6 @@ collection or database. A full refresh occurs when:
Methods that will mark routing table cache information as stale (sharded collection).
* [invalidateShardOrEntireCollectionEntryForShardedCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L226-L236)
-* [invalidateShardForShardedCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L262-L268)
* [invalidateEntriesThatReferenceShard](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L270-L274)
* [purgeCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L276-L280)
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 4f97f81b966..f35bda25acc 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -263,6 +263,7 @@ env.Library(
'$BUILD_DIR/mongo/db/catalog_raii',
'$BUILD_DIR/mongo/db/repl/read_concern_args',
'$BUILD_DIR/mongo/db/rw_concern_d',
+ '$BUILD_DIR/mongo/db/transaction',
'$BUILD_DIR/mongo/executor/network_interface',
'$BUILD_DIR/mongo/s/catalog/sharding_catalog_client',
'$BUILD_DIR/mongo/s/client/sharding_client',
diff --git a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
index fc74fafc0c5..6743958f1f5 100644
--- a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
@@ -59,8 +59,12 @@ class ConfigSvrDropCollectionCommand : public BasicCommand {
public:
ConfigSvrDropCollectionCommand() : BasicCommand("_configsvrDropCollection") {}
- const std::set<std::string>& apiVersions() const {
- return kApiVersions1;
+ /**
+ * We accept any apiVersion, apiStrict, and/or apiDeprecationErrors, and forward it with the
+ * "drop" command to shards.
+ */
+ bool acceptsAnyApiVersionParameters() const override {
+ return true;
}
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
@@ -129,7 +133,9 @@ public:
auto collDistLock = uassertStatusOK(
catalogClient->getDistLockManager()->lock(opCtx, nss.ns(), "dropCollection", waitFor));
- ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+ ON_BLOCK_EXIT([opCtx, nss] {
+ Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+ });
_dropCollection(opCtx, nss);
diff --git a/src/mongo/db/s/config/configsvr_drop_database_command.cpp b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
index eb3ef547e70..896569a0afe 100644
--- a/src/mongo/db/s/config/configsvr_drop_database_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
@@ -29,6 +29,7 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+#include "mongo/db/api_parameters.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/client.h"
#include "mongo/db/commands.h"
@@ -54,8 +55,12 @@ class ConfigSvrDropDatabaseCommand : public BasicCommand {
public:
ConfigSvrDropDatabaseCommand() : BasicCommand("_configsvrDropDatabase") {}
- const std::set<std::string>& apiVersions() const {
- return kApiVersions1;
+ /**
+ * We accept any apiVersion, apiStrict, and/or apiDeprecationErrors, and forward it with the
+ * "dropDatabase" command to shards.
+ */
+ bool acceptsAnyApiVersionParameters() const override {
+ return true;
}
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
@@ -177,6 +182,7 @@ public:
status, str::stream() << "Could not remove database '" << dbname << "' from metadata");
// Send _flushDatabaseCacheUpdates to all shards
+ IgnoreAPIParametersBlock ignoreApiParametersBlock{opCtx};
for (const ShardId& shardId : allShardIds) {
const auto shard =
uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
index ee992bef2a9..3af7f601e95 100644
--- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
@@ -357,7 +357,7 @@ public:
result << "collectionUUID" << *uuid;
}
- catalogCache->onEpochChange(nss);
+ catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
return true;
}
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index 204d8377764..2d40f65eaed 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -46,6 +46,7 @@
#include "mongo/db/s/sharding_logging.h"
#include "mongo/db/server_options.h"
#include "mongo/db/snapshot_window_options_gen.h"
+#include "mongo/db/transaction_participant_gen.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
@@ -334,12 +335,14 @@ BSONObj getShardAndCollectionVersion(OperationContext* opCtx,
ChunkVersion shardVersion;
if (!swDonorShardVersion.isOK()) {
- // The query to find 'nss' chunks belonging to the donor shard didn't return any, meaning
- // the last chunk was donated
- uassert(505770,
- str::stream() << "Couldn't retrieve donor chunks from config server",
- swDonorShardVersion.getStatus().code() == 50577);
- shardVersion = ChunkVersion(0, 0, collectionVersion.epoch());
+ if (swDonorShardVersion.getStatus().code() == 50577) {
+ // The query to find 'nss' chunks belonging to the donor shard didn't return any chunks,
+ // meaning the last chunk for fromShard was donated. Gracefully handle the error.
+ shardVersion = ChunkVersion(0, 0, collectionVersion.epoch());
+ } else {
+ // Bubble up any other error
+ uassertStatusOK(swDonorShardVersion);
+ }
} else {
shardVersion = swDonorShardVersion.getValue();
}
@@ -844,8 +847,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
// Drop old history. Keep at least 1 entry so ChunkInfo::getShardIdAt finds valid history for
// any query younger than the history window.
if (!MONGO_unlikely(skipExpiringOldChunkHistory.shouldFail())) {
- const int kHistorySecs = 10;
- auto windowInSeconds = std::max(minSnapshotHistoryWindowInSeconds.load(), kHistorySecs);
+ auto windowInSeconds = std::max(std::max(minSnapshotHistoryWindowInSeconds.load(),
+ gTransactionLifetimeLimitSeconds.load()),
+ 10);
int entriesDeleted = 0;
while (newHistory.size() > 1 &&
newHistory.back().getValidAfter().getSecs() + windowInSeconds <
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
index d6544e922d2..381a5e62029 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
@@ -42,6 +42,7 @@
#include "mongo/client/read_preference.h"
#include "mongo/client/remote_command_targeter.h"
#include "mongo/client/replica_set_monitor.h"
+#include "mongo/db/api_parameters.h"
#include "mongo/db/auth/authorization_session_impl.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/client.h"
@@ -398,6 +399,7 @@ void sendSSVToAllShards(OperationContext* opCtx, const NamespaceString& nss) {
auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
for (const auto& shardEntry : allShards) {
const auto& shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardEntry.getName()));
@@ -417,6 +419,7 @@ void sendSSVToAllShards(OperationContext* opCtx, const NamespaceString& nss) {
}
void removeChunksAndTagsForDroppedCollection(OperationContext* opCtx, const NamespaceString& nss) {
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
const auto catalogClient = Grid::get(opCtx)->catalogClient();
// Remove chunk data
@@ -502,6 +505,8 @@ void ShardingCatalogManager::ensureDropCollectionCompleted(OperationContext* opC
"Ensuring config entries from previous dropCollection are cleared",
"namespace"_attr = nss.ns());
sendDropCollectionToAllShards(opCtx, nss);
+
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
removeChunksAndTagsForDroppedCollection(opCtx, nss);
sendSSVToAllShards(opCtx, nss);
}
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 11bce269425..07f8f94daf9 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -466,7 +466,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() {
"Starting post-migration commit refresh on the shard",
"migrationId"_attr = _coordinator->getMigrationId());
- forceShardFilteringMetadataRefresh(_opCtx, getNss(), true);
+ forceShardFilteringMetadataRefresh(_opCtx, getNss());
LOGV2_DEBUG_OPTIONS(4817405,
2,
diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp
index 010f476773c..a2decb63c2d 100644
--- a/src/mongo/db/s/migration_util_test.cpp
+++ b/src/mongo/db/s/migration_util_test.cpp
@@ -522,7 +522,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
_mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(
Status(ErrorCodes::NamespaceNotFound, "dummy errmsg"));
- forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+ forceShardFilteringMetadataRefresh(opCtx, kNss);
auto cleanupCompleteFuture = migrationutil::submitRangeDeletionTask(opCtx, deletionTask);
@@ -553,7 +553,7 @@ TEST_F(SubmitRangeDeletionTaskTest, SucceedsIfFilteringMetadataUUIDMatchesTaskUU
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
makeChangedChunks(ChunkVersion(1, 0, kEpoch)));
_mockCatalogClient->setCollections({coll});
- forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+ forceShardFilteringMetadataRefresh(opCtx, kNss);
// The task should have been submitted successfully.
auto cleanupCompleteFuture = migrationutil::submitRangeDeletionTask(opCtx, deletionTask);
@@ -596,7 +596,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
_mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(
Status(ErrorCodes::NamespaceNotFound, "dummy errmsg"));
- forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+ forceShardFilteringMetadataRefresh(opCtx, kNss);
auto collectionUUID = createCollectionAndGetUUID(kNss);
auto deletionTask = createDeletionTask(kNss, collectionUUID, 0, 10, _myShardName);
@@ -633,7 +633,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
makeChangedChunks(ChunkVersion(1, 0, staleEpoch)));
_mockCatalogClient->setCollections({staleColl});
- forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+ forceShardFilteringMetadataRefresh(opCtx, kNss);
auto collectionUUID = createCollectionAndGetUUID(kNss);
auto deletionTask = createDeletionTask(kNss, collectionUUID, 0, 10, _myShardName);
diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp
index f8a321aea1a..aba2cd2f632 100644
--- a/src/mongo/db/s/set_shard_version_command.cpp
+++ b/src/mongo/db/s/set_shard_version_command.cpp
@@ -96,7 +96,7 @@ public:
uassertStatusOK(shardingState->canAcceptShardedCommands());
// Steps
- // 1. Set the `authoritative` and `forceRefresh` variables from the command object.
+ // 1. Set the `authoritative` variable from the command object.
//
// 2. Validate all command parameters against the info in our ShardingState, and return an
// error if they do not match.
@@ -117,12 +117,6 @@ public:
LastError::get(client).disable();
const bool authoritative = cmdObj.getBoolField("authoritative");
- // A flag that specifies whether the set shard version catalog refresh
- // is allowed to join an in-progress refresh triggered by an other
- // thread, or whether it's required to either a) trigger its own
- // refresh or b) wait for a refresh to be started after it has entered the
- // getCollectionRoutingInfoWithRefresh function
- const bool forceRefresh = cmdObj.getBoolField("forceRefresh");
// Step 2
@@ -241,11 +235,9 @@ public:
const auto status = [&] {
try {
- // TODO SERVER-48990 remove this if-else: just call onShardVersionMismatch
+ // TODO (SERVER-50812) remove this if-else: just call onShardVersionMismatch
if (requestedVersion == requestedVersion.DROPPED()) {
- // Note: The forceRefresh flag controls whether we make sure to do our own
- // refresh or if we're okay with joining another thread
- forceShardFilteringMetadataRefresh(opCtx, nss, forceRefresh);
+ forceShardFilteringMetadataRefresh(opCtx, nss);
} else {
onShardVersionMismatch(opCtx, nss, requestedVersion);
}
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index 1e39cd26dc8..317d80f2ec4 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -284,7 +284,7 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo
migrationutil::recoverMigrationCoordinations(_opCtx, _nss);
}
- forceShardFilteringMetadataRefresh(_opCtx, _nss, true);
+ forceShardFilteringMetadataRefresh(_opCtx, _nss);
}
ScopedShardVersionCriticalSection::~ScopedShardVersionCriticalSection() {
@@ -334,9 +334,8 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
invariant(shardingState->canAcceptShardedCommands());
try {
- const auto cm =
- uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
- opCtx, nss, true));
+ const auto cm = uassertStatusOK(
+ Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, nss));
if (!cm.isSharded()) {
return CollectionMetadata();
@@ -354,8 +353,7 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
}
ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
- const NamespaceString& nss,
- bool forceRefreshFromThisThread) {
+ const NamespaceString& nss) {
invariant(!opCtx->lockState()->isLocked());
invariant(!opCtx->getClient()->isInDirectClient());
@@ -366,9 +364,8 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
auto* const shardingState = ShardingState::get(opCtx);
invariant(shardingState->canAcceptShardedCommands());
- const auto cm =
- uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
- opCtx, nss, forceRefreshFromThisThread));
+ const auto cm = uassertStatusOK(
+ Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, nss));
if (!cm.isSharded()) {
// The collection is not sharded. Avoid using AutoGetCollection() as it returns the
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.h b/src/mongo/db/s/shard_filtering_metadata_refresh.h
index 774a370b9ef..317fab32f37 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.h
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.h
@@ -79,8 +79,7 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
* called with a lock
*/
ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
- const NamespaceString& nss,
- bool forceRefreshFromThisThread = false);
+ const NamespaceString& nss);
/**
* Should be called when any client request on this shard generates a StaleDbVersion exception.
diff --git a/src/mongo/db/s/shard_key_util.cpp b/src/mongo/db/s/shard_key_util.cpp
index e216f9f682d..9b71b8e1ec9 100644
--- a/src/mongo/db/s/shard_key_util.cpp
+++ b/src/mongo/db/s/shard_key_util.cpp
@@ -230,18 +230,12 @@ void ValidationBehaviorsShardCollection::createShardKeyIndex(
ValidationBehaviorsRefineShardKey::ValidationBehaviorsRefineShardKey(OperationContext* opCtx,
const NamespaceString& nss)
- : _opCtx(opCtx) {
- const auto cm = uassertStatusOK(
- Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss));
- uassert(ErrorCodes::NamespaceNotSharded,
- str::stream() << "refineCollectionShardKey namespace " << nss.toString()
- << " is not sharded",
- cm.isSharded());
- const auto minKeyShardId = cm.getMinKeyShardIdWithSimpleCollation();
- _indexShard =
- uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, minKeyShardId));
- _cm = std::move(cm);
-}
+ : _opCtx(opCtx),
+ _cm(uassertStatusOK(
+ Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
+ nss))),
+ _indexShard(uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(
+ opCtx, _cm.getMinKeyShardIdWithSimpleCollation()))) {}
std::vector<BSONObj> ValidationBehaviorsRefineShardKey::loadIndexes(
const NamespaceString& nss) const {
@@ -249,8 +243,7 @@ std::vector<BSONObj> ValidationBehaviorsRefineShardKey::loadIndexes(
_opCtx,
ReadPreferenceSetting(ReadPreference::PrimaryOnly),
nss.db().toString(),
- appendShardVersion(BSON("listIndexes" << nss.coll()),
- _cm->getVersion(_indexShard->getId())),
+ appendShardVersion(BSON("listIndexes" << nss.coll()), _cm.getVersion(_indexShard->getId())),
Milliseconds(-1));
if (indexesRes.getStatus().code() != ErrorCodes::NamespaceNotFound) {
return uassertStatusOK(indexesRes).docs;
@@ -266,7 +259,7 @@ void ValidationBehaviorsRefineShardKey::verifyUsefulNonMultiKeyIndex(
"admin",
appendShardVersion(
BSON(kCheckShardingIndexCmdName << nss.ns() << kKeyPatternField << proposedKey),
- _cm->getVersion(_indexShard->getId())),
+ _cm.getVersion(_indexShard->getId())),
Shard::RetryPolicy::kIdempotent));
if (checkShardingIndexRes.commandStatus == ErrorCodes::UnknownError) {
// CheckShardingIndex returns UnknownError if a compatible shard key index cannot be found,
diff --git a/src/mongo/db/s/shard_key_util.h b/src/mongo/db/s/shard_key_util.h
index d6e1802549c..e5ab23683eb 100644
--- a/src/mongo/db/s/shard_key_util.h
+++ b/src/mongo/db/s/shard_key_util.h
@@ -104,8 +104,10 @@ public:
private:
OperationContext* _opCtx;
+
+ ChunkManager _cm;
+
std::shared_ptr<Shard> _indexShard;
- boost::optional<ChunkManager> _cm;
};
/**
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 2b6d515148a..f01218bba70 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -113,16 +113,16 @@ MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand);
// Tracks the number of times a legacy unacknowledged write failed due to
-// not master error resulted in network disconnection.
-Counter64 notMasterLegacyUnackWrites;
-ServerStatusMetricField<Counter64> displayNotMasterLegacyUnackWrites(
- "repl.network.notMasterLegacyUnacknowledgedWrites", &notMasterLegacyUnackWrites);
+// not primary error resulted in network disconnection.
+Counter64 notPrimaryLegacyUnackWrites;
+ServerStatusMetricField<Counter64> displayNotPrimaryLegacyUnackWrites(
+ "repl.network.notPrimaryLegacyUnacknowledgedWrites", &notPrimaryLegacyUnackWrites);
-// Tracks the number of times an unacknowledged write failed due to not master error
+// Tracks the number of times an unacknowledged write failed due to not primary error
// resulted in network disconnection.
-Counter64 notMasterUnackWrites;
-ServerStatusMetricField<Counter64> displayNotMasterUnackWrites(
- "repl.network.notMasterUnacknowledgedWrites", &notMasterUnackWrites);
+Counter64 notPrimaryUnackWrites;
+ServerStatusMetricField<Counter64> displayNotPrimaryUnackWrites(
+ "repl.network.notPrimaryUnacknowledgedWrites", &notPrimaryUnackWrites);
namespace {
@@ -1479,7 +1479,7 @@ DbResponse receivedCommands(OperationContext* opCtx,
// Close the connection to get client to go through server selection again.
if (LastError::get(opCtx->getClient()).hadNotPrimaryError()) {
if (c && c->getReadWriteType() == Command::ReadWriteType::kWrite)
- notMasterUnackWrites.increment();
+ notPrimaryUnackWrites.increment();
uasserted(ErrorCodes::NotWritablePrimary,
str::stream()
<< "Not-master error while processing '" << request.getCommandName()
@@ -1839,7 +1839,7 @@ DbResponse FireAndForgetOpRunner::run() {
// Either way, we want to throw an exception here, which will cause the client to be
// disconnected.
if (LastError::get(hr->client()).hadNotPrimaryError()) {
- notMasterLegacyUnackWrites.increment();
+ notPrimaryLegacyUnackWrites.increment();
uasserted(ErrorCodes::NotWritablePrimary,
str::stream() << "Not-master error while processing '"
<< networkOpToString(hr->op()) << "' operation on '"
diff --git a/src/mongo/db/stats/api_version_metrics.h b/src/mongo/db/stats/api_version_metrics.h
index fc1de1d9766..354312a3992 100644
--- a/src/mongo/db/stats/api_version_metrics.h
+++ b/src/mongo/db/stats/api_version_metrics.h
@@ -29,7 +29,7 @@
#pragma once
-#include "mongo/db/initialize_api_parameters.h"
+#include "mongo/db/api_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/platform/mutex.h"
#include "mongo/rpc/metadata/client_metadata.h"
@@ -70,4 +70,4 @@ private:
APIVersionMetricsMap _apiVersionMetrics;
};
-} // namespace mongo \ No newline at end of file
+} // namespace mongo
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index 53ac37b0e30..f60d463a976 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -121,11 +121,13 @@ env.Library(
'control/storage_control.cpp',
],
LIBDEPS=[
+ 'checkpointer',
'journal_flusher',
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/service_context',
+ 'storage_options',
],
)
@@ -513,6 +515,19 @@ env.Library(
)
env.Library(
+ target='checkpointer',
+ source=[
+ 'checkpointer.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/service_context',
+ '$BUILD_DIR/mongo/util/background_job',
+ 'storage_options',
+ ],
+)
+
+env.Library(
target='two_phase_index_build_knobs_idl',
source=[
env.Idlc('two_phase_index_build_knobs.idl')[0],
diff --git a/src/mongo/db/storage/checkpointer.cpp b/src/mongo/db/storage/checkpointer.cpp
new file mode 100644
index 00000000000..825e914d062
--- /dev/null
+++ b/src/mongo/db/storage/checkpointer.cpp
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/checkpointer.h"
+
+#include "mongo/db/operation_context.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/storage/kv/kv_engine.h"
+#include "mongo/logv2/log.h"
+#include "mongo/util/concurrency/idle_thread_block.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo {
+
+namespace {
+
+const auto getCheckpointer = ServiceContext::declareDecoration<std::unique_ptr<Checkpointer>>();
+
+MONGO_FAIL_POINT_DEFINE(pauseCheckpointThread);
+
+} // namespace
+
+Checkpointer* Checkpointer::get(ServiceContext* serviceCtx) {
+ return getCheckpointer(serviceCtx).get();
+}
+
+Checkpointer* Checkpointer::get(OperationContext* opCtx) {
+ return get(opCtx->getServiceContext());
+}
+
+void Checkpointer::set(ServiceContext* serviceCtx, std::unique_ptr<Checkpointer> newCheckpointer) {
+ auto& checkpointer = getCheckpointer(serviceCtx);
+ if (checkpointer) {
+ invariant(!checkpointer->running(),
+ "Tried to reset the Checkpointer without shutting down the original instance.");
+ }
+ checkpointer = std::move(newCheckpointer);
+}
+
+void Checkpointer::run() {
+ ThreadClient tc(name(), getGlobalServiceContext());
+ LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name());
+
+ while (true) {
+ auto opCtx = tc->makeOperationContext();
+
+ {
+ stdx::unique_lock<Latch> lock(_mutex);
+ MONGO_IDLE_THREAD_BLOCK;
+
+ // Wait for 'storageGlobalParams.checkpointDelaySecs' seconds; or until either shutdown
+ // is signaled or a checkpoint is triggered.
+ _sleepCV.wait_for(lock,
+ stdx::chrono::seconds(static_cast<std::int64_t>(
+ storageGlobalParams.checkpointDelaySecs)),
+ [&] { return _shuttingDown || _triggerCheckpoint; });
+
+ // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing.
+ // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we
+ // need to wake up to check periodically. The wakeup to check period is arbitrary.
+ while (storageGlobalParams.checkpointDelaySecs == 0 && !_shuttingDown &&
+ !_triggerCheckpoint) {
+ _sleepCV.wait_for(lock, stdx::chrono::seconds(static_cast<std::int64_t>(3)), [&] {
+ return _shuttingDown || _triggerCheckpoint;
+ });
+ }
+
+ if (_shuttingDown) {
+ invariant(!_shutdownReason.isOK());
+ LOGV2_DEBUG(22309,
+ 1,
+ "Stopping thread",
+ "threadName"_attr = name(),
+ "reason"_attr = _shutdownReason);
+ return;
+ }
+
+ // Clear the trigger so we do not immediately checkpoint again after this.
+ _triggerCheckpoint = false;
+ }
+
+ pauseCheckpointThread.pauseWhileSet();
+
+ const Date_t startTime = Date_t::now();
+
+ // TODO SERVER-50861: Access the storage engine via the ServiceContext.
+ _kvEngine->checkpoint();
+
+ const auto secondsElapsed = durationCount<Seconds>(Date_t::now() - startTime);
+ if (secondsElapsed >= 30) {
+ LOGV2_DEBUG(22308,
+ 1,
+ "Checkpoint was slow to complete",
+ "secondsElapsed"_attr = secondsElapsed);
+ }
+ }
+}
+
+void Checkpointer::triggerFirstStableCheckpoint(Timestamp prevStable,
+ Timestamp initialData,
+ Timestamp currStable) {
+ stdx::unique_lock<Latch> lock(_mutex);
+ invariant(!_hasTriggeredFirstStableCheckpoint);
+ if (prevStable < initialData && currStable >= initialData) {
+ LOGV2(22310,
+ "Triggering the first stable checkpoint",
+ "initialDataTimestamp"_attr = initialData,
+ "prevStableTimestamp"_attr = prevStable,
+ "currStableTimestamp"_attr = currStable);
+ _hasTriggeredFirstStableCheckpoint = true;
+ _triggerCheckpoint = true;
+ _sleepCV.notify_one();
+ }
+}
+
+bool Checkpointer::hasTriggeredFirstStableCheckpoint() {
+ stdx::unique_lock<Latch> lock(_mutex);
+ return _hasTriggeredFirstStableCheckpoint;
+}
+
+void Checkpointer::shutdown(const Status& reason) {
+ LOGV2(22322, "Shutting down checkpoint thread");
+
+ {
+ stdx::unique_lock<Latch> lock(_mutex);
+ _shuttingDown = true;
+ _shutdownReason = reason;
+
+ // Wake up the checkpoint thread early, to take a final checkpoint before shutting down, if
+ // one has not coincidentally just been taken.
+ _sleepCV.notify_one();
+ }
+
+ wait();
+ LOGV2(22323, "Finished shutting down checkpoint thread");
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/checkpointer.h b/src/mongo/db/storage/checkpointer.h
new file mode 100644
index 00000000000..6c50974c2ba
--- /dev/null
+++ b/src/mongo/db/storage/checkpointer.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/platform/mutex.h"
+#include "mongo/stdx/condition_variable.h"
+#include "mongo/util/background.h"
+
+namespace mongo {
+
+class KVEngine;
+class OperationContext;
+class ServiceContext;
+class Timestamp;
+
+class Checkpointer : public BackgroundJob {
+public:
+ Checkpointer(KVEngine* kvEngine)
+ : BackgroundJob(false /* deleteSelf */),
+ _kvEngine(kvEngine),
+ _shuttingDown(false),
+ _shutdownReason(Status::OK()),
+ _hasTriggeredFirstStableCheckpoint(false),
+ _triggerCheckpoint(false) {}
+
+ static Checkpointer* get(ServiceContext* serviceCtx);
+ static Checkpointer* get(OperationContext* opCtx);
+ static void set(ServiceContext* serviceCtx, std::unique_ptr<Checkpointer> newCheckpointer);
+
+ std::string name() const override {
+ return "Checkpointer";
+ }
+
+ /**
+ * Starts the checkpoint thread that runs every storageGlobalParams.checkpointDelaySecs seconds.
+ */
+ void run() override;
+
+ /**
+ * Triggers taking the first stable checkpoint if the stable timestamp has advanced past the
+ * initial data timestamp.
+ *
+ * The checkpoint thread runs automatically every storageGlobalParams.checkpointDelaySecs
+ * seconds. This function avoids potentially waiting that full duration for a stable checkpoint,
+ * initiating one immediately.
+ *
+ * Do not call this function if hasTriggeredFirstStableCheckpoint() returns true.
+ */
+ void triggerFirstStableCheckpoint(Timestamp prevStable,
+ Timestamp initialData,
+ Timestamp currStable);
+
+ /**
+ * Returns whether the first stable checkpoint has already been triggered.
+ */
+ bool hasTriggeredFirstStableCheckpoint();
+
+ /**
+ * Blocks until the checkpoint thread has been fully shutdown.
+ */
+ void shutdown(const Status& reason);
+
+private:
+ // A pointer to the KVEngine is maintained only due to unit testing limitations that don't fully
+ // setup the ServiceContext.
+ // TODO SERVER-50861: Remove this pointer.
+ KVEngine* const _kvEngine;
+
+ // Protects the state below.
+ Mutex _mutex = MONGO_MAKE_LATCH("Checkpointer::_mutex");
+
+ // The checkpoint thread idles on this condition variable for a particular time duration between
+ // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if
+ // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set.
+ stdx::condition_variable _sleepCV;
+
+ bool _shuttingDown;
+ Status _shutdownReason;
+
+ // This flag ensures the first stable checkpoint is only triggered once.
+ bool _hasTriggeredFirstStableCheckpoint;
+
+ // This flag allows the checkpoint thread to wake up early when _sleepCV is signaled.
+ bool _triggerCheckpoint;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/control/storage_control.cpp b/src/mongo/db/storage/control/storage_control.cpp
index f0b7e7d825f..50213d44dfc 100644
--- a/src/mongo/db/storage/control/storage_control.cpp
+++ b/src/mongo/db/storage/control/storage_control.cpp
@@ -35,7 +35,9 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/service_context.h"
+#include "mongo/db/storage/checkpointer.h"
#include "mongo/db/storage/control/journal_flusher.h"
+#include "mongo/db/storage/storage_options.h"
#include "mongo/logv2/log.h"
namespace mongo {
@@ -73,12 +75,25 @@ void startStorageControls(ServiceContext* serviceContext, bool forTestOnly) {
journalFlusher->go();
JournalFlusher::set(serviceContext, std::move(journalFlusher));
+ if (storageEngine->supportsCheckpoints() && !storageEngine->isEphemeral() &&
+ !storageGlobalParams.readOnly) {
+ std::unique_ptr<Checkpointer> checkpointer =
+ std::make_unique<Checkpointer>(storageEngine->getEngine());
+ checkpointer->go();
+ Checkpointer::set(serviceContext, std::move(checkpointer));
+ }
+
areControlsStarted = true;
}
void stopStorageControls(ServiceContext* serviceContext, const Status& reason) {
if (areControlsStarted) {
JournalFlusher::get(serviceContext)->shutdown(reason);
+
+ auto checkpointer = Checkpointer::get(serviceContext);
+ if (checkpointer) {
+ checkpointer->shutdown(reason);
+ }
}
}
diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h
index 2d9aecb32ee..b782b144f0b 100644
--- a/src/mongo/db/storage/durable_catalog.h
+++ b/src/mongo/db/storage/durable_catalog.h
@@ -128,6 +128,11 @@ public:
virtual std::string newInternalIdent() = 0;
/**
+ * Generate an internal resumable index build ident name.
+ */
+ virtual std::string newInternalResumableIndexBuildIdent() = 0;
+
+ /**
* On success, returns the RecordId which identifies the new record store in the durable catalog
* in addition to ownership of the new RecordStore.
*/
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index b991c213bc3..de8b719918b 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -65,6 +65,7 @@ const char kNamespaceFieldName[] = "ns";
const char kNonRepairableFeaturesFieldName[] = "nonRepairable";
const char kRepairableFeaturesFieldName[] = "repairable";
const char kInternalIdentPrefix[] = "internal-";
+const char kResumableIndexBuildIdentStem[] = "resumable-index-build-";
void appendPositionsOfBitsSet(uint64_t value, StringBuilder* sb) {
invariant(sb);
@@ -427,8 +428,17 @@ bool DurableCatalogImpl::_hasEntryCollidingWithRand() const {
}
std::string DurableCatalogImpl::newInternalIdent() {
+ return _newInternalIdent("");
+}
+
+std::string DurableCatalogImpl::newInternalResumableIndexBuildIdent() {
+ return _newInternalIdent(kResumableIndexBuildIdentStem);
+}
+
+std::string DurableCatalogImpl::_newInternalIdent(StringData identStem) {
StringBuilder buf;
buf << kInternalIdentPrefix;
+ buf << identStem;
buf << _next.fetchAndAdd(1) << '-' << _rand;
return buf.str();
}
@@ -765,6 +775,11 @@ bool DurableCatalogImpl::isInternalIdent(StringData ident) const {
return ident.find(kInternalIdentPrefix) != std::string::npos;
}
+bool DurableCatalogImpl::isResumableIndexBuildIdent(StringData ident) const {
+ invariant(isInternalIdent(ident), ident.toString());
+ return ident.find(kResumableIndexBuildIdentStem) != std::string::npos;
+}
+
bool DurableCatalogImpl::isCollectionIdent(StringData ident) const {
// Internal idents prefixed "internal-" should not be considered collections, because
// they are not eligible for orphan recovery through repair.
diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h
index b24816d333d..9b8fad96825 100644
--- a/src/mongo/db/storage/durable_catalog_impl.h
+++ b/src/mongo/db/storage/durable_catalog_impl.h
@@ -92,6 +92,8 @@ public:
bool isInternalIdent(StringData ident) const;
+ bool isResumableIndexBuildIdent(StringData ident) const;
+
bool isCollectionIdent(StringData ident) const;
FeatureTracker* getFeatureTracker() const {
@@ -108,6 +110,7 @@ public:
std::string getFilesystemPathForDb(const std::string& dbName) const;
std::string newInternalIdent();
+ std::string newInternalResumableIndexBuildIdent();
StatusWith<std::pair<RecordId, std::unique_ptr<RecordStore>>> createCollection(
OperationContext* opCtx,
@@ -229,6 +232,8 @@ private:
*/
std::string _newUniqueIdent(NamespaceString nss, const char* kind);
+ std::string _newInternalIdent(StringData identStem);
+
// Helpers only used by constructor and init(). Don't call from elsewhere.
static std::string _newRand();
bool _hasEntryCollidingWithRand() const;
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
index b3da8bb0085..fd243b0c8c1 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
@@ -173,6 +173,10 @@ public:
Timestamp getOldestTimestamp() const override;
+ Timestamp getStableTimestamp() const override {
+ return Timestamp();
+ }
+
void setOldestTimestamp(Timestamp newOldestTimestamp, bool force) override;
std::map<Timestamp, std::shared_ptr<StringStore>> getHistory_forTest();
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
index fcf49f74442..e249daed751 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
@@ -359,7 +359,7 @@ TEST_F(EphemeralForTestKVEngineTest, ReadOlderSnapshotsSimple) {
ASSERT(!rs->findRecord(&opCtx, loc2, &rd));
opCtx.recoveryUnit()->abandonSnapshot();
- opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT(rs->findRecord(&opCtx, loc1, &rd));
ASSERT(rs->findRecord(&opCtx, loc2, &rd));
}
@@ -452,7 +452,7 @@ TEST_F(EphemeralForTestKVEngineTest, SetReadTimestampBehindOldestTimestamp) {
ASSERT_THROWS_CODE(rs->findRecord(&opCtx, loc2, &rd), DBException, ErrorCodes::SnapshotTooOld);
opCtx.recoveryUnit()->abandonSnapshot();
- opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT(rs->findRecord(&opCtx, loc1, &rd));
ASSERT(rs->findRecord(&opCtx, loc2, &rd));
}
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
index 5b2e77e6292..44d73995482 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
@@ -119,7 +119,6 @@ bool RecoveryUnit::forkIfNeeded() {
boost::optional<Timestamp> readFrom = boost::none;
switch (_timestampReadSource) {
- case ReadSource::kUnset:
case ReadSource::kNoTimestamp:
case ReadSource::kMajorityCommitted:
case ReadSource::kNoOverlap:
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
index 0e0afbb1a13..c31d0d54d86 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
@@ -131,7 +131,7 @@ private:
Timestamp _commitTimestamp = Timestamp::min();
// Specifies which external source to use when setting read timestamps on transactions.
- ReadSource _timestampReadSource = ReadSource::kUnset;
+ ReadSource _timestampReadSource = ReadSource::kNoTimestamp;
boost::optional<Timestamp> _readAtTimestamp = boost::none;
};
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index 46dad070544..6c8c67df3c4 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -53,18 +53,6 @@ class SnapshotManager;
class KVEngine {
public:
/**
- * This function should only be called after the StorageEngine is set on the ServiceContext.
- *
- * Starts asycnhronous threads for a storage engine's integration layer. Any such thread
- * generating an OperationContext should be initialized here.
- *
- * In order for OperationContexts to be generated with real Locker objects, the generation must
- * occur after the StorageEngine is instantiated and set on the ServiceContext. Otherwise,
- * OperationContexts are created with LockerNoops.
- */
- virtual void startAsyncThreads() {}
-
- /**
* During the startup process, the storage engine is one of the first components to be started
* up and fully initialized. But that fully initialized storage engine may not be recognized as
* the end for the remaining storage startup tasks that still need to be performed.
@@ -275,6 +263,8 @@ public:
return false;
}
+ virtual void checkpoint() {}
+
virtual bool isDurable() const = 0;
/**
diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
index 57bf3bf714d..1e928738d57 100644
--- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
+++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
@@ -372,7 +372,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) {
// Not reading on the last applied timestamp returns the most recent data.
auto op = makeOperation();
auto ru = op->recoveryUnit();
- ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT_EQ(itCountOn(op), 1);
ASSERT(readRecordOn(op, id));
@@ -408,7 +408,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) {
// Not reading on the last local timestamp returns the most recent data.
auto op = makeOperation();
auto ru = op->recoveryUnit();
- ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT_EQ(itCountOn(op), 1);
auto record = readRecordOn(op, id);
ASSERT_EQ(std::string(record->data.data()), "Blue spotted stingray");
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index 5c8be96b528..2057f8854b3 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -392,11 +392,7 @@ public:
*/
enum ReadSource {
/**
- * Do not read from a timestamp. This is the default.
- */
- kUnset,
- /**
- * Read without a timestamp explicitly.
+ * Read without a timestamp. This is the default.
*/
kNoTimestamp,
/**
@@ -424,8 +420,6 @@ public:
static std::string toString(ReadSource rs) {
switch (rs) {
- case ReadSource::kUnset:
- return "kUnset";
case ReadSource::kNoTimestamp:
return "kNoTimestamp";
case ReadSource::kMajorityCommitted:
@@ -455,7 +449,7 @@ public:
boost::optional<Timestamp> provided = boost::none) {}
virtual ReadSource getTimestampReadSource() const {
- return ReadSource::kUnset;
+ return ReadSource::kNoTimestamp;
};
/**
diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp
index 5acbcd3a513..84af208d391 100644
--- a/src/mongo/db/storage/snapshot_helper.cpp
+++ b/src/mongo/db/storage/snapshot_helper.cpp
@@ -38,29 +38,37 @@
#include "mongo/logv2/log.h"
namespace mongo {
-namespace SnapshotHelper {
-bool canSwitchReadSource(OperationContext* opCtx) {
-
- // Most readConcerns have behavior controlled at higher levels. Local and available are the only
- // ReadConcerns that should consider changing, since they read without a timestamp by default.
+namespace {
+bool canReadAtLastApplied(OperationContext* opCtx) {
+ // Local and available are the only ReadConcern levels that allow their ReadSource to be
+ // overridden to read at lastApplied. They read without a timestamp by default, but this check
+ // allows user secondary reads from conflicting with oplog batch application by reading at a
+ // consistent point in time.
+ // Internal operations use DBDirectClient as a loopback to perform local operations, and they
+ // expect the same level of consistency guarantees as any user operation. For that reason,
+ // DBDirectClient should be able to change the owning operation's ReadSource in order to serve
+ // consistent data.
const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
- if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
- readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) {
+ if ((opCtx->getClient()->isFromUserConnection() || opCtx->getClient()->isInDirectClient()) &&
+ (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
+ readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern)) {
return true;
}
-
return false;
}
+} // namespace
+namespace SnapshotHelper {
bool shouldReadAtLastApplied(OperationContext* opCtx,
const NamespaceString& nss,
std::string* reason) {
-
// If this is true, then the operation opted-in to the PBWM lock, implying that it cannot change
// its ReadSource. It's important to note that it is possible for this to be false, but still be
// holding the PBWM lock, explained below.
if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) {
- *reason = "conflicts with batch application";
+ if (reason) {
+ *reason = "conflicts with batch application";
+ }
return false;
}
@@ -71,16 +79,32 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
// guaranteed to observe all previous writes. This may occur when multiple collection locks are
// held concurrently, which is often the case when DBDirectClient is used.
if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) {
- *reason = "PBWM lock is held";
+ if (reason) {
+ *reason = "PBWM lock is held";
+ }
LOGV2_DEBUG(20577, 1, "not reading at lastApplied because the PBWM lock is held");
return false;
}
- // If we are in a replication state (like secondary or primary catch-up) where we are not
- // accepting writes, we should read at lastApplied. If this node can accept writes, then no
- // conflicting replication batches are being applied and we can read from the default snapshot.
+ // If this node can accept writes (i.e. primary), then no conflicting replication batches are
+ // being applied and we can read from the default snapshot. If we are in a replication state
+ // (like secondary or primary catch-up) where we are not accepting writes, we should read at
+ // lastApplied.
if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) {
- *reason = "primary";
+ if (reason) {
+ *reason = "primary";
+ }
+ return false;
+ }
+
+ // If we are not secondary, then we should not attempt to read at lastApplied because it may not
+ // be available or valid. Any operations reading outside of the primary or secondary states must
+ // be internal. We give these operations the benefit of the doubt rather than attempting to read
+ // at a lastApplied timestamp that is not valid.
+ if (!repl::ReplicationCoordinator::get(opCtx)->isInPrimaryOrSecondaryState(opCtx)) {
+ if (reason) {
+ *reason = "not primary or secondary";
+ }
return false;
}
@@ -88,7 +112,9 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
// written by the replication system. However, the oplog is special, as it *is* written by the
// replication system.
if (!nss.isReplicated() && !nss.isOplog()) {
- *reason = "unreplicated collection";
+ if (reason) {
+ *reason = "unreplicated collection";
+ }
return false;
}
@@ -96,15 +122,14 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
}
boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
const NamespaceString& nss) {
- const bool canSwitch = canSwitchReadSource(opCtx);
- if (!canSwitch) {
+ if (!canReadAtLastApplied(opCtx)) {
return boost::none;
}
const auto existing = opCtx->recoveryUnit()->getTimestampReadSource();
std::string reason;
const bool readAtLastApplied = shouldReadAtLastApplied(opCtx, nss, &reason);
- if (existing == RecoveryUnit::ReadSource::kUnset) {
+ if (existing == RecoveryUnit::ReadSource::kNoTimestamp) {
// Shifting from reading without a timestamp to reading with a timestamp can be dangerous
// because writes will appear to vanish. This case is intended for new reads on secondaries
// and query yield recovery after state transitions from primary to secondary.
@@ -122,13 +147,16 @@ boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opC
// Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to
// reading without one. More writes will become visible.
if (!readAtLastApplied) {
- LOGV2_DEBUG(
- 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason);
- // This shift to kUnset assumes that callers will not make future attempts to manipulate
- // their ReadSources after performing reads at an un-timetamped snapshot. The only
- // exception is callers of this function that may need to change from kUnset to
- // kLastApplied in the event of a catalog conflict or query yield.
- return RecoveryUnit::ReadSource::kUnset;
+ LOGV2_DEBUG(4452902,
+ 2,
+ "Changing ReadSource to kNoTimestamp",
+ logAttrs(nss),
+ "reason"_attr = reason);
+ // This shift to kNoTimestamp assumes that callers will not make future attempts to
+ // manipulate their ReadSources after performing reads at an un-timetamped snapshot. The
+ // only exception is callers of this function that may need to change from kNoTimestamp
+ // to kLastApplied in the event of a catalog conflict or query yield.
+ return RecoveryUnit::ReadSource::kNoTimestamp;
}
}
return boost::none;
diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h
index fa8fdd85f24..c24dfd16d8c 100644
--- a/src/mongo/db/storage/snapshot_helper.h
+++ b/src/mongo/db/storage/snapshot_helper.h
@@ -37,6 +37,10 @@ namespace SnapshotHelper {
boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
const NamespaceString& nss);
+bool shouldReadAtLastApplied(OperationContext* opCtx,
+ const NamespaceString& nss,
+ std::string* reason = nullptr);
+
bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin,
boost::optional<Timestamp> readTimestamp);
} // namespace SnapshotHelper
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index aa06d951c9f..edf31b874fe 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -376,6 +376,14 @@ public:
OperationContext* opCtx) = 0;
/**
+ * Creates a temporary RecordStore on the storage engine for a resumable index build. On
+ * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
+ * record stores.
+ */
+ virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+ OperationContext* opCtx) = 0;
+
+ /**
* Creates a temporary RecordStore on the storage engine from an existing ident on disk. On
* startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
* record stores.
@@ -465,6 +473,12 @@ public:
std::shared_ptr<Ident> ident) = 0;
/**
+ * Called when the checkpoint thread instructs the storage engine to take a checkpoint. The
+ * underlying storage engine must take a checkpoint at this point.
+ */
+ virtual void checkpoint() = 0;
+
+ /**
* Recovers the storage engine state to the last stable timestamp. "Stable" in this case
* refers to a timestamp that is guaranteed to never be rolled back. The stable timestamp
* used should be one provided by StorageEngine::setStableTimestamp().
@@ -509,6 +523,11 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) = 0;
/**
+ * Returns the stable timestamp.
+ */
+ virtual Timestamp getStableTimestamp() const = 0;
+
+ /**
* Tells the storage engine the timestamp of the data at startup. This is necessary because
* timestamps are not persisted in the storage layer.
*/
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 88c183919e1..22c82a09eba 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -326,7 +326,7 @@ Status StorageEngineImpl::_recoverOrphanedCollection(OperationContext* opCtx,
return Status::OK();
}
-bool StorageEngineImpl::_handleInternalIdents(
+bool StorageEngineImpl::_handleInternalIdent(
OperationContext* opCtx,
const std::string& ident,
InternalIdentReconcilePolicy internalIdentReconcilePolicy,
@@ -345,14 +345,15 @@ bool StorageEngineImpl::_handleInternalIdents(
return true;
}
+ if (!_catalog->isResumableIndexBuildIdent(ident)) {
+ return false;
+ }
+
// When starting up after a clean shutdown and resumable index builds are supported, find the
// internal idents that contain the relevant information to resume each index build and recover
// the state.
auto rs = _engine->getRecordStore(opCtx, "", ident, CollectionOptions());
- // Look at the contents to determine whether this ident will contain information for
- // resuming an index build.
- // TODO SERVER-49215: differentiate the internal idents without looking at the contents.
auto cursor = rs->getCursor(opCtx);
auto record = cursor->next();
if (record) {
@@ -360,36 +361,35 @@ bool StorageEngineImpl::_handleInternalIdents(
// Parse the documents here so that we can restart the build if the document doesn't
// contain all the necessary information to be able to resume building the index.
- if (doc.hasField("phase")) {
- ResumeIndexInfo resumeInfo;
- try {
- if (MONGO_unlikely(failToParseResumeIndexInfo.shouldFail())) {
- uasserted(ErrorCodes::FailPointEnabled,
- "failToParseResumeIndexInfo fail point is enabled");
- }
-
- resumeInfo = ResumeIndexInfo::parse(IDLParserErrorContext("ResumeIndexInfo"), doc);
- } catch (const DBException& e) {
- LOGV2(4916300, "Failed to parse resumable index info", "error"_attr = e.toStatus());
-
- // Ignore the error so that we can restart the index build instead of resume it. We
- // should drop the internal ident if we failed to parse.
- internalIdentsToDrop->insert(ident);
- return true;
+ ResumeIndexInfo resumeInfo;
+ try {
+ if (MONGO_unlikely(failToParseResumeIndexInfo.shouldFail())) {
+ uasserted(ErrorCodes::FailPointEnabled,
+ "failToParseResumeIndexInfo fail point is enabled");
}
- reconcileResult->indexBuildsToResume.push_back(resumeInfo);
+ resumeInfo = ResumeIndexInfo::parse(IDLParserErrorContext("ResumeIndexInfo"), doc);
+ } catch (const DBException& e) {
+ LOGV2(4916300, "Failed to parse resumable index info", "error"_attr = e.toStatus());
- // Once we have parsed the resume info, we can safely drop the internal ident.
+ // Ignore the error so that we can restart the index build instead of resume it. We
+ // should drop the internal ident if we failed to parse.
internalIdentsToDrop->insert(ident);
-
- LOGV2(4916301,
- "Found unfinished index build to resume",
- "buildUUID"_attr = resumeInfo.getBuildUUID(),
- "collectionUUID"_attr = resumeInfo.getCollectionUUID(),
- "phase"_attr = IndexBuildPhase_serializer(resumeInfo.getPhase()));
return true;
}
+
+ reconcileResult->indexBuildsToResume.push_back(resumeInfo);
+
+ // Once we have parsed the resume info, we can safely drop the internal ident.
+ internalIdentsToDrop->insert(ident);
+
+ LOGV2(4916301,
+ "Found unfinished index build to resume",
+ "buildUUID"_attr = resumeInfo.getBuildUUID(),
+ "collectionUUID"_attr = resumeInfo.getCollectionUUID(),
+ "phase"_attr = IndexBuildPhase_serializer(resumeInfo.getPhase()));
+
+ return true;
}
return false;
@@ -448,12 +448,12 @@ StatusWith<StorageEngine::ReconcileResult> StorageEngineImpl::reconcileCatalogAn
continue;
}
- if (_handleInternalIdents(opCtx,
- it,
- internalIdentReconcilePolicy,
- &reconcileResult,
- &internalIdentsToDrop,
- &allInternalIdents)) {
+ if (_handleInternalIdent(opCtx,
+ it,
+ internalIdentReconcilePolicy,
+ &reconcileResult,
+ &internalIdentsToDrop,
+ &allInternalIdents)) {
continue;
}
@@ -670,8 +670,6 @@ void StorageEngineImpl::finishInit() {
// A storage engine may need to start threads that require OperationsContexts with real Lockers,
// as opposed to LockerNoops. Placing the start logic here, after the StorageEngine has been
// instantiated, causes makeOperationContext() to create LockerImpls instead of LockerNoops.
- _engine->startAsyncThreads();
-
if (_engine->supportsRecoveryTimestamp()) {
_timestampMonitor = std::make_unique<TimestampMonitor>(
_engine.get(), getGlobalServiceContext()->getPeriodicRunner());
@@ -864,10 +862,18 @@ std::unique_ptr<TemporaryRecordStore> StorageEngineImpl::makeTemporaryRecordStor
OperationContext* opCtx) {
std::unique_ptr<RecordStore> rs =
_engine->makeTemporaryRecordStore(opCtx, _catalog->newInternalIdent());
- LOGV2_DEBUG(22258,
+ LOGV2_DEBUG(22258, 1, "Created temporary record store", "ident"_attr = rs->getIdent());
+ return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
+}
+
+std::unique_ptr<TemporaryRecordStore>
+StorageEngineImpl::makeTemporaryRecordStoreForResumableIndexBuild(OperationContext* opCtx) {
+ std::unique_ptr<RecordStore> rs =
+ _engine->makeTemporaryRecordStore(opCtx, _catalog->newInternalResumableIndexBuildIdent());
+ LOGV2_DEBUG(4921500,
1,
- "created temporary record store: {rs_getIdent}",
- "rs_getIdent"_attr = rs->getIdent());
+ "Created temporary record store for resumable index build",
+ "ident"_attr = rs->getIdent());
return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
}
@@ -885,6 +891,10 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force
_engine->setStableTimestamp(stableTimestamp, force);
}
+Timestamp StorageEngineImpl::getStableTimestamp() const {
+ return _engine->getStableTimestamp();
+}
+
void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
@@ -1025,6 +1035,10 @@ void StorageEngineImpl::addDropPendingIdent(const Timestamp& dropTimestamp,
_dropPendingIdentReaper.addDropPendingIdent(dropTimestamp, nss, ident);
}
+void StorageEngineImpl::checkpoint() {
+ _engine->checkpoint();
+}
+
void StorageEngineImpl::_onMinOfCheckpointAndOldestTimestampChanged(const Timestamp& timestamp) {
if (timestamp.isNull()) {
return;
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 4761e1f3a38..fed128f9b59 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -113,6 +113,9 @@ public:
virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(
OperationContext* opCtx) override;
+ virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+ OperationContext* opCtx) override;
+
virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
OperationContext* opCtx, StringData ident) override;
@@ -120,6 +123,8 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
+ virtual Timestamp getStableTimestamp() const override;
+
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
virtual Timestamp getInitialDataTimestamp() const override;
@@ -312,6 +317,8 @@ public:
const NamespaceString& nss,
std::shared_ptr<Ident> ident) override;
+ void checkpoint() override;
+
DurableCatalog* getCatalog() override {
return _catalog.get();
}
@@ -386,12 +393,12 @@ private:
* Returns whether the given ident is an internal ident and if it should be dropped or used to
* resume an index build.
*/
- bool _handleInternalIdents(OperationContext* opCtx,
- const std::string& ident,
- InternalIdentReconcilePolicy internalIdentReconcilePolicy,
- ReconcileResult* reconcileResult,
- std::set<std::string>* internalIdentsToDrop,
- std::set<std::string>* allInternalIdents);
+ bool _handleInternalIdent(OperationContext* opCtx,
+ const std::string& ident,
+ InternalIdentReconcilePolicy internalIdentReconcilePolicy,
+ ReconcileResult* reconcileResult,
+ std::set<std::string>* internalIdentsToDrop,
+ std::set<std::string>* allInternalIdents);
class RemoveDBChange;
diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h
index 3a4a14bd9e6..96eb8020b1d 100644
--- a/src/mongo/db/storage/storage_engine_mock.h
+++ b/src/mongo/db/storage/storage_engine_mock.h
@@ -93,6 +93,10 @@ public:
std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(OperationContext* opCtx) final {
return {};
}
+ std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+ OperationContext* opCtx) final {
+ return {};
+ }
std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
OperationContext* opCtx, StringData ident) final {
return {};
@@ -134,6 +138,9 @@ public:
MONGO_UNREACHABLE;
}
void setStableTimestamp(Timestamp stableTimestamp, bool force = false) final {}
+ Timestamp getStableTimestamp() const override {
+ return Timestamp();
+ }
void setInitialDataTimestamp(Timestamp timestamp) final {}
Timestamp getInitialDataTimestamp() const override {
return Timestamp();
@@ -168,6 +175,7 @@ public:
void addDropPendingIdent(const Timestamp& dropTimestamp,
const NamespaceString& nss,
std::shared_ptr<Ident> ident) final {}
+ void checkpoint() final {}
Status currentFilesCompatible(OperationContext* opCtx) const final {
return Status::OK();
}
diff --git a/src/mongo/db/storage/storage_options.cpp b/src/mongo/db/storage/storage_options.cpp
index 7ba94afde29..431698a807d 100644
--- a/src/mongo/db/storage/storage_options.cpp
+++ b/src/mongo/db/storage/storage_options.cpp
@@ -58,6 +58,7 @@ void StorageGlobalParams::reset() {
oplogMinRetentionHours.store(0.0);
allowOplogTruncation = true;
disableLockFreeReads = true;
+ checkpointDelaySecs = 0;
}
StorageGlobalParams storageGlobalParams;
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index f6284a06244..e7fe5331f96 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -123,6 +123,10 @@ struct StorageGlobalParams {
// settings with which lock-free reads are incompatible: standalone mode; and
// enableMajorityReadConcern=false.
bool disableLockFreeReads;
+
+ // Delay in seconds between triggering the next checkpoint after the completion of the previous
+ // one. A value of 0 indicates that checkpointing will be skipped.
+ size_t checkpointDelaySecs;
};
extern StorageGlobalParams storageGlobalParams;
diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript
index 0cf7d92ce08..5d24feec685 100644
--- a/src/mongo/db/storage/wiredtiger/SConscript
+++ b/src/mongo/db/storage/wiredtiger/SConscript
@@ -139,6 +139,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/service_context_d',
'$BUILD_DIR/mongo/db/service_context_test_fixture',
+ '$BUILD_DIR/mongo/db/storage/checkpointer',
'$BUILD_DIR/mongo/db/storage/durable_catalog_impl',
'$BUILD_DIR/mongo/db/storage/kv/kv_engine_test_harness',
'$BUILD_DIR/mongo/db/storage/recovery_unit_test_harness',
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
index d7bba3ee94d..8149bab8757 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
@@ -43,11 +43,6 @@ WiredTigerGlobalOptions wiredTigerGlobalOptions;
Status WiredTigerGlobalOptions::store(const moe::Environment& params) {
// WiredTiger storage engine options
- if (params.count("storage.syncPeriodSecs")) {
- wiredTigerGlobalOptions.checkpointDelaySecs =
- static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
- }
-
if (!wiredTigerGlobalOptions.engineConfig.empty()) {
LOGV2(22293,
"Engine custom option: {wiredTigerGlobalOptions_engineConfig}",
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
index 21d4c522f3b..51546164c39 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
@@ -40,7 +40,6 @@ class WiredTigerGlobalOptions {
public:
WiredTigerGlobalOptions()
: cacheSizeGB(0),
- checkpointDelaySecs(0),
statisticsLogDelaySecs(0),
directoryForIndexes(false),
maxCacheOverflowFileSizeGBDeprecated(0),
@@ -50,7 +49,6 @@ public:
Status store(const optionenvironment::Environment& params);
double cacheSizeGB;
- size_t checkpointDelaySecs;
size_t statisticsLogDelaySecs;
std::string journalCompressor;
bool directoryForIndexes;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 1553c1740fe..f169f952e05 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -119,8 +119,6 @@ namespace {
MONGO_FAIL_POINT_DEFINE(WTPreserveSnapshotHistoryIndefinitely);
MONGO_FAIL_POINT_DEFINE(WTSetOldestTSToStableTS);
-MONGO_FAIL_POINT_DEFINE(pauseCheckpointThread);
-
} // namespace
bool WiredTigerFileVersion::shouldDowngrade(bool readOnly,
@@ -255,231 +253,6 @@ std::string toString(const StorageEngine::OldestActiveTransactionTimestampResult
}
}
-class WiredTigerKVEngine::WiredTigerCheckpointThread : public BackgroundJob {
-public:
- explicit WiredTigerCheckpointThread(WiredTigerKVEngine* wiredTigerKVEngine,
- WiredTigerSessionCache* sessionCache)
- : BackgroundJob(false /* deleteSelf */),
- _wiredTigerKVEngine(wiredTigerKVEngine),
- _sessionCache(sessionCache) {}
-
- virtual string name() const {
- return "WTCheckpointThread";
- }
-
- virtual void run() {
- ThreadClient tc(name(), getGlobalServiceContext());
- LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name());
-
- while (true) {
- auto opCtx = tc->makeOperationContext();
-
- {
- stdx::unique_lock<Latch> lock(_mutex);
- MONGO_IDLE_THREAD_BLOCK;
-
- // Wait for 'wiredTigerGlobalOptions.checkpointDelaySecs' seconds; or until either
- // shutdown is signaled or a checkpoint is triggered.
- _condvar.wait_for(lock,
- stdx::chrono::seconds(static_cast<std::int64_t>(
- wiredTigerGlobalOptions.checkpointDelaySecs)),
- [&] { return _shuttingDown || _triggerCheckpoint; });
-
- // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing.
- // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we
- // need to wake up to check periodically. The wakeup to check period is arbitrary.
- while (wiredTigerGlobalOptions.checkpointDelaySecs == 0 && !_shuttingDown &&
- !_triggerCheckpoint) {
- _condvar.wait_for(lock,
- stdx::chrono::seconds(static_cast<std::int64_t>(3)),
- [&] { return _shuttingDown || _triggerCheckpoint; });
- }
-
- if (_shuttingDown) {
- LOGV2_DEBUG(22309, 1, "Stopping thread", "threadName"_attr = name());
- return;
- }
-
- // Clear the trigger so we do not immediately checkpoint again after this.
- _triggerCheckpoint = false;
- }
-
- pauseCheckpointThread.pauseWhileSet();
-
- const Date_t startTime = Date_t::now();
-
- const Timestamp stableTimestamp = _wiredTigerKVEngine->getStableTimestamp();
- const Timestamp initialDataTimestamp = _wiredTigerKVEngine->getInitialDataTimestamp();
-
- // The amount of oplog to keep is primarily dictated by a user setting. However, in
- // unexpected cases, durable, recover to a timestamp storage engines may need to play
- // forward from an oplog entry that would otherwise be truncated by the user
- // setting. Furthermore, the entries in prepared or large transactions can refer to
- // previous entries in the same transaction.
- //
- // Live (replication) rollback will replay oplogs from exactly the stable timestamp.
- // With prepared or large transactions, it may require some additional entries prior to
- // the stable timestamp. These requirements are summarized in getOplogNeededForRollback.
- // Truncating the oplog at this point is sufficient for in-memory configurations, but
- // could cause an unrecoverable scenario if the node crashed and has to play from the
- // last stable checkpoint.
- //
- // By recording the oplog needed for rollback "now", then taking a stable checkpoint,
- // we can safely assume that the oplog needed for crash recovery has caught up to the
- // recorded value. After the checkpoint, this value will be published such that actors
- // which truncate the oplog can read an updated value.
- try {
- // Three cases:
- //
- // First, initialDataTimestamp is Timestamp(0, 1) -> Take full checkpoint. This is
- // when there is no consistent view of the data (i.e: during initial sync).
- //
- // Second, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on
- // disk is prone to being rolled back. Hold off on checkpoints. Hope that the
- // stable timestamp surpasses the data on disk, allowing storage to persist newer
- // copies to disk.
- //
- // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady
- // state case.
- if (initialDataTimestamp.asULL() <= 1) {
- UniqueWiredTigerSession session = _sessionCache->getSession();
- WT_SESSION* s = session->getSession();
- invariantWTOK(s->checkpoint(s, "use_timestamp=false"));
- } else if (stableTimestamp < initialDataTimestamp) {
- LOGV2_FOR_RECOVERY(
- 23985,
- 2,
- "Stable timestamp is behind the initial data timestamp, skipping "
- "a checkpoint. StableTimestamp: {stableTimestamp} InitialDataTimestamp: "
- "{initialDataTimestamp}",
- "stableTimestamp"_attr = stableTimestamp.toString(),
- "initialDataTimestamp"_attr = initialDataTimestamp.toString());
- } else {
- auto oplogNeededForRollback = _wiredTigerKVEngine->getOplogNeededForRollback();
-
- LOGV2_FOR_RECOVERY(
- 23986,
- 2,
- "Performing stable checkpoint. StableTimestamp: {stableTimestamp}, "
- "OplogNeededForRollback: {oplogNeededForRollback}",
- "stableTimestamp"_attr = stableTimestamp,
- "oplogNeededForRollback"_attr = toString(oplogNeededForRollback));
-
- UniqueWiredTigerSession session = _sessionCache->getSession();
- WT_SESSION* s = session->getSession();
- invariantWTOK(s->checkpoint(s, "use_timestamp=true"));
-
- if (oplogNeededForRollback.isOK()) {
- // Now that the checkpoint is durable, publish the oplog needed to recover
- // from it.
- stdx::lock_guard<Latch> lk(_oplogNeededForCrashRecoveryMutex);
- _oplogNeededForCrashRecovery.store(
- oplogNeededForRollback.getValue().asULL());
- }
- }
-
- const auto secondsElapsed = durationCount<Seconds>(Date_t::now() - startTime);
- if (secondsElapsed >= 30) {
- LOGV2_DEBUG(22308,
- 1,
- "Checkpoint took {secondsElapsed} seconds to complete.",
- "secondsElapsed"_attr = secondsElapsed);
- }
- } catch (const WriteConflictException&) {
- // Temporary: remove this after WT-3483
- LOGV2_WARNING(22346, "Checkpoint encountered a write conflict exception.");
- } catch (const AssertionException& exc) {
- invariant(ErrorCodes::isShutdownError(exc.code()), exc.what());
- }
- }
- }
-
- /**
- * Returns true if we have already triggered taking the first checkpoint.
- */
- bool hasTriggeredFirstStableCheckpoint() {
- stdx::unique_lock<Latch> lock(_mutex);
- return _hasTriggeredFirstStableCheckpoint;
- }
-
- /**
- * Triggers taking the first stable checkpoint, which is when the stable timestamp advances past
- * the initial data timestamp.
- *
- * The checkpoint thread runs automatically every wiredTigerGlobalOptions.checkpointDelaySecs
- * seconds. This function avoids potentially waiting that full duration for a stable checkpoint,
- * initiating one immediately.
- *
- * Do not call this function if hasTriggeredFirstStableCheckpoint() returns true.
- */
- void triggerFirstStableCheckpoint(Timestamp prevStable,
- Timestamp initialData,
- Timestamp currStable) {
- stdx::unique_lock<Latch> lock(_mutex);
- invariant(!_hasTriggeredFirstStableCheckpoint);
- if (prevStable < initialData && currStable >= initialData) {
- LOGV2(22310,
- "Triggering the first stable checkpoint. Initial Data: {initialData} PrevStable: "
- "{prevStable} CurrStable: {currStable}",
- "Triggering the first stable checkpoint",
- "initialData"_attr = initialData,
- "prevStable"_attr = prevStable,
- "currStable"_attr = currStable);
- _hasTriggeredFirstStableCheckpoint = true;
- _triggerCheckpoint = true;
- _condvar.notify_one();
- }
- }
-
- std::uint64_t getOplogNeededForCrashRecovery() const {
- return _oplogNeededForCrashRecovery.load();
- }
-
- /*
- * Atomically assign _oplogNeededForCrashRecovery to a variable.
- * _oplogNeededForCrashRecovery will not change during assignment.
- */
- void assignOplogNeededForCrashRecoveryTo(boost::optional<Timestamp>* timestamp) {
- stdx::lock_guard<Latch> lk(_oplogNeededForCrashRecoveryMutex);
- *timestamp = Timestamp(_oplogNeededForCrashRecovery.load());
- }
-
- void shutdown() {
- {
- stdx::unique_lock<Latch> lock(_mutex);
- _shuttingDown = true;
- // Wake up the checkpoint thread early, to take a final checkpoint before shutting
- // down, if one has not coincidentally just been taken.
- _condvar.notify_one();
- }
- wait();
- }
-
-private:
- WiredTigerKVEngine* _wiredTigerKVEngine;
- WiredTigerSessionCache* _sessionCache;
-
- Mutex _oplogNeededForCrashRecoveryMutex =
- MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex");
- AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery;
-
- // Protects the state below.
- Mutex _mutex = MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_mutex");
-
- // The checkpoint thread idles on this condition variable for a particular time duration between
- // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if
- // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set.
- stdx::condition_variable _condvar;
-
- bool _shuttingDown = false;
-
- // This flag ensures the first stable checkpoint is only triggered once.
- bool _hasTriggeredFirstStableCheckpoint = false;
-
- // This flag allows the checkpoint thread to wake up early when _condvar is signaled.
- bool _triggerCheckpoint = false;
-};
-
namespace {
TicketHolder openWriteTransaction(128);
TicketHolder openReadTransaction(128);
@@ -759,16 +532,6 @@ WiredTigerKVEngine::~WiredTigerKVEngine() {
_sessionCache.reset(nullptr);
}
-void WiredTigerKVEngine::startAsyncThreads() {
- if (!_ephemeral) {
- if (!_readOnly) {
- _checkpointThread =
- std::make_unique<WiredTigerCheckpointThread>(this, _sessionCache.get());
- _checkpointThread->go();
- }
- }
-}
-
void WiredTigerKVEngine::notifyStartupComplete() {
WiredTigerUtil::notifyStartupComplete();
}
@@ -898,11 +661,6 @@ void WiredTigerKVEngine::cleanShutdown() {
_sessionSweeper->shutdown();
LOGV2(22319, "Finished shutting down session sweeper thread");
}
- if (_checkpointThread) {
- LOGV2(22322, "Shutting down checkpoint thread");
- _checkpointThread->shutdown();
- LOGV2(22323, "Finished shutting down checkpoint thread");
- }
LOGV2_FOR_RECOVERY(23988,
2,
"Shutdown timestamps.",
@@ -1385,7 +1143,7 @@ WiredTigerKVEngine::beginNonBlockingBackup(OperationContext* opCtx,
// Oplog truncation thread won't remove oplog since the checkpoint pinned by the backup cursor.
stdx::lock_guard<Latch> lock(_oplogPinnedByBackupMutex);
- _checkpointThread->assignOplogNeededForCrashRecoveryTo(&_oplogPinnedByBackup);
+ _oplogPinnedByBackup = Timestamp(_oplogNeededForCrashRecovery.load());
auto pinOplogGuard = makeGuard([&] { _oplogPinnedByBackup = boost::none; });
// Persist the sizeStorer information to disk before opening the backup cursor. We aren't
@@ -1907,6 +1665,74 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
return true;
}
+void WiredTigerKVEngine::checkpoint() {
+ const Timestamp stableTimestamp = getStableTimestamp();
+ const Timestamp initialDataTimestamp = getInitialDataTimestamp();
+
+ // The amount of oplog to keep is primarily dictated by a user setting. However, in unexpected
+ // cases, durable, recover to a timestamp storage engines may need to play forward from an oplog
+ // entry that would otherwise be truncated by the user setting. Furthermore, the entries in
+ // prepared or large transactions can refer to previous entries in the same transaction.
+ //
+ // Live (replication) rollback will replay the oplog from exactly the stable timestamp. With
+ // prepared or large transactions, it may require some additional entries prior to the stable
+ // timestamp. These requirements are summarized in getOplogNeededForRollback. Truncating the
+ // oplog at this point is sufficient for in-memory configurations, but could cause an
+ // unrecoverable scenario if the node crashed and has to play from the last stable checkpoint.
+ //
+ // By recording the oplog needed for rollback "now", then taking a stable checkpoint, we can
+ // safely assume that the oplog needed for crash recovery has caught up to the recorded value.
+ // After the checkpoint, this value will be published such that actors which truncate the oplog
+ // can read an updated value.
+ try {
+ // Three cases:
+ //
+ // First, initialDataTimestamp is Timestamp(0, 1) -> Take full checkpoint. This is when
+ // there is no consistent view of the data (i.e: during initial sync).
+ //
+ // Second, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on disk is
+ // prone to being rolled back. Hold off on checkpoints. Hope that the stable timestamp
+ // surpasses the data on disk, allowing storage to persist newer copies to disk.
+ //
+ // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady state
+ // case.
+ if (initialDataTimestamp.asULL() <= 1) {
+ UniqueWiredTigerSession session = _sessionCache->getSession();
+ WT_SESSION* s = session->getSession();
+ invariantWTOK(s->checkpoint(s, "use_timestamp=false"));
+ } else if (stableTimestamp < initialDataTimestamp) {
+ LOGV2_FOR_RECOVERY(
+ 23985,
+ 2,
+ "Stable timestamp is behind the initial data timestamp, skipping a checkpoint.",
+ "stableTimestamp"_attr = stableTimestamp.toString(),
+ "initialDataTimestamp"_attr = initialDataTimestamp.toString());
+ } else {
+ auto oplogNeededForRollback = getOplogNeededForRollback();
+
+ LOGV2_FOR_RECOVERY(23986,
+ 2,
+ "Performing stable checkpoint.",
+ "stableTimestamp"_attr = stableTimestamp,
+ "oplogNeededForRollback"_attr = toString(oplogNeededForRollback));
+
+ UniqueWiredTigerSession session = _sessionCache->getSession();
+ WT_SESSION* s = session->getSession();
+ invariantWTOK(s->checkpoint(s, "use_timestamp=true"));
+
+ if (oplogNeededForRollback.isOK()) {
+ // Now that the checkpoint is durable, publish the oplog needed to recover from it.
+ _oplogNeededForCrashRecovery.store(oplogNeededForRollback.getValue().asULL());
+ }
+ }
+ } catch (const WriteConflictException&) {
+ // TODO SERVER-50824: Check if this can be removed now that WT-3483 is done.
+ LOGV2_WARNING(22346, "Checkpoint encountered a write conflict exception.");
+ } catch (const AssertionException& exc) {
+ invariant(ErrorCodes::isShutdownError(exc.code()), exc.what());
+ }
+}
+
bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const {
return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession()->getSession(), _uri(ident));
}
@@ -2045,10 +1871,6 @@ void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp, bool forc
// After publishing a stable timestamp to WT, we can record the updated stable timestamp value
// for the necessary oplog to keep.
_stableTimestamp.store(stableTimestamp.asULL());
- if (_checkpointThread && !_checkpointThread->hasTriggeredFirstStableCheckpoint()) {
- _checkpointThread->triggerFirstStableCheckpoint(
- prevStable, Timestamp(_initialDataTimestamp.load()), stableTimestamp);
- }
// If 'force' is set, then we have already set the oldest timestamp equal to the stable
// timestamp, so there is nothing left to do.
@@ -2193,13 +2015,6 @@ StatusWith<Timestamp> WiredTigerKVEngine::recoverToStableTimestamp(OperationCont
23989, 2, "WiredTiger::RecoverToStableTimestamp syncing size storer to disk.");
syncSizeInfo(true);
- if (!_ephemeral) {
- LOGV2_FOR_ROLLBACK(
- 23990, 2, "WiredTiger::RecoverToStableTimestamp shutting down checkpoint thread.");
- // Shutdown WiredTigerKVEngine owned accesses into the storage engine.
- _checkpointThread->shutdown();
- }
-
const Timestamp stableTimestamp(_stableTimestamp.load());
const Timestamp initialDataTimestamp(_initialDataTimestamp.load());
@@ -2216,11 +2031,6 @@ StatusWith<Timestamp> WiredTigerKVEngine::recoverToStableTimestamp(OperationCont
str::stream() << "Error rolling back to stable. Err: " << wiredtiger_strerror(ret)};
}
- if (!_ephemeral) {
- _checkpointThread = std::make_unique<WiredTigerCheckpointThread>(this, _sessionCache.get());
- _checkpointThread->go();
- }
-
_sizeStorer = std::make_unique<WiredTigerSizeStorer>(_conn, _sizeStorerUri, _readOnly);
return {stableTimestamp};
@@ -2345,7 +2155,7 @@ boost::optional<Timestamp> WiredTigerKVEngine::getOplogNeededForCrashRecovery()
return boost::none;
}
- return Timestamp(_checkpointThread->getOplogNeededForCrashRecovery());
+ return Timestamp(_oplogNeededForCrashRecovery.load());
}
Timestamp WiredTigerKVEngine::getPinnedOplog() const {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 9327ae7454f..bfd539e7815 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -103,8 +103,6 @@ public:
~WiredTigerKVEngine();
- void startAsyncThreads() override;
-
void notifyStartupComplete() override;
void setRecordStoreExtraOptions(const std::string& options);
@@ -119,6 +117,8 @@ public:
return !isEphemeral();
}
+ void checkpoint() override;
+
bool isDurable() const override {
return _durable;
}
@@ -369,7 +369,6 @@ public:
private:
class WiredTigerSessionSweeper;
- class WiredTigerCheckpointThread;
/**
* Opens a connection on the WiredTiger database 'path' with the configuration 'wtOpenConfig'.
@@ -458,7 +457,6 @@ private:
const bool _keepDataHistory = true;
std::unique_ptr<WiredTigerSessionSweeper> _sessionSweeper;
- std::unique_ptr<WiredTigerCheckpointThread> _checkpointThread;
std::string _rsOptions;
std::string _indexOptions;
@@ -485,6 +483,8 @@ private:
// timestamp. Provided by replication layer because WT does not persist timestamps.
AtomicWord<std::uint64_t> _initialDataTimestamp;
+ AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery;
+
std::unique_ptr<WiredTigerEngineRuntimeConfigParameter> _runTimeConfigParam;
mutable Mutex _highestDurableTimestampMutex =
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
index b870c017798..2580960a76c 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
@@ -43,7 +43,7 @@
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/service_context.h"
#include "mongo/db/service_context_test_fixture.h"
-#include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h"
+#include "mongo/db/storage/checkpointer.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_record_store.h"
#include "mongo/logv2/log.h"
@@ -82,19 +82,16 @@ public:
private:
std::unique_ptr<WiredTigerKVEngine> makeEngine() {
- auto engine = std::make_unique<WiredTigerKVEngine>(kWiredTigerEngineName,
- _dbpath.path(),
- _cs.get(),
- "",
- 1,
- 0,
- false,
- false,
- _forRepair,
- false);
- // There are unit tests expecting checkpoints to occur asynchronously.
- engine->startAsyncThreads();
- return engine;
+ return std::make_unique<WiredTigerKVEngine>(kWiredTigerEngineName,
+ _dbpath.path(),
+ _cs.get(),
+ "",
+ 1,
+ 0,
+ false,
+ false,
+ _forRepair,
+ false);
}
const std::unique_ptr<ClockSource> _cs = std::make_unique<ClockSourceMock>();
@@ -246,6 +243,9 @@ TEST_F(WiredTigerKVEngineRepairTest, UnrecoverableOrphanedDataFilesAreRebuilt) {
}
TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
+ std::unique_ptr<Checkpointer> checkpointer = std::make_unique<Checkpointer>(_engine);
+ checkpointer->go();
+
auto opCtxPtr = makeOperationContext();
// The initial data timestamp has to be set to take stable checkpoints. The first stable
// timestamp greater than this will also trigger a checkpoint. The following loop of the
@@ -262,7 +262,7 @@ TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
#endif
#endif
{
- wiredTigerGlobalOptions.checkpointDelaySecs = 1;
+ storageGlobalParams.checkpointDelaySecs = 1;
}
();
@@ -341,6 +341,8 @@ TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
_engine->setStableTimestamp(Timestamp(30, 1), false);
callbackShouldFail.store(false);
assertPinnedMovesSoon(Timestamp(40, 1));
+
+ checkpointer->shutdown({ErrorCodes::ShutdownInProgress, "Test finished"});
}
std::unique_ptr<KVHarnessHelper> makeHelper() {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index 1167fd673f3..b3cc4c6dde7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -445,7 +445,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
// transaction to establish a read timestamp, but only for ReadSources that are expected to have
// read timestamps.
switch (_timestampReadSource) {
- case ReadSource::kUnset:
case ReadSource::kNoTimestamp:
return boost::none;
case ReadSource::kMajorityCommitted:
@@ -484,7 +483,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
return _readAtTimestamp;
// The follow ReadSources returned values in the first switch block.
- case ReadSource::kUnset:
case ReadSource::kNoTimestamp:
case ReadSource::kMajorityCommitted:
case ReadSource::kProvided:
@@ -507,7 +505,6 @@ void WiredTigerRecoveryUnit::_txnOpen() {
WT_SESSION* session = _session->getSession();
switch (_timestampReadSource) {
- case ReadSource::kUnset:
case ReadSource::kNoTimestamp: {
if (_isOplogReader) {
_oplogVisibleTs = static_cast<std::int64_t>(_oplogManager->getOplogReadTimestamp());
@@ -827,7 +824,6 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource,
"setting timestamp read source",
"readSource"_attr = toString(readSource),
"provided"_attr = ((provided) ? provided->toString() : "none"));
-
invariant(!_isActive() || _timestampReadSource == readSource,
str::stream() << "Current state: " << toString(_getState())
<< ". Invalid internal state while setting timestamp read source: "
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 312a46f5c09..0d557fc6329 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -250,7 +250,7 @@ private:
bool _isTimestamped = false;
// Specifies which external source to use when setting read timestamps on transactions.
- ReadSource _timestampReadSource = ReadSource::kUnset;
+ ReadSource _timestampReadSource = ReadSource::kNoTimestamp;
// Commits are assumed ordered. Unordered commits are assumed to always need to reserve a
// new optime, and thus always call oplogDiskLocRegister() on the record store.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index b50d4b79889..2dde320ceeb 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -61,13 +61,6 @@ public:
false, // .repair
false // .readOnly
) {
- // Deliberately not calling _engine->startAsyncThreads() because it starts an asynchronous
- // checkpointing thread that can interfere with unit tests manipulating checkpoints
- // manually.
- //
- // Alternatively, we would have to start using wiredTigerGlobalOptions.checkpointDelaySecs
- // to set a high enough value such that the async thread never runs during testing.
-
repl::ReplicationCoordinator::set(
getGlobalServiceContext(),
std::unique_ptr<repl::ReplicationCoordinator>(new repl::ReplicationCoordinatorMock(
@@ -203,7 +196,8 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) {
}
// Read without a timestamp. The write should be visible.
- ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset);
+ ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(),
+ RecoveryUnit::ReadSource::kNoTimestamp);
RecordData unused;
ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
@@ -237,7 +231,7 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) {
// Read without a timestamp, and we should see the first and third records.
opCtx1->recoveryUnit()->abandonSnapshot();
- opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused));
ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused));
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 742bfd087b4..de5874ae3a6 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -124,8 +124,9 @@ struct ActiveTransactionHistory {
ActiveTransactionHistory fetchActiveTransactionHistory(OperationContext* opCtx,
const LogicalSessionId& lsid) {
- // Restore the current timestamp read source after fetching transaction history.
- ReadSourceScope readSourceScope(opCtx);
+ // Restore the current timestamp read source after fetching transaction history using
+ // DBDirectClient, which may change our ReadSource.
+ ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
ActiveTransactionHistory result;
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index f898b21c112..37b71ce8589 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -33,11 +33,11 @@
#include <iostream>
#include <map>
+#include "mongo/db/api_parameters.h"
#include "mongo/db/catalog/uncommitted_collections.h"
#include "mongo/db/commands/txn_cmds_gen.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/concurrency/locker.h"
-#include "mongo/db/initialize_api_parameters.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/db/multi_key_path_tracker.h"
#include "mongo/db/ops/update_request.h"
diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp
index 84d533e2069..022dfb970ed 100644
--- a/src/mongo/dbtests/querytests.cpp
+++ b/src/mongo/dbtests/querytests.cpp
@@ -117,7 +117,7 @@ protected:
uassertStatusOK(indexer.insertAllDocumentsInCollection(&_opCtx, _collection));
uassertStatusOK(
indexer.drainBackgroundWrites(&_opCtx,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
uassertStatusOK(indexer.checkConstraints(&_opCtx));
{
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 750c8ac447d..d270e0467d7 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -103,7 +103,7 @@ public:
OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) {
_opCtx->recoveryUnit()->abandonSnapshot();
if (ts.isNull()) {
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
} else {
_opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, ts);
}
@@ -111,7 +111,7 @@ public:
~OneOffRead() {
_opCtx->recoveryUnit()->abandonSnapshot();
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
}
private:
@@ -234,7 +234,7 @@ public:
*/
void reset(NamespaceString nss) const {
::mongo::writeConflictRetry(_opCtx, "deleteAll", nss.ns(), [&] {
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
AutoGetCollection collRaii(_opCtx, nss, LockMode::MODE_X);
if (collRaii) {
@@ -2057,7 +2057,7 @@ public:
firstInsert.asTimestamp());
ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
auto indexCatalog = autoColl.getCollection()->getIndexCatalog();
@@ -2100,7 +2100,7 @@ public:
setReplCoordAppliedOpTime(repl::OpTime(afterSecondInsert.asTimestamp(), presentTerm));
ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
{
@@ -2843,7 +2843,7 @@ public:
ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kUnset,
+ RecoveryUnit::ReadSource::kNoTimestamp,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
diff --git a/src/mongo/executor/SConscript b/src/mongo/executor/SConscript
index 76956c8818e..b10cf01369a 100644
--- a/src/mongo/executor/SConscript
+++ b/src/mongo/executor/SConscript
@@ -31,6 +31,7 @@ env.Library(
'remote_command_response.cpp',
],
LIBDEPS=[
+ '$BUILD_DIR/mongo/db/api_parameters',
'$BUILD_DIR/mongo/rpc/metadata',
'$BUILD_DIR/mongo/util/net/network',
]
diff --git a/src/mongo/executor/remote_command_request.cpp b/src/mongo/executor/remote_command_request.cpp
index 875da25ef9f..4c35525e6a9 100644
--- a/src/mongo/executor/remote_command_request.cpp
+++ b/src/mongo/executor/remote_command_request.cpp
@@ -34,6 +34,7 @@
#include <fmt/format.h>
#include "mongo/bson/simple_bsonobj_comparator.h"
+#include "mongo/db/api_parameters.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/query/query_request.h"
#include "mongo/platform/atomic_word.h"
@@ -86,6 +87,12 @@ RemoteCommandRequestBase::RemoteCommandRequestBase(RequestId requestId,
cmdObj = cmdObj.addField(BSON("clientOperationKey" << operationKey.get()).firstElement());
}
+ if (opCtx && APIParameters::get(opCtx).getParamsPassed()) {
+ BSONObjBuilder bob(std::move(cmdObj));
+ APIParameters::get(opCtx).appendInfo(&bob);
+ cmdObj = bob.obj();
+ }
+
_updateTimeoutFromOpCtxDeadline(opCtx);
}
diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp
index 19846e62b48..d9c2500f2d3 100644
--- a/src/mongo/s/catalog_cache.cpp
+++ b/src/mongo/s/catalog_cache.cpp
@@ -55,6 +55,7 @@
#include "mongo/util/timer.h"
namespace mongo {
+
const OperationContext::Decoration<bool> operationShouldBlockBehindCatalogCacheRefresh =
OperationContext::declareDecoration<bool>();
@@ -68,81 +69,8 @@ namespace {
const int kMaxInconsistentRoutingInfoRefreshAttempts = 3;
const int kDatabaseCacheSize = 10000;
-/**
- * Returns whether two shard versions have a matching epoch.
- */
-bool shardVersionsHaveMatchingEpoch(boost::optional<ChunkVersion> wanted,
- const ChunkVersion& received) {
- return wanted && wanted->epoch() == received.epoch();
-};
-
-/**
- * Given an (optional) initial routing table and a set of changed chunks returned by the catalog
- * cache loader, produces a new routing table with the changes applied.
- *
- * If the collection is no longer sharded returns nullptr. If the epoch has changed, expects that
- * the 'collectionChunksList' contains the full contents of the chunks collection for that namespace
- * so that the routing table can be built from scratch.
- *
- * Throws ConflictingOperationInProgress if the chunk metadata was found to be inconsistent (not
- * containing all the necessary chunks, contains overlaps or chunks' epoch values are not the same
- * as that of the collection). Since this situation may be transient, due to the collection being
- * dropped or having its shard key refined concurrently, the caller must retry the reload up to some
- * configurable number of attempts.
- */
-std::shared_ptr<RoutingTableHistory> refreshCollectionRoutingInfo(
- OperationContext* opCtx,
- const NamespaceString& nss,
- std::shared_ptr<RoutingTableHistory> existingRoutingInfo,
- StatusWith<CatalogCacheLoader::CollectionAndChangedChunks> swCollectionAndChangedChunks) {
- if (swCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound) {
- return nullptr;
- }
- const auto collectionAndChunks = uassertStatusOK(std::move(swCollectionAndChangedChunks));
-
- auto chunkManager = [&] {
- // If we have routing info already and it's for the same collection epoch, we're updating.
- // Otherwise, we're making a whole new routing table.
- if (existingRoutingInfo &&
- existingRoutingInfo->getVersion().epoch() == collectionAndChunks.epoch) {
- if (collectionAndChunks.changedChunks.size() == 1 &&
- collectionAndChunks.changedChunks[0].getVersion() ==
- existingRoutingInfo->getVersion())
- return existingRoutingInfo;
-
- return std::make_shared<RoutingTableHistory>(
- existingRoutingInfo->makeUpdated(std::move(collectionAndChunks.reshardingFields),
- collectionAndChunks.changedChunks));
- }
-
- auto defaultCollator = [&]() -> std::unique_ptr<CollatorInterface> {
- if (!collectionAndChunks.defaultCollation.isEmpty()) {
- // The collation should have been validated upon collection creation
- return uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext())
- ->makeFromBSON(collectionAndChunks.defaultCollation));
- }
- return nullptr;
- }();
-
- return std::make_shared<RoutingTableHistory>(
- RoutingTableHistory::makeNew(nss,
- collectionAndChunks.uuid,
- KeyPattern(collectionAndChunks.shardKeyPattern),
- std::move(defaultCollator),
- collectionAndChunks.shardKeyIsUnique,
- collectionAndChunks.epoch,
- std::move(collectionAndChunks.reshardingFields),
- collectionAndChunks.changedChunks));
- }();
-
- std::set<ShardId> shardIds;
- chunkManager->getAllShardIds(&shardIds);
- for (const auto& shardId : shardIds) {
- uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
- }
- return chunkManager;
-}
+const int kCollectionCacheSize = 10000;
} // namespace
@@ -155,7 +83,8 @@ CatalogCache::CatalogCache(ServiceContext* const service, CatalogCacheLoader& ca
options.maxThreads = 6;
return options;
}())),
- _databaseCache(service, *_executor, _cacheLoader) {
+ _databaseCache(service, *_executor, _cacheLoader),
+ _collectionCache(service, *_executor, _cacheLoader) {
_executor->startup();
}
@@ -190,111 +119,89 @@ StatusWith<CachedDatabaseInfo> CatalogCache::getDatabase(OperationContext* opCtx
}
}
-StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfo(OperationContext* opCtx,
- const NamespaceString& nss) {
- return _getCollectionRoutingInfo(opCtx, nss).statusWithInfo;
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfoWithForcedRefresh(
- OperationContext* opCtx, const NamespaceString& nss) {
- setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
- _createOrGetCollectionEntryAndMarkAsNeedsRefresh(nss);
- return _getCollectionRoutingInfo(opCtx, nss);
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfo(OperationContext* opCtx,
- const NamespaceString& nss) {
- return _getCollectionRoutingInfoAt(opCtx, nss, boost::none);
-}
-
-
-StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoAt(OperationContext* opCtx,
- const NamespaceString& nss,
- Timestamp atClusterTime) {
- return _getCollectionRoutingInfoAt(opCtx, nss, atClusterTime).statusWithInfo;
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfoAt(
+StatusWith<ChunkManager> CatalogCache::_getCollectionRoutingInfoAt(
OperationContext* opCtx, const NamespaceString& nss, boost::optional<Timestamp> atClusterTime) {
- invariant(!opCtx->lockState() || !opCtx->lockState()->isLocked(),
- "Do not hold a lock while refreshing the catalog cache. Doing so would potentially "
- "hold the lock during a network call, and can lead to a deadlock as described in "
- "SERVER-37398.");
- // This default value can cause a single unnecessary extra refresh if this thread did do the
- // refresh but the refresh failed, or if the database or collection was not found, but only if
- // the caller is getCollectionRoutingInfoWithRefresh with the parameter
- // forceRefreshFromThisThread set to true
- RefreshAction refreshActionTaken(RefreshAction::kDidNotPerformRefresh);
- while (true) {
+ invariant(
+ !opCtx->lockState() || !opCtx->lockState()->isLocked(),
+ "Do not hold a lock while refreshing the catalog cache. Doing so would potentially hold "
+ "the lock during a network call, and can lead to a deadlock as described in SERVER-37398.");
+
+ try {
const auto swDbInfo = getDatabase(opCtx, nss.db());
+
if (!swDbInfo.isOK()) {
if (swDbInfo == ErrorCodes::NamespaceNotFound) {
LOGV2_FOR_CATALOG_REFRESH(
- 4947102,
+ 4947103,
2,
"Invalidating cached collection entry because its database has been dropped",
"namespace"_attr = nss);
- purgeCollection(nss);
+ invalidateCollectionEntry_LINEARIZABLE(nss);
}
- return {swDbInfo.getStatus(), refreshActionTaken};
+ return swDbInfo.getStatus();
}
const auto dbInfo = std::move(swDbInfo.getValue());
- stdx::unique_lock<Latch> ul(_mutex);
-
- auto collEntry = _createOrGetCollectionEntry(ul, nss);
+ const auto cacheConsistency = gEnableFinerGrainedCatalogCacheRefresh &&
+ !operationShouldBlockBehindCatalogCacheRefresh(opCtx)
+ ? CacheCausalConsistency::kLatestCached
+ : CacheCausalConsistency::kLatestKnown;
- if (collEntry->needsRefresh &&
- (!gEnableFinerGrainedCatalogCacheRefresh || collEntry->epochHasChanged ||
- operationShouldBlockBehindCatalogCacheRefresh(opCtx))) {
+ auto collEntryFuture = _collectionCache.acquireAsync(nss, cacheConsistency);
- operationBlockedBehindCatalogCacheRefresh(opCtx) = true;
+ // If the entry is in the cache return inmediately.
+ if (collEntryFuture.isReady()) {
+ setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, false);
+ return ChunkManager(dbInfo.primaryId(),
+ dbInfo.databaseVersion(),
+ collEntryFuture.get(opCtx),
+ atClusterTime);
+ }
- auto refreshNotification = collEntry->refreshCompletionNotification;
- if (!refreshNotification) {
- refreshNotification = (collEntry->refreshCompletionNotification =
- std::make_shared<Notification<Status>>());
- _scheduleCollectionRefresh(ul, opCtx->getServiceContext(), collEntry, nss, 1);
- refreshActionTaken = RefreshAction::kPerformedRefresh;
- }
+ operationBlockedBehindCatalogCacheRefresh(opCtx) = true;
- // Wait on the notification outside of the mutex
- ul.unlock();
+ size_t acquireTries = 0;
+ Timer t;
- auto refreshStatus = [&]() {
- Timer t;
- ON_BLOCK_EXIT([&] { _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros()); });
+ while (true) {
+ try {
+ auto collEntry = collEntryFuture.get(opCtx);
+ _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
- try {
- const Milliseconds kReportingInterval{250};
- while (!refreshNotification->waitFor(opCtx, kReportingInterval)) {
- _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
- t.reset();
- }
+ setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, false);
- return refreshNotification->get(opCtx);
- } catch (const DBException& ex) {
+ return ChunkManager(dbInfo.primaryId(),
+ dbInfo.databaseVersion(),
+ std::move(collEntry),
+ atClusterTime);
+ } catch (ExceptionFor<ErrorCodes::ConflictingOperationInProgress>& ex) {
+ _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
+ acquireTries++;
+ if (acquireTries == kMaxInconsistentRoutingInfoRefreshAttempts) {
return ex.toStatus();
}
- }();
-
- if (!refreshStatus.isOK()) {
- return {refreshStatus, refreshActionTaken};
}
- // Once the refresh is complete, loop around to get the latest value
- continue;
+ collEntryFuture = _collectionCache.acquireAsync(nss, cacheConsistency);
+ t.reset();
}
-
- return {ChunkManager(dbInfo.primaryId(),
- dbInfo.databaseVersion(),
- collEntry->routingInfo,
- atClusterTime),
- refreshActionTaken};
+ } catch (const DBException& ex) {
+ return ex.toStatus();
}
}
+StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfo(OperationContext* opCtx,
+ const NamespaceString& nss) {
+ return _getCollectionRoutingInfoAt(opCtx, nss, boost::none);
+}
+
+StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoAt(OperationContext* opCtx,
+ const NamespaceString& nss,
+ Timestamp atClusterTime) {
+ return _getCollectionRoutingInfoAt(opCtx, nss, atClusterTime);
+}
+
StatusWith<CachedDatabaseInfo> CatalogCache::getDatabaseWithRefresh(OperationContext* opCtx,
StringData dbName) {
// TODO SERVER-49724: Make ReadThroughCache support StringData keys
@@ -303,32 +210,20 @@ StatusWith<CachedDatabaseInfo> CatalogCache::getDatabaseWithRefresh(OperationCon
}
StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoWithRefresh(
- OperationContext* opCtx, const NamespaceString& nss, bool forceRefreshFromThisThread) {
- auto refreshResult = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss);
- // We want to ensure that we don't join an in-progress refresh because that
- // could violate causal consistency for this client. We don't need to actually perform the
- // refresh ourselves but we do need the refresh to begin *after* this function is
- // called, so calling it twice is enough regardless of what happens the
- // second time. See SERVER-33954 for reasoning.
- if (forceRefreshFromThisThread &&
- refreshResult.actionTaken == RefreshAction::kDidNotPerformRefresh) {
- refreshResult = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss);
- }
- return refreshResult.statusWithInfo;
+ OperationContext* opCtx, const NamespaceString& nss) {
+ _collectionCache.invalidate(nss);
+ setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
+ return getCollectionRoutingInfo(opCtx, nss);
}
StatusWith<ChunkManager> CatalogCache::getShardedCollectionRoutingInfoWithRefresh(
OperationContext* opCtx, const NamespaceString& nss) {
- auto swRoutingInfo = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss).statusWithInfo;
- if (!swRoutingInfo.isOK())
- return swRoutingInfo;
-
- auto cri(std::move(swRoutingInfo.getValue()));
- if (!cri.isSharded())
+ auto routingInfoStatus = getCollectionRoutingInfoWithRefresh(opCtx, nss);
+ if (routingInfoStatus.isOK() && !routingInfoStatus.getValue().isSharded()) {
return {ErrorCodes::NamespaceNotSharded,
str::stream() << "Collection " << nss.ns() << " is not sharded."};
-
- return cri;
+ }
+ return routingInfoStatus;
}
void CatalogCache::onStaleDatabaseVersion(const StringData dbName,
@@ -350,48 +245,49 @@ void CatalogCache::setOperationShouldBlockBehindCatalogCacheRefresh(OperationCon
if (gEnableFinerGrainedCatalogCacheRefresh) {
operationShouldBlockBehindCatalogCacheRefresh(opCtx) = shouldBlock;
}
-};
+}
void CatalogCache::invalidateShardOrEntireCollectionEntryForShardedCollection(
- OperationContext* opCtx,
const NamespaceString& nss,
- boost::optional<ChunkVersion> wantedVersion,
- const ChunkVersion& receivedVersion,
- ShardId shardId) {
- if (shardVersionsHaveMatchingEpoch(wantedVersion, receivedVersion)) {
- _createOrGetCollectionEntryAndMarkShardStale(nss, shardId);
- } else {
- _createOrGetCollectionEntryAndMarkEpochStale(nss);
+ const boost::optional<ChunkVersion>& wantedVersion,
+ const ShardId& shardId) {
+ _stats.countStaleConfigErrors.addAndFetch(1);
+
+ auto collectionEntry = _collectionCache.peekLatestCached(nss);
+ if (collectionEntry && collectionEntry->optRt) {
+ collectionEntry->optRt->setShardStale(shardId);
}
-};
-void CatalogCache::onEpochChange(const NamespaceString& nss) {
- _createOrGetCollectionEntryAndMarkEpochStale(nss);
-};
+ if (wantedVersion) {
+ _collectionCache.advanceTimeInStore(
+ nss, ComparableChunkVersion::makeComparableChunkVersion(*wantedVersion));
+ } else {
+ _collectionCache.advanceTimeInStore(
+ nss, ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh());
+ }
+}
void CatalogCache::checkEpochOrThrow(const NamespaceString& nss,
- ChunkVersion targetCollectionVersion,
- const ShardId& shardId) const {
- stdx::lock_guard<Latch> lg(_mutex);
- const auto itDb = _collectionsByDb.find(nss.db());
+ const ChunkVersion& targetCollectionVersion,
+ const ShardId& shardId) {
uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
str::stream() << "could not act as router for " << nss.ns()
<< ", no entry for database " << nss.db(),
- itDb != _collectionsByDb.end());
+ _databaseCache.peekLatestCached(nss.db().toString()));
- auto itColl = itDb->second.find(nss.ns());
+ auto collectionValueHandle = _collectionCache.peekLatestCached(nss);
uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
str::stream() << "could not act as router for " << nss.ns()
<< ", no entry for collection.",
- itColl != itDb->second.end());
+ collectionValueHandle);
uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
str::stream() << "could not act as router for " << nss.ns() << ", wanted "
<< targetCollectionVersion.toString()
<< ", but found the collection was unsharded",
- itColl->second->routingInfo);
+ collectionValueHandle->optRt);
- auto foundVersion = itColl->second->routingInfo->getVersion();
+ auto foundVersion = collectionValueHandle->optRt->getVersion();
uassert(StaleConfigInfo(nss, targetCollectionVersion, foundVersion, shardId),
str::stream() << "could not act as router for " << nss.ns() << ", wanted "
<< targetCollectionVersion.toString() << ", but found "
@@ -399,11 +295,6 @@ void CatalogCache::checkEpochOrThrow(const NamespaceString& nss,
foundVersion.epoch() == targetCollectionVersion.epoch());
}
-void CatalogCache::invalidateShardForShardedCollection(const NamespaceString& nss,
- const ShardId& staleShardId) {
- _createOrGetCollectionEntryAndMarkShardStale(nss, staleShardId);
-}
-
void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
LOGV2_DEBUG(4997600,
1,
@@ -413,32 +304,24 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
_databaseCache.invalidateCachedValueIf(
[&](const DatabaseType& dbt) { return dbt.getPrimary() == shardId; });
- stdx::lock_guard<Latch> lg(_mutex);
-
// Invalidate collections which contain data on this shard.
- for (const auto& [db, collInfoMap] : _collectionsByDb) {
- for (const auto& [collNs, collRoutingInfoEntry] : collInfoMap) {
- if (!collRoutingInfoEntry->needsRefresh && collRoutingInfoEntry->routingInfo) {
- // The set of shards on which this collection contains chunks.
- std::set<ShardId> shardsOwningDataForCollection;
- collRoutingInfoEntry->routingInfo->getAllShardIds(&shardsOwningDataForCollection);
-
- if (shardsOwningDataForCollection.find(shardId) !=
- shardsOwningDataForCollection.end()) {
- LOGV2_DEBUG(22647,
- 3,
- "Invalidating cached collection {namespace} that has data "
- "on shard {shardId}",
- "Invalidating cached collection",
- "namespace"_attr = collNs,
- "shardId"_attr = shardId);
-
- collRoutingInfoEntry->needsRefresh = true;
- collRoutingInfoEntry->routingInfo->setShardStale(shardId);
- }
- }
- }
- }
+ _collectionCache.invalidateCachedValueIf([&](const OptionalRoutingTableHistory& ort) {
+ if (!ort.optRt)
+ return false;
+ const auto& rt = *ort.optRt;
+
+ std::set<ShardId> shardIds;
+ rt.getAllShardIds(&shardIds);
+
+ LOGV2_DEBUG(22647,
+ 3,
+ "Invalidating cached collection {namespace} that has data "
+ "on shard {shardId}",
+ "Invalidating cached collection",
+ "namespace"_attr = rt.nss(),
+ "shardId"_attr = shardId);
+ return shardIds.find(shardId) != shardIds.end();
+ });
LOGV2(22648,
"Finished invalidating databases and collections with data on shard: {shardId}",
@@ -446,46 +329,28 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
"shardId"_attr = shardId);
}
-void CatalogCache::purgeCollection(const NamespaceString& nss) {
- stdx::lock_guard<Latch> lg(_mutex);
-
- auto itDb = _collectionsByDb.find(nss.db());
- if (itDb == _collectionsByDb.end()) {
- return;
- }
-
- itDb->second.erase(nss.ns());
-}
-
void CatalogCache::purgeDatabase(StringData dbName) {
_databaseCache.invalidate(dbName.toString());
- stdx::lock_guard<Latch> lg(_mutex);
- _collectionsByDb.erase(dbName);
+ _collectionCache.invalidateKeyIf(
+ [&](const NamespaceString& nss) { return nss.db() == dbName; });
}
void CatalogCache::purgeAllDatabases() {
_databaseCache.invalidateAll();
- stdx::lock_guard<Latch> lg(_mutex);
- _collectionsByDb.clear();
+ _collectionCache.invalidateAll();
}
void CatalogCache::report(BSONObjBuilder* builder) const {
BSONObjBuilder cacheStatsBuilder(builder->subobjStart("catalogCache"));
- size_t numDatabaseEntries;
- size_t numCollectionEntries{0};
- {
- numDatabaseEntries = _databaseCache.getCacheInfo().size();
- stdx::lock_guard<Latch> ul(_mutex);
- for (const auto& entry : _collectionsByDb) {
- numCollectionEntries += entry.second.size();
- }
- }
+ const size_t numDatabaseEntries = _databaseCache.getCacheInfo().size();
+ const size_t numCollectionEntries = _collectionCache.getCacheInfo().size();
cacheStatsBuilder.append("numDatabaseEntries", static_cast<long long>(numDatabaseEntries));
cacheStatsBuilder.append("numCollectionEntries", static_cast<long long>(numCollectionEntries));
_stats.report(&cacheStatsBuilder);
+ _collectionCache.reportStats(&cacheStatsBuilder);
}
void CatalogCache::checkAndRecordOperationBlockedByRefresh(OperationContext* opCtx,
@@ -519,188 +384,8 @@ void CatalogCache::checkAndRecordOperationBlockedByRefresh(OperationContext* opC
}
}
-void CatalogCache::_scheduleCollectionRefresh(WithLock lk,
- ServiceContext* service,
- std::shared_ptr<CollectionRoutingInfoEntry> collEntry,
- NamespaceString const& nss,
- int refreshAttempt) {
- const auto existingRoutingInfo = collEntry->routingInfo;
-
- // If we have an existing chunk manager, the refresh is considered "incremental", regardless of
- // how many chunks are in the differential
- const bool isIncremental(existingRoutingInfo);
-
- if (isIncremental) {
- _stats.numActiveIncrementalRefreshes.addAndFetch(1);
- _stats.countIncrementalRefreshesStarted.addAndFetch(1);
- } else {
- _stats.numActiveFullRefreshes.addAndFetch(1);
- _stats.countFullRefreshesStarted.addAndFetch(1);
- }
-
- // Invoked when one iteration of getChunksSince has completed, whether with success or error
- const auto onRefreshCompleted = [this, t = Timer(), nss, isIncremental, existingRoutingInfo](
- const Status& status,
- RoutingTableHistory* routingInfoAfterRefresh) {
- if (isIncremental) {
- _stats.numActiveIncrementalRefreshes.subtractAndFetch(1);
- } else {
- _stats.numActiveFullRefreshes.subtractAndFetch(1);
- }
-
- if (!status.isOK()) {
- _stats.countFailedRefreshes.addAndFetch(1);
-
- LOGV2_OPTIONS(24103,
- {logv2::LogComponent::kShardingCatalogRefresh},
- "Error refreshing cached collection {namespace}; Took {duration} and "
- "failed due to {error}",
- "Error refreshing cached collection",
- "namespace"_attr = nss,
- "duration"_attr = Milliseconds(t.millis()),
- "error"_attr = redact(status));
- } else if (routingInfoAfterRefresh) {
- const int logLevel =
- (!existingRoutingInfo ||
- (existingRoutingInfo &&
- routingInfoAfterRefresh->getVersion() != existingRoutingInfo->getVersion()))
- ? 0
- : 1;
- LOGV2_FOR_CATALOG_REFRESH(
- 24104,
- logLevel,
- "Refreshed cached collection {namespace} to version {newVersion} from version "
- "{oldVersion}. Took {duration}",
- "Refreshed cached collection",
- "namespace"_attr = nss,
- "newVersion"_attr = routingInfoAfterRefresh->getVersion(),
- "oldVersion"_attr =
- (existingRoutingInfo
- ? (" from version " + existingRoutingInfo->getVersion().toString())
- : ""),
- "duration"_attr = Milliseconds(t.millis()));
- } else {
- LOGV2_OPTIONS(24105,
- {logv2::LogComponent::kShardingCatalogRefresh},
- "Collection {namespace} was found to be unsharded after refresh that "
- "took {duration}",
- "Collection has found to be unsharded after refresh",
- "namespace"_attr = nss,
- "duration"_attr = Milliseconds(t.millis()));
- }
- };
-
- // Invoked if getChunksSince resulted in error or threw an exception
- const auto onRefreshFailed =
- [ this, service, collEntry, nss, refreshAttempt,
- onRefreshCompleted ](WithLock lk, const Status& status) noexcept {
- onRefreshCompleted(status, nullptr);
-
- // It is possible that the metadata is being changed concurrently, so retry the
- // refresh again
- if (status == ErrorCodes::ConflictingOperationInProgress &&
- refreshAttempt < kMaxInconsistentRoutingInfoRefreshAttempts) {
- _scheduleCollectionRefresh(lk, service, collEntry, nss, refreshAttempt + 1);
- } else {
- // Leave needsRefresh to true so that any subsequent get attempts will kick off
- // another round of refresh
- collEntry->refreshCompletionNotification->set(status);
- collEntry->refreshCompletionNotification = nullptr;
- }
- };
-
- const auto refreshCallback =
- [ this, service, collEntry, nss, existingRoutingInfo, onRefreshFailed, onRefreshCompleted ](
- StatusWith<CatalogCacheLoader::CollectionAndChangedChunks> swCollAndChunks) noexcept {
-
- ThreadClient tc("CatalogCache::collectionRefresh", service);
- auto opCtx = tc->makeOperationContext();
-
- std::shared_ptr<RoutingTableHistory> newRoutingInfo;
- try {
- newRoutingInfo = refreshCollectionRoutingInfo(
- opCtx.get(), nss, std::move(existingRoutingInfo), std::move(swCollAndChunks));
-
- onRefreshCompleted(Status::OK(), newRoutingInfo.get());
- } catch (const DBException& ex) {
- stdx::lock_guard<Latch> lg(_mutex);
- onRefreshFailed(lg, ex.toStatus());
- return;
- }
-
- stdx::lock_guard<Latch> lg(_mutex);
-
- collEntry->epochHasChanged = false;
- collEntry->needsRefresh = false;
- collEntry->refreshCompletionNotification->set(Status::OK());
- collEntry->refreshCompletionNotification = nullptr;
-
- setOperationShouldBlockBehindCatalogCacheRefresh(opCtx.get(), false);
-
- // TODO(SERVER-49876): remove clang-tidy NOLINT comments.
- if (existingRoutingInfo && newRoutingInfo && // NOLINT(bugprone-use-after-move)
- existingRoutingInfo->getVersion() == // NOLINT(bugprone-use-after-move)
- newRoutingInfo->getVersion()) { // NOLINT(bugprone-use-after-move)
- // If the routingInfo hasn't changed, we need to manually reset stale shards.
- newRoutingInfo->setAllShardsRefreshed();
- }
-
- collEntry->routingInfo = std::move(newRoutingInfo);
- };
-
- const ChunkVersion startingCollectionVersion =
- (existingRoutingInfo ? existingRoutingInfo->getVersion() : ChunkVersion::UNSHARDED());
-
- LOGV2_FOR_CATALOG_REFRESH(
- 24106,
- 1,
- "Refreshing cached collection {namespace} with version {currentCollectionVersion}",
- "namespace"_attr = nss,
- "currentCollectionVersion"_attr = startingCollectionVersion);
-
- _cacheLoader.getChunksSince(nss, startingCollectionVersion)
- .thenRunOn(_executor)
- .getAsync(refreshCallback);
-
- // The routing info for this collection shouldn't change, as other threads may try to use the
- // CatalogCache while we are waiting for the refresh to complete.
- invariant(collEntry->routingInfo.get() == existingRoutingInfo.get());
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkEpochStale(const NamespaceString& nss) {
- stdx::lock_guard<Latch> lg(_mutex);
- auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
- collRoutingInfoEntry->needsRefresh = true;
- collRoutingInfoEntry->epochHasChanged = true;
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkShardStale(const NamespaceString& nss,
- const ShardId& staleShardId) {
- stdx::lock_guard<Latch> lg(_mutex);
- auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
- collRoutingInfoEntry->needsRefresh = true;
- if (collRoutingInfoEntry->routingInfo) {
- collRoutingInfoEntry->routingInfo->setShardStale(staleShardId);
- }
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkAsNeedsRefresh(const NamespaceString& nss) {
- stdx::lock_guard<Latch> lg(_mutex);
- auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
- collRoutingInfoEntry->needsRefresh = true;
-}
-
-std::shared_ptr<CatalogCache::CollectionRoutingInfoEntry> CatalogCache::_createOrGetCollectionEntry(
- WithLock wl, const NamespaceString& nss) {
- auto& collectionsForDb = _collectionsByDb[nss.db()];
- if (!collectionsForDb.contains(nss.ns())) {
- // TODO SERVER-46199: ensure collections cache size is capped
- // currently no routine except for dropDatabase is removing cached collection entries and
- // the cache for a specific DB can grow indefinitely.
- collectionsForDb[nss.ns()] = std::make_shared<CollectionRoutingInfoEntry>();
- }
-
- return collectionsForDb[nss.ns()];
+void CatalogCache::invalidateCollectionEntry_LINEARIZABLE(const NamespaceString& nss) {
+ _collectionCache.invalidate(nss);
}
void CatalogCache::Stats::report(BSONObjBuilder* builder) const {
@@ -708,14 +393,6 @@ void CatalogCache::Stats::report(BSONObjBuilder* builder) const {
builder->append("totalRefreshWaitTimeMicros", totalRefreshWaitTimeMicros.load());
- builder->append("numActiveIncrementalRefreshes", numActiveIncrementalRefreshes.load());
- builder->append("countIncrementalRefreshesStarted", countIncrementalRefreshesStarted.load());
-
- builder->append("numActiveFullRefreshes", numActiveFullRefreshes.load());
- builder->append("countFullRefreshesStarted", countFullRefreshesStarted.load());
-
- builder->append("countFailedRefreshes", countFailedRefreshes.load());
-
if (isMongos()) {
BSONObjBuilder operationsBlockedByRefreshBuilder(
builder->subobjStart("operationsBlockedByRefresh"));
@@ -756,7 +433,6 @@ CatalogCache::DatabaseCache::LookupResult CatalogCache::DatabaseCache::_lookupDa
OperationContext* opCtx,
const std::string& dbName,
const ComparableDatabaseVersion& previousDbVersion) {
-
// TODO (SERVER-34164): Track and increment stats for database refreshes
LOGV2_FOR_CATALOG_REFRESH(24102, 2, "Refreshing cached database entry", "db"_attr = dbName);
@@ -788,73 +464,199 @@ CatalogCache::DatabaseCache::LookupResult CatalogCache::DatabaseCache::_lookupDa
}
}
-AtomicWord<uint64_t> ComparableDatabaseVersion::_localSequenceNumSource{1ULL};
+CatalogCache::CollectionCache::CollectionCache(ServiceContext* service,
+ ThreadPoolInterface& threadPool,
+ CatalogCacheLoader& catalogCacheLoader)
+ : ReadThroughCache(_mutex,
+ service,
+ threadPool,
+ [this](OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ValueHandle& collectionHistory,
+ const ComparableChunkVersion& previousChunkVersion) {
+ return _lookupCollection(
+ opCtx, nss, collectionHistory, previousChunkVersion);
+ },
+ kCollectionCacheSize),
+ _catalogCacheLoader(catalogCacheLoader) {}
-ComparableDatabaseVersion ComparableDatabaseVersion::makeComparableDatabaseVersion(
- const DatabaseVersion& version) {
- return ComparableDatabaseVersion(version, _localSequenceNumSource.fetchAndAdd(1));
+void CatalogCache::CollectionCache::reportStats(BSONObjBuilder* builder) const {
+ _stats.report(builder);
}
-const DatabaseVersion& ComparableDatabaseVersion::getVersion() const {
- return _dbVersion;
+void CatalogCache::CollectionCache::_updateRefreshesStats(const bool isIncremental,
+ const bool add) {
+ if (add) {
+ if (isIncremental) {
+ _stats.numActiveIncrementalRefreshes.addAndFetch(1);
+ _stats.countIncrementalRefreshesStarted.addAndFetch(1);
+ } else {
+ _stats.numActiveFullRefreshes.addAndFetch(1);
+ _stats.countFullRefreshesStarted.addAndFetch(1);
+ }
+ } else {
+ if (isIncremental) {
+ _stats.numActiveIncrementalRefreshes.subtractAndFetch(1);
+ } else {
+ _stats.numActiveFullRefreshes.subtractAndFetch(1);
+ }
+ }
}
-uint64_t ComparableDatabaseVersion::getLocalSequenceNum() const {
- return _localSequenceNum;
-}
+void CatalogCache::CollectionCache::Stats::report(BSONObjBuilder* builder) const {
+ builder->append("numActiveIncrementalRefreshes", numActiveIncrementalRefreshes.load());
+ builder->append("countIncrementalRefreshesStarted", countIncrementalRefreshesStarted.load());
-BSONObj ComparableDatabaseVersion::toBSON() const {
- BSONObjBuilder builder;
- _dbVersion.getUuid().appendToBuilder(&builder, "uuid");
- builder.append("lastMod", _dbVersion.getLastMod());
- builder.append("localSequenceNum", std::to_string(_localSequenceNum));
- return builder.obj();
-}
+ builder->append("numActiveFullRefreshes", numActiveFullRefreshes.load());
+ builder->append("countFullRefreshesStarted", countFullRefreshesStarted.load());
-std::string ComparableDatabaseVersion::toString() const {
- return toBSON().toString();
+ builder->append("countFailedRefreshes", countFailedRefreshes.load());
}
+CatalogCache::CollectionCache::LookupResult CatalogCache::CollectionCache::_lookupCollection(
+ OperationContext* opCtx,
+ const NamespaceString& nss,
+ const RoutingTableHistoryValueHandle& existingHistory,
+ const ComparableChunkVersion& previousVersion) {
+ const bool isIncremental(existingHistory && existingHistory->optRt);
+ _updateRefreshesStats(isIncremental, true);
-CachedDatabaseInfo::CachedDatabaseInfo(DatabaseType dbt, std::shared_ptr<Shard> primaryShard)
- : _dbt(std::move(dbt)), _primaryShard(std::move(primaryShard)) {}
+ Timer t{};
+ try {
+ auto lookupVersion =
+ isIncremental ? existingHistory->optRt->getVersion() : ChunkVersion::UNSHARDED();
-const ShardId& CachedDatabaseInfo::primaryId() const {
- return _dbt.getPrimary();
+ LOGV2_FOR_CATALOG_REFRESH(4619900,
+ 1,
+ "Refreshing cached collection",
+ "namespace"_attr = nss,
+ "currentVersion"_attr = previousVersion);
+
+ auto collectionAndChunks = _catalogCacheLoader.getChunksSince(nss, lookupVersion).get();
+
+ auto newRoutingHistory = [&] {
+ // If we have routing info already and it's for the same collection epoch, we're
+ // updating. Otherwise, we're making a whole new routing table.
+ if (isIncremental &&
+ existingHistory->optRt->getVersion().epoch() == collectionAndChunks.epoch) {
+ return existingHistory->optRt->makeUpdated(collectionAndChunks.reshardingFields,
+ collectionAndChunks.changedChunks);
+ }
+
+ auto defaultCollator = [&]() -> std::unique_ptr<CollatorInterface> {
+ if (!collectionAndChunks.defaultCollation.isEmpty()) {
+ // The collation should have been validated upon collection creation
+ return uassertStatusOK(
+ CollatorFactoryInterface::get(opCtx->getServiceContext())
+ ->makeFromBSON(collectionAndChunks.defaultCollation));
+ }
+ return nullptr;
+ }();
+
+ return RoutingTableHistory::makeNew(nss,
+ collectionAndChunks.uuid,
+ KeyPattern(collectionAndChunks.shardKeyPattern),
+ std::move(defaultCollator),
+ collectionAndChunks.shardKeyIsUnique,
+ collectionAndChunks.epoch,
+ std::move(collectionAndChunks.reshardingFields),
+ collectionAndChunks.changedChunks);
+ }();
+
+ newRoutingHistory.setAllShardsRefreshed();
+
+ // Check that the shards all match with what is on the config server
+ std::set<ShardId> shardIds;
+ newRoutingHistory.getAllShardIds(&shardIds);
+ for (const auto& shardId : shardIds) {
+ uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
+ }
+
+ const auto newVersion =
+ ComparableChunkVersion::makeComparableChunkVersion(newRoutingHistory.getVersion());
+
+ LOGV2_FOR_CATALOG_REFRESH(4619901,
+ isIncremental || newVersion != previousVersion ? 0 : 1,
+ "Refreshed cached collection",
+ "namespace"_attr = nss,
+ "newVersion"_attr = newVersion,
+ "oldVersion"_attr = previousVersion,
+ "duration"_attr = Milliseconds(t.millis()));
+ _updateRefreshesStats(isIncremental, false);
+
+ return LookupResult(OptionalRoutingTableHistory(std::move(newRoutingHistory)), newVersion);
+ } catch (const DBException& ex) {
+ _stats.countFailedRefreshes.addAndFetch(1);
+ _updateRefreshesStats(isIncremental, false);
+
+ if (ex.code() == ErrorCodes::NamespaceNotFound) {
+ LOGV2_FOR_CATALOG_REFRESH(4619902,
+ 0,
+ "Collection has found to be unsharded after refresh",
+ "namespace"_attr = nss,
+ "duration"_attr = Milliseconds(t.millis()));
+
+ return LookupResult(
+ OptionalRoutingTableHistory(),
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED()));
+ }
+
+ LOGV2_FOR_CATALOG_REFRESH(4619903,
+ 0,
+ "Error refreshing cached collection",
+ "namespace"_attr = nss,
+ "duration"_attr = Milliseconds(t.millis()),
+ "error"_attr = redact(ex));
+
+ throw;
+ }
}
-bool CachedDatabaseInfo::shardingEnabled() const {
- return _dbt.getSharded();
+AtomicWord<uint64_t> ComparableDatabaseVersion::_uuidDisambiguatingSequenceNumSource{1ULL};
+
+ComparableDatabaseVersion ComparableDatabaseVersion::makeComparableDatabaseVersion(
+ const DatabaseVersion& version) {
+ return ComparableDatabaseVersion(version, _uuidDisambiguatingSequenceNumSource.fetchAndAdd(1));
}
-DatabaseVersion CachedDatabaseInfo::databaseVersion() const {
- return _dbt.getVersion();
+std::string ComparableDatabaseVersion::toString() const {
+ return str::stream() << (_dbVersion ? _dbVersion->toBSON().toString() : "NONE") << "|"
+ << _uuidDisambiguatingSequenceNum;
}
-AtomicWord<uint64_t> ComparableChunkVersion::_localSequenceNumSource{1ULL};
+bool ComparableDatabaseVersion::operator==(const ComparableDatabaseVersion& other) const {
+ if (!_dbVersion && !other._dbVersion)
+ return true; // Default constructed value
+ if (_dbVersion.is_initialized() != other._dbVersion.is_initialized())
+ return false; // One side is default constructed value
-ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersion(
- const ChunkVersion& version) {
- return ComparableChunkVersion(version, _localSequenceNumSource.fetchAndAdd(1));
+ return sameUuid(other) && (_dbVersion->getLastMod() == other._dbVersion->getLastMod());
}
-const ChunkVersion& ComparableChunkVersion::getVersion() const {
- return _chunkVersion;
+bool ComparableDatabaseVersion::operator<(const ComparableDatabaseVersion& other) const {
+ if (!_dbVersion && !other._dbVersion)
+ return false; // Default constructed value
+
+ if (_dbVersion && other._dbVersion && sameUuid(other)) {
+ return _dbVersion->getLastMod() < other._dbVersion->getLastMod();
+ } else {
+ return _uuidDisambiguatingSequenceNum < other._uuidDisambiguatingSequenceNum;
+ }
}
-uint64_t ComparableChunkVersion::getLocalSequenceNum() const {
- return _localSequenceNum;
+CachedDatabaseInfo::CachedDatabaseInfo(DatabaseType dbt, std::shared_ptr<Shard> primaryShard)
+ : _dbt(std::move(dbt)), _primaryShard(std::move(primaryShard)) {}
+
+const ShardId& CachedDatabaseInfo::primaryId() const {
+ return _dbt.getPrimary();
}
-BSONObj ComparableChunkVersion::toBSON() const {
- BSONObjBuilder builder;
- _chunkVersion.appendToCommand(&builder);
- builder.append("localSequenceNum", std::to_string(_localSequenceNum));
- return builder.obj();
+bool CachedDatabaseInfo::shardingEnabled() const {
+ return _dbt.getSharded();
}
-std::string ComparableChunkVersion::toString() const {
- return toBSON().toString();
+DatabaseVersion CachedDatabaseInfo::databaseVersion() const {
+ return _dbt.getVersion();
}
} // namespace mongo
diff --git a/src/mongo/s/catalog_cache.h b/src/mongo/s/catalog_cache.h
index a957189183a..796b9e10136 100644
--- a/src/mongo/s/catalog_cache.h
+++ b/src/mongo/s/catalog_cache.h
@@ -45,8 +45,6 @@
namespace mongo {
class BSONObjBuilder;
-class CachedDatabaseInfo;
-class OperationContext;
static constexpr int kMaxNumStaleVersionRetries = 10;
@@ -64,21 +62,21 @@ extern const OperationContext::Decoration<bool> operationShouldBlockBehindCatalo
* in fact is impossible to compare two different DatabaseVersion that have different UUIDs.
*
* This class wrap a DatabaseVersion object to make it always comparable by timestamping it with a
- * node-local sequence number (_dbVersionLocalSequence).
+ * node-local sequence number (_uuidDisambiguatingSequenceNum).
*
* This class class should go away once a cluster-wide comparable DatabaseVersion will be
* implemented.
*/
class ComparableDatabaseVersion {
public:
- /*
- * Create a ComparableDatabaseVersion that wraps the given DatabaseVersion.
- * Each object created through this method will have a local sequence number grater then the
+ /**
+ * Creates a ComparableDatabaseVersion that wraps the given DatabaseVersion.
+ * Each object created through this method will have a local sequence number greater than the
* previously created ones.
*/
static ComparableDatabaseVersion makeComparableDatabaseVersion(const DatabaseVersion& version);
- /*
+ /**
* Empty constructor needed by the ReadThroughCache.
*
* Instances created through this constructor will be always less then the ones created through
@@ -86,39 +84,28 @@ public:
*/
ComparableDatabaseVersion() = default;
- const DatabaseVersion& getVersion() const;
-
- uint64_t getLocalSequenceNum() const;
-
- BSONObj toBSON() const;
+ const DatabaseVersion& getVersion() const {
+ return *_dbVersion;
+ }
std::string toString() const;
- // Rerturns true if the two versions have the same UUID
bool sameUuid(const ComparableDatabaseVersion& other) const {
- return _dbVersion.getUuid() == other._dbVersion.getUuid();
+ return _dbVersion->getUuid() == other._dbVersion->getUuid();
}
- bool operator==(const ComparableDatabaseVersion& other) const {
- return sameUuid(other) && (_dbVersion.getLastMod() == other._dbVersion.getLastMod());
- }
+ bool operator==(const ComparableDatabaseVersion& other) const;
bool operator!=(const ComparableDatabaseVersion& other) const {
return !(*this == other);
}
- /*
- * In the case the two compared instances have different UUIDs the most recently created one
- * will be grater, otherwise the comparision will be driven by the lastMod field of the
- * underlying DatabaseVersion.
+ /**
+ * In case the two compared instances have different UUIDs, the most recently created one will
+ * be greater, otherwise the comparison will be driven by the lastMod field of the underlying
+ * DatabaseVersion.
*/
- bool operator<(const ComparableDatabaseVersion& other) const {
- if (sameUuid(other)) {
- return _dbVersion.getLastMod() < other._dbVersion.getLastMod();
- } else {
- return _localSequenceNum < other._localSequenceNum;
- }
- }
+ bool operator<(const ComparableDatabaseVersion& other) const;
bool operator>(const ComparableDatabaseVersion& other) const {
return other < *this;
@@ -133,92 +120,18 @@ public:
}
private:
- static AtomicWord<uint64_t> _localSequenceNumSource;
+ static AtomicWord<uint64_t> _uuidDisambiguatingSequenceNumSource;
+
+ ComparableDatabaseVersion(const DatabaseVersion& version,
+ uint64_t uuidDisambiguatingSequenceNum)
+ : _dbVersion(version), _uuidDisambiguatingSequenceNum(uuidDisambiguatingSequenceNum) {}
- ComparableDatabaseVersion(const DatabaseVersion& version, uint64_t localSequenceNum)
- : _dbVersion(version), _localSequenceNum(localSequenceNum) {}
+ boost::optional<DatabaseVersion> _dbVersion;
- DatabaseVersion _dbVersion;
// Locally incremented sequence number that allows to compare two database versions with
// different UUIDs. Each new comparableDatabaseVersion will have a greater sequence number then
// the ones created before.
- uint64_t _localSequenceNum{0};
-};
-
-/**
- * Constructed to be used exclusively by the CatalogCache as a vector clock (Time) to drive
- * CollectionCache's lookups.
- *
- * The ChunkVersion class contains an non comparable epoch, which makes impossible to compare two
- * ChunkVersions when their epochs's differ.
- *
- * This class wraps a ChunkVersion object with a node-local sequence number (_localSequenceNum) that
- * allows the comparision.
- *
- * This class should go away once a cluster-wide comparable ChunkVersion is implemented.
- */
-class ComparableChunkVersion {
-public:
- static ComparableChunkVersion makeComparableChunkVersion(const ChunkVersion& version);
-
- ComparableChunkVersion() = default;
-
- const ChunkVersion& getVersion() const;
-
- uint64_t getLocalSequenceNum() const;
-
- BSONObj toBSON() const;
-
- std::string toString() const;
-
- bool sameEpoch(const ComparableChunkVersion& other) const {
- return _chunkVersion.epoch() == other._chunkVersion.epoch();
- }
-
- bool operator==(const ComparableChunkVersion& other) const {
- return sameEpoch(other) &&
- (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() &&
- _chunkVersion.minorVersion() == other._chunkVersion.minorVersion());
- }
-
- bool operator!=(const ComparableChunkVersion& other) const {
- return !(*this == other);
- }
-
- bool operator<(const ComparableChunkVersion& other) const {
- if (sameEpoch(other)) {
- return _chunkVersion.majorVersion() < other._chunkVersion.majorVersion() ||
- (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() &&
- _chunkVersion.minorVersion() < other._chunkVersion.minorVersion());
- } else {
- return _localSequenceNum < other._localSequenceNum;
- }
- }
-
- bool operator>(const ComparableChunkVersion& other) const {
- return other < *this;
- }
-
- bool operator<=(const ComparableChunkVersion& other) const {
- return !(*this > other);
- }
-
- bool operator>=(const ComparableChunkVersion& other) const {
- return !(*this < other);
- }
-
-private:
- static AtomicWord<uint64_t> _localSequenceNumSource;
-
- ComparableChunkVersion(const ChunkVersion& version, uint64_t localSequenceNum)
- : _chunkVersion(version), _localSequenceNum(localSequenceNum) {}
-
- ChunkVersion _chunkVersion;
-
- // Locally incremented sequence number that allows to compare two colection versions with
- // different epochs. Each new comparableChunkVersion will have a greater sequence number than
- // the ones created before.
- uint64_t _localSequenceNum{0};
+ uint64_t _uuidDisambiguatingSequenceNum{0};
};
/**
@@ -298,21 +211,9 @@ public:
/**
* Same as getCollectionRoutingInfo above, but in addition causes the namespace to be refreshed.
- *
- * When forceRefreshFromThisThread is false, it's possible for this call to
- * join an ongoing refresh from another thread forceRefreshFromThisThread.
- * forceRefreshFromThisThread checks whether it joined another thread and
- * then forces it to try again, which is necessary in cases where calls to
- * getCollectionRoutingInfoWithRefresh must be causally consistent
- *
- * TODO: Remove this parameter in favor of using collection creation time +
- * collection version to decide when a refresh is necessary and provide
- * proper causal consistency
*/
- StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh(
- OperationContext* opCtx,
- const NamespaceString& nss,
- bool forceRefreshFromThisThread = false);
+ StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh(OperationContext* opCtx,
+ const NamespaceString& nss);
/**
* Same as getCollectionRoutingInfoWithRefresh above, but in addition returns a
@@ -333,11 +234,6 @@ public:
const boost::optional<DatabaseVersion>& wantedVersion);
/**
- * Gets whether this operation should block behind a catalog cache refresh.
- */
- static bool getOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx);
-
- /**
* Sets whether this operation should block behind a catalog cache refresh.
*/
static void setOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx,
@@ -349,18 +245,9 @@ public:
* requests to block on an upcoming catalog cache refresh.
*/
void invalidateShardOrEntireCollectionEntryForShardedCollection(
- OperationContext* opCtx,
const NamespaceString& nss,
- boost::optional<ChunkVersion> wantedVersion,
- const ChunkVersion& receivedVersion,
- ShardId shardId);
-
- /**
- * Non-blocking method that marks the current collection entry for the namespace as needing
- * refresh due to an epoch change. Will cause all further targetting attempts for this
- * namespace to block on a catalog cache refresh.
- */
- void onEpochChange(const NamespaceString& nss);
+ const boost::optional<ChunkVersion>& wantedVersion,
+ const ShardId& shardId);
/**
* Throws a StaleConfigException if this catalog cache does not have an entry for the given
@@ -370,16 +257,8 @@ public:
* version to throw a StaleConfigException.
*/
void checkEpochOrThrow(const NamespaceString& nss,
- ChunkVersion targetCollectionVersion,
- const ShardId& shardId) const;
-
- /**
- * Non-blocking method, which invalidates the shard for the routing table for the specified
- * namespace. If that shard is targetted in the future, getCollectionRoutingInfo will wait on a
- * refresh.
- */
- void invalidateShardForShardedCollection(const NamespaceString& nss,
- const ShardId& staleShardId);
+ const ChunkVersion& targetCollectionVersion,
+ const ShardId& shardId);
/**
* Non-blocking method, which invalidates all namespaces which contain data on the specified
@@ -388,12 +267,6 @@ public:
void invalidateEntriesThatReferenceShard(const ShardId& shardId);
/**
- * Non-blocking method, which removes the entire specified collection from the cache (resulting
- * in full refresh on subsequent access)
- */
- void purgeCollection(const NamespaceString& nss);
-
- /**
* Non-blocking method, which removes the entire specified database (including its collections)
* from the cache.
*/
@@ -416,35 +289,17 @@ public:
*/
void checkAndRecordOperationBlockedByRefresh(OperationContext* opCtx, mongo::LogicalOp opType);
+ /**
+ * Non-blocking method that marks the current collection entry for the namespace as needing
+ * refresh. Will cause all further targetting attempts to block on a catalog cache refresh,
+ * even if they do not require causal consistency.
+ */
+ void invalidateCollectionEntry_LINEARIZABLE(const NamespaceString& nss);
+
private:
// Make the cache entries friends so they can access the private classes below
friend class CachedDatabaseInfo;
- /**
- * Cache entry describing a collection.
- */
- struct CollectionRoutingInfoEntry {
- CollectionRoutingInfoEntry() = default;
- // Disable copy (and move) semantics
- CollectionRoutingInfoEntry(const CollectionRoutingInfoEntry&) = delete;
- CollectionRoutingInfoEntry& operator=(const CollectionRoutingInfoEntry&) = delete;
-
- // Specifies whether this cache entry needs a refresh (in which case routingInfo should not
- // be relied on) or it doesn't, in which case there should be a non-null routingInfo.
- bool needsRefresh{true};
-
- // Specifies whether the namespace has had an epoch change, which indicates that every
- // shard should block on an upcoming refresh.
- bool epochHasChanged{true};
-
- // Contains a notification to be waited on for the refresh to complete (only available if
- // needsRefresh is true)
- std::shared_ptr<Notification<Status>> refreshCompletionNotification;
-
- // Contains the cached routing information (only available if needsRefresh is false)
- std::shared_ptr<RoutingTableHistory> routingInfo;
- };
-
class DatabaseCache
: public ReadThroughCache<std::string, DatabaseType, ComparableDatabaseVersion> {
public:
@@ -461,88 +316,54 @@ private:
Mutex _mutex = MONGO_MAKE_LATCH("DatabaseCache::_mutex");
};
- /**
- * Non-blocking call which schedules an asynchronous refresh for the specified namespace. The
- * namespace must be in the 'needRefresh' state.
- */
- void _scheduleCollectionRefresh(WithLock,
- ServiceContext* service,
- std::shared_ptr<CollectionRoutingInfoEntry> collEntry,
- NamespaceString const& nss,
- int refreshAttempt);
+ class CollectionCache : public RoutingTableHistoryCache {
+ public:
+ CollectionCache(ServiceContext* service,
+ ThreadPoolInterface& threadPool,
+ CatalogCacheLoader& catalogCacheLoader);
- /**
- * Marks a collection entry as needing refresh. Will create the collection entry if one does
- * not exist. Also marks the epoch as changed, which will cause all further targetting requests
- * against this namespace to block upon a catalog cache refresh.
- */
- void _createOrGetCollectionEntryAndMarkEpochStale(const NamespaceString& nss);
+ void reportStats(BSONObjBuilder* builder) const;
- /**
- * Marks a collection entry as needing refresh. Will create the collection entry if one does
- * not exist. Will mark the given shard ID as stale, which will cause all further targetting
- * requests for the given shard for this namespace to block upon a catalog cache refresh.
- */
- void _createOrGetCollectionEntryAndMarkShardStale(const NamespaceString& nss,
- const ShardId& shardId);
+ private:
+ LookupResult _lookupCollection(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ValueHandle& collectionHistory,
+ const ComparableChunkVersion& previousChunkVersion);
- /**
- * Marks a collection entry as needing refresh. Will create the collection entry if one does
- * not exist.
- */
- void _createOrGetCollectionEntryAndMarkAsNeedsRefresh(const NamespaceString& nss);
+ CatalogCacheLoader& _catalogCacheLoader;
+ Mutex _mutex = MONGO_MAKE_LATCH("CollectionCache::_mutex");
- /**
- * Retrieves the collection entry for the given namespace, creating the entry if one does not
- * already exist.
- */
- std::shared_ptr<CollectionRoutingInfoEntry> _createOrGetCollectionEntry(
- WithLock wl, const NamespaceString& nss);
+ struct Stats {
+ // Tracks how many incremental refreshes are waiting to complete currently
+ AtomicWord<long long> numActiveIncrementalRefreshes{0};
- /**
- * Used as a flag to indicate whether or not this thread performed its own
- * refresh for certain helper functions
- *
- * kPerformedRefresh is used only when the calling thread performed the
- * refresh *itself*
- *
- * kDidNotPerformRefresh is used either when there was an error or when
- * this thread joined an ongoing refresh
- */
- enum class RefreshAction {
- kPerformedRefresh,
- kDidNotPerformRefresh,
- };
+ // Cumulative, always-increasing counter of how many incremental refreshes have been
+ // kicked off
+ AtomicWord<long long> countIncrementalRefreshesStarted{0};
- /**
- * Return type for helper functions performing refreshes so that they can
- * indicate both status and whether or not this thread performed its own
- * refresh
- */
- struct RefreshResult {
- // Status containing result of refresh
- StatusWith<ChunkManager> statusWithInfo;
- RefreshAction actionTaken;
- };
+ // Tracks how many full refreshes are waiting to complete currently
+ AtomicWord<long long> numActiveFullRefreshes{0};
- /**
- * Retrieves the collection routing info for this namespace after blocking on a catalog cache
- * refresh.
- */
- CatalogCache::RefreshResult _getCollectionRoutingInfoWithForcedRefresh(
- OperationContext* opctx, const NamespaceString& nss);
+ // Cumulative, always-increasing counter of how many full refreshes have been kicked off
+ AtomicWord<long long> countFullRefreshesStarted{0};
- /**
- * Helper function used when we need the refresh action taken (e.g. when we
- * want to force refresh)
- */
- CatalogCache::RefreshResult _getCollectionRoutingInfo(OperationContext* opCtx,
- const NamespaceString& nss);
+ // Cumulative, always-increasing counter of how many full or incremental refreshes
+ // failed for whatever reason
+ AtomicWord<long long> countFailedRefreshes{0};
- CatalogCache::RefreshResult _getCollectionRoutingInfoAt(
- OperationContext* opCtx,
- const NamespaceString& nss,
- boost::optional<Timestamp> atClusterTime);
+ /**
+ * Reports the accumulated statistics for serverStatus.
+ */
+ void report(BSONObjBuilder* builder) const;
+
+ } _stats;
+
+ void _updateRefreshesStats(const bool isIncremental, const bool add);
+ };
+
+ StatusWith<ChunkManager> _getCollectionRoutingInfoAt(OperationContext* opCtx,
+ const NamespaceString& nss,
+ boost::optional<Timestamp> atClusterTime);
// Interface from which chunks will be retrieved
CatalogCacheLoader& _cacheLoader;
@@ -557,23 +378,6 @@ private:
// combined
AtomicWord<long long> totalRefreshWaitTimeMicros{0};
- // Tracks how many incremental refreshes are waiting to complete currently
- AtomicWord<long long> numActiveIncrementalRefreshes{0};
-
- // Cumulative, always-increasing counter of how many incremental refreshes have been kicked
- // off
- AtomicWord<long long> countIncrementalRefreshesStarted{0};
-
- // Tracks how many full refreshes are waiting to complete currently
- AtomicWord<long long> numActiveFullRefreshes{0};
-
- // Cumulative, always-increasing counter of how many full refreshes have been kicked off
- AtomicWord<long long> countFullRefreshesStarted{0};
-
- // Cumulative, always-increasing counter of how many full or incremental refreshes failed
- // for whatever reason
- AtomicWord<long long> countFailedRefreshes{0};
-
// Cumulative, always-increasing counter of how many operations have been blocked by a
// catalog cache refresh. Broken down by operation type to match the operations tracked
// by the OpCounters class.
@@ -595,15 +399,9 @@ private:
std::shared_ptr<ThreadPool> _executor;
-
DatabaseCache _databaseCache;
- // Mutex to serialize access to the collection cache
- mutable Mutex _mutex = MONGO_MAKE_LATCH("CatalogCache::_mutex");
- // Map from full collection name to the routing info for that collection, grouped by database
- using CollectionInfoMap = StringMap<std::shared_ptr<CollectionRoutingInfoEntry>>;
- using CollectionsByDbMap = StringMap<CollectionInfoMap>;
- CollectionsByDbMap _collectionsByDb;
+ CollectionCache _collectionCache;
};
} // namespace mongo
diff --git a/src/mongo/s/catalog_cache_refresh_test.cpp b/src/mongo/s/catalog_cache_refresh_test.cpp
index 70b56845eb1..1e21135a15b 100644
--- a/src/mongo/s/catalog_cache_refresh_test.cpp
+++ b/src/mongo/s/catalog_cache_refresh_test.cpp
@@ -440,7 +440,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithLowestVersion) {
ASSERT_EQ(1, initialRoutingInfo.numChunks());
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
const auto incompleteChunks = [&]() {
ChunkVersion version(1, 0, epoch);
@@ -497,7 +497,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithHighestVersion) {
ASSERT_EQ(1, initialRoutingInfo.numChunks());
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
const auto incompleteChunks = [&]() {
ChunkVersion version(1, 0, epoch);
@@ -551,7 +551,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoad) {
auto initialRoutingInfo(makeChunkManager(kNss, shardKeyPattern, nullptr, true, {}));
ASSERT_EQ(1, initialRoutingInfo.numChunks());
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
ChunkVersion version = initialRoutingInfo.getVersion();
@@ -598,7 +598,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoadRecoveryAft
setupNShards(2);
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
ChunkVersion oldVersion = initialRoutingInfo.getVersion();
const OID newEpoch = OID::gen();
@@ -683,7 +683,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterCollectionEpochChange) {
setupNShards(2);
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
ChunkVersion newVersion(1, 0, OID::gen());
@@ -730,7 +730,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterSplit) {
ChunkVersion version = initialRoutingInfo.getVersion();
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
expectGetCollection(version.epoch(), shardKeyPattern);
@@ -776,7 +776,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveWithReshardingFieldsAdde
ChunkVersion version = initialRoutingInfo.getVersion();
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
ChunkVersion expectedDestShardVersion;
@@ -824,7 +824,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveLastChunkWithReshardingF
ChunkVersion version = initialRoutingInfo.getVersion();
- auto future = scheduleRoutingInfoForcedRefresh(kNss);
+ auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
// The collection type won't have resharding fields this time.
expectGetCollection(version.epoch(), shardKeyPattern);
diff --git a/src/mongo/s/catalog_cache_test.cpp b/src/mongo/s/catalog_cache_test.cpp
index fce177bdd4f..8fdb461aca3 100644
--- a/src/mongo/s/catalog_cache_test.cpp
+++ b/src/mongo/s/catalog_cache_test.cpp
@@ -35,6 +35,7 @@
#include "mongo/s/catalog_cache.h"
#include "mongo/s/catalog_cache_loader_mock.h"
#include "mongo/s/sharding_router_test_fixture.h"
+#include "mongo/s/stale_exception.h"
namespace mongo {
namespace {
@@ -72,7 +73,54 @@ protected:
_catalogCacheLoader->setDatabaseRefreshReturnValue(kErrorStatus);
}
+ void loadCollection(const ChunkVersion& version) {
+ const auto coll = makeCollectionType(version);
+ _catalogCacheLoader->setCollectionRefreshReturnValue(coll);
+ _catalogCacheLoader->setChunkRefreshReturnValue(makeChunks(version));
+
+ const auto swChunkManager =
+ _catalogCache->getCollectionRoutingInfo(operationContext(), coll.getNs());
+ ASSERT_OK(swChunkManager.getStatus());
+
+ // Reset the loader return values to avoid false positive results
+ _catalogCacheLoader->setCollectionRefreshReturnValue(kErrorStatus);
+ _catalogCacheLoader->setChunkRefreshReturnValue(kErrorStatus);
+ }
+
+ void loadUnshardedCollection(const NamespaceString& nss) {
+ _catalogCacheLoader->setCollectionRefreshReturnValue(
+ Status(ErrorCodes::NamespaceNotFound, "collection not found"));
+
+ const auto swChunkManager =
+ _catalogCache->getCollectionRoutingInfo(operationContext(), nss);
+ ASSERT_OK(swChunkManager.getStatus());
+
+ // Reset the loader return value to avoid false positive results
+ _catalogCacheLoader->setCollectionRefreshReturnValue(kErrorStatus);
+ }
+
+ std::vector<ChunkType> makeChunks(ChunkVersion version) {
+ ChunkType chunk(kNss,
+ {kShardKeyPattern.getKeyPattern().globalMin(),
+ kShardKeyPattern.getKeyPattern().globalMax()},
+ version,
+ {"0"});
+ chunk.setName(OID::gen());
+ return {chunk};
+ }
+
+ CollectionType makeCollectionType(const ChunkVersion& collVersion) {
+ CollectionType coll;
+ coll.setNs(kNss);
+ coll.setEpoch(collVersion.epoch());
+ coll.setKeyPattern(kShardKeyPattern.getKeyPattern());
+ coll.setUnique(false);
+ return coll;
+ }
+
const NamespaceString kNss{"catalgoCacheTestDB.foo"};
+ const std::string kPattern{"_id"};
+ const ShardKeyPattern kShardKeyPattern{BSON(kPattern << 1)};
const int kDummyPort{12345};
const HostAndPort kConfigHostAndPort{"DummyConfig", kDummyPort};
const std::vector<ShardId> kShards{{"0"}, {"1"}};
@@ -129,5 +177,86 @@ TEST_F(CatalogCacheTest, InvalidateSingleDbOnShardRemoval) {
ASSERT_EQ(cachedDb.primaryId(), kShards[1]);
}
+TEST_F(CatalogCacheTest, CheckEpochNoDatabase) {
+ const auto collVersion = ChunkVersion(1, 0, OID::gen());
+ ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+ StaleConfigException,
+ [&](const StaleConfigException& ex) {
+ const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+ ASSERT(staleInfo);
+ ASSERT_EQ(staleInfo->getNss(), kNss);
+ ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+ ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+ ASSERT(staleInfo->getVersionWanted() == boost::none);
+ });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochNoCollection) {
+ const auto dbVersion = DatabaseVersion();
+ const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+ loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+ ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+ StaleConfigException,
+ [&](const StaleConfigException& ex) {
+ const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+ ASSERT(staleInfo);
+ ASSERT_EQ(staleInfo->getNss(), kNss);
+ ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+ ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+ ASSERT(staleInfo->getVersionWanted() == boost::none);
+ });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochUnshardedCollection) {
+ const auto dbVersion = DatabaseVersion();
+ const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+ loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+ loadUnshardedCollection(kNss);
+ ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+ StaleConfigException,
+ [&](const StaleConfigException& ex) {
+ const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+ ASSERT(staleInfo);
+ ASSERT_EQ(staleInfo->getNss(), kNss);
+ ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+ ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+ ASSERT(staleInfo->getVersionWanted() == boost::none);
+ });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochWithMismatch) {
+ const auto dbVersion = DatabaseVersion();
+ const auto wantedCollVersion = ChunkVersion(1, 0, OID::gen());
+ const auto receivedCollVersion = ChunkVersion(1, 0, OID::gen());
+
+ loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+ loadCollection(wantedCollVersion);
+
+ ASSERT_THROWS_WITH_CHECK(
+ _catalogCache->checkEpochOrThrow(kNss, receivedCollVersion, kShards[0]),
+ StaleConfigException,
+ [&](const StaleConfigException& ex) {
+ const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+ ASSERT(staleInfo);
+ ASSERT_EQ(staleInfo->getNss(), kNss);
+ ASSERT_EQ(staleInfo->getVersionReceived(), receivedCollVersion);
+ ASSERT(staleInfo->getVersionWanted() != boost::none);
+ ASSERT_EQ(*(staleInfo->getVersionWanted()), wantedCollVersion);
+ ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+ });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochWithMatch) {
+ const auto dbVersion = DatabaseVersion();
+ const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+ loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+ loadCollection(collVersion);
+
+ _catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/s/catalog_cache_test_fixture.cpp b/src/mongo/s/catalog_cache_test_fixture.cpp
index 71e02e67fac..4f59eeaef8a 100644
--- a/src/mongo/s/catalog_cache_test_fixture.cpp
+++ b/src/mongo/s/catalog_cache_test_fixture.cpp
@@ -81,6 +81,26 @@ CatalogCacheTestFixture::scheduleRoutingInfoUnforcedRefresh(const NamespaceStrin
});
}
+executor::NetworkTestEnv::FutureHandle<boost::optional<ChunkManager>>
+CatalogCacheTestFixture::scheduleRoutingInfoIncrementalRefresh(const NamespaceString& nss) {
+ auto catalogCache = Grid::get(getServiceContext())->catalogCache();
+ const auto cm =
+ uassertStatusOK(catalogCache->getCollectionRoutingInfo(operationContext(), nss));
+ ASSERT(cm.isSharded());
+
+ // Simulates the shard wanting a higher version than the one sent by the router.
+ catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ nss, boost::none, cm.dbPrimary());
+
+ return launchAsync([this, nss] {
+ auto client = getServiceContext()->makeClient("Test");
+ auto const catalogCache = Grid::get(getServiceContext())->catalogCache();
+
+ return boost::make_optional(
+ uassertStatusOK(catalogCache->getCollectionRoutingInfo(operationContext(), nss)));
+ });
+}
+
std::vector<ShardType> CatalogCacheTestFixture::setupNShards(int numShards) {
std::vector<ShardType> shards;
for (int i = 0; i < numShards; i++) {
diff --git a/src/mongo/s/catalog_cache_test_fixture.h b/src/mongo/s/catalog_cache_test_fixture.h
index fb5238a2ba9..3d58f6a8557 100644
--- a/src/mongo/s/catalog_cache_test_fixture.h
+++ b/src/mongo/s/catalog_cache_test_fixture.h
@@ -84,6 +84,17 @@ protected:
scheduleRoutingInfoUnforcedRefresh(const NamespaceString& nss);
/**
+ * Advance the time in the cache for 'kNss' and schedules a thread to make an incremental
+ * refresh.
+ *
+ * NOTE: The returned value is always set. The reason to use optional is a deficiency of
+ * std::future with the MSVC STL library, which requires the templated type to be default
+ * constructible.
+ */
+ executor::NetworkTestEnv::FutureHandle<boost::optional<ChunkManager>>
+ scheduleRoutingInfoIncrementalRefresh(const NamespaceString& nss);
+
+ /**
* Ensures that there are 'numShards' available in the shard registry. The shard ids are
* generated as "0", "1", etc.
*
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index 5713855e01f..9ded562066c 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -336,22 +336,23 @@ void RoutingTableHistory::setAllShardsRefreshed() {
}
Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const {
- const bool hasSimpleCollation = (collation.isEmpty() && !_rt->getDefaultCollator()) ||
+ const bool hasSimpleCollation = (collation.isEmpty() && !_rt->optRt->getDefaultCollator()) ||
SimpleBSONObjComparator::kInstance.evaluate(collation == CollationSpec::kSimpleSpec);
if (!hasSimpleCollation) {
for (BSONElement elt : shardKey) {
uassert(ErrorCodes::ShardKeyNotFound,
str::stream() << "Cannot target single shard due to collation of key "
- << elt.fieldNameStringData() << " for namespace " << _rt->nss(),
+ << elt.fieldNameStringData() << " for namespace "
+ << _rt->optRt->nss(),
!CollationIndexKey::isCollatableType(elt.type()));
}
}
- auto chunkInfo = _rt->findIntersectingChunk(shardKey);
+ auto chunkInfo = _rt->optRt->findIntersectingChunk(shardKey);
uassert(ErrorCodes::ShardKeyNotFound,
str::stream() << "Cannot target single shard using key " << shardKey
- << " for namespace " << _rt->nss(),
+ << " for namespace " << _rt->optRt->nss(),
chunkInfo && chunkInfo->containsKey(shardKey));
return Chunk(*chunkInfo, _clusterTime);
@@ -361,7 +362,7 @@ bool ChunkManager::keyBelongsToShard(const BSONObj& shardKey, const ShardId& sha
if (shardKey.isEmpty())
return false;
- auto chunkInfo = _rt->findIntersectingChunk(shardKey);
+ auto chunkInfo = _rt->optRt->findIntersectingChunk(shardKey);
if (!chunkInfo)
return false;
@@ -374,7 +375,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
const BSONObj& query,
const BSONObj& collation,
std::set<ShardId>* shardIds) const {
- auto qr = std::make_unique<QueryRequest>(_rt->nss());
+ auto qr = std::make_unique<QueryRequest>(_rt->optRt->nss());
qr->setFilter(query);
if (auto uuid = getUUID())
@@ -382,8 +383,8 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
if (!collation.isEmpty()) {
qr->setCollation(collation);
- } else if (_rt->getDefaultCollator()) {
- auto defaultCollator = _rt->getDefaultCollator();
+ } else if (_rt->optRt->getDefaultCollator()) {
+ auto defaultCollator = _rt->optRt->getDefaultCollator();
qr->setCollation(defaultCollator->getSpec().toBSON());
expCtx->setCollator(defaultCollator->clone());
}
@@ -396,7 +397,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
MatchExpressionParser::kAllowAllSpecialFeatures));
// Fast path for targeting equalities on the shard key.
- auto shardKeyToFind = _rt->getShardKeyPattern().extractShardKeyFromQuery(*cq);
+ auto shardKeyToFind = _rt->optRt->getShardKeyPattern().extractShardKeyFromQuery(*cq);
if (!shardKeyToFind.isEmpty()) {
try {
auto chunk = findIntersectingChunk(shardKeyToFind, collation);
@@ -413,14 +414,14 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
// Query { a : { $gte : 1, $lt : 2 },
// b : { $gte : 3, $lt : 4 } }
// => Bounds { a : [1, 2), b : [3, 4) }
- IndexBounds bounds = getIndexBoundsForQuery(_rt->getShardKeyPattern().toBSON(), *cq);
+ IndexBounds bounds = getIndexBoundsForQuery(_rt->optRt->getShardKeyPattern().toBSON(), *cq);
// Transforms bounds for each shard key field into full shard key ranges
// for example :
// Key { a : 1, b : 1 }
// Bounds { a : [1, 2), b : [3, 4) }
// => Ranges { a : 1, b : 3 } => { a : 2, b : 4 }
- BoundList ranges = _rt->getShardKeyPattern().flattenBounds(bounds);
+ BoundList ranges = _rt->optRt->getShardKeyPattern().flattenBounds(bounds);
for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); ++it) {
getShardIdsForRange(it->first /*min*/, it->second /*max*/, shardIds);
@@ -430,7 +431,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
// because _shardVersions contains shards with chunks and is built based on the last
// refresh. Therefore, it is possible for _shardVersions to have fewer entries if a shard
// no longer owns chunks when it used to at _clusterTime.
- if (!_clusterTime && shardIds->size() == _rt->_shardVersions.size()) {
+ if (!_clusterTime && shardIds->size() == _rt->optRt->_shardVersions.size()) {
break;
}
}
@@ -439,7 +440,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
// For now, we satisfy that assumption by adding a shard with no matches rather than returning
// an empty set of shards.
if (shardIds->empty()) {
- _rt->forEachChunk([&](const std::shared_ptr<ChunkInfo>& chunkInfo) {
+ _rt->optRt->forEachChunk([&](const std::shared_ptr<ChunkInfo>& chunkInfo) {
shardIds->insert(chunkInfo->getShardIdAt(_clusterTime));
return false;
});
@@ -459,7 +460,7 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
return;
}
- _rt->forEachOverlappingChunk(min, max, true, [&](auto& chunkInfo) {
+ _rt->optRt->forEachOverlappingChunk(min, max, true, [&](auto& chunkInfo) {
shardIds->insert(chunkInfo->getShardIdAt(_clusterTime));
// No need to iterate through the rest of the ranges, because we already know we need to use
@@ -467,7 +468,7 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
// because _shardVersions contains shards with chunks and is built based on the last
// refresh. Therefore, it is possible for _shardVersions to have fewer entries if a shard
// no longer owns chunks when it used to at _clusterTime.
- if (!_clusterTime && shardIds->size() == _rt->_shardVersions.size()) {
+ if (!_clusterTime && shardIds->size() == _rt->optRt->_shardVersions.size()) {
return false;
}
@@ -478,14 +479,15 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
bool ChunkManager::rangeOverlapsShard(const ChunkRange& range, const ShardId& shardId) const {
bool overlapFound = false;
- _rt->forEachOverlappingChunk(range.getMin(), range.getMax(), false, [&](auto& chunkInfo) {
- if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
- overlapFound = true;
- return false;
- }
+ _rt->optRt->forEachOverlappingChunk(
+ range.getMin(), range.getMax(), false, [&](auto& chunkInfo) {
+ if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
+ overlapFound = true;
+ return false;
+ }
- return true;
- });
+ return true;
+ });
return overlapFound;
}
@@ -494,7 +496,7 @@ boost::optional<Chunk> ChunkManager::getNextChunkOnShard(const BSONObj& shardKey
const ShardId& shardId) const {
boost::optional<Chunk> chunk;
- _rt->forEachChunk(
+ _rt->optRt->forEachChunk(
[&](auto& chunkInfo) {
if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
chunk.emplace(*chunkInfo, _clusterTime);
@@ -654,7 +656,7 @@ ChunkManager ChunkManager::makeAtTime(const ChunkManager& cm, Timestamp clusterT
}
std::string ChunkManager::toString() const {
- return _rt ? _rt->toString() : "UNSHARDED";
+ return _rt->optRt ? _rt->optRt->toString() : "UNSHARDED";
}
bool RoutingTableHistory::compatibleWith(const RoutingTableHistory& other,
@@ -733,7 +735,7 @@ RoutingTableHistory RoutingTableHistory::makeUpdated(
auto changedChunkInfos = flatten(changedChunks);
auto chunkMap = _chunkMap.createMerged(changedChunkInfos);
- // If at least one diff was applied, the collection's version must have advanced
+ // Only update the same collection.
invariant(getVersion().epoch() == chunkMap.getVersion().epoch());
return RoutingTableHistory(_nss,
@@ -745,4 +747,60 @@ RoutingTableHistory RoutingTableHistory::makeUpdated(
std::move(chunkMap));
}
+AtomicWord<uint64_t> ComparableChunkVersion::_epochDisambiguatingSequenceNumSource{1ULL};
+AtomicWord<uint64_t> ComparableChunkVersion::_forcedRefreshSequenceNumSource{1ULL};
+
+ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersion(
+ const ChunkVersion& version) {
+ return ComparableChunkVersion(_forcedRefreshSequenceNumSource.load(),
+ version,
+ _epochDisambiguatingSequenceNumSource.fetchAndAdd(1));
+}
+
+ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh() {
+ return ComparableChunkVersion(_forcedRefreshSequenceNumSource.addAndFetch(2) - 1,
+ boost::none,
+ _epochDisambiguatingSequenceNumSource.fetchAndAdd(1));
+}
+
+std::string ComparableChunkVersion::toString() const {
+ return str::stream() << _forcedRefreshSequenceNum << "|"
+ << (_chunkVersion ? _chunkVersion->toString() : "NONE") << "|"
+ << _epochDisambiguatingSequenceNum;
+}
+
+bool ComparableChunkVersion::operator==(const ComparableChunkVersion& other) const {
+ if (_forcedRefreshSequenceNum == other._forcedRefreshSequenceNum) {
+ if (_forcedRefreshSequenceNum == 0)
+ return true; // Default constructed value
+
+ if (sameEpoch(other)) {
+ if (_chunkVersion->majorVersion() == 0 && other._chunkVersion->majorVersion() == 0) {
+ return _chunkVersion->epoch() == OID();
+ }
+ return _chunkVersion->majorVersion() == other._chunkVersion->majorVersion() &&
+ _chunkVersion->minorVersion() == other._chunkVersion->minorVersion();
+ }
+ }
+ return false;
+}
+
+bool ComparableChunkVersion::operator<(const ComparableChunkVersion& other) const {
+ if (_forcedRefreshSequenceNum < other._forcedRefreshSequenceNum)
+ return true;
+ if (_forcedRefreshSequenceNum > other._forcedRefreshSequenceNum)
+ return false;
+ if (_forcedRefreshSequenceNum == 0)
+ return false; // Default constructed value
+
+ if (sameEpoch(other) && other._chunkVersion->epoch() != OID() &&
+ _chunkVersion->majorVersion() != 0 && other._chunkVersion->majorVersion() != 0) {
+ return _chunkVersion->majorVersion() < other._chunkVersion->majorVersion() ||
+ (_chunkVersion->majorVersion() == other._chunkVersion->majorVersion() &&
+ _chunkVersion->minorVersion() < other._chunkVersion->minorVersion());
+ } else {
+ return _epochDisambiguatingSequenceNum < other._epochDisambiguatingSequenceNum;
+ }
+}
+
} // namespace mongo
diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h
index 7f25a810a4a..e694a94c201 100644
--- a/src/mongo/s/chunk_manager.h
+++ b/src/mongo/s/chunk_manager.h
@@ -43,6 +43,7 @@
#include "mongo/s/shard_key_pattern.h"
#include "mongo/stdx/unordered_map.h"
#include "mongo/util/concurrency/ticketholder.h"
+#include "mongo/util/read_through_cache.h"
namespace mongo {
@@ -324,13 +325,128 @@ private:
};
/**
+ * Constructed to be used exclusively by the CatalogCache as a vector clock (Time) to drive
+ * CollectionCache's lookups.
+ *
+ * The ChunkVersion class contains a non comparable epoch, which makes impossible to compare two
+ * ChunkVersions when their epochs's differ.
+ *
+ * This class wraps a ChunkVersion object with a node-local sequence number
+ * (_epochDisambiguatingSequenceNum) that allows the comparision.
+ *
+ * This class should go away once a cluster-wide comparable ChunkVersion is implemented.
+ */
+class ComparableChunkVersion {
+public:
+ /**
+ * Creates a ComparableChunkVersion that wraps the given ChunkVersion.
+ * Each object created through this method will have a local sequence number greater than the
+ * previously created ones.
+ */
+ static ComparableChunkVersion makeComparableChunkVersion(const ChunkVersion& version);
+
+ /**
+ * Creates a ComparableChunkVersion object, which will artificially be greater than any that
+ * were previously created by `makeComparableChunkVersion`. Used as means to cause the
+ * collections cache to attempt a refresh in situations where causal consistency cannot be
+ * inferred.
+ */
+ static ComparableChunkVersion makeComparableChunkVersionForForcedRefresh();
+
+ /**
+ * Empty constructor needed by the ReadThroughCache.
+ *
+ * Instances created through this constructor will be always less then the ones created through
+ * the two static constructors, but they do not carry any meaningful value and can only be used
+ * for comparison purposes.
+ */
+ ComparableChunkVersion() = default;
+
+ const ChunkVersion& getVersion() const {
+ return *_chunkVersion;
+ }
+
+ std::string toString() const;
+
+ bool sameEpoch(const ComparableChunkVersion& other) const {
+ return _chunkVersion->epoch() == other._chunkVersion->epoch();
+ }
+
+ bool operator==(const ComparableChunkVersion& other) const;
+
+ bool operator!=(const ComparableChunkVersion& other) const {
+ return !(*this == other);
+ }
+
+ /**
+ * In case the two compared instances have different epochs, the most recently created one will
+ * be greater, otherwise the comparision will be driven by the major/minor versions of the
+ * underlying ChunkVersion.
+ */
+ bool operator<(const ComparableChunkVersion& other) const;
+
+ bool operator>(const ComparableChunkVersion& other) const {
+ return other < *this;
+ }
+
+ bool operator<=(const ComparableChunkVersion& other) const {
+ return !(*this > other);
+ }
+
+ bool operator>=(const ComparableChunkVersion& other) const {
+ return !(*this < other);
+ }
+
+private:
+ static AtomicWord<uint64_t> _epochDisambiguatingSequenceNumSource;
+ static AtomicWord<uint64_t> _forcedRefreshSequenceNumSource;
+
+ ComparableChunkVersion(uint64_t forcedRefreshSequenceNum,
+ boost::optional<ChunkVersion> version,
+ uint64_t epochDisambiguatingSequenceNum)
+ : _forcedRefreshSequenceNum(forcedRefreshSequenceNum),
+ _chunkVersion(std::move(version)),
+ _epochDisambiguatingSequenceNum(epochDisambiguatingSequenceNum) {}
+
+ uint64_t _forcedRefreshSequenceNum{0};
+
+ boost::optional<ChunkVersion> _chunkVersion;
+
+ // Locally incremented sequence number that allows to compare two colection versions with
+ // different epochs. Each new comparableChunkVersion will have a greater sequence number than
+ // the ones created before.
+ uint64_t _epochDisambiguatingSequenceNum{0};
+};
+
+/**
+ * This intermediate structure is necessary to be able to store UNSHARDED collections in the routing
+ * table history cache below. The reason is that currently the RoutingTableHistory class only
+ * supports sharded collections (i.e., collections which have entries in config.collections and
+ * config.chunks).
+ */
+struct OptionalRoutingTableHistory {
+ // UNSHARDED collection constructor
+ OptionalRoutingTableHistory() = default;
+
+ // SHARDED collection constructor
+ OptionalRoutingTableHistory(RoutingTableHistory&& rt) : optRt(std::move(rt)) {}
+
+ // If boost::none, the collection is UNSHARDED, otherwise it is SHARDED
+ boost::optional<RoutingTableHistory> optRt;
+};
+
+using RoutingTableHistoryCache =
+ ReadThroughCache<NamespaceString, OptionalRoutingTableHistory, ComparableChunkVersion>;
+using RoutingTableHistoryValueHandle = RoutingTableHistoryCache::ValueHandle;
+
+/**
* Wrapper around a RoutingTableHistory, which pins it to a particular point in time.
*/
class ChunkManager {
public:
ChunkManager(ShardId dbPrimary,
DatabaseVersion dbVersion,
- std::shared_ptr<RoutingTableHistory> rt,
+ RoutingTableHistoryValueHandle rt,
boost::optional<Timestamp> clusterTime)
: _dbPrimary(std::move(dbPrimary)),
_dbVersion(std::move(dbVersion)),
@@ -340,7 +456,7 @@ public:
// Methods supported on both sharded and unsharded collections
bool isSharded() const {
- return bool(_rt);
+ return bool(_rt->optRt);
}
const ShardId& dbPrimary() const {
@@ -352,7 +468,7 @@ public:
}
int numChunks() const {
- return _rt ? _rt->numChunks() : 1;
+ return _rt->optRt ? _rt->optRt->numChunks() : 1;
}
std::string toString() const;
@@ -360,32 +476,32 @@ public:
// Methods only supported on sharded collections (caller must check isSharded())
const ShardKeyPattern& getShardKeyPattern() const {
- return _rt->getShardKeyPattern();
+ return _rt->optRt->getShardKeyPattern();
}
const CollatorInterface* getDefaultCollator() const {
- return _rt->getDefaultCollator();
+ return _rt->optRt->getDefaultCollator();
}
bool isUnique() const {
- return _rt->isUnique();
+ return _rt->optRt->isUnique();
}
ChunkVersion getVersion() const {
- return _rt->getVersion();
+ return _rt->optRt->getVersion();
}
ChunkVersion getVersion(const ShardId& shardId) const {
- return _rt->getVersion(shardId);
+ return _rt->optRt->getVersion(shardId);
}
ChunkVersion getVersionForLogging(const ShardId& shardId) const {
- return _rt->getVersionForLogging(shardId);
+ return _rt->optRt->getVersionForLogging(shardId);
}
template <typename Callable>
void forEachChunk(Callable&& handler) const {
- _rt->forEachChunk(
+ _rt->optRt->forEachChunk(
[this, handler = std::forward<Callable>(handler)](const auto& chunkInfo) mutable {
if (!handler(Chunk{*chunkInfo, _clusterTime}))
return false;
@@ -461,14 +577,14 @@ public:
* Returns the ids of all shards on which the collection has any chunks.
*/
void getAllShardIds(std::set<ShardId>* all) const {
- _rt->getAllShardIds(all);
+ _rt->optRt->getAllShardIds(all);
}
/**
* Returns the number of shards on which the collection has any chunks
*/
int getNShardsOwningChunks() const {
- return _rt->getNShardsOwningChunks();
+ return _rt->optRt->getNShardsOwningChunks();
}
// Transforms query into bounds for each field in the shard key
@@ -500,30 +616,30 @@ public:
* Returns true if, for this shard, the chunks are identical in both chunk managers
*/
bool compatibleWith(const ChunkManager& other, const ShardId& shard) const {
- return _rt->compatibleWith(*other._rt, shard);
+ return _rt->optRt->compatibleWith(*other._rt->optRt, shard);
}
bool uuidMatches(UUID uuid) const {
- return _rt->uuidMatches(uuid);
+ return _rt->optRt->uuidMatches(uuid);
}
boost::optional<UUID> getUUID() const {
- return _rt->getUUID();
+ return _rt->optRt->getUUID();
}
const boost::optional<TypeCollectionReshardingFields>& getReshardingFields() const {
- return _rt->getReshardingFields();
+ return _rt->optRt->getReshardingFields();
}
const RoutingTableHistory& getRoutingTableHistory_ForTest() const {
- return *_rt;
+ return *_rt->optRt;
}
private:
ShardId _dbPrimary;
DatabaseVersion _dbVersion;
- std::shared_ptr<RoutingTableHistory> _rt;
+ RoutingTableHistoryValueHandle _rt;
boost::optional<Timestamp> _clusterTime;
};
diff --git a/src/mongo/s/chunk_manager_refresh_bm.cpp b/src/mongo/s/chunk_manager_refresh_bm.cpp
index a3feba2de1e..bd9b133301c 100644
--- a/src/mongo/s/chunk_manager_refresh_bm.cpp
+++ b/src/mongo/s/chunk_manager_refresh_bm.cpp
@@ -43,8 +43,10 @@ namespace {
const NamespaceString kNss("test", "foo");
-std::shared_ptr<RoutingTableHistory> makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
- return std::make_shared<RoutingTableHistory>(std::move(rt));
+RoutingTableHistoryValueHandle makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
+ const auto version = rt.getVersion();
+ return RoutingTableHistoryValueHandle(
+ std::move(rt), ComparableChunkVersion::makeComparableChunkVersion(version));
}
ChunkRange getRangeForChunk(int i, int nChunks) {
@@ -69,6 +71,7 @@ CollectionMetadata makeChunkManagerWithShardSelector(int nShards,
std::vector<ChunkType> chunks;
chunks.reserve(nChunks);
+
for (uint32_t i = 0; i < nChunks; ++i) {
chunks.emplace_back(kNss,
getRangeForChunk(i, nChunks),
@@ -144,13 +147,13 @@ auto BM_FullBuildOfChunkManager(benchmark::State& state, ShardSelectorFn selectS
const uint32_t nChunks = state.range(1);
const auto collEpoch = OID::gen();
- const auto collName = NamespaceString("test.foo");
const auto shardKeyPattern = KeyPattern(BSON("_id" << 1));
std::vector<ChunkType> chunks;
chunks.reserve(nChunks);
+
for (uint32_t i = 0; i < nChunks; ++i) {
- chunks.emplace_back(collName,
+ chunks.emplace_back(kNss,
getRangeForChunk(i, nChunks),
ChunkVersion{i + 1, 0, collEpoch},
selectShard(i, nShards, nChunks));
@@ -158,7 +161,7 @@ auto BM_FullBuildOfChunkManager(benchmark::State& state, ShardSelectorFn selectS
for (auto keepRunning : state) {
auto rt = RoutingTableHistory::makeNew(
- collName, UUID::gen(), shardKeyPattern, nullptr, true, collEpoch, boost::none, chunks);
+ kNss, UUID::gen(), shardKeyPattern, nullptr, true, collEpoch, boost::none, chunks);
benchmark::DoNotOptimize(
CollectionMetadata(ChunkManager(ShardId("shard0"),
DatabaseVersion(UUID::gen(), 1),
diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript
index 780d4d4bc9a..8fc761a2e0b 100644
--- a/src/mongo/s/commands/SConscript
+++ b/src/mongo/s/commands/SConscript
@@ -124,6 +124,7 @@ env.Library(
'$BUILD_DIR/mongo/db/commands/test_commands_enabled',
'$BUILD_DIR/mongo/db/commands/write_commands_common',
'$BUILD_DIR/mongo/db/ftdc/ftdc_server',
+ '$BUILD_DIR/mongo/db/initialize_api_parameters',
'$BUILD_DIR/mongo/db/logical_session_cache_impl',
'$BUILD_DIR/mongo/db/pipeline/aggregation',
'$BUILD_DIR/mongo/db/query/command_request_response',
diff --git a/src/mongo/s/commands/cluster_drop_cmd.cpp b/src/mongo/s/commands/cluster_drop_cmd.cpp
index a69e3292597..f727489ccc0 100644
--- a/src/mongo/s/commands/cluster_drop_cmd.cpp
+++ b/src/mongo/s/commands/cluster_drop_cmd.cpp
@@ -88,7 +88,9 @@ public:
// Invalidate the routing table cache entry for this collection so that we reload it the
// next time it is accessed, even if sending the command to the config server fails due
// to e.g. a NetworkError.
- ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+ ON_BLOCK_EXIT([opCtx, nss] {
+ Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+ });
auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
diff --git a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
index b4157bee9d9..531aa1ab41e 100644
--- a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
+++ b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
@@ -174,8 +174,10 @@ public:
Shard::RetryPolicy::kNotIdempotent));
uassertStatusOK(response.commandStatus);
- Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(
- nss, firstChunk.getShardId());
+ Grid::get(opCtx)
+ ->catalogCache()
+ ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ nss, boost::none, firstChunk.getShardId());
CommandHelpers::filterCommandReplyForPassthrough(response.response, &result);
return true;
diff --git a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
index 01cdb91234e..f6e2d27c80f 100644
--- a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
+++ b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
@@ -198,9 +198,14 @@ public:
cmdObj["waitForDelete"].trueValue(),
forceJumbo));
- Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss,
- chunk->getShardId());
- Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss, to->getId());
+ Grid::get(opCtx)
+ ->catalogCache()
+ ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ nss, boost::none, chunk->getShardId());
+ Grid::get(opCtx)
+ ->catalogCache()
+ ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ nss, boost::none, to->getId());
result.append("millis", t.millis());
return true;
diff --git a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
index d27fd037d30..d4c4d7901ad 100644
--- a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
+++ b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
@@ -105,7 +105,9 @@ public:
// Invalidate the routing table cache entry for this collection so that we reload the
// collection the next time it's accessed, even if we receive a failure, e.g. NetworkError.
- ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+ ON_BLOCK_EXIT([opCtx, nss] {
+ Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+ });
auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
diff --git a/src/mongo/s/commands/cluster_split_cmd.cpp b/src/mongo/s/commands/cluster_split_cmd.cpp
index 19d33b3f10b..5532fac1daf 100644
--- a/src/mongo/s/commands/cluster_split_cmd.cpp
+++ b/src/mongo/s/commands/cluster_split_cmd.cpp
@@ -270,8 +270,10 @@ public:
ChunkRange(chunk->getMin(), chunk->getMax()),
{splitPoint}));
- Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss,
- chunk->getShardId());
+ Grid::get(opCtx)
+ ->catalogCache()
+ ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ nss, boost::none, chunk->getShardId());
return true;
}
diff --git a/src/mongo/s/commands/flush_router_config_cmd.cpp b/src/mongo/s/commands/flush_router_config_cmd.cpp
index bcc61a82a0a..d27b65a2c4d 100644
--- a/src/mongo/s/commands/flush_router_config_cmd.cpp
+++ b/src/mongo/s/commands/flush_router_config_cmd.cpp
@@ -102,7 +102,7 @@ public:
"Routing metadata flushed for collection {namespace}",
"Routing metadata flushed for collection",
"namespace"_attr = nss);
- catalogCache->purgeCollection(nss);
+ catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
}
}
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index 644c10e6bcb..f83b490d0ef 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -722,16 +722,12 @@ void runCommand(OperationContext* opCtx,
auto catalogCache = Grid::get(opCtx)->catalogCache();
if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- staleNs,
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
+ staleNs, staleInfo->getVersionWanted(), staleInfo->getShardId());
} else {
// If we don't have the stale config info and therefore don't know the shard's
// id, we have to force all further targetting requests for the namespace to
// block on a refresh.
- catalogCache->onEpochChange(staleNs);
+ catalogCache->invalidateCollectionEntry_LINEARIZABLE(staleNs);
}
@@ -1301,16 +1297,12 @@ void Strategy::explainFind(OperationContext* opCtx,
Grid::get(opCtx)
->catalogCache()
->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- staleNs,
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
+ staleNs, staleInfo->getVersionWanted(), staleInfo->getShardId());
} else {
// If we don't have the stale config info and therefore don't know the shard's id,
// we have to force all further targetting requests for the namespace to block on
// a refresh.
- Grid::get(opCtx)->catalogCache()->onEpochChange(staleNs);
+ Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(staleNs);
}
if (canRetry) {
diff --git a/src/mongo/s/comparable_chunk_version_test.cpp b/src/mongo/s/comparable_chunk_version_test.cpp
index 941d9bad080..8c1fa71fce2 100644
--- a/src/mongo/s/comparable_chunk_version_test.cpp
+++ b/src/mongo/s/comparable_chunk_version_test.cpp
@@ -29,8 +29,7 @@
#include "mongo/platform/basic.h"
-#include "mongo/s/catalog_cache.h"
-#include "mongo/s/chunk_version.h"
+#include "mongo/s/chunk_manager.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -95,9 +94,15 @@ TEST(ComparableChunkVersionTest, VersionLessSameEpoch) {
ASSERT_FALSE(version2 > version3);
}
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionsAreEqual) {
+ const ComparableChunkVersion defaultVersion1{}, defaultVersion2{};
+ ASSERT(defaultVersion1 == defaultVersion2);
+ ASSERT_FALSE(defaultVersion1 < defaultVersion2);
+ ASSERT_FALSE(defaultVersion1 > defaultVersion2);
+}
+
TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLess) {
const ComparableChunkVersion defaultVersion{};
- ASSERT_EQ(defaultVersion.getLocalSequenceNum(), 0);
const auto version1 =
ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
ASSERT(defaultVersion != version1);
@@ -105,5 +110,127 @@ TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLess) {
ASSERT_FALSE(defaultVersion > version1);
}
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanUnsharded) {
+ const ComparableChunkVersion defaultVersion{};
+ const auto version1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+ ASSERT(defaultVersion != version1);
+ ASSERT(defaultVersion < version1);
+ ASSERT_FALSE(defaultVersion > version1);
+}
+
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanDropped) {
+ const ComparableChunkVersion defaultVersion{};
+ const auto version1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+ ASSERT(defaultVersion != version1);
+ ASSERT(defaultVersion < version1);
+ ASSERT_FALSE(defaultVersion > version1);
+}
+
+TEST(ComparableChunkVersionTest, UnshardedAndDroppedAreEqual) {
+ const auto version1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+ const auto version2 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+ const auto version3 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+ const auto version4 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+ ASSERT(version1 == version2);
+ ASSERT(version1 == version3);
+ ASSERT(version2 == version4);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksAreDifferent) {
+ const auto oid = OID::gen();
+ const auto version1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+ const auto version2 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+ ASSERT(version1 != version2);
+ ASSERT(version1 < version2);
+ ASSERT_FALSE(version1 > version2);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksCompareBySequenceNum) {
+ const auto oid = OID::gen();
+ const auto version1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(1, 0, oid));
+ const auto noChunkSV1 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+
+ ASSERT(version1 != noChunkSV1);
+ ASSERT(noChunkSV1 > version1);
+
+ const auto noChunkSV2 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+
+ ASSERT(noChunkSV1 != noChunkSV2);
+ ASSERT_FALSE(noChunkSV1 > noChunkSV2);
+ ASSERT(noChunkSV2 > noChunkSV1);
+
+ const auto version2 =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(2, 0, oid));
+
+ ASSERT(version2 != noChunkSV2);
+ ASSERT(version2 > noChunkSV2);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksGreaterThanUnshardedBySequenceNum) {
+ const auto unsharded =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+ const auto noChunkSV =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+
+ ASSERT(noChunkSV != unsharded);
+ ASSERT(noChunkSV > unsharded);
+}
+
+TEST(ComparableChunkVersionTest, UnshardedGreaterThanNoChunksBySequenceNum) {
+ const auto noChunkSV =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+ const auto unsharded =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+
+ ASSERT(noChunkSV != unsharded);
+ ASSERT(unsharded > noChunkSV);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksGreaterThanDefault) {
+ const auto noChunkSV =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+ const ComparableChunkVersion defaultVersion{};
+
+ ASSERT(noChunkSV != defaultVersion);
+ ASSERT(noChunkSV > defaultVersion);
+}
+
+TEST(ComparableChunkVersionTest, ForcedRefreshSequenceNumber) {
+ auto oid = OID::gen();
+ const ComparableChunkVersion defaultVersionBeforeForce;
+ const auto versionBeforeForce =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(100, 0, oid));
+
+ const auto forcedRefreshVersion =
+ ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh();
+
+ const auto versionAfterForce =
+ ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(100, 0, oid));
+ const ComparableChunkVersion defaultVersionAfterForce;
+
+ ASSERT(defaultVersionBeforeForce != forcedRefreshVersion);
+ ASSERT(defaultVersionBeforeForce < forcedRefreshVersion);
+
+ ASSERT(versionBeforeForce != forcedRefreshVersion);
+ ASSERT(versionBeforeForce < forcedRefreshVersion);
+
+ ASSERT(versionAfterForce != forcedRefreshVersion);
+ ASSERT(versionAfterForce > forcedRefreshVersion);
+
+ ASSERT(defaultVersionAfterForce != forcedRefreshVersion);
+ ASSERT(defaultVersionAfterForce < forcedRefreshVersion);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/s/comparable_database_version_test.cpp b/src/mongo/s/comparable_database_version_test.cpp
index 3b2486a5ebd..d4201d56564 100644
--- a/src/mongo/s/comparable_database_version_test.cpp
+++ b/src/mongo/s/comparable_database_version_test.cpp
@@ -82,9 +82,15 @@ TEST(ComparableDatabaseVersionTest, VersionLessSameUuid) {
ASSERT_FALSE(version1 > version2);
}
+TEST(ComparableDatabaseVersionTest, DefaultConstructedVersionsAreEqual) {
+ const ComparableDatabaseVersion defaultVersion1{}, defaultVersion2{};
+ ASSERT(defaultVersion1 == defaultVersion2);
+ ASSERT_FALSE(defaultVersion1 < defaultVersion2);
+ ASSERT_FALSE(defaultVersion1 > defaultVersion2);
+}
+
TEST(ComparableDatabaseVersionTest, DefaultConstructedVersionIsAlwaysLess) {
const ComparableDatabaseVersion defaultVersion{};
- ASSERT_EQ(defaultVersion.getLocalSequenceNum(), 0);
const auto version1 =
ComparableDatabaseVersion::makeComparableDatabaseVersion(DatabaseVersion(UUID::gen(), 0));
ASSERT(defaultVersion != version1);
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 2ad05010afb..98aec3332ec 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -462,8 +462,11 @@ Status AsyncResultsMerger::_askForNextBatch(WithLock, size_t remoteIndex) {
cmdObj = newCmdBob.obj();
}
+ // Never pass API parameters with getMore.
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(_opCtx);
executor::RemoteCommandRequest request(
remote.getTargetHost(), remote.cursorNss.db().toString(), cmdObj, _opCtx);
+ ignoreApiParametersBlock.release();
auto callbackStatus =
_executor->scheduleRemoteCommand(request, [this, remoteIndex](auto const& cbData) {
diff --git a/src/mongo/s/query/cluster_client_cursor.h b/src/mongo/s/query/cluster_client_cursor.h
index 44aae05e34d..87e3271e692 100644
--- a/src/mongo/s/query/cluster_client_cursor.h
+++ b/src/mongo/s/query/cluster_client_cursor.h
@@ -32,8 +32,8 @@
#include <boost/optional.hpp>
#include "mongo/client/read_preference.h"
+#include "mongo/db/api_parameters.h"
#include "mongo/db/auth/user_name.h"
-#include "mongo/db/initialize_api_parameters.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/s/query/cluster_client_cursor_params.h"
diff --git a/src/mongo/s/query/cluster_client_cursor_params.h b/src/mongo/s/query/cluster_client_cursor_params.h
index d8bb0ae8da0..b0fae249884 100644
--- a/src/mongo/s/query/cluster_client_cursor_params.h
+++ b/src/mongo/s/query/cluster_client_cursor_params.h
@@ -36,10 +36,10 @@
#include "mongo/bson/bsonobj.h"
#include "mongo/client/read_preference.h"
+#include "mongo/db/api_parameters.h"
#include "mongo/db/auth/privilege.h"
#include "mongo/db/auth/user_name.h"
#include "mongo/db/cursor_id.h"
-#include "mongo/db/initialize_api_parameters.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/pipeline/pipeline.h"
#include "mongo/db/query/cursor_response.h"
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 3996e01c326..57925b873ed 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -504,18 +504,18 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
// Re-target and re-send the initial find command to the shards until we have established the
// shard version.
for (size_t retries = 1; retries <= kMaxRetries; ++retries) {
- auto routingInfoStatus = getCollectionRoutingInfoForTxnCmd(opCtx, query.nss());
- if (routingInfoStatus == ErrorCodes::NamespaceNotFound) {
+ auto swCM = getCollectionRoutingInfoForTxnCmd(opCtx, query.nss());
+ if (swCM == ErrorCodes::NamespaceNotFound) {
// If the database doesn't exist, we successfully return an empty result set without
// creating a cursor.
return CursorId(0);
}
- auto routingInfo = uassertStatusOK(routingInfoStatus);
+ const auto cm = uassertStatusOK(std::move(swCM));
try {
return runQueryWithoutRetrying(
- opCtx, query, readPref, routingInfo, results, partialResultsReturned);
+ opCtx, query, readPref, cm, results, partialResultsReturned);
} catch (ExceptionFor<ErrorCodes::StaleDbVersion>& ex) {
if (retries >= kMaxRetries) {
// Check if there are no retries remaining, so the last received error can be
@@ -577,13 +577,9 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
if (ex.code() != ErrorCodes::ShardInvalidatedForTargeting) {
if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- query.nss(),
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
+ query.nss(), staleInfo->getVersionWanted(), staleInfo->getShardId());
} else {
- catalogCache->onEpochChange(query.nss());
+ catalogCache->invalidateCollectionEntry_LINEARIZABLE(query.nss());
}
}
@@ -776,6 +772,7 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
StatusWith<ClusterQueryResult> next =
Status{ErrorCodes::InternalError, "uninitialized cluster query result"};
try {
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
next = pinnedCursor.getValue()->next(context);
} catch (const ExceptionFor<ErrorCodes::CloseChangeStream>&) {
// This exception is thrown when a $changeStream stage encounters an event
diff --git a/src/mongo/s/request_types/set_shard_version_request.h b/src/mongo/s/request_types/set_shard_version_request.h
index bfd7385ffae..44cacff0415 100644
--- a/src/mongo/s/request_types/set_shard_version_request.h
+++ b/src/mongo/s/request_types/set_shard_version_request.h
@@ -98,6 +98,7 @@ private:
SetShardVersionRequest();
bool _isAuthoritative{false};
+ // TODO (SERVER-50812) remove this flag that isn't used anymore
bool _forceRefresh{false};
boost::optional<NamespaceString> _nss;
diff --git a/src/mongo/s/sessions_collection_sharded.cpp b/src/mongo/s/sessions_collection_sharded.cpp
index 060c1158dbd..22915bd2c0a 100644
--- a/src/mongo/s/sessions_collection_sharded.cpp
+++ b/src/mongo/s/sessions_collection_sharded.cpp
@@ -123,8 +123,6 @@ void SessionsCollectionSharded::checkSessionsCollectionExists(OperationContext*
const auto cm = uassertStatusOK(
Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(
opCtx, NamespaceString::kLogicalSessionsNamespace));
-
- uassert(ErrorCodes::NamespaceNotFound, "config.system.sessions does not exist", cm.isSharded());
}
void SessionsCollectionSharded::refreshSessions(OperationContext* opCtx,
diff --git a/src/mongo/s/sharding_test_fixture_common.cpp b/src/mongo/s/sharding_test_fixture_common.cpp
index 95dd505687b..2ac936d3977 100644
--- a/src/mongo/s/sharding_test_fixture_common.cpp
+++ b/src/mongo/s/sharding_test_fixture_common.cpp
@@ -47,9 +47,11 @@ ShardingTestFixtureCommon::ShardingTestFixtureCommon() {
ShardingTestFixtureCommon::~ShardingTestFixtureCommon() = default;
-std::shared_ptr<RoutingTableHistory> ShardingTestFixtureCommon::makeStandaloneRoutingTableHistory(
+RoutingTableHistoryValueHandle ShardingTestFixtureCommon::makeStandaloneRoutingTableHistory(
RoutingTableHistory rt) {
- return std::make_shared<RoutingTableHistory>(std::move(rt));
+ const auto version = rt.getVersion();
+ return RoutingTableHistoryValueHandle(
+ std::move(rt), ComparableChunkVersion::makeComparableChunkVersion(version));
}
void ShardingTestFixtureCommon::onCommand(NetworkTestEnv::OnCommandFunction func) {
diff --git a/src/mongo/s/sharding_test_fixture_common.h b/src/mongo/s/sharding_test_fixture_common.h
index 0ecbbb30695..52377d7fbc5 100644
--- a/src/mongo/s/sharding_test_fixture_common.h
+++ b/src/mongo/s/sharding_test_fixture_common.h
@@ -55,8 +55,7 @@ public:
* which can be used to pass to ChunkManager for tests, which specifically target the behaviour
* of the ChunkManager.
*/
- static std::shared_ptr<RoutingTableHistory> makeStandaloneRoutingTableHistory(
- RoutingTableHistory rt);
+ static RoutingTableHistoryValueHandle makeStandaloneRoutingTableHistory(RoutingTableHistory rt);
protected:
ShardingTestFixtureCommon();
diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp
index b7b26698e78..c269d734365 100644
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@@ -125,7 +125,6 @@ BSONObj appendReadConcernForTxn(BSONObj cmd,
}
BSONObjBuilder appendFieldsForStartTransaction(BSONObj cmd,
- APIParameters apiParameters,
repl::ReadConcernArgs readConcernArgs,
boost::optional<LogicalTime> atClusterTime,
bool doAppendStartTransaction) {
@@ -134,8 +133,6 @@ BSONObjBuilder appendFieldsForStartTransaction(BSONObj cmd,
appendReadConcernForTxn(std::move(cmd), readConcernArgs, atClusterTime);
BSONObjBuilder bob(std::move(cmdWithReadConcern));
-
- apiParameters.appendInfo(&bob);
if (doAppendStartTransaction) {
bob.append(OperationSessionInfoFromClient::kStartTransactionFieldName, true);
}
@@ -433,7 +430,6 @@ BSONObj TransactionRouter::Participant::attachTxnFieldsIfNeeded(
BSONObjBuilder newCmd = mustStartTransaction
? appendFieldsForStartTransaction(std::move(cmd),
- sharedOptions.apiParameters,
sharedOptions.readConcernArgs,
sharedOptions.atClusterTime,
!hasStartTxn)
@@ -1203,6 +1199,8 @@ BSONObj TransactionRouter::Router::abortTransaction(OperationContext* opCtx) {
"txnNumber"_attr = o().txnNumber,
"numParticipantShards"_attr = o().participants.size());
+ // Omit API parameters from abortTransaction.
+ IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
const auto responses = gatherResponses(opCtx,
NamespaceString::kAdminDb,
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
diff --git a/src/mongo/s/transaction_router.h b/src/mongo/s/transaction_router.h
index 25ce17831fe..3d6be675077 100644
--- a/src/mongo/s/transaction_router.h
+++ b/src/mongo/s/transaction_router.h
@@ -31,8 +31,8 @@
#include <boost/optional.hpp>
+#include "mongo/db/api_parameters.h"
#include "mongo/db/commands/txn_cmds_gen.h"
-#include "mongo/db/initialize_api_parameters.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/read_concern_args.h"
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index eb827201e84..a507d3e4f3f 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -316,16 +316,9 @@ TEST_F(TransactionRouterTestWithDefaultSession, CannotContiueTxnWithoutStarting)
ErrorCodes::NoSuchTransaction);
}
-TEST_F(TransactionRouterTestWithDefaultSession,
- NewParticipantMustAttachTxnAndReadConcernAndAPIParams) {
+TEST_F(TransactionRouterTestWithDefaultSession, NewParticipantMustAttachTxnAndReadConcern) {
TxnNumber txnNum{3};
- APIParameters apiParameters = APIParameters();
- apiParameters.setAPIVersion("1");
- apiParameters.setAPIStrict(false);
- apiParameters.setAPIDeprecationErrors(false);
- APIParameters::get(operationContext()) = apiParameters;
-
auto txnRouter = TransactionRouter::get(operationContext());
txnRouter.beginOrContinueTxn(
operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
@@ -337,9 +330,6 @@ TEST_F(TransactionRouterTestWithDefaultSession,
<< BSON("level"
<< "snapshot"
<< "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
- << "apiVersion"
- << "1"
- << "apiStrict" << false << "apiDeprecationErrors" << false
<< "startTransaction" << true << "coordinator" << true
<< "autocommit" << false << "txnNumber" << txnNum);
@@ -369,9 +359,6 @@ TEST_F(TransactionRouterTestWithDefaultSession,
<< BSON("level"
<< "snapshot"
<< "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
- << "apiVersion"
- << "1"
- << "apiStrict" << false << "apiDeprecationErrors" << false
<< "startTransaction" << true << "autocommit" << false << "txnNumber"
<< txnNum);
@@ -735,40 +722,6 @@ TEST_F(TransactionRouterTestWithDefaultSession, AttachTxnValidatesReadConcernIfA
}
}
-TEST_F(TransactionRouterTestWithDefaultSession, AttachTxnAttachesAPIParameters) {
- APIParameters apiParams = APIParameters();
- apiParams.setAPIVersion("2");
- apiParams.setAPIStrict(true);
- apiParams.setAPIDeprecationErrors(true);
-
- APIParameters::get(operationContext()) = apiParams;
-
- TxnNumber txnNum{3};
- auto txnRouter = TransactionRouter::get(operationContext());
- txnRouter.beginOrContinueTxn(
- operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
- txnRouter.setDefaultAtClusterTime(operationContext());
-
- {
- auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(),
- shard1,
- BSON("insert"
- << "test"));
- ASSERT_BSONOBJ_EQ(BSON("insert"
- << "test"
- << "readConcern"
- << BSON("level"
- << "snapshot"
- << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
- << "apiVersion"
- << "2"
- << "apiStrict" << true << "apiDeprecationErrors" << true
- << "startTransaction" << true << "coordinator" << true
- << "autocommit" << false << "txnNumber" << txnNum),
- newCmd);
- }
-}
-
TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyAPIParametersAfterFirstStatement) {
APIParameters apiParameters = APIParameters();
apiParameters.setAPIVersion("1");
@@ -787,40 +740,6 @@ TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyAPIParametersAfterF
4937701);
}
-TEST_F(TransactionRouterTestWithDefaultSession, PassesThroughAPIParametersToParticipants) {
- APIParameters apiParams = APIParameters();
- apiParams.setAPIVersion("2");
- apiParams.setAPIStrict(true);
- apiParams.setAPIDeprecationErrors(true);
-
- APIParameters::get(operationContext()) = apiParams;
-
- TxnNumber txnNum{3};
-
- auto txnRouter = TransactionRouter::get(operationContext());
- txnRouter.beginOrContinueTxn(
- operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
- txnRouter.setDefaultAtClusterTime(operationContext());
-
- BSONObj expectedNewObj = BSON("insert"
- << "test"
- << "readConcern"
- << BSON("level"
- << "snapshot"
- << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
- << "apiVersion"
- << "2"
- << "apiStrict" << true << "apiDeprecationErrors" << true
- << "startTransaction" << true << "coordinator" << true
- << "autocommit" << false << "txnNumber" << txnNum);
-
- auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(),
- shard1,
- BSON("insert"
- << "test"));
- ASSERT_BSONOBJ_EQ(expectedNewObj, newCmd);
-}
-
TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyReadConcernAfterFirstStatement) {
TxnNumber txnNum{3};
@@ -3294,6 +3213,43 @@ TEST_F(TransactionRouterMetricsTest, LogsTransactionsOverSlowMSThreshold) {
assertPrintedExactlyOneSlowLogLine();
}
+TEST_F(TransactionRouterMetricsTest, LogsTransactionsWithAPIParameters) {
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
+ serverGlobalParams.slowMS = 100;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
+ APIParameters::get(operationContext()).setAPIVersion("1");
+ APIParameters::get(operationContext()).setAPIStrict(true);
+ APIParameters::get(operationContext()).setAPIDeprecationErrors(false);
+ beginTxnWithDefaultTxnNumber();
+ tickSource()->advance(Milliseconds(101));
+ runCommit(kDummyOkRes);
+ assertPrintedExactlyOneSlowLogLine();
+
+ int nFound = 0;
+ for (auto&& bson : getCapturedBSONFormatLogMessages()) {
+ if (bson["id"].Int() != 51805) {
+ continue;
+ }
+
+ auto parameters = bson["attr"]["parameters"];
+ ASSERT_EQUALS(parameters["apiVersion"].String(), "1");
+ ASSERT_EQUALS(parameters["apiStrict"].Bool(), true);
+ ASSERT_EQUALS(parameters["apiDeprecationErrors"].Bool(), false);
+ ++nFound;
+ }
+
+ ASSERT_EQUALS(nFound, 1);
+}
+
TEST_F(TransactionRouterMetricsTest, DoesNotLogTransactionsWithSampleRateZero) {
const auto originalSlowMS = serverGlobalParams.slowMS;
const auto originalSampleRate = serverGlobalParams.sampleRate;
diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.cpp b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
index f7189efdfe9..6794dabc3ca 100644
--- a/src/mongo/s/write_ops/chunk_manager_targeter.cpp
+++ b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
@@ -791,7 +791,7 @@ int ChunkManagerTargeter::getNShardsOwningChunks() const {
void ChunkManagerTargeter::_refreshShardVersionNow(OperationContext* opCtx) {
uassertStatusOK(
- Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, _nss, true));
+ Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, _nss));
_init(opCtx);
}
diff --git a/src/mongo/scripting/engine.cpp b/src/mongo/scripting/engine.cpp
index d941e9834af..42ccc5f7154 100644
--- a/src/mongo/scripting/engine.cpp
+++ b/src/mongo/scripting/engine.cpp
@@ -249,6 +249,7 @@ void Scope::loadStored(OperationContext* opCtx, bool ignoreNotConnected) {
v.type() != BSONType::CodeWScope);
if (MONGO_unlikely(mr_killop_test_fp.shouldFail())) {
+ LOGV2(5062200, "Pausing mr_killop_test_fp for system.js entry", "entryName"_attr = n);
/* This thread sleep makes the interrupts in the test come in at a time
* where the js misses the interrupt and throw an exception instead of
diff --git a/src/mongo/shell/collection.js b/src/mongo/shell/collection.js
index 20522653bc2..bf5aa3a2653 100644
--- a/src/mongo/shell/collection.js
+++ b/src/mongo/shell/collection.js
@@ -1322,9 +1322,7 @@ DBCollection.prototype.getSlaveOk = function() {
return this.getSecondaryOk();
};
-DBCollection.prototype.setSecondaryOk = function(value) {
- if (value === undefined)
- value = true;
+DBCollection.prototype.setSecondaryOk = function(value = true) {
this._secondaryOk = value;
};
diff --git a/src/mongo/shell/db.js b/src/mongo/shell/db.js
index 73fdb9c25e2..16c109e9cb4 100644
--- a/src/mongo/shell/db.js
+++ b/src/mongo/shell/db.js
@@ -1091,28 +1091,17 @@ DB.prototype.printSecondaryReplicationInfo = function() {
return null;
}
- function g(x) {
- assert(x, "how could this be null (printSecondaryReplicationInfo gx)");
- print("source: " + x.host);
- if (x.syncedTo) {
- var st = new Date(DB.tsToSeconds(x.syncedTo) * 1000);
- getReplLag(st);
- } else {
- print("\tdoing initial sync");
- }
- }
-
- function r(x) {
- assert(x, "how could this be null (printSecondaryReplicationInfo rx)");
- if (x.state == 1 || x.state == 7) { // ignore primaries (1) and arbiters (7)
+ function printNodeReplicationInfo(node) {
+ assert(node);
+ if (node.state === 1 || node.state === 7) { // ignore primaries (1) and arbiters (7)
return;
}
- print("source: " + x.name);
- if (x.optime) {
- getReplLag(x.optimeDate);
+ print("source: " + node.name);
+ if (node.optime && node.health != 0) {
+ getReplLag(node.optimeDate);
} else {
- print("\tno replication info, yet. State: " + x.stateStr);
+ print("\tno replication info, yet. State: " + node.stateStr);
}
}
@@ -1136,7 +1125,7 @@ DB.prototype.printSecondaryReplicationInfo = function() {
}
for (i in status.members) {
- r(status.members[i]);
+ printNodeReplicationInfo(status.members[i]);
}
}
};
@@ -1255,7 +1244,7 @@ DB.autocomplete = function(obj) {
return ret;
};
-DB.prototype.setSlaveOk = function(value) {
+DB.prototype.setSlaveOk = function(value = true) {
print(
"WARNING: setSlaveOk() is deprecated and may be removed in the next major release. Please use setSecondaryOk() instead.");
this.setSecondaryOk(value);
@@ -1267,9 +1256,7 @@ DB.prototype.getSlaveOk = function() {
return this.getSecondaryOk();
};
-DB.prototype.setSecondaryOk = function(value) {
- if (value == undefined)
- value = true;
+DB.prototype.setSecondaryOk = function(value = true) {
this._secondaryOk = value;
};
diff --git a/src/mongo/shell/mongo.js b/src/mongo/shell/mongo.js
index 23a2cf775f1..5beecf5646d 100644
--- a/src/mongo/shell/mongo.js
+++ b/src/mongo/shell/mongo.js
@@ -39,9 +39,7 @@ Mongo.prototype.getSlaveOk = function() {
return this.getSecondaryOk();
};
-Mongo.prototype.setSecondaryOk = function(value) {
- if (value == undefined)
- value = true;
+Mongo.prototype.setSecondaryOk = function(value = true) {
this.secondaryOk = value;
};
diff --git a/src/mongo/transport/service_executor_fixed.cpp b/src/mongo/transport/service_executor_fixed.cpp
index bdf75660dce..f48a9d7a170 100644
--- a/src/mongo/transport/service_executor_fixed.cpp
+++ b/src/mongo/transport/service_executor_fixed.cpp
@@ -64,7 +64,9 @@ ServiceExecutorFixed::ServiceExecutorFixed(ThreadPool::Options options)
_options.onCreateThread =
[this, onCreate = std::move(_options.onCreateThread)](const std::string& name) mutable {
_executorContext = std::make_unique<ExecutorThreadContext>(this->weak_from_this());
- onCreate(name);
+ if (onCreate) {
+ onCreate(name);
+ }
};
_threadPool = std::make_unique<ThreadPool>(_options);
}
diff --git a/src/mongo/util/concurrency/thread_pool.cpp b/src/mongo/util/concurrency/thread_pool.cpp
index 680d397946f..0e8eda183b4 100644
--- a/src/mongo/util/concurrency/thread_pool.cpp
+++ b/src/mongo/util/concurrency/thread_pool.cpp
@@ -33,23 +33,37 @@
#include "mongo/util/concurrency/thread_pool.h"
+#include <deque>
+#include <fmt/format.h>
+#include <list>
+#include <sstream>
+#include <vector>
+
#include "mongo/base/status.h"
#include "mongo/logv2/log.h"
#include "mongo/platform/atomic_word.h"
+#include "mongo/platform/mutex.h"
+#include "mongo/stdx/condition_variable.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/concurrency/idle_thread_block.h"
#include "mongo/util/concurrency/thread_name.h"
-#include "mongo/util/str.h"
-
-#include <sstream>
+#include "mongo/util/hierarchical_acquisition.h"
namespace mongo {
namespace {
+using namespace fmt::literals;
+
// Counter used to assign unique names to otherwise-unnamed thread pools.
AtomicWord<int> nextUnnamedThreadPoolId{1};
+std::string threadIdToString(stdx::thread::id id) {
+ std::ostringstream oss;
+ oss << id;
+ return oss.str();
+}
+
/**
* Sets defaults and checks bounds limits on "options", and returns it.
*
@@ -57,10 +71,10 @@ AtomicWord<int> nextUnnamedThreadPoolId{1};
*/
ThreadPool::Options cleanUpOptions(ThreadPool::Options&& options) {
if (options.poolName.empty()) {
- options.poolName = str::stream() << "ThreadPool" << nextUnnamedThreadPoolId.fetchAndAdd(1);
+ options.poolName = "ThreadPool{}"_format(nextUnnamedThreadPoolId.fetchAndAdd(1));
}
if (options.threadNamePrefix.empty()) {
- options.threadNamePrefix = str::stream() << options.poolName << '-';
+ options.threadNamePrefix = "{}-"_format(options.poolName);
}
if (options.maxThreads < 1) {
LOGV2_FATAL(28702,
@@ -85,28 +99,144 @@ ThreadPool::Options cleanUpOptions(ThreadPool::Options&& options) {
} // namespace
-ThreadPool::Options::Options(const ThreadPool::Limits& limits)
- : minThreads(limits.minThreads),
- maxThreads(limits.maxThreads),
- maxIdleThreadAge(limits.maxIdleThreadAge) {}
-ThreadPool::ThreadPool(Options options) : _options(cleanUpOptions(std::move(options))) {}
+// Public functions forwarded from ThreadPool.
+class ThreadPool::Impl {
+public:
+ explicit Impl(Options options);
+ ~Impl();
+ void startup();
+ void shutdown();
+ void join();
+ void schedule(Task task);
+ void waitForIdle();
+ Stats getStats() const;
+
+private:
+ /**
+ * Representation of the stage of life of a thread pool.
+ *
+ * A pool starts out in the preStart state, and ends life in the shutdownComplete state. Work
+ * may only be scheduled in the preStart and running states. Threads may only be started in the
+ * running state. In shutdownComplete, there are no remaining threads or pending tasks to
+ * execute.
+ *
+ * Diagram of legal transitions:
+ *
+ * preStart -> running -> joinRequired -> joining -> shutdownComplete
+ * \ ^
+ * \_____________/
+ */
+ enum LifecycleState { preStart, running, joinRequired, joining, shutdownComplete };
+
+ /** The thread body for worker threads. */
+ void _workerThreadBody(const std::string& threadName) noexcept;
+
+ /**
+ * Starts a worker thread, unless _options.maxThreads threads are already running or
+ * _state is not running.
+ */
+ void _startWorkerThread_inlock();
+
+ /**
+ * This is the run loop of a worker thread, invoked by _workerThreadBody.
+ */
+ void _consumeTasks();
+
+ /**
+ * Implementation of shutdown once _mutex is locked.
+ */
+ void _shutdown_inlock();
+
+ /**
+ * Implementation of join once _mutex is owned by "lk".
+ */
+ void _join_inlock(stdx::unique_lock<Latch>* lk);
+
+ /**
+ * Runs the remaining tasks on a new thread as part of the join process, blocking until
+ * complete. Caller must not hold the mutex!
+ */
+ void _drainPendingTasks();
+
+ /**
+ * Executes one task from _pendingTasks. "lk" must own _mutex, and _pendingTasks must have at
+ * least one entry.
+ */
+ void _doOneTask(stdx::unique_lock<Latch>* lk) noexcept;
+
+ /**
+ * Changes the lifecycle state (_state) of the pool and wakes up any threads waiting for a state
+ * change. Has no effect if _state == newState.
+ */
+ void _setState_inlock(LifecycleState newState);
+
+ /**
+ * Waits for all remaining retired threads to join.
+ * If a thread's _workerThreadBody() were ever to attempt to reacquire
+ * ThreadPool::_mutex after that thread had been added to _retiredThreads,
+ * it could cause a deadlock.
+ */
+ void _joinRetired_inlock();
+
+ // These are the options with which the pool was configured at construction time.
+ const Options _options;
+
+ // Mutex guarding all non-const member variables.
+ mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ThreadPool::_mutex");
+
+ // This variable represents the lifecycle state of the pool.
+ //
+ // Work may only be scheduled in states preStart and running, and only executes in states
+ // running and shuttingDown.
+ LifecycleState _state = preStart;
+
+ // Condition signaled to indicate that there is work in the _pendingTasks queue, or
+ // that the system is shutting down.
+ stdx::condition_variable _workAvailable;
+
+ // Condition signaled to indicate that there is no work in the _pendingTasks queue.
+ stdx::condition_variable _poolIsIdle;
+
+ // Condition variable signaled whenever _state changes.
+ stdx::condition_variable _stateChange;
+
+ // Queue of yet-to-be-executed tasks.
+ std::deque<Task> _pendingTasks;
+
+ // List of threads serving as the worker pool.
+ std::list<stdx::thread> _threads;
+
+ // List of threads that are retired and pending join
+ std::list<stdx::thread> _retiredThreads;
+
+ // Count of idle threads.
+ size_t _numIdleThreads = 0;
+
+ // Id counter for assigning thread names
+ size_t _nextThreadId = 0;
+
+ // The last time that _pendingTasks.size() grew to be at least _threads.size().
+ Date_t _lastFullUtilizationDate;
+};
+
+ThreadPool::Impl::Impl(Options options) : _options(cleanUpOptions(std::move(options))) {}
-ThreadPool::~ThreadPool() {
+ThreadPool::Impl::~Impl() {
stdx::unique_lock<Latch> lk(_mutex);
_shutdown_inlock();
- if (shutdownComplete != _state) {
+ if (_state != shutdownComplete) {
_join_inlock(&lk);
}
- if (shutdownComplete != _state) {
+ if (_state != shutdownComplete) {
LOGV2_FATAL(28704, "Failed to shutdown pool during destruction");
}
invariant(_threads.empty());
invariant(_pendingTasks.empty());
}
-void ThreadPool::startup() {
+void ThreadPool::Impl::startup() {
stdx::lock_guard<Latch> lk(_mutex);
if (_state != preStart) {
LOGV2_FATAL(28698,
@@ -116,19 +246,18 @@ void ThreadPool::startup() {
}
_setState_inlock(running);
invariant(_threads.empty());
- const size_t numToStart =
- std::min(_options.maxThreads, std::max(_options.minThreads, _pendingTasks.size()));
+ size_t numToStart = std::clamp(_pendingTasks.size(), _options.minThreads, _options.maxThreads);
for (size_t i = 0; i < numToStart; ++i) {
_startWorkerThread_inlock();
}
}
-void ThreadPool::shutdown() {
+void ThreadPool::Impl::shutdown() {
stdx::lock_guard<Latch> lk(_mutex);
_shutdown_inlock();
}
-void ThreadPool::_shutdown_inlock() {
+void ThreadPool::Impl::_shutdown_inlock() {
switch (_state) {
case preStart:
case running:
@@ -143,38 +272,30 @@ void ThreadPool::_shutdown_inlock() {
MONGO_UNREACHABLE;
}
-void ThreadPool::join() {
+void ThreadPool::Impl::join() {
stdx::unique_lock<Latch> lk(_mutex);
_join_inlock(&lk);
}
-void ThreadPool::_joinRetired_inlock() {
+void ThreadPool::Impl::_joinRetired_inlock() {
while (!_retiredThreads.empty()) {
auto& t = _retiredThreads.front();
t.join();
- _options.onJoinRetiredThread(t);
+ if (_options.onJoinRetiredThread)
+ _options.onJoinRetiredThread(t);
_retiredThreads.pop_front();
}
}
-void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
- _stateChange.wait(*lk, [this] {
- switch (_state) {
- case preStart:
- return false;
- case running:
- return false;
- case joinRequired:
- return true;
- case joining:
- case shutdownComplete:
- LOGV2_FATAL(28700,
- "Attempted to join pool {poolName} more than once",
- "Attempted to join pool more than once",
- "poolName"_attr = _options.poolName);
- }
- MONGO_UNREACHABLE;
- });
+void ThreadPool::Impl::_join_inlock(stdx::unique_lock<Latch>* lk) {
+ _stateChange.wait(*lk, [this] { return _state != preStart && _state != running; });
+ if (_state != joinRequired) {
+ LOGV2_FATAL(28700,
+ "Attempted to join pool {poolName} more than once",
+ "Attempted to join pool more than once",
+ "poolName"_attr = _options.poolName);
+ }
+
_setState_inlock(joining);
++_numIdleThreads;
if (!_pendingTasks.empty()) {
@@ -184,8 +305,7 @@ void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
}
--_numIdleThreads;
_joinRetired_inlock();
- ThreadList threadsToJoin;
- swap(threadsToJoin, _threads);
+ auto threadsToJoin = std::exchange(_threads, {});
lk->unlock();
for (auto& t : threadsToJoin) {
t.join();
@@ -195,14 +315,14 @@ void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
_setState_inlock(shutdownComplete);
}
-void ThreadPool::_drainPendingTasks() {
+void ThreadPool::Impl::_drainPendingTasks() {
// Tasks cannot be run inline because they can create OperationContexts and the join() caller
// may already have one associated with the thread.
stdx::thread cleanThread = stdx::thread([&] {
- const std::string threadName = str::stream()
- << _options.threadNamePrefix << _nextThreadId++;
+ const std::string threadName = "{}{}"_format(_options.threadNamePrefix, _nextThreadId++);
setThreadName(threadName);
- _options.onCreateThread(threadName);
+ if (_options.onCreateThread)
+ _options.onCreateThread(threadName);
stdx::unique_lock<Latch> lock(_mutex);
while (!_pendingTasks.empty()) {
_doOneTask(&lock);
@@ -211,16 +331,16 @@ void ThreadPool::_drainPendingTasks() {
cleanThread.join();
}
-void ThreadPool::schedule(Task task) {
+void ThreadPool::Impl::schedule(Task task) {
stdx::unique_lock<Latch> lk(_mutex);
switch (_state) {
case joinRequired:
case joining:
case shutdownComplete: {
- auto status = Status(ErrorCodes::ShutdownInProgress,
- str::stream() << "Shutdown of thread pool " << _options.poolName
- << " in progress");
+ auto status =
+ Status(ErrorCodes::ShutdownInProgress,
+ "Shutdown of thread pool {} in progress"_format(_options.poolName));
lk.unlock();
task(status);
@@ -246,15 +366,14 @@ void ThreadPool::schedule(Task task) {
_workAvailable.notify_one();
}
-void ThreadPool::waitForIdle() {
+void ThreadPool::Impl::waitForIdle() {
stdx::unique_lock<Latch> lk(_mutex);
- // If there are any pending tasks, or non-idle threads, the pool is not idle.
- while (!_pendingTasks.empty() || _numIdleThreads < _threads.size()) {
- _poolIsIdle.wait(lk);
- }
+ // True when there are no `_pendingTasks` and all `_threads` are idle.
+ auto isIdle = [this] { return _pendingTasks.empty() && _numIdleThreads >= _threads.size(); };
+ _poolIsIdle.wait(lk, isIdle);
}
-ThreadPool::Stats ThreadPool::getStats() const {
+ThreadPool::Stats ThreadPool::Impl::getStats() const {
stdx::lock_guard<Latch> lk(_mutex);
Stats result;
result.options = _options;
@@ -265,95 +384,91 @@ ThreadPool::Stats ThreadPool::getStats() const {
return result;
}
-void ThreadPool::_workerThreadBody(ThreadPool* pool, const std::string& threadName) noexcept {
+void ThreadPool::Impl::_workerThreadBody(const std::string& threadName) noexcept {
setThreadName(threadName);
- pool->_options.onCreateThread(threadName);
- const auto poolName = pool->_options.poolName;
+ if (_options.onCreateThread)
+ _options.onCreateThread(threadName);
LOGV2_DEBUG(23104,
1,
"Starting thread {threadName} in pool {poolName}",
"Starting thread",
"threadName"_attr = threadName,
- "poolName"_attr = poolName);
- pool->_consumeTasks();
-
- // At this point, another thread may have destroyed "pool", if this thread chose to detach
- // itself and remove itself from pool->_threads before releasing pool->_mutex. Do not access
- // member variables of "pool" from here, on.
- //
- // This can happen if this thread decided to retire, got descheduled after removing itself
- // from _threads and calling detach(), and then the pool was deleted. When this thread resumes,
- // it is no longer safe to access "pool".
+ "poolName"_attr = _options.poolName);
+ _consumeTasks();
LOGV2_DEBUG(23105,
1,
"Shutting down thread {threadName} in pool {poolName}",
"Shutting down thread",
"threadName"_attr = threadName,
- "poolName"_attr = poolName);
+ "poolName"_attr = _options.poolName);
}
-void ThreadPool::_consumeTasks() {
+void ThreadPool::Impl::_consumeTasks() {
stdx::unique_lock<Latch> lk(_mutex);
while (_state == running) {
- if (_pendingTasks.empty()) {
- /**
- * Help with garbage collecting retired threads to:
- * * Reduce the memory overhead of _retiredThreads
- * * Expedite the shutdown process
- */
- _joinRetired_inlock();
-
- if (_threads.size() > _options.minThreads) {
- // Since there are more than minThreads threads, this thread may be eligible for
- // retirement. If it isn't now, it may be later, so it must put a time limit on how
- // long it waits on _workAvailable.
- const auto now = Date_t::now();
- const auto nextThreadRetirementDate =
- _lastFullUtilizationDate + _options.maxIdleThreadAge;
- if (now >= nextThreadRetirementDate) {
- _lastFullUtilizationDate = now;
- LOGV2_DEBUG(23106,
- 1,
- "Reaping this thread; next thread reaped no earlier than "
- "{nextThreadRetirementDate}",
- "Reaping this thread",
- "nextThreadRetirementDate"_attr =
- _lastFullUtilizationDate + _options.maxIdleThreadAge);
- break;
- }
-
- LOGV2_DEBUG(23107,
- 3,
- "Not reaping this thread because the earliest retirement date is "
+ if (!_pendingTasks.empty()) {
+ _doOneTask(&lk);
+ continue;
+ }
+
+ // Help with garbage collecting retired threads to reduce the
+ // memory overhead of _retiredThreads and expedite the shutdown
+ // process.
+ _joinRetired_inlock();
+
+ boost::optional<Date_t> waitDeadline;
+
+ if (_threads.size() > _options.minThreads) {
+ // Since there are more than minThreads threads, this thread may be eligible for
+ // retirement. If it isn't now, it may be later, so it must put a time limit on how
+ // long it waits on _workAvailable.
+ const auto now = Date_t::now();
+ const auto nextRetirement = _lastFullUtilizationDate + _options.maxIdleThreadAge;
+ if (now >= nextRetirement) {
+ _lastFullUtilizationDate = now;
+ LOGV2_DEBUG(23106,
+ 1,
+ "Reaping this thread; next thread reaped no earlier than "
"{nextThreadRetirementDate}",
- "Not reaping this thread",
- "nextThreadRetirementDate"_attr = nextThreadRetirementDate);
- MONGO_IDLE_THREAD_BLOCK;
- _workAvailable.wait_until(lk, nextThreadRetirementDate.toSystemTimePoint());
- } else {
- // Since the number of threads is not more than minThreads, this thread is not
- // eligible for retirement. It is OK to sleep until _workAvailable is signaled,
- // because any new threads that put the number of total threads above minThreads
- // would be eligible for retirement once they had no work left to do.
- LOGV2_DEBUG(23108,
- 3,
- "Waiting for work; the thread pool size is {numThreads}; the minimum "
- "number of threads is {minThreads}",
- "Waiting for work",
- "numThreads"_attr = _threads.size(),
- "minThreads"_attr = _options.minThreads);
- MONGO_IDLE_THREAD_BLOCK;
- _workAvailable.wait(lk);
+ "Reaping this thread",
+ "nextThreadRetirementDate"_attr =
+ _lastFullUtilizationDate + _options.maxIdleThreadAge);
+ break;
}
- continue;
+
+ LOGV2_DEBUG(23107,
+ 3,
+ "Not reaping this thread because the earliest retirement date is "
+ "{nextThreadRetirementDate}",
+ "Not reaping this thread",
+ "nextThreadRetirementDate"_attr = nextRetirement);
+ waitDeadline = nextRetirement;
+ } else {
+ // Since the number of threads is not more than minThreads, this thread is not
+ // eligible for retirement. It is OK to sleep until _workAvailable is signaled,
+ // because any new threads that put the number of total threads above minThreads
+ // would be eligible for retirement once they had no work left to do.
+ LOGV2_DEBUG(23108,
+ 3,
+ "Waiting for work; the thread pool size is {numThreads}; the minimum "
+ "number of threads is {minThreads}",
+ "Waiting for work",
+ "numThreads"_attr = _threads.size(),
+ "minThreads"_attr = _options.minThreads);
}
- _doOneTask(&lk);
+ auto wake = [&] { return _state != running || !_pendingTasks.empty(); };
+ MONGO_IDLE_THREAD_BLOCK;
+ if (waitDeadline) {
+ _workAvailable.wait_until(lk, waitDeadline->toSystemTimePoint(), wake);
+ } else {
+ _workAvailable.wait(lk, wake);
+ }
}
// We still hold the lock, but this thread is retiring. If the whole pool is shutting down, this
// thread lends a hand in draining the work pool and returns so it can be joined. Otherwise, it
- // falls through to the detach code, below.
+ // falls through to the thread retirement code, below.
if (_state == joinRequired || _state == joining) {
// Drain the leftover pending tasks.
@@ -375,29 +490,22 @@ void ThreadPool::_consumeTasks() {
"expectedState"_attr = static_cast<int32_t>(running));
}
- // This thread is ending because it was idle for too long. Find self in _threads, remove self
- // from _threads, and add self to the list of retired threads.
- for (size_t i = 0; i < _threads.size(); ++i) {
- auto& t = _threads[i];
- if (t.get_id() != stdx::this_thread::get_id()) {
- continue;
- }
- std::swap(t, _threads.back());
- _retiredThreads.push_back(std::move(_threads.back()));
- _threads.pop_back();
- return;
+ // This thread is ending because it was idle for too long.
+ // Move self from _threads to _retiredThreads.
+ auto selfId = stdx::this_thread::get_id();
+ auto pos = std::find_if(
+ _threads.begin(), _threads.end(), [&](auto&& t) { return t.get_id() == selfId; });
+ if (pos == _threads.end()) {
+ LOGV2_FATAL_NOTRACE(28703,
+ "Could not find thread with id {threadId} in pool {poolName}",
+ "Could not find thread",
+ "threadId"_attr = threadIdToString(selfId),
+ "poolName"_attr = _options.poolName);
}
-
- std::ostringstream threadId;
- threadId << stdx::this_thread::get_id();
- LOGV2_FATAL_NOTRACE(28703,
- "Could not find thread with id {threadId} in pool {poolName}",
- "Could not find thread",
- "threadId"_attr = threadId.str(),
- "poolName"_attr = _options.poolName);
+ _retiredThreads.splice(_retiredThreads.end(), _threads, pos);
}
-void ThreadPool::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
+void ThreadPool::Impl::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
invariant(!_pendingTasks.empty());
LOGV2_DEBUG(23109,
3,
@@ -416,7 +524,7 @@ void ThreadPool::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
}
}
-void ThreadPool::_startWorkerThread_inlock() {
+void ThreadPool::Impl::_startWorkerThread_inlock() {
switch (_state) {
case preStart:
LOGV2_DEBUG(
@@ -452,9 +560,9 @@ void ThreadPool::_startWorkerThread_inlock() {
return;
}
invariant(_threads.size() < _options.maxThreads);
- const std::string threadName = str::stream() << _options.threadNamePrefix << _nextThreadId++;
+ std::string threadName = "{}{}"_format(_options.threadNamePrefix, _nextThreadId++);
try {
- _threads.emplace_back([this, threadName] { _workerThreadBody(this, threadName); });
+ _threads.emplace_back([this, threadName] { _workerThreadBody(threadName); });
++_numIdleThreads;
} catch (const std::exception& ex) {
LOGV2_ERROR(23113,
@@ -468,7 +576,7 @@ void ThreadPool::_startWorkerThread_inlock() {
}
}
-void ThreadPool::_setState_inlock(const LifecycleState newState) {
+void ThreadPool::Impl::_setState_inlock(const LifecycleState newState) {
if (newState == _state) {
return;
}
@@ -476,4 +584,35 @@ void ThreadPool::_setState_inlock(const LifecycleState newState) {
_stateChange.notify_all();
}
+// ========================================
+// ThreadPool public functions that simply forward to the `_impl`.
+
+ThreadPool::ThreadPool(Options options) : _impl{std::make_unique<Impl>(std::move(options))} {}
+
+ThreadPool::~ThreadPool() = default;
+
+void ThreadPool::startup() {
+ _impl->startup();
+}
+
+void ThreadPool::shutdown() {
+ _impl->shutdown();
+}
+
+void ThreadPool::join() {
+ _impl->join();
+}
+
+void ThreadPool::schedule(Task task) {
+ _impl->schedule(std::move(task));
+}
+
+void ThreadPool::waitForIdle() {
+ _impl->waitForIdle();
+}
+
+ThreadPool::Stats ThreadPool::getStats() const {
+ return _impl->getStats();
+}
+
} // namespace mongo
diff --git a/src/mongo/util/concurrency/thread_pool.h b/src/mongo/util/concurrency/thread_pool.h
index a6e56f8c9bf..29acd9e09c0 100644
--- a/src/mongo/util/concurrency/thread_pool.h
+++ b/src/mongo/util/concurrency/thread_pool.h
@@ -29,47 +29,52 @@
#pragma once
-#include <deque>
#include <functional>
+#include <memory>
#include <string>
-#include <vector>
-#include "mongo/platform/mutex.h"
-#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/concurrency/thread_pool_interface.h"
-#include "mongo/util/hierarchical_acquisition.h"
+#include "mongo/util/duration.h"
#include "mongo/util/time_support.h"
namespace mongo {
-class Status;
-
/**
* A configurable thread pool, for general use.
*
* See the Options struct for information about how to configure an instance.
*/
class ThreadPool final : public ThreadPoolInterface {
- ThreadPool(const ThreadPool&) = delete;
- ThreadPool& operator=(const ThreadPool&) = delete;
-
public:
- struct Limits;
+ /**
+ * Contains a subset of the fields from Options related to limiting the number of concurrent
+ * threads in the pool. Used in places where we want a way to specify limits to the size of a
+ * ThreadPool without overriding the other behaviors of the pool such thread names or onCreate
+ * behaviors. Each field of Limits maps directly to the same-named field in Options.
+ */
+ struct Limits {
+ size_t minThreads = 1;
+ size_t maxThreads = 8;
+ Milliseconds maxIdleThreadAge = Seconds{30};
+ };
/**
* Structure used to configure an instance of ThreadPool.
*/
struct Options {
-
- Options() = default;
- explicit Options(const Limits& limits);
-
// Set maxThreads to this if you don't want to limit the number of threads in the pool.
// Note: the value used here is high enough that it will never be reached, but low enough
// that it won't cause overflows if mixed with signed ints or math.
static constexpr size_t kUnlimited = 1'000'000'000;
+ Options() = default;
+
+ explicit Options(const Limits& limits)
+ : minThreads(limits.minThreads),
+ maxThreads(limits.maxThreads),
+ maxIdleThreadAge(limits.maxIdleThreadAge) {}
+
// Name of the thread pool. If this string is empty, the pool will be assigned a
// name unique to the current process.
std::string poolName;
@@ -95,29 +100,15 @@ public:
// a thread.
Milliseconds maxIdleThreadAge = Seconds{30};
- // This function is run before each worker thread begins consuming tasks.
- using OnCreateThreadFn = std::function<void(const std::string& threadName)>;
- OnCreateThreadFn onCreateThread = [](const std::string&) {};
+ /** If callable, called before each worker thread begins consuming tasks. */
+ std::function<void(const std::string&)> onCreateThread;
/**
- * This function is called after joining each retired thread.
+ * If callable, called after joining each retired thread.
* Since there could be multiple calls to this function in a single critical section,
* avoid complex logic in the callback.
*/
- using OnJoinRetiredThreadFn = std::function<void(const stdx::thread&)>;
- OnJoinRetiredThreadFn onJoinRetiredThread = [](const stdx::thread&) {};
- };
-
- /**
- * Contains a subset of the fields from Options related to limiting the number of concurrent
- * threads in the pool. Used in places where we want a way to specify limits to the size of a
- * ThreadPool without overriding the other behaviors of the pool such thread names or onCreate
- * behaviors. Each field of Limits maps directly to the same-named field in Options.
- */
- struct Limits {
- size_t minThreads = 1;
- size_t maxThreads = 8;
- Milliseconds maxIdleThreadAge = Seconds{30};
+ std::function<void(const stdx::thread&)> onJoinRetiredThread;
};
/**
@@ -145,12 +136,18 @@ public:
*/
explicit ThreadPool(Options options);
+ ThreadPool(const ThreadPool&) = delete;
+ ThreadPool& operator=(const ThreadPool&) = delete;
+
~ThreadPool() override;
+ // from OutOfLineExecutor (base of ThreadPoolInterface)
+ void schedule(Task task) override;
+
+ // from ThreadPoolInterface
void startup() override;
void shutdown() override;
void join() override;
- void schedule(Task task) override;
/**
* Blocks the caller until there are no pending tasks on this pool.
@@ -170,120 +167,8 @@ public:
Stats getStats() const;
private:
- using TaskList = std::deque<Task>;
- using ThreadList = std::vector<stdx::thread>;
- using RetiredThreadList = std::list<stdx::thread>;
-
- /**
- * Representation of the stage of life of a thread pool.
- *
- * A pool starts out in the preStart state, and ends life in the shutdownComplete state. Work
- * may only be scheduled in the preStart and running states. Threads may only be started in the
- * running state. In shutdownComplete, there are no remaining threads or pending tasks to
- * execute.
- *
- * Diagram of legal transitions:
- *
- * preStart -> running -> joinRequired -> joining -> shutdownComplete
- * \ ^
- * \_____________/
- */
- enum LifecycleState { preStart, running, joinRequired, joining, shutdownComplete };
-
- /**
- * This is the thread body for worker threads. It is a static member function,
- * because late in its execution it is possible for the pool to have been destroyed.
- * As such, it is advisable to pass the pool pointer as an explicit argument, rather
- * than as the implicit "this" argument.
- */
- static void _workerThreadBody(ThreadPool* pool, const std::string& threadName) noexcept;
-
- /**
- * Starts a worker thread, unless _options.maxThreads threads are already running or
- * _state is not running.
- */
- void _startWorkerThread_inlock();
-
- /**
- * This is the run loop of a worker thread, invoked by _workerThreadBody.
- */
- void _consumeTasks();
-
- /**
- * Implementation of shutdown once _mutex is locked.
- */
- void _shutdown_inlock();
-
- /**
- * Implementation of join once _mutex is owned by "lk".
- */
- void _join_inlock(stdx::unique_lock<Latch>* lk);
-
- /**
- * Runs the remaining tasks on a new thread as part of the join process, blocking until
- * complete. Caller must not hold the mutex!
- */
- void _drainPendingTasks();
-
- /**
- * Executes one task from _pendingTasks. "lk" must own _mutex, and _pendingTasks must have at
- * least one entry.
- */
- void _doOneTask(stdx::unique_lock<Latch>* lk) noexcept;
-
- /**
- * Changes the lifecycle state (_state) of the pool and wakes up any threads waiting for a state
- * change. Has no effect if _state == newState.
- */
- void _setState_inlock(LifecycleState newState);
-
- /**
- * Waits for all remaining retired threads to join.
- * If a thread's _workerThreadBody() were ever to attempt to reacquire
- * ThreadPool::_mutex after that thread had been added to _retiredThreads,
- * it could cause a deadlock.
- */
- void _joinRetired_inlock();
-
- // These are the options with which the pool was configured at construction time.
- const Options _options;
-
- // Mutex guarding all non-const member variables.
- mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ThreadPool::_mutex");
-
- // This variable represents the lifecycle state of the pool.
- //
- // Work may only be scheduled in states preStart and running, and only executes in states
- // running and shuttingDown.
- LifecycleState _state = preStart;
-
- // Condition signaled to indicate that there is work in the _pendingTasks queue, or
- // that the system is shutting down.
- stdx::condition_variable _workAvailable;
-
- // Condition signaled to indicate that there is no work in the _pendingTasks queue.
- stdx::condition_variable _poolIsIdle;
-
- // Condition variable signaled whenever _state changes.
- stdx::condition_variable _stateChange;
-
- // Queue of yet-to-be-executed tasks.
- TaskList _pendingTasks;
-
- // List of threads serving as the worker pool.
- ThreadList _threads;
-
- // List of threads that are retired and pending join
- RetiredThreadList _retiredThreads;
-
- // Count of idle threads.
- size_t _numIdleThreads = 0;
-
- // Id counter for assigning thread names
- size_t _nextThreadId = 0;
-
- // The last time that _pendingTasks.size() grew to be at least _threads.size().
- Date_t _lastFullUtilizationDate;
+ class Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace mongo
diff --git a/src/mongo/util/fail_point.cpp b/src/mongo/util/fail_point.cpp
index c0a28ddb3ac..e467ff2d9fb 100644
--- a/src/mongo/util/fail_point.cpp
+++ b/src/mongo/util/fail_point.cpp
@@ -70,7 +70,7 @@ void FailPoint::setThreadPRNGSeed(int32_t seed) {
threadPrng = PseudoRandom(seed);
}
-FailPoint::FailPoint() = default;
+FailPoint::FailPoint(std::string name) : _name(std::move(name)) {}
void FailPoint::_shouldFailCloseBlock() {
_fpInfo.subtractAndFetch(1);
@@ -286,8 +286,8 @@ BSONObj FailPoint::toBSON() const {
return builder.obj();
}
-FailPointRegisterer::FailPointRegisterer(const std::string& name, FailPoint* fp) {
- uassertStatusOK(globalFailPointRegistry().add(name, fp));
+FailPointRegisterer::FailPointRegisterer(FailPoint* fp) {
+ uassertStatusOK(globalFailPointRegistry().add(fp));
}
FailPointRegistry& globalFailPointRegistry() {
@@ -309,12 +309,18 @@ auto setGlobalFailPoint(const std::string& failPointName, const BSONObj& cmdObj)
return timesEntered;
}
-FailPointEnableBlock::FailPointEnableBlock(std::string failPointName)
- : FailPointEnableBlock(std::move(failPointName), {}) {}
+FailPointEnableBlock::FailPointEnableBlock(StringData failPointName)
+ : FailPointEnableBlock(failPointName, {}) {}
+
+FailPointEnableBlock::FailPointEnableBlock(StringData failPointName, BSONObj data)
+ : FailPointEnableBlock(globalFailPointRegistry().find(failPointName), std::move(data)) {}
+
+FailPointEnableBlock::FailPointEnableBlock(FailPoint* failPoint)
+ : FailPointEnableBlock(failPoint, {}) {}
+
+FailPointEnableBlock::FailPointEnableBlock(FailPoint* failPoint, BSONObj data)
+ : _failPoint(failPoint) {
-FailPointEnableBlock::FailPointEnableBlock(std::string failPointName, BSONObj data)
- : _failPointName(std::move(failPointName)) {
- _failPoint = globalFailPointRegistry().find(_failPointName);
invariant(_failPoint != nullptr);
_initialTimesEntered = _failPoint->setMode(FailPoint::alwaysOn, 0, std::move(data));
@@ -322,7 +328,7 @@ FailPointEnableBlock::FailPointEnableBlock(std::string failPointName, BSONObj da
LOGV2_WARNING(23830,
"Set failpoint {failPointName} to: {failPoint}",
"Set failpoint",
- "failPointName"_attr = _failPointName,
+ "failPointName"_attr = _failPoint->getName(),
"failPoint"_attr = _failPoint->toBSON());
}
@@ -331,24 +337,25 @@ FailPointEnableBlock::~FailPointEnableBlock() {
LOGV2_WARNING(23831,
"Set failpoint {failPointName} to: {failPoint}",
"Set failpoint",
- "failPointName"_attr = _failPointName,
+ "failPointName"_attr = _failPoint->getName(),
"failPoint"_attr = _failPoint->toBSON());
}
FailPointRegistry::FailPointRegistry() : _frozen(false) {}
-Status FailPointRegistry::add(const std::string& name, FailPoint* failPoint) {
+Status FailPointRegistry::add(FailPoint* failPoint) {
if (_frozen) {
return {ErrorCodes::CannotMutateObject, "Registry is already frozen"};
}
- auto [pos, ok] = _fpMap.insert({name, failPoint});
+ auto [pos, ok] = _fpMap.insert({failPoint->getName(), failPoint});
if (!ok) {
- return {ErrorCodes::Error(51006), "Fail point already registered: {}"_format(name)};
+ return {ErrorCodes::Error(51006),
+ "Fail point already registered: {}"_format(failPoint->getName())};
}
return Status::OK();
}
-FailPoint* FailPointRegistry::find(const std::string& name) const {
+FailPoint* FailPointRegistry::find(StringData name) const {
auto iter = _fpMap.find(name);
return (iter == _fpMap.end()) ? nullptr : iter->second;
}
diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h
index 5322fad8b67..af02e9f1622 100644
--- a/src/mongo/util/fail_point.h
+++ b/src/mongo/util/fail_point.h
@@ -40,6 +40,7 @@
#include "mongo/stdx/unordered_map.h"
#include "mongo/util/duration.h"
#include "mongo/util/interruptible.h"
+#include "mongo/util/string_map.h"
namespace mongo {
@@ -202,11 +203,15 @@ public:
*/
static StatusWith<ModeOptions> parseBSON(const BSONObj& obj);
- FailPoint();
+ explicit FailPoint(std::string name);
FailPoint(const FailPoint&) = delete;
FailPoint& operator=(const FailPoint&) = delete;
+ const std::string& getName() const {
+ return _name;
+ }
+
/**
* Returns true if fail point is active.
*
@@ -423,6 +428,8 @@ private:
AtomicWord<int> _timesOrPeriod{0};
BSONObj _data;
+ const std::string _name;
+
// protects _mode, _timesOrPeriod, _data
mutable Mutex _modMutex = MONGO_MAKE_LATCH("FailPoint::_modMutex");
};
@@ -439,12 +446,12 @@ public:
* 51006 - if the given name already exists in this registry.
* CannotMutateObject - if this registry is already frozen.
*/
- Status add(const std::string& name, FailPoint* failPoint);
+ Status add(FailPoint* failPoint);
/**
* @return a registered FailPoint, or nullptr if it was not registered.
*/
- FailPoint* find(const std::string& name) const;
+ FailPoint* find(StringData name) const;
/**
* Freezes this registry from being modified.
@@ -460,7 +467,7 @@ public:
private:
bool _frozen;
- stdx::unordered_map<std::string, FailPoint*> _fpMap;
+ StringMap<FailPoint*> _fpMap;
};
/**
@@ -468,10 +475,15 @@ private:
*/
class FailPointEnableBlock {
public:
- explicit FailPointEnableBlock(std::string failPointName);
- FailPointEnableBlock(std::string failPointName, BSONObj data);
+ explicit FailPointEnableBlock(StringData failPointName);
+ FailPointEnableBlock(StringData failPointName, BSONObj data);
+ explicit FailPointEnableBlock(FailPoint* failPoint);
+ FailPointEnableBlock(FailPoint* failPoint, BSONObj data);
~FailPointEnableBlock();
+ FailPointEnableBlock(const FailPointEnableBlock&) = delete;
+ FailPointEnableBlock& operator=(const FailPointEnableBlock&) = delete;
+
// Const access to the underlying FailPoint
const FailPoint* failPoint() const {
return _failPoint;
@@ -488,8 +500,7 @@ public:
}
private:
- std::string _failPointName;
- FailPoint* _failPoint;
+ FailPoint* const _failPoint;
FailPoint::EntryCountT _initialTimesEntered;
};
@@ -507,7 +518,7 @@ FailPoint::EntryCountT setGlobalFailPoint(const std::string& failPointName, cons
*/
class FailPointRegisterer {
public:
- FailPointRegisterer(const std::string& name, FailPoint* fp);
+ explicit FailPointRegisterer(FailPoint* fp);
};
FailPointRegistry& globalFailPointRegistry();
@@ -518,8 +529,8 @@ FailPointRegistry& globalFailPointRegistry();
* Never use in header files, only .cpp files.
*/
#define MONGO_FAIL_POINT_DEFINE(fp) \
- ::mongo::FailPoint fp; \
- ::mongo::FailPointRegisterer fp##failPointRegisterer(#fp, &fp);
+ ::mongo::FailPoint fp(#fp); \
+ ::mongo::FailPointRegisterer fp##failPointRegisterer(&fp);
} // namespace mongo
diff --git a/src/mongo/util/fail_point_test.cpp b/src/mongo/util/fail_point_test.cpp
index 26b051fb7dc..a3c346594c9 100644
--- a/src/mongo/util/fail_point_test.cpp
+++ b/src/mongo/util/fail_point_test.cpp
@@ -55,12 +55,12 @@ namespace stdx = mongo::stdx;
namespace mongo_test {
TEST(FailPoint, InitialState) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
ASSERT_FALSE(failPoint.shouldFail());
}
TEST(FailPoint, AlwaysOn) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn);
ASSERT(failPoint.shouldFail());
@@ -74,7 +74,7 @@ TEST(FailPoint, AlwaysOn) {
}
TEST(FailPoint, NTimes) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::nTimes, 4);
ASSERT(failPoint.shouldFail());
ASSERT(failPoint.shouldFail());
@@ -87,14 +87,14 @@ TEST(FailPoint, NTimes) {
}
TEST(FailPoint, BlockOff) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
bool called = false;
failPoint.execute([&](const BSONObj&) { called = true; });
ASSERT_FALSE(called);
}
TEST(FailPoint, BlockAlwaysOn) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn);
bool called = false;
@@ -104,7 +104,7 @@ TEST(FailPoint, BlockAlwaysOn) {
}
TEST(FailPoint, BlockNTimes) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::nTimes, 1);
size_t counter = 0;
@@ -116,7 +116,7 @@ TEST(FailPoint, BlockNTimes) {
}
TEST(FailPoint, BlockWithException) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn);
bool threw = false;
@@ -134,7 +134,7 @@ TEST(FailPoint, BlockWithException) {
}
TEST(FailPoint, SetGetParam) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn, 0, BSON("x" << 20));
failPoint.execute([&](const BSONObj& data) { ASSERT_EQUALS(20, data["x"].numberInt()); });
@@ -143,12 +143,13 @@ TEST(FailPoint, SetGetParam) {
class FailPointStress : public mongo::unittest::Test {
public:
void setUp() {
- _fp.setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
+ _fp = std::make_unique<FailPoint>("testFP");
+ _fp->setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
}
void tearDown() {
// Note: This can loop indefinitely if reference counter was off
- _fp.setMode(FailPoint::off, 0, BSON("a" << 66));
+ _fp->setMode(FailPoint::off, 0, BSON("a" << 66));
}
void startTest() {
@@ -174,7 +175,7 @@ public:
private:
void blockTask() {
while (true) {
- _fp.execute([](const BSONObj& data) {
+ _fp->execute([](const BSONObj& data) {
// Expanded ASSERT_EQUALS since the error is not being
// printed out properly
if (data["a"].numberInt() != 44) {
@@ -196,7 +197,7 @@ private:
void blockWithExceptionTask() {
while (true) {
try {
- _fp.execute([](const BSONObj& data) {
+ _fp->execute([](const BSONObj& data) {
if (data["a"].numberInt() != 44) {
using namespace mongo::literals;
LOGV2_ERROR(24130,
@@ -219,7 +220,7 @@ private:
void simpleTask() {
while (true) {
- static_cast<void>(MONGO_unlikely(_fp.shouldFail()));
+ static_cast<void>(MONGO_unlikely(_fp->shouldFail()));
stdx::lock_guard<mongo::Latch> lk(_mutex);
if (_inShutdown)
break;
@@ -228,10 +229,10 @@ private:
void flipTask() {
while (true) {
- if (_fp.shouldFail()) {
- _fp.setMode(FailPoint::off, 0);
+ if (_fp->shouldFail()) {
+ _fp->setMode(FailPoint::off, 0);
} else {
- _fp.setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
+ _fp->setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
}
stdx::lock_guard<mongo::Latch> lk(_mutex);
@@ -240,7 +241,7 @@ private:
}
}
- FailPoint _fp;
+ std::unique_ptr<FailPoint> _fp;
std::vector<stdx::thread> _tasks;
mongo::Mutex _mutex = MONGO_MAKE_LATCH();
@@ -249,7 +250,7 @@ private:
TEST_F(FailPointStress, Basic) {
startTest();
- mongo::sleepsecs(30);
+ mongo::sleepsecs(5);
stopTest();
}
@@ -277,7 +278,7 @@ static int64_t runParallelFailPointTest(FailPoint::Mode fpMode,
const int32_t numEncountersPerThread) {
ASSERT_GT(numThreads, 0);
ASSERT_GT(numEncountersPerThread, 0);
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(fpMode, fpVal);
std::vector<stdx::thread*> tasks;
std::vector<int64_t> counts(numThreads, 0);
@@ -398,7 +399,7 @@ TEST(FailPoint, parseBSONValidDataSucceeds) {
ASSERT_TRUE(swTuple.isOK());
}
-TEST(FailPoint, FailPointBlockBasicTest) {
+TEST(FailPoint, FailPointEnableBlockBasicTest) {
auto failPoint = mongo::globalFailPointRegistry().find("dummy");
ASSERT_FALSE(failPoint->shouldFail());
@@ -411,8 +412,21 @@ TEST(FailPoint, FailPointBlockBasicTest) {
ASSERT_FALSE(failPoint->shouldFail());
}
-TEST(FailPoint, FailPointBlockIfBasicTest) {
- FailPoint failPoint;
+TEST(FailPoint, FailPointEnableBlockByPointer) {
+ auto failPoint = mongo::globalFailPointRegistry().find("dummy");
+
+ ASSERT_FALSE(failPoint->shouldFail());
+
+ {
+ FailPointEnableBlock dummyFp(failPoint);
+ ASSERT_TRUE(failPoint->shouldFail());
+ }
+
+ ASSERT_FALSE(failPoint->shouldFail());
+}
+
+TEST(FailPoint, ExecuteIfBasicTest) {
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::nTimes, 1, BSON("skip" << true));
{
bool hit = false;
@@ -463,7 +477,7 @@ void assertFunctionInterruptable(std::function<void(Interruptible* interruptible
}
TEST(FailPoint, PauseWhileSetInterruptibility) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn);
assertFunctionInterruptable(
@@ -473,7 +487,7 @@ TEST(FailPoint, PauseWhileSetInterruptibility) {
}
TEST(FailPoint, WaitForFailPointTimeout) {
- FailPoint failPoint;
+ FailPoint failPoint("testFP");
failPoint.setMode(FailPoint::alwaysOn);
assertFunctionInterruptable([&failPoint](Interruptible* interruptible) {
diff --git a/src/mongo/util/invalidating_lru_cache.h b/src/mongo/util/invalidating_lru_cache.h
index 18b9a94c9fa..c8ead4adecc 100644
--- a/src/mongo/util/invalidating_lru_cache.h
+++ b/src/mongo/util/invalidating_lru_cache.h
@@ -196,9 +196,9 @@ public:
*/
class ValueHandle {
public:
- // The two constructors below are present in order to offset the fact that the cache doesn't
- // support pinning items. Their only usage must be in the authorization mananager for the
- // internal authentication user.
+ // The three constructors below are present in order to offset the fact that the cache
+ // doesn't support pinning items. Their only usage must be in the authorization mananager
+ // for the internal authentication user.
explicit ValueHandle(Value&& value)
: _value(std::make_shared<StoredValue>(nullptr,
0,
@@ -207,6 +207,10 @@ public:
CacheNotCausallyConsistent(),
CacheNotCausallyConsistent())) {}
+ explicit ValueHandle(Value&& value, const Time& t)
+ : _value(
+ std::make_shared<StoredValue>(nullptr, 0, boost::none, std::move(value), t, t)) {}
+
ValueHandle() = default;
operator bool() const {
@@ -264,15 +268,16 @@ public:
Value&& value,
const Time& time = CacheNotCausallyConsistent()) {
LockGuardWithPostUnlockDestructor guard(_mutex);
- Time timeInStore;
- _invalidate(&guard, key, _cache.find(key), &timeInStore);
- if (auto evicted = _cache.add(key,
- std::make_shared<StoredValue>(this,
- ++_epoch,
- key,
- std::forward<Value>(value),
- time,
- std::max(time, timeInStore)))) {
+ Time currentTime, currentTimeInStore;
+ _invalidate(&guard, key, _cache.find(key), &currentTime, &currentTimeInStore);
+ if (auto evicted =
+ _cache.add(key,
+ std::make_shared<StoredValue>(this,
+ ++_epoch,
+ key,
+ std::forward<Value>(value),
+ time,
+ std::max(time, currentTimeInStore)))) {
const auto& evictedKey = evicted->first;
auto& evictedValue = evicted->second;
@@ -310,15 +315,16 @@ public:
Value&& value,
const Time& time = CacheNotCausallyConsistent()) {
LockGuardWithPostUnlockDestructor guard(_mutex);
- Time timeInStore;
- _invalidate(&guard, key, _cache.find(key), &timeInStore);
- if (auto evicted = _cache.add(key,
- std::make_shared<StoredValue>(this,
- ++_epoch,
- key,
- std::forward<Value>(value),
- time,
- std::max(time, timeInStore)))) {
+ Time currentTime, currentTimeInStore;
+ _invalidate(&guard, key, _cache.find(key), &currentTime, &currentTimeInStore);
+ if (auto evicted =
+ _cache.add(key,
+ std::make_shared<StoredValue>(this,
+ ++_epoch,
+ key,
+ std::forward<Value>(value),
+ time,
+ std::max(time, currentTimeInStore)))) {
const auto& evictedKey = evicted->first;
auto& evictedValue = evicted->second;
@@ -526,10 +532,13 @@ private:
void _invalidate(LockGuardWithPostUnlockDestructor* guard,
const Key& key,
typename Cache::iterator it,
+ Time* outTime = nullptr,
Time* outTimeInStore = nullptr) {
if (it != _cache.end()) {
auto& storedValue = it->second;
storedValue->isValid.store(false);
+ if (outTime)
+ *outTime = storedValue->time;
if (outTimeInStore)
*outTimeInStore = storedValue->timeInStore;
guard->releasePtr(std::move(storedValue));
@@ -545,6 +554,8 @@ private:
// released and drops to zero
if (auto evictedValue = itEvicted->second.lock()) {
evictedValue->isValid.store(false);
+ if (outTime)
+ *outTime = evictedValue->time;
if (outTimeInStore)
*outTimeInStore = evictedValue->timeInStore;
guard->releasePtr(std::move(evictedValue));
diff --git a/src/mongo/util/invalidating_lru_cache_test.cpp b/src/mongo/util/invalidating_lru_cache_test.cpp
index 282a130af68..8476dfc5c9e 100644
--- a/src/mongo/util/invalidating_lru_cache_test.cpp
+++ b/src/mongo/util/invalidating_lru_cache_test.cpp
@@ -67,11 +67,14 @@ TEST(InvalidatingLRUCacheTest, ValueHandleOperators) {
TestValueCache cache(1);
cache.insertOrAssign(100, {"Test value"});
+ // Test non-const operators
{
auto valueHandle = cache.get(100);
ASSERT_EQ("Test value", valueHandle->value);
ASSERT_EQ("Test value", (*valueHandle).value);
}
+
+ // Test const operators
{
const auto valueHandle = cache.get(100);
ASSERT_EQ("Test value", valueHandle->value);
@@ -473,7 +476,7 @@ void parallelTest(size_t cacheSize, TestFunc doTest) {
}
TEST(InvalidatingLRUCacheParallelTest, InsertOrAssignThenGet) {
- parallelTest<TestValueCache>(1, [](auto& cache) mutable {
+ parallelTest<TestValueCache>(1, [](auto& cache) {
const int key = 100;
cache.insertOrAssign(key, TestValue{"Parallel tester value"});
@@ -501,7 +504,7 @@ TEST(InvalidatingLRUCacheParallelTest, InsertOrAssignAndGet) {
}
TEST(InvalidatingLRUCacheParallelTest, CacheSizeZeroInsertOrAssignAndGet) {
- parallelTest<TestValueCache>(0, [](auto& cache) mutable {
+ parallelTest<TestValueCache>(0, [](auto& cache) {
const int key = 300;
auto cachedItem = cache.insertOrAssignAndGet(key, TestValue{"Parallel tester value"});
ASSERT(cachedItem);
@@ -511,12 +514,18 @@ TEST(InvalidatingLRUCacheParallelTest, CacheSizeZeroInsertOrAssignAndGet) {
}
TEST(InvalidatingLRUCacheParallelTest, AdvanceTime) {
- AtomicWord<uint64_t> counter{0};
+ AtomicWord<uint64_t> counter{1};
+ Mutex insertOrAssignMutex = MONGO_MAKE_LATCH("ReadThroughCacheBase::_cancelTokenMutex");
- parallelTest<TestValueCacheCausallyConsistent>(0, [&counter](auto& cache) mutable {
+ parallelTest<TestValueCacheCausallyConsistent>(0, [&](auto& cache) {
const int key = 300;
- cache.insertOrAssign(
- key, TestValue{"Parallel tester value"}, Timestamp(counter.fetchAndAdd(1)));
+ {
+ // The calls to insertOrAssign must always pass strictly incrementing time
+ stdx::lock_guard lg(insertOrAssignMutex);
+ cache.insertOrAssign(
+ key, TestValue{"Parallel tester value"}, Timestamp(counter.fetchAndAdd(1)));
+ }
+
auto latestCached = cache.get(key, CacheCausalConsistency::kLatestCached);
auto latestKnown = cache.get(key, CacheCausalConsistency::kLatestKnown);
diff --git a/src/mongo/util/read_through_cache.h b/src/mongo/util/read_through_cache.h
index 3d5c7bf0923..72b3e7a5771 100644
--- a/src/mongo/util/read_through_cache.h
+++ b/src/mongo/util/read_through_cache.h
@@ -136,10 +136,12 @@ public:
*/
class ValueHandle {
public:
- // The two constructors below are present in order to offset the fact that the cache doesn't
- // support pinning items. Their only usage must be in the authorization mananager for the
- // internal authentication user.
+ // The three constructors below are present in order to offset the fact that the cache
+ // doesn't support pinning items. Their only usage must be in the authorization mananager
+ // for the internal authentication user.
ValueHandle(Value&& value) : _valueHandle({std::move(value), Date_t::min()}) {}
+ ValueHandle(Value&& value, const Time& t)
+ : _valueHandle({std::move(value), Date_t::min()}, t) {}
ValueHandle() = default;
operator bool() const {
@@ -289,6 +291,16 @@ public:
}
/**
+ * Acquires the latest value from the cache, or an empty ValueHandle if the key is not present
+ * in the cache.
+ *
+ * Doesn't attempt to lookup, and so doesn't block.
+ */
+ ValueHandle peekLatestCached(const Key& key) {
+ return {_cache.get(key, CacheCausalConsistency::kLatestCached)};
+ }
+
+ /**
* Invalidates the given 'key' and immediately replaces it with a new value.
*/
ValueHandle insertOrAssignAndGet(const Key& key, Value&& newValue, Date_t updateWallClockTime) {
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 6a630be33db..9bf8a939b70 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.6",
- "commit": "bb92ab603f22ca84c24af3be7bc9194f44ff3e64"
+ "commit": "a68890f718f74cdc9e9961bf5b33f5b125e853dd"
}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index ac7cef167ff..daee3be92a8 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -267,7 +267,7 @@ __session_close(WT_SESSION *wt_session, const char *config)
SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
WT_UNUSED(cfg);
- WT_ERR(__wt_session_close_internal(session));
+ WT_TRET(__wt_session_close_internal(session));
session = NULL;
err:
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
index 4a322c61998..12a3daeedfc 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
@@ -38,7 +38,7 @@ from time import sleep
def timestamp_str(t):
return '%x' % t
-def retry_rollback(self, name, code):
+def retry_rollback(self, name, txn_session, code):
retry_limit = 100
retries = 0
completed = False
@@ -46,7 +46,12 @@ def retry_rollback(self, name, code):
while not completed and retries < retry_limit:
if retries != 0:
self.pr("Retrying operation for " + name)
+ if txn_session:
+ txn_session.rollback_transaction()
sleep(0.1)
+ if txn_session:
+ txn_session.begin_transaction('isolation=snapshot')
+ self.pr("Began new transaction for " + name)
try:
code()
completed = True
@@ -164,13 +169,13 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
# Perform several updates in parallel with checkpoint.
# Rollbacks may occur when checkpoint is running, so retry as needed.
self.pr("updates")
- retry_rollback(self, 'update ds1, e',
+ retry_rollback(self, 'update ds1, e', None,
lambda: self.large_updates(uri_1, value_e, ds_1, nrows, 70))
- retry_rollback(self, 'update ds2, e',
+ retry_rollback(self, 'update ds2, e', None,
lambda: self.large_updates(uri_2, value_e, ds_2, nrows, 70))
- retry_rollback(self, 'update ds1, f',
+ retry_rollback(self, 'update ds1, f', None,
lambda: self.large_updates(uri_1, value_f, ds_1, nrows, 80))
- retry_rollback(self, 'update ds2, f',
+ retry_rollback(self, 'update ds2, f', None,
lambda: self.large_updates(uri_2, value_f, ds_2, nrows, 80))
finally:
done.set()
@@ -271,12 +276,17 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
else:
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(50))
- # Here's the update operation we'll perform, encapsulated so we can easily retry
+ # Here's the update operations we'll perform, encapsulated so we can easily retry
# it if we get a rollback. Rollbacks may occur when checkpoint is running.
- def simple_update(cursor, key, value):
- cursor.set_key(key)
- cursor.set_value(value)
- self.assertEquals(cursor.update(), 0)
+ def prepare_range_updates(session, cursor, ds, value, nrows, prepare_config):
+ self.pr("updates")
+ for i in range(1, nrows):
+ key = ds.key(i)
+ cursor.set_key(key)
+ cursor.set_value(value)
+ self.assertEquals(cursor.update(), 0)
+ self.pr("prepare")
+ session.prepare_transaction(prepare_config)
# Create a checkpoint thread
done = threading.Event()
@@ -289,23 +299,19 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
session_p1 = self.conn.open_session()
cursor_p1 = session_p1.open_cursor(uri_1)
session_p1.begin_transaction('isolation=snapshot')
- self.pr("updates 1")
- for i in range(1, nrows):
- retry_rollback(self, 'update ds1',
- lambda: simple_update(cursor_p1, ds_1.key(i), value_e))
- self.pr("prepare 1")
- session_p1.prepare_transaction('prepare_timestamp=' + timestamp_str(69))
+ retry_rollback(self, 'update ds1', session_p1,
+ lambda: prepare_range_updates(
+ session_p1, cursor_p1, ds_1, value_e, nrows,
+ 'prepare_timestamp=' + timestamp_str(69)))
# Perform several updates in parallel with checkpoint.
session_p2 = self.conn.open_session()
cursor_p2 = session_p2.open_cursor(uri_2)
session_p2.begin_transaction('isolation=snapshot')
- self.pr("updates 2")
- for i in range(1, nrows):
- retry_rollback(self, 'update ds2',
- lambda: simple_update(cursor_p2, ds_2.key(i), value_e))
- self.pr("prepare 2")
- session_p2.prepare_transaction('prepare_timestamp=' + timestamp_str(69))
+ retry_rollback(self, 'update ds2', session_p2,
+ lambda: prepare_range_updates(
+ session_p2, cursor_p2, ds_2, value_e, nrows,
+ 'prepare_timestamp=' + timestamp_str(69)))
finally:
done.set()
ckpt.join()
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index a0a86731f1c..617a8326582 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -227,6 +227,7 @@ class WiredTigerTestCase(unittest.TestCase):
if hasattr(self, 'scenarios'):
assert(len(self.scenarios) == len(dict(self.scenarios)))
unittest.TestCase.__init__(self, *args, **kwargs)
+ self.skipped = False
if not self._globalSetup:
WiredTigerTestCase.globalSetup()
@@ -253,6 +254,10 @@ class WiredTigerTestCase(unittest.TestCase):
def buildDirectory(self):
return self._builddir
+ def skipTest(self, reason):
+ self.skipped = True
+ super(WiredTigerTestCase, self).skipTest(reason)
+
# Return the wiredtiger_open extension argument for
# any needed shared library.
def extensionsConfig(self):
@@ -460,9 +465,10 @@ class WiredTigerTestCase(unittest.TestCase):
for f in files:
os.chmod(os.path.join(root, f), 0o666)
self.pr('passed=' + str(passed))
+ self.pr('skipped=' + str(self.skipped))
# Clean up unless there's a failure
- if passed and not WiredTigerTestCase._preserveFiles:
+ if (passed and (not WiredTigerTestCase._preserveFiles)) or self.skipped:
shutil.rmtree(self.testdir, ignore_errors=True)
else:
self.pr('preserving directory ' + self.testdir)
@@ -470,7 +476,7 @@ class WiredTigerTestCase(unittest.TestCase):
elapsed = time.time() - self.starttime
if elapsed > 0.001 and WiredTigerTestCase._verbose >= 2:
print("%s: %.2f seconds" % (str(self), elapsed))
- if not passed:
+ if (not passed) and (not self.skipped):
print("ERROR in " + str(self))
self.pr('FAIL')
self.pr('preserving directory ' + self.testdir)